diff --git a/ibis-server/app/custom_sqlglot/__init__.py b/ibis-server/app/custom_sqlglot/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ibis-server/app/custom_sqlglot/dialects/__init__.py b/ibis-server/app/custom_sqlglot/dialects/__init__.py new file mode 100644 index 000000000..b40c47a3a --- /dev/null +++ b/ibis-server/app/custom_sqlglot/dialects/__init__.py @@ -0,0 +1,3 @@ +# ruff: noqa: F401 + +from app.custom_sqlglot.dialects.mysql import MySQL diff --git a/ibis-server/app/custom_sqlglot/dialects/mysql.py b/ibis-server/app/custom_sqlglot/dialects/mysql.py new file mode 100644 index 000000000..b80439b6b --- /dev/null +++ b/ibis-server/app/custom_sqlglot/dialects/mysql.py @@ -0,0 +1,10 @@ +from sqlglot import exp +from sqlglot.dialects import MySQL as OriginalMySQL + + +class MySQL(OriginalMySQL): + class Generator(OriginalMySQL.Generator): + TYPE_MAPPING = { + **OriginalMySQL.Generator.TYPE_MAPPING, + exp.DataType.Type.VARBINARY: "BINARY", + } diff --git a/ibis-server/app/mdl/rewriter.py b/ibis-server/app/mdl/rewriter.py index 3d7f466a7..19e93e654 100644 --- a/ibis-server/app/mdl/rewriter.py +++ b/ibis-server/app/mdl/rewriter.py @@ -15,6 +15,9 @@ # To register custom dialects from ibis library for sqlglot importlib.import_module("ibis.backends.sql.dialects") +# Register custom dialects +importlib.import_module("app.custom_sqlglot.dialects") + class Rewriter(ABC): def __init__(self, manifest_str: str, data_source: DataSource = None): diff --git a/ibis-server/app/util.py b/ibis-server/app/util.py index 616514c2c..615c5ff86 100644 --- a/ibis-server/app/util.py +++ b/ibis-server/app/util.py @@ -30,6 +30,8 @@ def default(obj): return None if isinstance(obj, decimal.Decimal): return str(obj) + if isinstance(obj, (bytes, bytearray)): + return obj.hex() raise TypeError json_obj = orjson.loads( diff --git a/ibis-server/tests/routers/v2/connector/test_bigquery.py b/ibis-server/tests/routers/v2/connector/test_bigquery.py index 6bc6765ce..f24ff56b0 100644 --- a/ibis-server/tests/routers/v2/connector/test_bigquery.py +++ b/ibis-server/tests/routers/v2/connector/test_bigquery.py @@ -60,6 +60,11 @@ "expression": "cast(NULL as timestamp)", "type": "timestamp", }, + { + "name": "bytea_column", + "expression": "cast('abc' as bytea)", + "type": "bytea", + }, ], "primaryKey": "orderkey", }, @@ -92,6 +97,7 @@ def test_query(): "2024-01-01 23:59:59.000000", "2024-01-01 23:59:59.000000 UTC", None, + "616263", ] assert result["dtypes"] == { "orderkey": "int64", @@ -103,6 +109,7 @@ def test_query(): "timestamp": "object", "timestamptz": "object", "test_null_time": "datetime64[ns]", + "bytea_column": "object", } diff --git a/ibis-server/tests/routers/v2/connector/test_clickhouse.py b/ibis-server/tests/routers/v2/connector/test_clickhouse.py index fd1b2b009..58a214951 100644 --- a/ibis-server/tests/routers/v2/connector/test_clickhouse.py +++ b/ibis-server/tests/routers/v2/connector/test_clickhouse.py @@ -57,6 +57,11 @@ "expression": "toDateTime64(NULL, 9)", "type": "timestamp", }, + { + "name": "bytea_column", + "expression": "cast('abc' as bytea)", + "type": "bytea", + }, { "name": "customer", "type": "Customer", @@ -167,7 +172,7 @@ def test_query(clickhouse: ClickHouseContainer): ) assert response.status_code == 200 result = response.json() - assert len(result["columns"]) == 9 + assert len(result["columns"]) == 10 assert len(result["data"]) == 1 assert result["data"][0] == [ 1, @@ -179,6 +184,7 @@ def test_query(clickhouse: ClickHouseContainer): "2024-01-01 23:59:59.000000", "2024-01-01 23:59:59.000000 UTC", None, + "abc", # Clickhouse does not support bytea, so it is returned as string ] assert result["dtypes"] == { "orderkey": "int32", @@ -190,6 +196,7 @@ def test_query(clickhouse: ClickHouseContainer): "timestamp": "object", "timestamptz": "object", "test_null_time": "object", + "bytea_column": "object", } @@ -205,7 +212,7 @@ def test_query_with_connection_url(clickhouse: ClickHouseContainer): ) assert response.status_code == 200 result = response.json() - assert len(result["columns"]) == 9 + assert len(result["columns"]) == 10 assert len(result["data"]) == 1 assert result["data"][0][0] == 1 assert result["dtypes"] is not None diff --git a/ibis-server/tests/routers/v2/connector/test_mssql.py b/ibis-server/tests/routers/v2/connector/test_mssql.py index 1fc3b1819..c46f9e876 100644 --- a/ibis-server/tests/routers/v2/connector/test_mssql.py +++ b/ibis-server/tests/routers/v2/connector/test_mssql.py @@ -57,6 +57,11 @@ "expression": "cast(NULL as timestamp)", "type": "timestamp", }, + { + "name": "bytea_column", + "expression": "cast('abc' as bytea)", + "type": "bytea", + }, ], "primaryKey": "orderkey", }, @@ -103,6 +108,7 @@ def test_query(mssql: SqlServerContainer): "2024-01-01 23:59:59.000000", "2024-01-01 23:59:59.000000 UTC", None, + "616263", ] assert result["dtypes"] == { "orderkey": "int32", @@ -114,6 +120,7 @@ def test_query(mssql: SqlServerContainer): "timestamp": "object", "timestamptz": "object", "test_null_time": "datetime64[ns]", + "bytea_column": "object", } diff --git a/ibis-server/tests/routers/v2/connector/test_mysql.py b/ibis-server/tests/routers/v2/connector/test_mysql.py index 8051ce1a7..23b4af573 100644 --- a/ibis-server/tests/routers/v2/connector/test_mysql.py +++ b/ibis-server/tests/routers/v2/connector/test_mysql.py @@ -58,6 +58,11 @@ "expression": "cast(NULL as timestamp)", "type": "timestamp", }, + { + "name": "bytea_column", + "expression": "cast('abc' as bytea)", + "type": "bytea", + }, ], "primaryKey": "orderkey", }, @@ -123,6 +128,7 @@ def test_query(mysql: MySqlContainer): "2024-01-01 23:59:59.000000", "2024-01-01 23:59:59.000000", None, + "616263", ] assert result["dtypes"] == { "orderkey": "int32", @@ -134,6 +140,7 @@ def test_query(mysql: MySqlContainer): "timestamp": "object", "timestamptz": "object", "test_null_time": "datetime64[ns]", + "bytea_column": "object", } diff --git a/ibis-server/tests/routers/v2/connector/test_postgres.py b/ibis-server/tests/routers/v2/connector/test_postgres.py index 336178560..8266fc249 100644 --- a/ibis-server/tests/routers/v2/connector/test_postgres.py +++ b/ibis-server/tests/routers/v2/connector/test_postgres.py @@ -59,6 +59,11 @@ "expression": "cast(NULL as timestamp)", "type": "timestamp", }, + { + "name": "bytea_column", + "expression": "cast('abc' as bytea)", + "type": "bytea", + }, ], "primaryKey": "orderkey", }, @@ -91,7 +96,7 @@ def test_query(postgres: PostgresContainer): ) assert response.status_code == 200 result = response.json() - assert len(result["columns"]) == 9 + assert len(result["columns"]) == len(manifest["models"][0]["columns"]) assert len(result["data"]) == 1 assert result["data"][0] == [ 1, @@ -103,6 +108,7 @@ def test_query(postgres: PostgresContainer): "2024-01-01 23:59:59.000000", "2024-01-01 23:59:59.000000 UTC", None, + "616263", ] assert result["dtypes"] == { "orderkey": "int32", @@ -114,6 +120,7 @@ def test_query(postgres: PostgresContainer): "timestamp": "object", "timestamptz": "object", "test_null_time": "datetime64[ns]", + "bytea_column": "object", } @@ -129,7 +136,7 @@ def test_query_with_connection_url(postgres: PostgresContainer): ) assert response.status_code == 200 result = response.json() - assert len(result["columns"]) == 9 + assert len(result["columns"]) == len(manifest["models"][0]["columns"]) assert len(result["data"]) == 1 assert result["data"][0][0] == 1 assert result["dtypes"] is not None diff --git a/ibis-server/tests/routers/v2/connector/test_trino.py b/ibis-server/tests/routers/v2/connector/test_trino.py index a931a5af5..669aa2b47 100644 --- a/ibis-server/tests/routers/v2/connector/test_trino.py +++ b/ibis-server/tests/routers/v2/connector/test_trino.py @@ -54,6 +54,11 @@ "expression": "cast(NULL as timestamp)", "type": "timestamp", }, + { + "name": "bytea_column", + "expression": "cast('abc' as bytea)", + "type": "bytea", + }, ], "primaryKey": "orderkey", }, @@ -94,6 +99,7 @@ def test_query(trino: TrinoContainer): "2024-01-01 23:59:59.000000", "2024-01-01 23:59:59.000000 UTC", None, + "616263", ] assert result["dtypes"] == { "orderkey": "int64", @@ -105,6 +111,7 @@ def test_query(trino: TrinoContainer): "timestamp": "object", "timestamptz": "object", "test_null_time": "datetime64[ns]", + "bytea_column": "object", }