diff --git a/.github/workflows/ibis-ci.yml b/.github/workflows/ibis-ci.yml index 829c8577b..6758cdad0 100644 --- a/.github/workflows/ibis-ci.yml +++ b/.github/workflows/ibis-ci.yml @@ -71,7 +71,7 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.AWS_REGION }} AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} - run: poetry run pytest -m "not bigquery and not snowflake and not canner and not s3_file" + run: poetry run pytest -m "not bigquery and not snowflake and not canner and not s3_file and not gcs_file" - name: Test bigquery if need if: contains(github.event.pull_request.labels.*.name, 'bigquery') env: diff --git a/ibis-server/app/mdl/rewriter.py b/ibis-server/app/mdl/rewriter.py index cbb8515aa..ec3161256 100644 --- a/ibis-server/app/mdl/rewriter.py +++ b/ibis-server/app/mdl/rewriter.py @@ -76,6 +76,7 @@ def _get_write_dialect(cls, data_source: DataSource) -> str: DataSource.local_file, DataSource.s3_file, DataSource.minio_file, + DataSource.gcs_file, }: return "duckdb" return data_source.name diff --git a/ibis-server/app/model/__init__.py b/ibis-server/app/model/__init__.py index a1251590f..800d3bdd7 100644 --- a/ibis-server/app/model/__init__.py +++ b/ibis-server/app/model/__init__.py @@ -64,6 +64,10 @@ class QueryMinioFileDTO(QueryDTO): connection_info: MinioFileConnectionInfo = connection_info_field +class QueryGcsFileDTO(QueryDTO): + connection_info: GcsFileConnectionInfo = connection_info_field + + class BigQueryConnectionInfo(BaseModel): project_id: SecretStr dataset_id: SecretStr @@ -167,7 +171,7 @@ class S3FileConnectionInfo(BaseModel): class MinioFileConnectionInfo(BaseModel): - url: SecretStr = Field(description="the root path of the s3 bucket", default="/") + url: SecretStr = Field(description="the root path of the minio bucket", default="/") format: str = Field( description="File format", default="csv", examples=["csv", "parquet", "json"] ) @@ -180,6 +184,17 @@ class MinioFileConnectionInfo(BaseModel): secret_key: SecretStr +class GcsFileConnectionInfo(BaseModel): + url: SecretStr = Field(description="the root path of the gcs bucket", default="/") + format: str = Field( + description="File format", default="csv", examples=["csv", "parquet", "json"] + ) + bucket: SecretStr + key_id: SecretStr + secret_key: SecretStr + credentials: SecretStr = Field(description="Base64 encode `credentials.json`") + + ConnectionInfo = ( BigQueryConnectionInfo | CannerConnectionInfo @@ -192,6 +207,7 @@ class MinioFileConnectionInfo(BaseModel): | LocalFileConnectionInfo | S3FileConnectionInfo | MinioFileConnectionInfo + | GcsFileConnectionInfo ) diff --git a/ibis-server/app/model/connector.py b/ibis-server/app/model/connector.py index 515efadf3..43249fd74 100644 --- a/ibis-server/app/model/connector.py +++ b/ibis-server/app/model/connector.py @@ -18,13 +18,14 @@ from app.model import ( ConnectionInfo, + GcsFileConnectionInfo, MinioFileConnectionInfo, S3FileConnectionInfo, UnknownIbisError, UnprocessableEntityError, ) from app.model.data_source import DataSource -from app.model.utils import init_duckdb_minio, init_duckdb_s3 +from app.model.utils import init_duckdb_gcs, init_duckdb_minio, init_duckdb_s3 # Override datatypes of ibis importlib.import_module("app.custom_ibis.backends.sql.datatypes") @@ -42,6 +43,7 @@ def __init__(self, data_source: DataSource, connection_info: ConnectionInfo): DataSource.local_file, DataSource.s3_file, DataSource.minio_file, + DataSource.gcs_file, }: self._connector = DuckDBConnector(connection_info) else: @@ -167,6 +169,8 @@ def __init__(self, connection_info: ConnectionInfo): init_duckdb_s3(self.connection, connection_info) if isinstance(connection_info, MinioFileConnectionInfo): init_duckdb_minio(self.connection, connection_info) + if isinstance(connection_info, GcsFileConnectionInfo): + init_duckdb_gcs(self.connection, connection_info) def query(self, sql: str, limit: int) -> pd.DataFrame: try: diff --git a/ibis-server/app/model/data_source.py b/ibis-server/app/model/data_source.py index fba27571f..6006607cb 100644 --- a/ibis-server/app/model/data_source.py +++ b/ibis-server/app/model/data_source.py @@ -22,6 +22,7 @@ QueryCannerDTO, QueryClickHouseDTO, QueryDTO, + QueryGcsFileDTO, QueryLocalFileDTO, QueryMinioFileDTO, QueryMSSqlDTO, @@ -48,6 +49,7 @@ class DataSource(StrEnum): local_file = auto() s3_file = auto() minio_file = auto() + gcs_file = auto() def get_connection(self, info: ConnectionInfo) -> BaseBackend: try: @@ -74,6 +76,7 @@ class DataSourceExtension(Enum): local_file = QueryLocalFileDTO s3_file = QueryS3FileDTO minio_file = QueryMinioFileDTO + gcs_file = QueryGcsFileDTO def __init__(self, dto: QueryDTO): self.dto = dto diff --git a/ibis-server/app/model/metadata/factory.py b/ibis-server/app/model/metadata/factory.py index 3df9c3751..267ddbb33 100644 --- a/ibis-server/app/model/metadata/factory.py +++ b/ibis-server/app/model/metadata/factory.py @@ -6,6 +6,7 @@ from app.model.metadata.mssql import MSSQLMetadata from app.model.metadata.mysql import MySQLMetadata from app.model.metadata.object_storage import ( + GcsFileMetadata, LocalFileMetadata, MinioFileMetadata, S3FileMetadata, @@ -26,6 +27,7 @@ DataSource.local_file: LocalFileMetadata, DataSource.s3_file: S3FileMetadata, DataSource.minio_file: MinioFileMetadata, + DataSource.gcs_file: GcsFileMetadata, } diff --git a/ibis-server/app/model/metadata/object_storage.py b/ibis-server/app/model/metadata/object_storage.py index ff2003f62..d28baa65d 100644 --- a/ibis-server/app/model/metadata/object_storage.py +++ b/ibis-server/app/model/metadata/object_storage.py @@ -5,6 +5,7 @@ from loguru import logger from app.model import ( + GcsFileConnectionInfo, LocalFileConnectionInfo, MinioFileConnectionInfo, S3FileConnectionInfo, @@ -17,7 +18,7 @@ TableProperties, ) from app.model.metadata.metadata import Metadata -from app.model.utils import init_duckdb_minio, init_duckdb_s3 +from app.model.utils import init_duckdb_gcs, init_duckdb_minio, init_duckdb_s3 class ObjectStorageMetadata(Metadata): @@ -240,3 +241,33 @@ def _get_full_path(self, path): path = path[1:] return f"s3://{self.connection_info.bucket.get_secret_value()}/{path}" + + +class GcsFileMetadata(ObjectStorageMetadata): + def __init__(self, connection_info: GcsFileConnectionInfo): + super().__init__(connection_info) + + def get_version(self): + return "GCS" + + def _get_connection(self): + conn = duckdb.connect() + init_duckdb_gcs(conn, self.connection_info) + logger.debug("Initialized duckdb minio") + return conn + + def _get_dal_operator(self): + info: GcsFileConnectionInfo = self.connection_info + + return opendal.Operator( + "gcs", + root=info.url.get_secret_value(), + bucket=info.bucket.get_secret_value(), + credential=info.credentials.get_secret_value(), + ) + + def _get_full_path(self, path): + if path.startswith("/"): + path = path[1:] + + return f"gs://{self.connection_info.bucket.get_secret_value()}/{path}" diff --git a/ibis-server/app/model/utils.py b/ibis-server/app/model/utils.py index f3f9750c0..c1c95c935 100644 --- a/ibis-server/app/model/utils.py +++ b/ibis-server/app/model/utils.py @@ -1,6 +1,10 @@ from duckdb import DuckDBPyConnection, HTTPException -from app.model import MinioFileConnectionInfo, S3FileConnectionInfo +from app.model import ( + GcsFileConnectionInfo, + MinioFileConnectionInfo, + S3FileConnectionInfo, +) def init_duckdb_s3( @@ -44,3 +48,21 @@ def init_duckdb_minio( connection.execute("SET s3_use_ssl=?", [connection_info.ssl_enabled]) except HTTPException as e: raise Exception("Failed to create secret", e) + + +def init_duckdb_gcs( + connection: DuckDBPyConnection, connection_info: GcsFileConnectionInfo +): + create_secret = f""" + CREATE SECRET wren_gcs ( + TYPE GCS, + KEY_ID '{connection_info.key_id.get_secret_value()}', + SECRET '{connection_info.secret_key.get_secret_value()}' + ) + """ + try: + result = connection.execute(create_secret).fetchone() + if result is None or not result[0]: + raise Exception("Failed to create secret") + except HTTPException as e: + raise Exception("Failed to create secret", e) diff --git a/ibis-server/pyproject.toml b/ibis-server/pyproject.toml index 02b91438e..7b952cb54 100644 --- a/ibis-server/pyproject.toml +++ b/ibis-server/pyproject.toml @@ -67,6 +67,7 @@ markers = [ "local_file: mark a test as a local file test", "s3_file: mark a test as a s3 file test", "minio_file: mark a test as a minio file test", + "gcs_file: mark a test as a gcs file test", "beta: mark a test as a test for beta versions of the engine", ] diff --git a/ibis-server/tests/routers/v2/connector/test_gcs_file.py b/ibis-server/tests/routers/v2/connector/test_gcs_file.py new file mode 100644 index 000000000..47d53ce9a --- /dev/null +++ b/ibis-server/tests/routers/v2/connector/test_gcs_file.py @@ -0,0 +1,507 @@ +import base64 +import os + +import orjson +import pytest + +pytestmark = pytest.mark.gcs_file + +key_id = os.getenv("GCS_KEY_ID") +secret_key = os.getenv("GCS_SECRET_ACCESS_KEY") +bucket = os.getenv("GCS_BUCKET") +credentials = os.getenv("GCS_CREDENTIALS_BASE64_JSON") + +base_url = "/v2/connector/gcs_file" +manifest = { + "catalog": "my_calalog", + "schema": "my_schema", + "models": [ + { + "name": "Orders", + "tableReference": { + "table": f"gs://{bucket}/wren-private-test/tpch/data/orders.parquet", + }, + "columns": [ + {"name": "orderkey", "expression": "o_orderkey", "type": "integer"}, + {"name": "custkey", "expression": "o_custkey", "type": "integer"}, + { + "name": "orderstatus", + "expression": "o_orderstatus", + "type": "varchar", + }, + { + "name": "totalprice", + "expression": "o_totalprice", + "type": "float", + }, + {"name": "orderdate", "expression": "o_orderdate", "type": "date"}, + { + "name": "order_cust_key", + "expression": "concat(o_orderkey, '_', o_custkey)", + "type": "varchar", + }, + ], + "primaryKey": "orderkey", + }, + { + "name": "Customer", + "tableReference": { + "table": f"gs://{bucket}/wren-private-test/tpch/data/customer.parquet", + }, + "columns": [ + { + "name": "custkey", + "type": "integer", + "expression": "c_custkey", + }, + { + "name": "orders", + "type": "Orders", + "relationship": "CustomerOrders", + }, + { + "name": "sum_totalprice", + "type": "float", + "isCalculated": True, + "expression": "sum(orders.totalprice)", + }, + ], + "primaryKey": "custkey", + }, + ], + "relationships": [ + { + "name": "CustomerOrders", + "models": ["Customer", "Orders"], + "joinType": "ONE_TO_MANY", + "condition": "Customer.custkey = Orders.custkey", + } + ], +} + + +@pytest.fixture(scope="module") +def manifest_str(): + return base64.b64encode(orjson.dumps(manifest)).decode("utf-8") + + +@pytest.fixture(scope="module") +def connection_info() -> dict[str, str]: + return { + "url": "/wren-private-test/tpch/data", + "format": "parquet", + "bucket": bucket, + "key_id": key_id, + "secret_key": secret_key, + "credentials": credentials, + } + + +async def test_query(client, manifest_str, connection_info): + response = await client.post( + f"{base_url}/query", + json={ + "manifestStr": manifest_str, + "sql": 'SELECT * FROM "Orders" LIMIT 1', + "connectionInfo": connection_info, + }, + ) + assert response.status_code == 200 + result = response.json() + assert len(result["columns"]) == len(manifest["models"][0]["columns"]) + assert len(result["data"]) == 1 + assert result["data"][0] == [ + 1, + 370, + "O", + "172799.49", + "1996-01-02 00:00:00.000000", + "1_370", + ] + assert result["dtypes"] == { + "orderkey": "int32", + "custkey": "int32", + "orderstatus": "object", + "totalprice": "float64", + "orderdate": "object", + "order_cust_key": "object", + } + + +async def test_query_with_limit(client, manifest_str, connection_info): + response = await client.post( + f"{base_url}/query", + params={"limit": 1}, + json={ + "manifestStr": manifest_str, + "sql": 'SELECT * FROM "Orders" limit 2', + "connectionInfo": connection_info, + }, + ) + assert response.status_code == 200 + result = response.json() + assert len(result["data"]) == 1 + + +async def test_query_calculated_field(client, manifest_str, connection_info): + response = await client.post( + f"{base_url}/query", + json={ + "manifestStr": manifest_str, + "sql": 'SELECT custkey, sum_totalprice FROM "Customer" WHERE custkey = 370', + "connectionInfo": connection_info, + }, + ) + assert response.status_code == 200 + result = response.json() + assert len(result["columns"]) == 2 + assert len(result["data"]) == 1 + assert result["data"][0] == [ + 370, + "2860895.79", + ] + assert result["dtypes"] == { + "custkey": "int32", + "sum_totalprice": "float64", + } + + +async def test_dry_run(client, manifest_str, connection_info): + response = await client.post( + f"{base_url}/query", + params={"dryRun": True}, + json={ + "manifestStr": manifest_str, + "sql": 'SELECT * FROM "Orders" LIMIT 1', + "connectionInfo": connection_info, + }, + ) + assert response.status_code == 204 + + response = await client.post( + f"{base_url}/query", + params={"dryRun": True}, + json={ + "manifestStr": manifest_str, + "sql": 'SELECT * FROM "NotFound" LIMIT 1', + "connectionInfo": connection_info, + }, + ) + assert response.status_code == 422 + assert response.text is not None + + +async def test_query_with_invalid_connection_info(client, manifest_str): + response = await client.post( + f"{base_url}/query", + json={ + "manifestStr": manifest_str, + "sql": 'SELECT * FROM "Orders" LIMIT 1', + "connectionInfo": { + "url": "/tpch/data", + "format": "parquet", + "bucket": bucket, + "key_id": "invalid", + "secret_key": "invalid", + "credentials": "invalid", + }, + }, + ) + assert response.status_code == 422 + + response = await client.post( + url=f"{base_url}/metadata/tables", + json={ + "connectionInfo": { + "url": "/tpch/data", + "format": "parquet", + "bucket": bucket, + "key_id": "invalid", + "secret_key": "invalid", + "credentials": "invalid", + }, + }, + ) + assert response.status_code == 422 + + +async def test_metadata_list_tables(client, connection_info): + response = await client.post( + url=f"{base_url}/metadata/tables", + json={ + "connectionInfo": connection_info, + }, + ) + assert response.status_code == 200 + + result = next(filter(lambda x: x["name"] == "orders", response.json())) + assert result["name"] == "orders" + assert result["primaryKey"] is None + assert result["description"] is None + assert result["properties"] == { + "catalog": None, + "schema": None, + "table": "orders", + "path": f"gs://{bucket}/wren-private-test/tpch/data/orders.parquet", + } + assert len(result["columns"]) == 9 + assert result["columns"][8] == { + "name": "o_comment", + "nestedColumns": None, + "type": "STRING", + "notNull": False, + "description": None, + "properties": None, + } + + +async def test_metadata_list_constraints(client, connection_info): + response = await client.post( + url=f"{base_url}/metadata/constraints", + json={ + "connectionInfo": connection_info, + }, + ) + assert response.status_code == 200 + + +async def test_metadata_db_version(client, connection_info): + response = await client.post( + url=f"{base_url}/metadata/version", + json={ + "connectionInfo": connection_info, + }, + ) + assert response.status_code == 200 + assert "GCS" in response.text + + +async def test_unsupported_format(client): + response = await client.post( + url=f"{base_url}/metadata/tables", + json={ + "connectionInfo": { + "url": "/wren-private-test/tpch/data", + "format": "unsupported", + "bucket": bucket, + "key_id": key_id, + "secret_key": secret_key, + "credentials": credentials, + }, + }, + ) + assert response.status_code == 422 + assert response.text == "Failed to list files: Unsupported format: unsupported" + + +async def test_list_parquet_files(client): + response = await client.post( + url=f"{base_url}/metadata/tables", + json={ + "connectionInfo": { + "url": "/wren-private-test/test_file_source", + "format": "parquet", + "bucket": bucket, + "key_id": key_id, + "secret_key": secret_key, + "credentials": credentials, + }, + }, + ) + assert response.status_code == 200 + result = response.json() + assert len(result) == 2 + table_names = [table["name"] for table in result] + assert "type-test-parquet" in table_names + assert "type-test" in table_names + columns = result[0]["columns"] + assert len(columns) == 23 + assert columns[0]["name"] == "c_bigint" + assert columns[0]["type"] == "INT64" + assert columns[1]["name"] == "c_bit" + assert columns[1]["type"] == "STRING" + assert columns[2]["name"] == "c_blob" + assert columns[2]["type"] == "BYTES" + assert columns[3]["name"] == "c_boolean" + assert columns[3]["type"] == "BOOL" + assert columns[4]["name"] == "c_date" + assert columns[4]["type"] == "DATE" + assert columns[5]["name"] == "c_double" + assert columns[5]["type"] == "DOUBLE" + assert columns[6]["name"] == "c_float" + assert columns[6]["type"] == "FLOAT" + assert columns[7]["name"] == "c_integer" + assert columns[7]["type"] == "INT" + assert columns[8]["name"] == "c_hugeint" + assert columns[8]["type"] == "DOUBLE" + assert columns[9]["name"] == "c_interval" + assert columns[9]["type"] == "INTERVAL" + assert columns[10]["name"] == "c_json" + assert columns[10]["type"] == "JSON" + assert columns[11]["name"] == "c_smallint" + assert columns[11]["type"] == "INT2" + assert columns[12]["name"] == "c_time" + assert columns[12]["type"] == "TIME" + assert columns[13]["name"] == "c_timestamp" + assert columns[13]["type"] == "TIMESTAMP" + assert columns[14]["name"] == "c_timestamptz" + assert columns[14]["type"] == "TIMESTAMPTZ" + assert columns[15]["name"] == "c_tinyint" + assert columns[15]["type"] == "INT2" + assert columns[16]["name"] == "c_ubigint" + assert columns[16]["type"] == "INT64" + assert columns[17]["name"] == "c_uhugeint" + assert columns[17]["type"] == "DOUBLE" + assert columns[18]["name"] == "c_uinteger" + assert columns[18]["type"] == "INT" + assert columns[19]["name"] == "c_usmallint" + assert columns[19]["type"] == "INT2" + assert columns[20]["name"] == "c_utinyint" + assert columns[20]["type"] == "INT2" + assert columns[21]["name"] == "c_uuid" + assert columns[21]["type"] == "UUID" + assert columns[22]["name"] == "c_varchar" + assert columns[22]["type"] == "STRING" + + +async def test_list_csv_files(client): + response = await client.post( + url=f"{base_url}/metadata/tables", + json={ + "connectionInfo": { + "url": "/wren-private-test/test_file_source", + "format": "csv", + "bucket": bucket, + "key_id": key_id, + "secret_key": secret_key, + "credentials": credentials, + }, + }, + ) + assert response.status_code == 200 + result = response.json() + assert len(result) == 3 + table_names = [table["name"] for table in result] + assert "type-test-csv" in table_names + assert "type-test" in table_names + # `invalid` will be considered as a one column csv file + assert "invalid" in table_names + columns = result[0]["columns"] + assert columns[0]["name"] == "c_bigint" + assert columns[0]["type"] == "INT64" + assert columns[1]["name"] == "c_bit" + assert columns[1]["type"] == "STRING" + assert columns[2]["name"] == "c_blob" + assert columns[2]["type"] == "STRING" + assert columns[3]["name"] == "c_boolean" + assert columns[3]["type"] == "BOOL" + assert columns[4]["name"] == "c_date" + assert columns[4]["type"] == "DATE" + assert columns[5]["name"] == "c_double" + assert columns[5]["type"] == "DOUBLE" + assert columns[6]["name"] == "c_float" + assert columns[6]["type"] == "DOUBLE" + assert columns[7]["name"] == "c_integer" + assert columns[7]["type"] == "INT64" + assert columns[8]["name"] == "c_hugeint" + assert columns[8]["type"] == "INT64" + assert columns[9]["name"] == "c_interval" + assert columns[9]["type"] == "STRING" + assert columns[10]["name"] == "c_json" + assert columns[10]["type"] == "STRING" + assert columns[11]["name"] == "c_smallint" + assert columns[11]["type"] == "INT64" + assert columns[12]["name"] == "c_time" + assert columns[12]["type"] == "TIME" + assert columns[13]["name"] == "c_timestamp" + assert columns[13]["type"] == "TIMESTAMP" + assert columns[14]["name"] == "c_timestamptz" + assert columns[14]["type"] == "TIMESTAMP" + assert columns[15]["name"] == "c_tinyint" + assert columns[15]["type"] == "INT64" + assert columns[16]["name"] == "c_ubigint" + assert columns[16]["type"] == "INT64" + assert columns[17]["name"] == "c_uhugeint" + assert columns[17]["type"] == "INT64" + assert columns[18]["name"] == "c_uinteger" + assert columns[18]["type"] == "INT64" + assert columns[19]["name"] == "c_usmallint" + assert columns[19]["type"] == "INT64" + assert columns[20]["name"] == "c_utinyint" + assert columns[20]["type"] == "INT64" + assert columns[21]["name"] == "c_uuid" + assert columns[21]["type"] == "STRING" + assert columns[22]["name"] == "c_varchar" + assert columns[22]["type"] == "STRING" + + +async def test_list_json_files(client): + response = await client.post( + url=f"{base_url}/metadata/tables", + json={ + "connectionInfo": { + "url": "/wren-private-test/test_file_source", + "format": "json", + "bucket": bucket, + "key_id": key_id, + "secret_key": secret_key, + "credentials": credentials, + }, + }, + ) + assert response.status_code == 200 + result = response.json() + assert len(result) == 2 + table_names = [table["name"] for table in result] + assert "type-test-json" in table_names + assert "type-test" in table_names + + columns = result[0]["columns"] + assert columns[0]["name"] == "c_bigint" + assert columns[0]["type"] == "INT64" + # `c_bit` is a string in json which value is `00000000000000000000000000000001` + # It's considered as a UUID by DuckDB json reader. + assert columns[1]["name"] == "c_bit" + assert columns[1]["type"] == "UUID" + assert columns[2]["name"] == "c_blob" + assert columns[2]["type"] == "STRING" + assert columns[3]["name"] == "c_boolean" + assert columns[3]["type"] == "BOOL" + assert columns[4]["name"] == "c_date" + assert columns[4]["type"] == "DATE" + assert columns[5]["name"] == "c_double" + assert columns[5]["type"] == "DOUBLE" + assert columns[6]["name"] == "c_float" + assert columns[6]["type"] == "DOUBLE" + assert columns[7]["name"] == "c_integer" + assert columns[7]["type"] == "INT64" + assert columns[8]["name"] == "c_hugeint" + assert columns[8]["type"] == "DOUBLE" + assert columns[9]["name"] == "c_interval" + assert columns[9]["type"] == "STRING" + assert columns[10]["name"] == "c_json" + assert columns[10]["type"] == "UNKNOWN" + assert columns[11]["name"] == "c_smallint" + assert columns[11]["type"] == "INT64" + assert columns[12]["name"] == "c_time" + assert columns[12]["type"] == "TIME" + assert columns[13]["name"] == "c_timestamp" + assert columns[13]["type"] == "TIMESTAMP" + assert columns[14]["name"] == "c_timestamptz" + assert columns[14]["type"] == "STRING" + assert columns[15]["name"] == "c_tinyint" + assert columns[15]["type"] == "INT64" + assert columns[16]["name"] == "c_ubigint" + assert columns[16]["type"] == "INT64" + assert columns[17]["name"] == "c_uhugeint" + assert columns[17]["type"] == "DOUBLE" + assert columns[18]["name"] == "c_uinteger" + assert columns[18]["type"] == "INT64" + assert columns[19]["name"] == "c_usmallint" + assert columns[19]["type"] == "INT64" + assert columns[20]["name"] == "c_utinyint" + assert columns[20]["type"] == "INT64" + assert columns[21]["name"] == "c_uuid" + assert columns[21]["type"] == "UUID" + assert columns[22]["name"] == "c_varchar" + assert columns[22]["type"] == "STRING"