diff --git a/ibis-server/app/model/metadata/athena.py b/ibis-server/app/model/metadata/athena.py index ccf727446..b67c7e0c0 100644 --- a/ibis-server/app/model/metadata/athena.py +++ b/ibis-server/app/model/metadata/athena.py @@ -2,6 +2,7 @@ from contextlib import closing import pandas as pd +from loguru import logger from app.model import AthenaConnectionInfo from app.model.data_source import DataSource @@ -14,6 +15,41 @@ ) from app.model.metadata.metadata import Metadata +# Athena-specific type mapping +ATHENA_TYPE_MAPPING = { + # String Types (ignore Binary and Spatial Types for now) + "char": RustWrenEngineColumnType.CHAR, + "varchar": RustWrenEngineColumnType.VARCHAR, + "tinytext": RustWrenEngineColumnType.TEXT, + "text": RustWrenEngineColumnType.TEXT, + "mediumtext": RustWrenEngineColumnType.TEXT, + "longtext": RustWrenEngineColumnType.TEXT, + "enum": RustWrenEngineColumnType.VARCHAR, + "set": RustWrenEngineColumnType.VARCHAR, + # Integer Types + "bit": RustWrenEngineColumnType.TINYINT, + "tinyint": RustWrenEngineColumnType.TINYINT, + "smallint": RustWrenEngineColumnType.SMALLINT, + "mediumint": RustWrenEngineColumnType.INTEGER, + "int": RustWrenEngineColumnType.INTEGER, + "integer": RustWrenEngineColumnType.INTEGER, + "bigint": RustWrenEngineColumnType.BIGINT, + # Boolean Types + "bool": RustWrenEngineColumnType.BOOL, + "boolean": RustWrenEngineColumnType.BOOL, + # Decimal Types + "float": RustWrenEngineColumnType.FLOAT4, + "double": RustWrenEngineColumnType.DOUBLE, + "decimal": RustWrenEngineColumnType.DECIMAL, + "numeric": RustWrenEngineColumnType.NUMERIC, + # Date/Time Types + "date": RustWrenEngineColumnType.DATE, + "datetime": RustWrenEngineColumnType.TIMESTAMP, + "timestamp": RustWrenEngineColumnType.TIMESTAMPTZ, + # JSON Type + "json": RustWrenEngineColumnType.JSON, +} + class AthenaMetadata(Metadata): def __init__(self, connection_info: AthenaConnectionInfo): @@ -101,38 +137,24 @@ def get_version(self) -> str: def _format_athena_compact_table_name(self, schema: str, table: str) -> str: return f"{schema}.{table}" - def _transform_column_type(self, data_type): - data_type = re.sub(r"\(.*\)", "", data_type).strip() - switcher = { - # String Types (ignore Binary and Spatial Types for now) - "char": RustWrenEngineColumnType.CHAR, - "varchar": RustWrenEngineColumnType.VARCHAR, - "tinytext": RustWrenEngineColumnType.TEXT, - "text": RustWrenEngineColumnType.TEXT, - "mediumtext": RustWrenEngineColumnType.TEXT, - "longtext": RustWrenEngineColumnType.TEXT, - "enum": RustWrenEngineColumnType.VARCHAR, - "set": RustWrenEngineColumnType.VARCHAR, - "bit": RustWrenEngineColumnType.TINYINT, - "tinyint": RustWrenEngineColumnType.TINYINT, - "smallint": RustWrenEngineColumnType.SMALLINT, - "mediumint": RustWrenEngineColumnType.INTEGER, - "int": RustWrenEngineColumnType.INTEGER, - "integer": RustWrenEngineColumnType.INTEGER, - "bigint": RustWrenEngineColumnType.BIGINT, - # boolean - "bool": RustWrenEngineColumnType.BOOL, - "boolean": RustWrenEngineColumnType.BOOL, - # Decimal - "float": RustWrenEngineColumnType.FLOAT4, - "double": RustWrenEngineColumnType.DOUBLE, - "decimal": RustWrenEngineColumnType.DECIMAL, - "numeric": RustWrenEngineColumnType.NUMERIC, - "date": RustWrenEngineColumnType.DATE, - "datetime": RustWrenEngineColumnType.TIMESTAMP, - "timestamp": RustWrenEngineColumnType.TIMESTAMPTZ, - # JSON Type - "json": RustWrenEngineColumnType.JSON, - } - - return switcher.get(data_type.lower(), RustWrenEngineColumnType.UNKNOWN) + def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType: + """Transform Athena data type to RustWrenEngineColumnType. + + Args: + data_type: The Athena data type string + + Returns: + The corresponding RustWrenEngineColumnType + """ + # Remove parameter specifications like VARCHAR(255) -> VARCHAR + normalized_type = re.sub(r"\(.*\)", "", data_type).strip().lower() + + # Use the module-level mapping table + mapped_type = ATHENA_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) + + if mapped_type == RustWrenEngineColumnType.UNKNOWN: + logger.warning(f"Unknown Athena data type: {data_type}") + + return mapped_type diff --git a/ibis-server/app/model/metadata/bigquery.py b/ibis-server/app/model/metadata/bigquery.py index dbcf92a18..654abbc37 100644 --- a/ibis-server/app/model/metadata/bigquery.py +++ b/ibis-server/app/model/metadata/bigquery.py @@ -1,3 +1,5 @@ +from loguru import logger + from app.model import BigQueryConnectionInfo from app.model.data_source import DataSource from app.model.metadata.dto import ( @@ -10,6 +12,25 @@ ) from app.model.metadata.metadata import Metadata +# BigQuery-specific type mapping +BIGQUERY_TYPE_MAPPING = { + # GEOGRAPHY and RANGE columns were filtered out + "bool": RustWrenEngineColumnType.BOOL, + "boolean": RustWrenEngineColumnType.BOOL, + "bytes": RustWrenEngineColumnType.BYTES, + "date": RustWrenEngineColumnType.DATE, + "datetime": RustWrenEngineColumnType.DATETIME, + "interval": RustWrenEngineColumnType.INTERVAL, + "json": RustWrenEngineColumnType.JSON, + "int64": RustWrenEngineColumnType.INT64, + "numeric": RustWrenEngineColumnType.NUMERIC, + "bignumeric": RustWrenEngineColumnType.BIGNUMERIC, + "float64": RustWrenEngineColumnType.FLOAT64, + "string": RustWrenEngineColumnType.STRING, + "time": RustWrenEngineColumnType.TIME, + "timestamp": RustWrenEngineColumnType.TIMESTAMPTZ, +} + class BigQueryMetadata(Metadata): def __init__(self, connection_info: BigQueryConnectionInfo): @@ -173,29 +194,28 @@ def get_constraints(self) -> list[Constraint]: def get_version(self) -> str: return "Follow BigQuery release version" - def _transform_column_type(self, data_type): - # lower case the data_type - data_type = data_type.lower() + def _transform_column_type(self, data_type: str) -> str | RustWrenEngineColumnType: + """Transform BigQuery data type to RustWrenEngineColumnType. + + Args: + data_type: The BigQuery data type string - # if data_type start with "array" or "struct", by pass it - if data_type.startswith(("array", "struct")): + Returns: + The corresponding RustWrenEngineColumnType or original string for complex types + """ + # Convert to lowercase for comparison + normalized_type = data_type.lower() + + # Handle complex types (array, struct) by returning as-is + if normalized_type.startswith(("array", "struct")): return data_type - # Map BigQuery types to RustWrenEngineColumnType - switcher = { - # GEOGRAPHY and RANGE columns were filtered out - "bytes": RustWrenEngineColumnType.BYTES, - "date": RustWrenEngineColumnType.DATE, - "datetime": RustWrenEngineColumnType.DATETIME, - "interval": RustWrenEngineColumnType.INTERVAL, - "json": RustWrenEngineColumnType.JSON, - "int64": RustWrenEngineColumnType.INT64, - "numeric": RustWrenEngineColumnType.NUMERIC, - "bignumeric": RustWrenEngineColumnType.BIGNUMERIC, - "float64": RustWrenEngineColumnType.FLOAT64, - "string": RustWrenEngineColumnType.STRING, - "time": RustWrenEngineColumnType.TIME, - "timestamp": RustWrenEngineColumnType.TIMESTAMPTZ, - } - - return switcher.get(data_type, RustWrenEngineColumnType.UNKNOWN) + # Map to RustWrenEngineColumnType using module-level mapping + mapped_type = BIGQUERY_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) + + if mapped_type == RustWrenEngineColumnType.UNKNOWN: + logger.warning(f"Unknown BigQuery data type: {data_type}") + + return mapped_type diff --git a/ibis-server/app/model/metadata/canner.py b/ibis-server/app/model/metadata/canner.py index ed6cfda24..40564bdc9 100644 --- a/ibis-server/app/model/metadata/canner.py +++ b/ibis-server/app/model/metadata/canner.py @@ -3,6 +3,7 @@ from gql import Client, gql from gql.transport.aiohttp import AIOHTTPTransport +from loguru import logger from app.model import CannerConnectionInfo from app.model.error import ErrorCode, WrenError @@ -15,6 +16,40 @@ ) from app.model.metadata.metadata import Metadata +CANNER_TYPE_MAPPING = { + # String Types (ignore Binary and Spatial Types for now) + "char": RustWrenEngineColumnType.CHAR, + "varchar": RustWrenEngineColumnType.VARCHAR, + "tinytext": RustWrenEngineColumnType.TEXT, + "text": RustWrenEngineColumnType.TEXT, + "mediumtext": RustWrenEngineColumnType.TEXT, + "longtext": RustWrenEngineColumnType.TEXT, + "enum": RustWrenEngineColumnType.VARCHAR, + "set": RustWrenEngineColumnType.VARCHAR, + # Numeric Types(https://dev.mysql.com/doc/refman/8.4/en/numeric-types.html) + "bit": RustWrenEngineColumnType.TINYINT, + "tinyint": RustWrenEngineColumnType.TINYINT, + "smallint": RustWrenEngineColumnType.SMALLINT, + "mediumint": RustWrenEngineColumnType.INTEGER, + "int": RustWrenEngineColumnType.INTEGER, + "integer": RustWrenEngineColumnType.INTEGER, + "bigint": RustWrenEngineColumnType.BIGINT, + # boolean + "bool": RustWrenEngineColumnType.BOOL, + "boolean": RustWrenEngineColumnType.BOOL, + # Decimal + "float": RustWrenEngineColumnType.FLOAT8, + "double": RustWrenEngineColumnType.DOUBLE, + "decimal": RustWrenEngineColumnType.DECIMAL, + "numeric": RustWrenEngineColumnType.NUMERIC, + # Date and Time Types(https://dev.mysql.com/doc/refman/8.4/en/date-and-time-types.html) + "date": RustWrenEngineColumnType.DATE, + "datetime": RustWrenEngineColumnType.TIMESTAMP, + "timestamp": RustWrenEngineColumnType.TIMESTAMPTZ, + # JSON Type + "json": RustWrenEngineColumnType.JSON, +} + class CannerMetadata(Metadata): def __init__(self, connection_info: CannerConnectionInfo): @@ -202,38 +237,15 @@ def _transform_column_type(self, data_type): # trim the (all characters) at the end of the data_type if exists data_type = re.sub(r"\(.*\)", "", data_type).strip() - switcher = { - # String Types (ignore Binary and Spatial Types for now) - "char": RustWrenEngineColumnType.CHAR, - "varchar": RustWrenEngineColumnType.VARCHAR, - "tinytext": RustWrenEngineColumnType.TEXT, - "text": RustWrenEngineColumnType.TEXT, - "mediumtext": RustWrenEngineColumnType.TEXT, - "longtext": RustWrenEngineColumnType.TEXT, - "enum": RustWrenEngineColumnType.VARCHAR, - "set": RustWrenEngineColumnType.VARCHAR, - # Numeric Types(https://dev.mysql.com/doc/refman/8.4/en/numeric-types.html) - "bit": RustWrenEngineColumnType.TINYINT, - "tinyint": RustWrenEngineColumnType.TINYINT, - "smallint": RustWrenEngineColumnType.SMALLINT, - "mediumint": RustWrenEngineColumnType.INTEGER, - "int": RustWrenEngineColumnType.INTEGER, - "integer": RustWrenEngineColumnType.INTEGER, - "bigint": RustWrenEngineColumnType.BIGINT, - # boolean - "bool": RustWrenEngineColumnType.BOOL, - "boolean": RustWrenEngineColumnType.BOOL, - # Decimal - "float": RustWrenEngineColumnType.FLOAT8, - "double": RustWrenEngineColumnType.DOUBLE, - "decimal": RustWrenEngineColumnType.DECIMAL, - "numeric": RustWrenEngineColumnType.NUMERIC, - # Date and Time Types(https://dev.mysql.com/doc/refman/8.4/en/date-and-time-types.html) - "date": RustWrenEngineColumnType.DATE, - "datetime": RustWrenEngineColumnType.TIMESTAMP, - "timestamp": RustWrenEngineColumnType.TIMESTAMPTZ, - # JSON Type - "json": RustWrenEngineColumnType.JSON, - } - - return switcher.get(data_type.lower(), RustWrenEngineColumnType.UNKNOWN) + # Convert to lowercase for comparison + normalized_type = data_type.lower() + + # Use the module-level mapping table + mapped_type = CANNER_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) + + if mapped_type == RustWrenEngineColumnType.UNKNOWN: + logger.warning(f"Unknown Canner data type: {data_type}") + + return mapped_type diff --git a/ibis-server/app/model/metadata/clickhouse.py b/ibis-server/app/model/metadata/clickhouse.py index 07dc335a9..8121cf6f0 100644 --- a/ibis-server/app/model/metadata/clickhouse.py +++ b/ibis-server/app/model/metadata/clickhouse.py @@ -1,3 +1,5 @@ +from loguru import logger + from app.model import ClickHouseConnectionInfo from app.model.data_source import DataSource from app.model.metadata.dto import ( @@ -9,6 +11,37 @@ ) from app.model.metadata.metadata import Metadata +# ClickHouse-specific type mapping +CLICKHOUSE_TYPE_MAPPING = { + # Boolean Types + "boolean": RustWrenEngineColumnType.BOOL, + # Integer Types + "int8": RustWrenEngineColumnType.TINYINT, + "uint8": RustWrenEngineColumnType.INT2, + "int16": RustWrenEngineColumnType.INT2, + "uint16": RustWrenEngineColumnType.INT2, + "int32": RustWrenEngineColumnType.INT4, + "uint32": RustWrenEngineColumnType.INT4, + "int64": RustWrenEngineColumnType.INT8, + "uint64": RustWrenEngineColumnType.INT8, + # Float Types + "float32": RustWrenEngineColumnType.FLOAT4, + "float64": RustWrenEngineColumnType.FLOAT8, + "decimal": RustWrenEngineColumnType.DECIMAL, + # Date/Time Types + "date": RustWrenEngineColumnType.DATE, + "datetime": RustWrenEngineColumnType.TIMESTAMP, + # String Types + "string": RustWrenEngineColumnType.VARCHAR, + "fixedstring": RustWrenEngineColumnType.CHAR, + # Special Types + "uuid": RustWrenEngineColumnType.UUID, + "enum8": RustWrenEngineColumnType.STRING, # Enums can be mapped to strings + "enum16": RustWrenEngineColumnType.STRING, # Enums can be mapped to strings + "ipv4": RustWrenEngineColumnType.INET, + "ipv6": RustWrenEngineColumnType.INET, +} + class ClickHouseMetadata(Metadata): def __init__(self, connection_info: ClickHouseConnectionInfo): @@ -76,33 +109,24 @@ def get_version(self) -> str: def _format_compact_table_name(self, schema: str, table: str): return f"{schema}.{table}" - def _transform_column_type(self, data_type): - # lower case the data_type - data_type = data_type.lower() - - # Map ClickHouse types to RustWrenEngineColumnType - switcher = { - "boolean": RustWrenEngineColumnType.BOOL, - "int8": RustWrenEngineColumnType.TINYINT, - "uint8": RustWrenEngineColumnType.INT2, - "int16": RustWrenEngineColumnType.INT2, - "uint16": RustWrenEngineColumnType.INT2, - "int32": RustWrenEngineColumnType.INT4, - "uint32": RustWrenEngineColumnType.INT4, - "int64": RustWrenEngineColumnType.INT8, - "uint64": RustWrenEngineColumnType.INT8, - "float32": RustWrenEngineColumnType.FLOAT4, - "float64": RustWrenEngineColumnType.FLOAT8, - "decimal": RustWrenEngineColumnType.DECIMAL, - "date": RustWrenEngineColumnType.DATE, - "datetime": RustWrenEngineColumnType.TIMESTAMP, - "string": RustWrenEngineColumnType.VARCHAR, - "fixedstring": RustWrenEngineColumnType.CHAR, - "uuid": RustWrenEngineColumnType.UUID, - "enum8": RustWrenEngineColumnType.STRING, # Enums can be mapped to strings - "enum16": RustWrenEngineColumnType.STRING, # Enums can be mapped to strings - "ipv4": RustWrenEngineColumnType.INET, - "ipv6": RustWrenEngineColumnType.INET, - } - - return switcher.get(data_type, RustWrenEngineColumnType.UNKNOWN) + def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType: + """Transform ClickHouse data type to RustWrenEngineColumnType. + + Args: + data_type: The ClickHouse data type string + + Returns: + The corresponding RustWrenEngineColumnType + """ + # Convert to lowercase for comparison + normalized_type = data_type.lower() + + # Use the module-level mapping table + mapped_type = CLICKHOUSE_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) + + if mapped_type == RustWrenEngineColumnType.UNKNOWN: + logger.warning(f"Unknown ClickHouse data type: {data_type}") + + return mapped_type diff --git a/ibis-server/app/model/metadata/mssql.py b/ibis-server/app/model/metadata/mssql.py index c3a135a84..ab96e5e01 100644 --- a/ibis-server/app/model/metadata/mssql.py +++ b/ibis-server/app/model/metadata/mssql.py @@ -1,3 +1,5 @@ +from loguru import logger + from app.model import MSSqlConnectionInfo from app.model.data_source import DataSource from app.model.metadata.dto import ( @@ -10,6 +12,42 @@ ) from app.model.metadata.metadata import Metadata +# MSSQL-specific type mapping +# Reference: https://learn.microsoft.com/en-us/sql/t-sql/data-types/data-types-transact-sql?view=sql-server-ver15#exact-numerics +MSSQL_TYPE_MAPPING = { + # String Types + "char": RustWrenEngineColumnType.CHAR, + "varchar": RustWrenEngineColumnType.VARCHAR, + "text": RustWrenEngineColumnType.TEXT, + "nchar": RustWrenEngineColumnType.CHAR, + "nvarchar": RustWrenEngineColumnType.VARCHAR, + "ntext": RustWrenEngineColumnType.TEXT, + # Numeric Types + "bit": RustWrenEngineColumnType.TINYINT, + "tinyint": RustWrenEngineColumnType.TINYINT, + "smallint": RustWrenEngineColumnType.SMALLINT, + "int": RustWrenEngineColumnType.INTEGER, + "bigint": RustWrenEngineColumnType.BIGINT, + # Boolean + "boolean": RustWrenEngineColumnType.BOOL, + # Decimal + "float": RustWrenEngineColumnType.FLOAT8, + "real": RustWrenEngineColumnType.FLOAT8, + "decimal": RustWrenEngineColumnType.DECIMAL, + "numeric": RustWrenEngineColumnType.NUMERIC, + "money": RustWrenEngineColumnType.DECIMAL, + "smallmoney": RustWrenEngineColumnType.DECIMAL, + # Date and Time Types + "date": RustWrenEngineColumnType.DATE, + "datetime": RustWrenEngineColumnType.TIMESTAMP, + "datetime2": RustWrenEngineColumnType.TIMESTAMP, + "smalldatetime": RustWrenEngineColumnType.TIMESTAMP, + "time": RustWrenEngineColumnType.INTERVAL, + "datetimeoffset": RustWrenEngineColumnType.TIMESTAMPTZ, + # JSON Type (Note: MSSQL supports JSON natively as a string type) + "json": RustWrenEngineColumnType.JSON, +} + class MSSQLMetadata(Metadata): def __init__(self, connection_info: MSSqlConnectionInfo): @@ -172,41 +210,24 @@ def _format_constraint_name( ): return f"{table_name}_{column_name}_{referenced_table_name}_{referenced_column_name}" - def _transform_column_type(self, data_type): - # Define the mapping of MSSQL data types to RustWrenEngineColumnType - # ref: https://learn.microsoft.com/en-us/sql/t-sql/data-types/data-types-transact-sql?view=sql-server-ver15#exact-numerics - switcher = { - # String Types - "char": RustWrenEngineColumnType.CHAR, - "varchar": RustWrenEngineColumnType.VARCHAR, - "text": RustWrenEngineColumnType.TEXT, - "nchar": RustWrenEngineColumnType.CHAR, - "nvarchar": RustWrenEngineColumnType.VARCHAR, - "ntext": RustWrenEngineColumnType.TEXT, - # Numeric Types - "bit": RustWrenEngineColumnType.TINYINT, - "tinyint": RustWrenEngineColumnType.TINYINT, - "smallint": RustWrenEngineColumnType.SMALLINT, - "int": RustWrenEngineColumnType.INTEGER, - "bigint": RustWrenEngineColumnType.BIGINT, - # Boolean - "boolean": RustWrenEngineColumnType.BOOL, - # Decimal - "float": RustWrenEngineColumnType.FLOAT8, - "real": RustWrenEngineColumnType.FLOAT8, - "decimal": RustWrenEngineColumnType.DECIMAL, - "numeric": RustWrenEngineColumnType.NUMERIC, - "money": RustWrenEngineColumnType.DECIMAL, - "smallmoney": RustWrenEngineColumnType.DECIMAL, - # Date and Time Types - "date": RustWrenEngineColumnType.DATE, - "datetime": RustWrenEngineColumnType.TIMESTAMP, - "datetime2": RustWrenEngineColumnType.TIMESTAMP, - "smalldatetime": RustWrenEngineColumnType.TIMESTAMP, - "time": RustWrenEngineColumnType.INTERVAL, - "datetimeoffset": RustWrenEngineColumnType.TIMESTAMPTZ, - # JSON Type (Note: MSSQL supports JSON natively as a string type) - "json": RustWrenEngineColumnType.JSON, - } - - return switcher.get(data_type.lower(), RustWrenEngineColumnType.UNKNOWN) + def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType: + """Transform MSSQL data type to RustWrenEngineColumnType. + + Args: + data_type: The MSSQL data type string + + Returns: + The corresponding RustWrenEngineColumnType + """ + # Convert to lowercase for comparison + normalized_type = data_type.lower() + + # Use the module-level mapping table + mapped_type = MSSQL_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) + + if mapped_type == RustWrenEngineColumnType.UNKNOWN: + logger.warning(f"Unknown MSSQL data type: {data_type}") + + return mapped_type diff --git a/ibis-server/app/model/metadata/mysql.py b/ibis-server/app/model/metadata/mysql.py index 13f34d357..0eab110b8 100644 --- a/ibis-server/app/model/metadata/mysql.py +++ b/ibis-server/app/model/metadata/mysql.py @@ -1,3 +1,5 @@ +from loguru import logger + from app.model import MySqlConnectionInfo from app.model.data_source import DataSource from app.model.metadata.dto import ( @@ -10,6 +12,42 @@ ) from app.model.metadata.metadata import Metadata +# MySQL-specific type mapping +# All possible types listed here: https://dev.mysql.com/doc/refman/8.4/en/data-types.html +MYSQL_TYPE_MAPPING = { + # String Types (ignore Binary and Spatial Types for now) + "char": RustWrenEngineColumnType.CHAR, + "varchar": RustWrenEngineColumnType.VARCHAR, + "tinytext": RustWrenEngineColumnType.TEXT, + "text": RustWrenEngineColumnType.TEXT, + "mediumtext": RustWrenEngineColumnType.TEXT, + "longtext": RustWrenEngineColumnType.TEXT, + "enum": RustWrenEngineColumnType.VARCHAR, + "set": RustWrenEngineColumnType.VARCHAR, + # Numeric Types (https://dev.mysql.com/doc/refman/8.4/en/numeric-types.html) + "bit": RustWrenEngineColumnType.TINYINT, + "tinyint": RustWrenEngineColumnType.TINYINT, + "smallint": RustWrenEngineColumnType.SMALLINT, + "mediumint": RustWrenEngineColumnType.INTEGER, + "int": RustWrenEngineColumnType.INTEGER, + "integer": RustWrenEngineColumnType.INTEGER, + "bigint": RustWrenEngineColumnType.BIGINT, + # Boolean Types + "bool": RustWrenEngineColumnType.BOOL, + "boolean": RustWrenEngineColumnType.BOOL, + # Decimal Types + "float": RustWrenEngineColumnType.FLOAT8, + "double": RustWrenEngineColumnType.DOUBLE, + "decimal": RustWrenEngineColumnType.DECIMAL, + "numeric": RustWrenEngineColumnType.NUMERIC, + # Date and Time Types (https://dev.mysql.com/doc/refman/8.4/en/date-and-time-types.html) + "date": RustWrenEngineColumnType.DATE, + "datetime": RustWrenEngineColumnType.TIMESTAMP, + "timestamp": RustWrenEngineColumnType.TIMESTAMPTZ, + # JSON Type + "json": RustWrenEngineColumnType.JSON, +} + class MySQLMetadata(Metadata): def __init__(self, connection_info: MySqlConnectionInfo): @@ -128,40 +166,24 @@ def _format_constraint_name( ): return f"{table_name}_{column_name}_{referenced_table_name}_{referenced_column_name}" - def _transform_column_type(self, data_type): - # all possible types listed here: https://dev.mysql.com/doc/refman/8.4/en/data-types.html - switcher = { - # String Types (ignore Binary and Spatial Types for now) - "char": RustWrenEngineColumnType.CHAR, - "varchar": RustWrenEngineColumnType.VARCHAR, - "tinytext": RustWrenEngineColumnType.TEXT, - "text": RustWrenEngineColumnType.TEXT, - "mediumtext": RustWrenEngineColumnType.TEXT, - "longtext": RustWrenEngineColumnType.TEXT, - "enum": RustWrenEngineColumnType.VARCHAR, - "set": RustWrenEngineColumnType.VARCHAR, - # Numeric Types(https://dev.mysql.com/doc/refman/8.4/en/numeric-types.html) - "bit": RustWrenEngineColumnType.TINYINT, - "tinyint": RustWrenEngineColumnType.TINYINT, - "smallint": RustWrenEngineColumnType.SMALLINT, - "mediumint": RustWrenEngineColumnType.INTEGER, - "int": RustWrenEngineColumnType.INTEGER, - "integer": RustWrenEngineColumnType.INTEGER, - "bigint": RustWrenEngineColumnType.BIGINT, - # boolean - "bool": RustWrenEngineColumnType.BOOL, - "boolean": RustWrenEngineColumnType.BOOL, - # Decimal - "float": RustWrenEngineColumnType.FLOAT8, - "double": RustWrenEngineColumnType.DOUBLE, - "decimal": RustWrenEngineColumnType.DECIMAL, - "numeric": RustWrenEngineColumnType.NUMERIC, - # Date and Time Types(https://dev.mysql.com/doc/refman/8.4/en/date-and-time-types.html) - "date": RustWrenEngineColumnType.DATE, - "datetime": RustWrenEngineColumnType.TIMESTAMP, - "timestamp": RustWrenEngineColumnType.TIMESTAMPTZ, - # JSON Type - "json": RustWrenEngineColumnType.JSON, - } - - return switcher.get(data_type.lower(), RustWrenEngineColumnType.UNKNOWN) + def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType: + """Transform MySQL data type to RustWrenEngineColumnType. + + Args: + data_type: The MySQL data type string + + Returns: + The corresponding RustWrenEngineColumnType + """ + # Remove parameter specifications like VARCHAR(255) -> VARCHAR + normalized_type = data_type.strip().lower() + + # Use the module-level mapping table + mapped_type = MYSQL_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) + + if mapped_type == RustWrenEngineColumnType.UNKNOWN: + logger.warning(f"Unknown MySQL data type: {data_type}") + + return mapped_type diff --git a/ibis-server/app/model/metadata/object_storage.py b/ibis-server/app/model/metadata/object_storage.py index 87197b6d4..5aa65a615 100644 --- a/ibis-server/app/model/metadata/object_storage.py +++ b/ibis-server/app/model/metadata/object_storage.py @@ -22,6 +22,34 @@ from app.model.metadata.metadata import Metadata from app.model.utils import init_duckdb_gcs, init_duckdb_minio, init_duckdb_s3 +DUCKDB_TYPE_MAPPING = { + "bigint": RustWrenEngineColumnType.INT64, + "bit": RustWrenEngineColumnType.INT2, + "blob": RustWrenEngineColumnType.BYTES, + "boolean": RustWrenEngineColumnType.BOOL, + "date": RustWrenEngineColumnType.DATE, + "double": RustWrenEngineColumnType.DOUBLE, + "float": RustWrenEngineColumnType.FLOAT, + "integer": RustWrenEngineColumnType.INT, + # TODO: Wren engine does not support HUGEINT. Map to INT64 for now. + "hugeint": RustWrenEngineColumnType.INT64, + "interval": RustWrenEngineColumnType.INTERVAL, + "json": RustWrenEngineColumnType.JSON, + "smallint": RustWrenEngineColumnType.INT2, + "time": RustWrenEngineColumnType.TIME, + "timestamp": RustWrenEngineColumnType.TIMESTAMP, + "timestamp with time zone": RustWrenEngineColumnType.TIMESTAMPTZ, + "tinyint": RustWrenEngineColumnType.INT2, + "ubigint": RustWrenEngineColumnType.INT64, + # TODO: Wren engine does not support UHUGEINT. Map to INT64 for now. + "uhugeint": RustWrenEngineColumnType.INT64, + "uinteger": RustWrenEngineColumnType.INT, + "usmallint": RustWrenEngineColumnType.INT2, + "utinyint": RustWrenEngineColumnType.INT2, + "uuid": RustWrenEngineColumnType.UUID, + "varchar": RustWrenEngineColumnType.STRING, +} + class ObjectStorageMetadata(Metadata): def __init__(self, connection_info): @@ -121,6 +149,14 @@ def _read_df(self, conn, path): ) def _to_column_type(self, col_type: str) -> RustWrenEngineColumnType: + """Transform DuckDB data type to RustWrenEngineColumnType. + + Args: + col_type: The DuckDB data type string + + Returns: + The corresponding RustWrenEngineColumnType + """ if col_type.startswith("DECIMAL"): return RustWrenEngineColumnType.DECIMAL @@ -131,36 +167,18 @@ def _to_column_type(self, col_type: str) -> RustWrenEngineColumnType: # TODO: support array if col_type.endswith("[]"): return RustWrenEngineColumnType.UNKNOWN + # Convert to lowercase for comparison + normalized_type = col_type.lower() + + # Use the module-level mapping table + mapped_type = DUCKDB_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) + + if mapped_type == RustWrenEngineColumnType.UNKNOWN: + logger.warning(f"Unknown DuckDB data type: {col_type}") - # refer to https://duckdb.org/docs/sql/data_types/overview#general-purpose-data-types - switcher = { - "BIGINT": RustWrenEngineColumnType.INT64, - "BIT": RustWrenEngineColumnType.INT2, - "BLOB": RustWrenEngineColumnType.BYTES, - "BOOLEAN": RustWrenEngineColumnType.BOOL, - "DATE": RustWrenEngineColumnType.DATE, - "DOUBLE": RustWrenEngineColumnType.DOUBLE, - "FLOAT": RustWrenEngineColumnType.FLOAT, - "INTEGER": RustWrenEngineColumnType.INT, - # TODO: Wren engine does not support HUGEINT. Map to INT64 for now. - "HUGEINT": RustWrenEngineColumnType.INT64, - "INTERVAL": RustWrenEngineColumnType.INTERVAL, - "JSON": RustWrenEngineColumnType.JSON, - "SMALLINT": RustWrenEngineColumnType.INT2, - "TIME": RustWrenEngineColumnType.TIME, - "TIMESTAMP": RustWrenEngineColumnType.TIMESTAMP, - "TIMESTAMP WITH TIME ZONE": RustWrenEngineColumnType.TIMESTAMPTZ, - "TINYINT": RustWrenEngineColumnType.INT2, - "UBIGINT": RustWrenEngineColumnType.INT64, - # TODO: Wren engine does not support UHUGEINT. Map to INT64 for now. - "UHUGEINT": RustWrenEngineColumnType.INT64, - "UINTEGER": RustWrenEngineColumnType.INT, - "USMALLINT": RustWrenEngineColumnType.INT2, - "UTINYINT": RustWrenEngineColumnType.INT2, - "UUID": RustWrenEngineColumnType.UUID, - "VARCHAR": RustWrenEngineColumnType.STRING, - } - return switcher.get(col_type, RustWrenEngineColumnType.UNKNOWN) + return mapped_type def _get_connection(self): return duckdb.connect() diff --git a/ibis-server/app/model/metadata/oracle.py b/ibis-server/app/model/metadata/oracle.py index ea536da7d..c6b2ed8e3 100644 --- a/ibis-server/app/model/metadata/oracle.py +++ b/ibis-server/app/model/metadata/oracle.py @@ -1,4 +1,5 @@ import ibis +from loguru import logger from app.model import OracleConnectionInfo from app.model.data_source import DataSource @@ -12,6 +13,37 @@ ) from app.model.metadata.metadata import Metadata +# Oracle-specific type mapping +ORACLE_TYPE_MAPPING = { + "CHAR": RustWrenEngineColumnType.CHAR, + "NCHAR": RustWrenEngineColumnType.CHAR, + "VARCHAR2": RustWrenEngineColumnType.VARCHAR, + "NVARCHAR2": RustWrenEngineColumnType.VARCHAR, + "CLOB": RustWrenEngineColumnType.TEXT, + "NCLOB": RustWrenEngineColumnType.TEXT, + "NUMBER": RustWrenEngineColumnType.DECIMAL, + "FLOAT": RustWrenEngineColumnType.FLOAT8, + "BINARY_FLOAT": RustWrenEngineColumnType.FLOAT8, + "BINARY_DOUBLE": RustWrenEngineColumnType.DOUBLE, + "DATE": RustWrenEngineColumnType.TIMESTAMP, # Oracle DATE includes time. + "TIMESTAMP": RustWrenEngineColumnType.TIMESTAMP, + "TIMESTAMP WITH TIME ZONE": RustWrenEngineColumnType.TIMESTAMPTZ, + "TIMESTAMP WITH LOCAL TIME ZONE": RustWrenEngineColumnType.TIMESTAMPTZ, + "INTERVAL YEAR TO MONTH": RustWrenEngineColumnType.INTERVAL, + "INTERVAL DAY TO SECOND": RustWrenEngineColumnType.INTERVAL, + "BLOB": RustWrenEngineColumnType.BYTEA, + "BFILE": RustWrenEngineColumnType.BYTEA, + "RAW": RustWrenEngineColumnType.BYTEA, + "LONG RAW": RustWrenEngineColumnType.BYTEA, + "ROWID": RustWrenEngineColumnType.CHAR, + "UROWID": RustWrenEngineColumnType.CHAR, + "JSON": RustWrenEngineColumnType.JSON, + "OSON": RustWrenEngineColumnType.JSON, + "VARCHAR2 WITH JSON": RustWrenEngineColumnType.JSON, + "BLOB WITH JSON": RustWrenEngineColumnType.JSON, + "CLOB WITH JSON": RustWrenEngineColumnType.JSON, +} + class OracleMetadata(Metadata): def __init__(self, connection_info: OracleConnectionInfo): @@ -190,34 +222,24 @@ def _format_constraint_name( ): return f"{table_name}_{column_name}_{referenced_table_name}_{referenced_column_name}" - def _transform_column_type(self, data_type): - switcher = { - "CHAR": RustWrenEngineColumnType.CHAR, - "NCHAR": RustWrenEngineColumnType.CHAR, - "VARCHAR2": RustWrenEngineColumnType.VARCHAR, - "NVARCHAR2": RustWrenEngineColumnType.VARCHAR, - "CLOB": RustWrenEngineColumnType.TEXT, - "NCLOB": RustWrenEngineColumnType.TEXT, - "NUMBER": RustWrenEngineColumnType.DECIMAL, - "FLOAT": RustWrenEngineColumnType.FLOAT8, - "BINARY_FLOAT": RustWrenEngineColumnType.FLOAT8, - "BINARY_DOUBLE": RustWrenEngineColumnType.DOUBLE, - "DATE": RustWrenEngineColumnType.TIMESTAMP, # Oracle DATE includes time. - "TIMESTAMP": RustWrenEngineColumnType.TIMESTAMP, - "TIMESTAMP WITH TIME ZONE": RustWrenEngineColumnType.TIMESTAMPTZ, - "TIMESTAMP WITH LOCAL TIME ZONE": RustWrenEngineColumnType.TIMESTAMPTZ, - "INTERVAL YEAR TO MONTH": RustWrenEngineColumnType.INTERVAL, - "INTERVAL DAY TO SECOND": RustWrenEngineColumnType.INTERVAL, - "BLOB": RustWrenEngineColumnType.BYTEA, - "BFILE": RustWrenEngineColumnType.BYTEA, - "RAW": RustWrenEngineColumnType.BYTEA, - "LONG RAW": RustWrenEngineColumnType.BYTEA, - "ROWID": RustWrenEngineColumnType.CHAR, - "UROWID": RustWrenEngineColumnType.CHAR, - "JSON": RustWrenEngineColumnType.JSON, - "OSON": RustWrenEngineColumnType.JSON, - "VARCHAR2 WITH JSON": RustWrenEngineColumnType.JSON, - "BLOB WITH JSON": RustWrenEngineColumnType.JSON, - "CLOB WITH JSON": RustWrenEngineColumnType.JSON, - } - return switcher.get(data_type.upper(), RustWrenEngineColumnType.UNKNOWN) + def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType: + """Transform Oracle data type to RustWrenEngineColumnType. + + Args: + data_type: The Oracle data type string + + Returns: + The corresponding RustWrenEngineColumnType + """ + # Convert to uppercase for Oracle type comparison + normalized_type = data_type.upper() + + # Use the module-level mapping table + mapped_type = ORACLE_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) + + if mapped_type == RustWrenEngineColumnType.UNKNOWN: + logger.warning(f"Unknown Oracle data type: {data_type}") + + return mapped_type diff --git a/ibis-server/app/model/metadata/postgres.py b/ibis-server/app/model/metadata/postgres.py index 9a4324a98..1d25e6ac9 100644 --- a/ibis-server/app/model/metadata/postgres.py +++ b/ibis-server/app/model/metadata/postgres.py @@ -1,3 +1,5 @@ +from loguru import logger + from app.model import PostgresConnectionInfo from app.model.data_source import DataSource from app.model.metadata.dto import ( @@ -10,6 +12,51 @@ ) from app.model.metadata.metadata import Metadata +# PostgreSQL-specific type mappings +# All possible types listed here: https://www.postgresql.org/docs/current/datatype.html#DATATYPE-TABLE +POSTGRES_TYPE_MAPPING = { + "text": RustWrenEngineColumnType.TEXT, + "char": RustWrenEngineColumnType.CHAR, + "character": RustWrenEngineColumnType.CHAR, + "bpchar": RustWrenEngineColumnType.CHAR, + "name": RustWrenEngineColumnType.CHAR, + "character varying": RustWrenEngineColumnType.VARCHAR, + "varchar": RustWrenEngineColumnType.VARCHAR, + "bigint": RustWrenEngineColumnType.BIGINT, + "int": RustWrenEngineColumnType.INTEGER, + "int4": RustWrenEngineColumnType.INTEGER, + "integer": RustWrenEngineColumnType.INTEGER, + "smallint": RustWrenEngineColumnType.SMALLINT, + "int2": RustWrenEngineColumnType.SMALLINT, + "real": RustWrenEngineColumnType.REAL, + "float4": RustWrenEngineColumnType.REAL, + "double precision": RustWrenEngineColumnType.DOUBLE, + "float8": RustWrenEngineColumnType.DOUBLE, + "numeric": RustWrenEngineColumnType.DECIMAL, + "decimal": RustWrenEngineColumnType.DECIMAL, + "boolean": RustWrenEngineColumnType.BOOL, + "bool": RustWrenEngineColumnType.BOOL, + "timestamp": RustWrenEngineColumnType.TIMESTAMP, + "timestamp without time zone": RustWrenEngineColumnType.TIMESTAMP, + "timestamp with time zone": RustWrenEngineColumnType.TIMESTAMPTZ, + "timestamptz": RustWrenEngineColumnType.TIMESTAMPTZ, + "date": RustWrenEngineColumnType.DATE, + "time": RustWrenEngineColumnType.TIME, + "interval": RustWrenEngineColumnType.INTERVAL, + "json": RustWrenEngineColumnType.JSON, + "jsonb": RustWrenEngineColumnType.JSON, + "bytea": RustWrenEngineColumnType.BYTEA, + "uuid": RustWrenEngineColumnType.UUID, + "inet": RustWrenEngineColumnType.INET, + "oid": RustWrenEngineColumnType.OID, +} + +# PostgreSQL extension type mappings +POSTGRES_EXTENSION_TYPE_MAPPING = { + "geometry": RustWrenEngineColumnType.GEOMETRY, + "geography": RustWrenEngineColumnType.GEOGRAPHY, +} + class ExtensionHandler: def __init__(self, connection): @@ -84,17 +131,24 @@ def postgis_handler(self, tables: list[Table], schema_name: str) -> list[Table]: return tables - def _transform_postgres_column_type(self, data_type): - # lower case the data_type - data_type = data_type.lower() + def _transform_postgres_column_type( + self, data_type: str + ) -> RustWrenEngineColumnType: + """Transform PostgreSQL extension column type. - # Extension types - switcher = { - "geometry": RustWrenEngineColumnType.GEOMETRY, - "geography": RustWrenEngineColumnType.GEOGRAPHY, - } + Args: + data_type: The PostgreSQL extension data type string + + Returns: + The corresponding RustWrenEngineColumnType + """ + # Convert to lowercase for comparison + normalized_type = data_type.lower() - return switcher.get(data_type, RustWrenEngineColumnType.UNKNOWN) + # Use the module-level extension mapping table + return POSTGRES_EXTENSION_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) class PostgresMetadata(Metadata): @@ -222,38 +276,26 @@ def _format_constraint_name( ): return f"{table_name}_{column_name}_{foreign_table_name}_{foreign_column_name}" - def _transform_postgres_column_type(self, data_type): - # lower case the data_type - data_type = data_type.lower() - - # all possible types listed here: https://www.postgresql.org/docs/current/datatype.html#DATATYPE-TABLE - - switcher = { - "text": RustWrenEngineColumnType.TEXT, - "char": RustWrenEngineColumnType.CHAR, - "character": RustWrenEngineColumnType.CHAR, - "bpchar": RustWrenEngineColumnType.CHAR, - "name": RustWrenEngineColumnType.CHAR, - "character varying": RustWrenEngineColumnType.VARCHAR, - "bigint": RustWrenEngineColumnType.BIGINT, - "int": RustWrenEngineColumnType.INTEGER, - "integer": RustWrenEngineColumnType.INTEGER, - "smallint": RustWrenEngineColumnType.SMALLINT, - "real": RustWrenEngineColumnType.REAL, - "double precision": RustWrenEngineColumnType.DOUBLE, - "numeric": RustWrenEngineColumnType.DECIMAL, - "decimal": RustWrenEngineColumnType.DECIMAL, - "boolean": RustWrenEngineColumnType.BOOL, - "timestamp": RustWrenEngineColumnType.TIMESTAMP, - "timestamp without time zone": RustWrenEngineColumnType.TIMESTAMP, - "timestamp with time zone": RustWrenEngineColumnType.TIMESTAMPTZ, - "date": RustWrenEngineColumnType.DATE, - "interval": RustWrenEngineColumnType.INTERVAL, - "json": RustWrenEngineColumnType.JSON, - "bytea": RustWrenEngineColumnType.BYTEA, - "uuid": RustWrenEngineColumnType.UUID, - "inet": RustWrenEngineColumnType.INET, - "oid": RustWrenEngineColumnType.OID, - } + def _transform_postgres_column_type( + self, data_type: str + ) -> RustWrenEngineColumnType: + """Transform PostgreSQL data type to RustWrenEngineColumnType. + + Args: + data_type: The PostgreSQL data type string + + Returns: + The corresponding RustWrenEngineColumnType + """ + # Convert to lowercase for comparison + normalized_type = data_type.lower() + + # Use the module-level mapping table + mapped_type = POSTGRES_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) + + if mapped_type == RustWrenEngineColumnType.UNKNOWN: + logger.warning(f"Unknown Postgres data type: {data_type}") - return switcher.get(data_type, RustWrenEngineColumnType.UNKNOWN) + return mapped_type diff --git a/ibis-server/app/model/metadata/redshift.py b/ibis-server/app/model/metadata/redshift.py index d6b3ea8d4..7598507be 100644 --- a/ibis-server/app/model/metadata/redshift.py +++ b/ibis-server/app/model/metadata/redshift.py @@ -1,3 +1,5 @@ +from loguru import logger + from app.model import RedshiftConnectionInfo from app.model.connector import Connector from app.model.metadata.dto import ( @@ -10,6 +12,44 @@ ) from app.model.metadata.metadata import Metadata +# Redshift-specific type mapping +# Reference: https://docs.aws.amazon.com/redshift/latest/dg/c_Supported_data_types.html +REDSHIFT_TYPE_MAPPING = { + "text": RustWrenEngineColumnType.TEXT, + "char": RustWrenEngineColumnType.CHAR, + "character": RustWrenEngineColumnType.CHAR, + "bpchar": RustWrenEngineColumnType.CHAR, + "name": RustWrenEngineColumnType.CHAR, + "character varying": RustWrenEngineColumnType.VARCHAR, + "varchar": RustWrenEngineColumnType.VARCHAR, + "bigint": RustWrenEngineColumnType.BIGINT, + "int": RustWrenEngineColumnType.INTEGER, + "int4": RustWrenEngineColumnType.INTEGER, + "integer": RustWrenEngineColumnType.INTEGER, + "smallint": RustWrenEngineColumnType.SMALLINT, + "int2": RustWrenEngineColumnType.SMALLINT, + "real": RustWrenEngineColumnType.REAL, + "float4": RustWrenEngineColumnType.REAL, + "double precision": RustWrenEngineColumnType.DOUBLE, + "float8": RustWrenEngineColumnType.DOUBLE, + "numeric": RustWrenEngineColumnType.DECIMAL, + "decimal": RustWrenEngineColumnType.DECIMAL, + "boolean": RustWrenEngineColumnType.BOOL, + "bool": RustWrenEngineColumnType.BOOL, + "timestamp": RustWrenEngineColumnType.TIMESTAMP, + "timestamp without time zone": RustWrenEngineColumnType.TIMESTAMP, + "timestamp with time zone": RustWrenEngineColumnType.TIMESTAMPTZ, + "timestamptz": RustWrenEngineColumnType.TIMESTAMPTZ, + "date": RustWrenEngineColumnType.DATE, + "time": RustWrenEngineColumnType.TIME, + "interval": RustWrenEngineColumnType.INTERVAL, + "json": RustWrenEngineColumnType.JSON, + "bytea": RustWrenEngineColumnType.BYTEA, + "uuid": RustWrenEngineColumnType.UUID, + "inet": RustWrenEngineColumnType.INET, + "oid": RustWrenEngineColumnType.OID, +} + class RedshiftMetadata(Metadata): def __init__(self, connection_info: RedshiftConnectionInfo): @@ -133,38 +173,26 @@ def _format_constraint_name( ): return f"{table_name}_{column_name}_{foreign_table_name}_{foreign_column_name}" - def _transform_redshift_column_type(self, data_type): - data_type = data_type.lower() - - # Redshift doc - # https://docs.aws.amazon.com/redshift/latest/dg/c_Supported_data_types.html - - switcher = { - "text": RustWrenEngineColumnType.TEXT, - "char": RustWrenEngineColumnType.CHAR, - "character": RustWrenEngineColumnType.CHAR, - "bpchar": RustWrenEngineColumnType.CHAR, - "name": RustWrenEngineColumnType.CHAR, - "character varying": RustWrenEngineColumnType.VARCHAR, - "bigint": RustWrenEngineColumnType.BIGINT, - "int": RustWrenEngineColumnType.INTEGER, - "integer": RustWrenEngineColumnType.INTEGER, - "smallint": RustWrenEngineColumnType.SMALLINT, - "real": RustWrenEngineColumnType.REAL, - "double precision": RustWrenEngineColumnType.DOUBLE, - "numeric": RustWrenEngineColumnType.DECIMAL, - "decimal": RustWrenEngineColumnType.DECIMAL, - "boolean": RustWrenEngineColumnType.BOOL, - "timestamp": RustWrenEngineColumnType.TIMESTAMP, - "timestamp without time zone": RustWrenEngineColumnType.TIMESTAMP, - "timestamp with time zone": RustWrenEngineColumnType.TIMESTAMPTZ, - "date": RustWrenEngineColumnType.DATE, - "interval": RustWrenEngineColumnType.INTERVAL, - "json": RustWrenEngineColumnType.JSON, - "bytea": RustWrenEngineColumnType.BYTEA, - "uuid": RustWrenEngineColumnType.UUID, - "inet": RustWrenEngineColumnType.INET, - "oid": RustWrenEngineColumnType.OID, - } - - return switcher.get(data_type, RustWrenEngineColumnType.UNKNOWN) + def _transform_redshift_column_type( + self, data_type: str + ) -> RustWrenEngineColumnType: + """Transform Redshift data type to RustWrenEngineColumnType. + + Args: + data_type: The Redshift data type string + + Returns: + The corresponding RustWrenEngineColumnType + """ + # Convert to lowercase for comparison + normalized_type = data_type.lower() + + # Use the module-level mapping table + mapped_type = REDSHIFT_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) + + if mapped_type == RustWrenEngineColumnType.UNKNOWN: + logger.warning(f"Unknown Redshift data type: {data_type}") + + return mapped_type diff --git a/ibis-server/app/model/metadata/snowflake.py b/ibis-server/app/model/metadata/snowflake.py index 7998b9f09..e864d8016 100644 --- a/ibis-server/app/model/metadata/snowflake.py +++ b/ibis-server/app/model/metadata/snowflake.py @@ -1,5 +1,7 @@ from contextlib import closing +from loguru import logger + from app.model import SnowflakeConnectionInfo from app.model.data_source import DataSource from app.model.metadata.dto import ( @@ -12,6 +14,42 @@ ) from app.model.metadata.metadata import Metadata +# Snowflake-specific type mapping +# All possible types listed here: https://docs.snowflake.com/en/sql-reference/intro-summary-data-types +SNOWFLAKE_TYPE_MAPPING = { + # Numeric Types + "number": RustWrenEngineColumnType.NUMERIC, + "decimal": RustWrenEngineColumnType.NUMERIC, + "numeric": RustWrenEngineColumnType.NUMERIC, + "int": RustWrenEngineColumnType.INTEGER, + "integer": RustWrenEngineColumnType.INTEGER, + "bigint": RustWrenEngineColumnType.BIGINT, + "smallint": RustWrenEngineColumnType.SMALLINT, + "tinyint": RustWrenEngineColumnType.TINYINT, + "byteint": RustWrenEngineColumnType.TINYINT, + # Float Types + "float4": RustWrenEngineColumnType.FLOAT4, + "float": RustWrenEngineColumnType.FLOAT8, + "float8": RustWrenEngineColumnType.FLOAT8, + "double": RustWrenEngineColumnType.DOUBLE, + "double precision": RustWrenEngineColumnType.DOUBLE, + "real": RustWrenEngineColumnType.REAL, + # String Types + "varchar": RustWrenEngineColumnType.VARCHAR, + "char": RustWrenEngineColumnType.CHAR, + "character": RustWrenEngineColumnType.CHAR, + "string": RustWrenEngineColumnType.STRING, + "text": RustWrenEngineColumnType.TEXT, + # Boolean Types + "boolean": RustWrenEngineColumnType.BOOL, + # Date and Time Types + "date": RustWrenEngineColumnType.DATE, + "datetime": RustWrenEngineColumnType.TIMESTAMP, + "timestamp": RustWrenEngineColumnType.TIMESTAMP, + "timestamp_ntz": RustWrenEngineColumnType.TIMESTAMP, + "timestamp_tz": RustWrenEngineColumnType.TIMESTAMPTZ, +} + class SnowflakeMetadata(Metadata): def __init__(self, connection_info: SnowflakeConnectionInfo): @@ -122,40 +160,24 @@ def _format_constraint_name( ): return f"{table_name}_{column_name}_{referenced_table_name}_{referenced_column_name}" - def _transform_column_type(self, data_type): - # all possible types listed here: https://docs.snowflake.com/en/sql-reference/intro-summary-data-types - switcher = { - # Numeric Types - "number": RustWrenEngineColumnType.NUMERIC, - "decimal": RustWrenEngineColumnType.NUMERIC, - "numeric": RustWrenEngineColumnType.NUMERIC, - "int": RustWrenEngineColumnType.INTEGER, - "integer": RustWrenEngineColumnType.INTEGER, - "bigint": RustWrenEngineColumnType.BIGINT, - "smallint": RustWrenEngineColumnType.SMALLINT, - "tinyint": RustWrenEngineColumnType.TINYINT, - "byteint": RustWrenEngineColumnType.TINYINT, - # Float - "float4": RustWrenEngineColumnType.FLOAT4, - "float": RustWrenEngineColumnType.FLOAT8, - "float8": RustWrenEngineColumnType.FLOAT8, - "double": RustWrenEngineColumnType.DOUBLE, - "double precision": RustWrenEngineColumnType.DOUBLE, - "real": RustWrenEngineColumnType.REAL, - # String Types - "varchar": RustWrenEngineColumnType.VARCHAR, - "char": RustWrenEngineColumnType.CHAR, - "character": RustWrenEngineColumnType.CHAR, - "string": RustWrenEngineColumnType.STRING, - "text": RustWrenEngineColumnType.TEXT, - # Boolean - "boolean": RustWrenEngineColumnType.BOOL, - # Date and Time Types - "date": RustWrenEngineColumnType.DATE, - "datetime": RustWrenEngineColumnType.TIMESTAMP, - "timestamp": RustWrenEngineColumnType.TIMESTAMP, - "timestamp_ntz": RustWrenEngineColumnType.TIMESTAMP, - "timestamp_tz": RustWrenEngineColumnType.TIMESTAMPTZ, - } - - return switcher.get(data_type.lower(), RustWrenEngineColumnType.UNKNOWN) + def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType: + """Transform Snowflake data type to RustWrenEngineColumnType. + + Args: + data_type: The Snowflake data type string + + Returns: + The corresponding RustWrenEngineColumnType + """ + # Convert to lowercase for comparison + normalized_type = data_type.lower() + + # Use the module-level mapping table + mapped_type = SNOWFLAKE_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) + + if mapped_type == RustWrenEngineColumnType.UNKNOWN: + logger.warning(f"Unknown Snowflake data type: {data_type}") + + return mapped_type diff --git a/ibis-server/app/model/metadata/trino.py b/ibis-server/app/model/metadata/trino.py index 784d9306d..fae7cf600 100644 --- a/ibis-server/app/model/metadata/trino.py +++ b/ibis-server/app/model/metadata/trino.py @@ -1,6 +1,8 @@ import re from urllib.parse import urlparse +from loguru import logger + from app.model import TrinoConnectionInfo from app.model.data_source import DataSource from app.model.metadata.dto import ( @@ -12,6 +14,42 @@ ) from app.model.metadata.metadata import Metadata +# Trino-specific type mapping +# All possible types listed here: https://trino.io/docs/current/language/types.html +TRINO_TYPE_MAPPING = { + # String Types (ignore Binary and Spatial Types for now) + "char": RustWrenEngineColumnType.CHAR, + "varchar": RustWrenEngineColumnType.VARCHAR, + "tinytext": RustWrenEngineColumnType.TEXT, + "text": RustWrenEngineColumnType.TEXT, + "mediumtext": RustWrenEngineColumnType.TEXT, + "longtext": RustWrenEngineColumnType.TEXT, + "enum": RustWrenEngineColumnType.VARCHAR, + "set": RustWrenEngineColumnType.VARCHAR, + # Numeric Types + "bit": RustWrenEngineColumnType.TINYINT, + "tinyint": RustWrenEngineColumnType.TINYINT, + "smallint": RustWrenEngineColumnType.SMALLINT, + "mediumint": RustWrenEngineColumnType.INTEGER, + "int": RustWrenEngineColumnType.INTEGER, + "integer": RustWrenEngineColumnType.INTEGER, + "bigint": RustWrenEngineColumnType.BIGINT, + # Boolean Types + "bool": RustWrenEngineColumnType.BOOL, + "boolean": RustWrenEngineColumnType.BOOL, + # Decimal Types + "float": RustWrenEngineColumnType.FLOAT4, + "double": RustWrenEngineColumnType.DOUBLE, + "decimal": RustWrenEngineColumnType.DECIMAL, + "numeric": RustWrenEngineColumnType.NUMERIC, + # Date and Time Types + "date": RustWrenEngineColumnType.DATE, + "datetime": RustWrenEngineColumnType.TIMESTAMP, + "timestamp": RustWrenEngineColumnType.TIMESTAMPTZ, + # JSON Type + "json": RustWrenEngineColumnType.JSON, +} + class TrinoMetadata(Metadata): def __init__(self, connection_info: TrinoConnectionInfo): @@ -97,42 +135,24 @@ def _get_schema_name(self): else: return self.connection_info.trino_schema.get_secret_value() - def _transform_column_type(self, data_type): - # all possible types listed here: https://trino.io/docs/current/language/types.html - # trim the (all characters) at the end of the data_type if exists - data_type = re.sub(r"\(.*\)", "", data_type).strip() - switcher = { - # String Types (ignore Binary and Spatial Types for now) - "char": RustWrenEngineColumnType.CHAR, - "varchar": RustWrenEngineColumnType.VARCHAR, - "tinytext": RustWrenEngineColumnType.TEXT, - "text": RustWrenEngineColumnType.TEXT, - "mediumtext": RustWrenEngineColumnType.TEXT, - "longtext": RustWrenEngineColumnType.TEXT, - "enum": RustWrenEngineColumnType.VARCHAR, - "set": RustWrenEngineColumnType.VARCHAR, - # Numeric Types(https://dev.mysql.com/doc/refman/8.4/en/numeric-types.html) - "bit": RustWrenEngineColumnType.TINYINT, - "tinyint": RustWrenEngineColumnType.TINYINT, - "smallint": RustWrenEngineColumnType.SMALLINT, - "mediumint": RustWrenEngineColumnType.INTEGER, - "int": RustWrenEngineColumnType.INTEGER, - "integer": RustWrenEngineColumnType.INTEGER, - "bigint": RustWrenEngineColumnType.BIGINT, - # boolean - "bool": RustWrenEngineColumnType.BOOL, - "boolean": RustWrenEngineColumnType.BOOL, - # Decimal - "float": RustWrenEngineColumnType.FLOAT4, - "double": RustWrenEngineColumnType.DOUBLE, - "decimal": RustWrenEngineColumnType.DECIMAL, - "numeric": RustWrenEngineColumnType.NUMERIC, - # Date and Time Types(https://dev.mysql.com/doc/refman/8.4/en/date-and-time-types.html) - "date": RustWrenEngineColumnType.DATE, - "datetime": RustWrenEngineColumnType.TIMESTAMP, - "timestamp": RustWrenEngineColumnType.TIMESTAMPTZ, - # JSON Type - "json": RustWrenEngineColumnType.JSON, - } - - return switcher.get(data_type.lower(), RustWrenEngineColumnType.UNKNOWN) + def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType: + """Transform Trino data type to RustWrenEngineColumnType. + + Args: + data_type: The Trino data type string + + Returns: + The corresponding RustWrenEngineColumnType + """ + # Remove parameter specifications like VARCHAR(255) -> VARCHAR + normalized_type = re.sub(r"\(.*\)", "", data_type).strip().lower() + + # Use the module-level mapping table + mapped_type = TRINO_TYPE_MAPPING.get( + normalized_type, RustWrenEngineColumnType.UNKNOWN + ) + + if mapped_type == RustWrenEngineColumnType.UNKNOWN: + logger.warning(f"Unknown Trino data type: {data_type}") + + return mapped_type