Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 57 additions & 35 deletions ibis-server/app/model/metadata/athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from contextlib import closing

import pandas as pd
from loguru import logger

from app.model import AthenaConnectionInfo
from app.model.data_source import DataSource
Expand All @@ -14,6 +15,41 @@
)
from app.model.metadata.metadata import Metadata

# Athena-specific type mapping
ATHENA_TYPE_MAPPING = {
# String Types (ignore Binary and Spatial Types for now)
"char": RustWrenEngineColumnType.CHAR,
"varchar": RustWrenEngineColumnType.VARCHAR,
"tinytext": RustWrenEngineColumnType.TEXT,
"text": RustWrenEngineColumnType.TEXT,
"mediumtext": RustWrenEngineColumnType.TEXT,
"longtext": RustWrenEngineColumnType.TEXT,
"enum": RustWrenEngineColumnType.VARCHAR,
"set": RustWrenEngineColumnType.VARCHAR,
# Integer Types
"bit": RustWrenEngineColumnType.TINYINT,
"tinyint": RustWrenEngineColumnType.TINYINT,
"smallint": RustWrenEngineColumnType.SMALLINT,
"mediumint": RustWrenEngineColumnType.INTEGER,
"int": RustWrenEngineColumnType.INTEGER,
"integer": RustWrenEngineColumnType.INTEGER,
"bigint": RustWrenEngineColumnType.BIGINT,
# Boolean Types
"bool": RustWrenEngineColumnType.BOOL,
"boolean": RustWrenEngineColumnType.BOOL,
# Decimal Types
"float": RustWrenEngineColumnType.FLOAT4,
"double": RustWrenEngineColumnType.DOUBLE,
"decimal": RustWrenEngineColumnType.DECIMAL,
"numeric": RustWrenEngineColumnType.NUMERIC,
# Date/Time Types
"date": RustWrenEngineColumnType.DATE,
"datetime": RustWrenEngineColumnType.TIMESTAMP,
"timestamp": RustWrenEngineColumnType.TIMESTAMPTZ,
# JSON Type
"json": RustWrenEngineColumnType.JSON,
}


class AthenaMetadata(Metadata):
def __init__(self, connection_info: AthenaConnectionInfo):
Expand Down Expand Up @@ -101,38 +137,24 @@ def get_version(self) -> str:
def _format_athena_compact_table_name(self, schema: str, table: str) -> str:
return f"{schema}.{table}"

def _transform_column_type(self, data_type):
data_type = re.sub(r"\(.*\)", "", data_type).strip()
switcher = {
# String Types (ignore Binary and Spatial Types for now)
"char": RustWrenEngineColumnType.CHAR,
"varchar": RustWrenEngineColumnType.VARCHAR,
"tinytext": RustWrenEngineColumnType.TEXT,
"text": RustWrenEngineColumnType.TEXT,
"mediumtext": RustWrenEngineColumnType.TEXT,
"longtext": RustWrenEngineColumnType.TEXT,
"enum": RustWrenEngineColumnType.VARCHAR,
"set": RustWrenEngineColumnType.VARCHAR,
"bit": RustWrenEngineColumnType.TINYINT,
"tinyint": RustWrenEngineColumnType.TINYINT,
"smallint": RustWrenEngineColumnType.SMALLINT,
"mediumint": RustWrenEngineColumnType.INTEGER,
"int": RustWrenEngineColumnType.INTEGER,
"integer": RustWrenEngineColumnType.INTEGER,
"bigint": RustWrenEngineColumnType.BIGINT,
# boolean
"bool": RustWrenEngineColumnType.BOOL,
"boolean": RustWrenEngineColumnType.BOOL,
# Decimal
"float": RustWrenEngineColumnType.FLOAT4,
"double": RustWrenEngineColumnType.DOUBLE,
"decimal": RustWrenEngineColumnType.DECIMAL,
"numeric": RustWrenEngineColumnType.NUMERIC,
"date": RustWrenEngineColumnType.DATE,
"datetime": RustWrenEngineColumnType.TIMESTAMP,
"timestamp": RustWrenEngineColumnType.TIMESTAMPTZ,
# JSON Type
"json": RustWrenEngineColumnType.JSON,
}

return switcher.get(data_type.lower(), RustWrenEngineColumnType.UNKNOWN)
def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType:
"""Transform Athena data type to RustWrenEngineColumnType.

Args:
data_type: The Athena data type string

Returns:
The corresponding RustWrenEngineColumnType
"""
# Remove parameter specifications like VARCHAR(255) -> VARCHAR
normalized_type = re.sub(r"\(.*\)", "", data_type).strip().lower()

# Use the module-level mapping table
mapped_type = ATHENA_TYPE_MAPPING.get(
normalized_type, RustWrenEngineColumnType.UNKNOWN
)

if mapped_type == RustWrenEngineColumnType.UNKNOWN:
logger.warning(f"Unknown Athena data type: {data_type}")

return mapped_type
66 changes: 43 additions & 23 deletions ibis-server/app/model/metadata/bigquery.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from loguru import logger

from app.model import BigQueryConnectionInfo
from app.model.data_source import DataSource
from app.model.metadata.dto import (
Expand All @@ -10,6 +12,25 @@
)
from app.model.metadata.metadata import Metadata

# BigQuery-specific type mapping
BIGQUERY_TYPE_MAPPING = {
# GEOGRAPHY and RANGE columns were filtered out
"bool": RustWrenEngineColumnType.BOOL,
"boolean": RustWrenEngineColumnType.BOOL,
"bytes": RustWrenEngineColumnType.BYTES,
"date": RustWrenEngineColumnType.DATE,
"datetime": RustWrenEngineColumnType.DATETIME,
"interval": RustWrenEngineColumnType.INTERVAL,
"json": RustWrenEngineColumnType.JSON,
"int64": RustWrenEngineColumnType.INT64,
"numeric": RustWrenEngineColumnType.NUMERIC,
"bignumeric": RustWrenEngineColumnType.BIGNUMERIC,
"float64": RustWrenEngineColumnType.FLOAT64,
"string": RustWrenEngineColumnType.STRING,
"time": RustWrenEngineColumnType.TIME,
"timestamp": RustWrenEngineColumnType.TIMESTAMPTZ,
}


class BigQueryMetadata(Metadata):
def __init__(self, connection_info: BigQueryConnectionInfo):
Expand Down Expand Up @@ -173,29 +194,28 @@ def get_constraints(self) -> list[Constraint]:
def get_version(self) -> str:
return "Follow BigQuery release version"

def _transform_column_type(self, data_type):
# lower case the data_type
data_type = data_type.lower()
def _transform_column_type(self, data_type: str) -> str | RustWrenEngineColumnType:
"""Transform BigQuery data type to RustWrenEngineColumnType.

Args:
data_type: The BigQuery data type string

# if data_type start with "array" or "struct", by pass it
if data_type.startswith(("array", "struct")):
Returns:
The corresponding RustWrenEngineColumnType or original string for complex types
"""
# Convert to lowercase for comparison
normalized_type = data_type.lower()

# Handle complex types (array, struct) by returning as-is
if normalized_type.startswith(("array", "struct")):
return data_type

# Map BigQuery types to RustWrenEngineColumnType
switcher = {
# GEOGRAPHY and RANGE columns were filtered out
"bytes": RustWrenEngineColumnType.BYTES,
"date": RustWrenEngineColumnType.DATE,
"datetime": RustWrenEngineColumnType.DATETIME,
"interval": RustWrenEngineColumnType.INTERVAL,
"json": RustWrenEngineColumnType.JSON,
"int64": RustWrenEngineColumnType.INT64,
"numeric": RustWrenEngineColumnType.NUMERIC,
"bignumeric": RustWrenEngineColumnType.BIGNUMERIC,
"float64": RustWrenEngineColumnType.FLOAT64,
"string": RustWrenEngineColumnType.STRING,
"time": RustWrenEngineColumnType.TIME,
"timestamp": RustWrenEngineColumnType.TIMESTAMPTZ,
}

return switcher.get(data_type, RustWrenEngineColumnType.UNKNOWN)
# Map to RustWrenEngineColumnType using module-level mapping
mapped_type = BIGQUERY_TYPE_MAPPING.get(
normalized_type, RustWrenEngineColumnType.UNKNOWN
)

if mapped_type == RustWrenEngineColumnType.UNKNOWN:
logger.warning(f"Unknown BigQuery data type: {data_type}")

return mapped_type
82 changes: 47 additions & 35 deletions ibis-server/app/model/metadata/canner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from gql import Client, gql
from gql.transport.aiohttp import AIOHTTPTransport
from loguru import logger

from app.model import CannerConnectionInfo
from app.model.error import ErrorCode, WrenError
Expand All @@ -15,6 +16,40 @@
)
from app.model.metadata.metadata import Metadata

CANNER_TYPE_MAPPING = {
# String Types (ignore Binary and Spatial Types for now)
"char": RustWrenEngineColumnType.CHAR,
"varchar": RustWrenEngineColumnType.VARCHAR,
"tinytext": RustWrenEngineColumnType.TEXT,
"text": RustWrenEngineColumnType.TEXT,
"mediumtext": RustWrenEngineColumnType.TEXT,
"longtext": RustWrenEngineColumnType.TEXT,
"enum": RustWrenEngineColumnType.VARCHAR,
"set": RustWrenEngineColumnType.VARCHAR,
# Numeric Types(https://dev.mysql.com/doc/refman/8.4/en/numeric-types.html)
"bit": RustWrenEngineColumnType.TINYINT,
"tinyint": RustWrenEngineColumnType.TINYINT,
"smallint": RustWrenEngineColumnType.SMALLINT,
"mediumint": RustWrenEngineColumnType.INTEGER,
"int": RustWrenEngineColumnType.INTEGER,
"integer": RustWrenEngineColumnType.INTEGER,
"bigint": RustWrenEngineColumnType.BIGINT,
# boolean
"bool": RustWrenEngineColumnType.BOOL,
"boolean": RustWrenEngineColumnType.BOOL,
# Decimal
"float": RustWrenEngineColumnType.FLOAT8,
"double": RustWrenEngineColumnType.DOUBLE,
"decimal": RustWrenEngineColumnType.DECIMAL,
"numeric": RustWrenEngineColumnType.NUMERIC,
# Date and Time Types(https://dev.mysql.com/doc/refman/8.4/en/date-and-time-types.html)
"date": RustWrenEngineColumnType.DATE,
"datetime": RustWrenEngineColumnType.TIMESTAMP,
"timestamp": RustWrenEngineColumnType.TIMESTAMPTZ,
# JSON Type
"json": RustWrenEngineColumnType.JSON,
}


class CannerMetadata(Metadata):
def __init__(self, connection_info: CannerConnectionInfo):
Expand Down Expand Up @@ -202,38 +237,15 @@ def _transform_column_type(self, data_type):
# trim the (all characters) at the end of the data_type if exists
data_type = re.sub(r"\(.*\)", "", data_type).strip()

switcher = {
# String Types (ignore Binary and Spatial Types for now)
"char": RustWrenEngineColumnType.CHAR,
"varchar": RustWrenEngineColumnType.VARCHAR,
"tinytext": RustWrenEngineColumnType.TEXT,
"text": RustWrenEngineColumnType.TEXT,
"mediumtext": RustWrenEngineColumnType.TEXT,
"longtext": RustWrenEngineColumnType.TEXT,
"enum": RustWrenEngineColumnType.VARCHAR,
"set": RustWrenEngineColumnType.VARCHAR,
# Numeric Types(https://dev.mysql.com/doc/refman/8.4/en/numeric-types.html)
"bit": RustWrenEngineColumnType.TINYINT,
"tinyint": RustWrenEngineColumnType.TINYINT,
"smallint": RustWrenEngineColumnType.SMALLINT,
"mediumint": RustWrenEngineColumnType.INTEGER,
"int": RustWrenEngineColumnType.INTEGER,
"integer": RustWrenEngineColumnType.INTEGER,
"bigint": RustWrenEngineColumnType.BIGINT,
# boolean
"bool": RustWrenEngineColumnType.BOOL,
"boolean": RustWrenEngineColumnType.BOOL,
# Decimal
"float": RustWrenEngineColumnType.FLOAT8,
"double": RustWrenEngineColumnType.DOUBLE,
"decimal": RustWrenEngineColumnType.DECIMAL,
"numeric": RustWrenEngineColumnType.NUMERIC,
# Date and Time Types(https://dev.mysql.com/doc/refman/8.4/en/date-and-time-types.html)
"date": RustWrenEngineColumnType.DATE,
"datetime": RustWrenEngineColumnType.TIMESTAMP,
"timestamp": RustWrenEngineColumnType.TIMESTAMPTZ,
# JSON Type
"json": RustWrenEngineColumnType.JSON,
}

return switcher.get(data_type.lower(), RustWrenEngineColumnType.UNKNOWN)
# Convert to lowercase for comparison
normalized_type = data_type.lower()

# Use the module-level mapping table
mapped_type = CANNER_TYPE_MAPPING.get(
normalized_type, RustWrenEngineColumnType.UNKNOWN
)

if mapped_type == RustWrenEngineColumnType.UNKNOWN:
logger.warning(f"Unknown Canner data type: {data_type}")

return mapped_type
84 changes: 54 additions & 30 deletions ibis-server/app/model/metadata/clickhouse.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from loguru import logger

from app.model import ClickHouseConnectionInfo
from app.model.data_source import DataSource
from app.model.metadata.dto import (
Expand All @@ -9,6 +11,37 @@
)
from app.model.metadata.metadata import Metadata

# ClickHouse-specific type mapping
CLICKHOUSE_TYPE_MAPPING = {
# Boolean Types
"boolean": RustWrenEngineColumnType.BOOL,
# Integer Types
"int8": RustWrenEngineColumnType.TINYINT,
"uint8": RustWrenEngineColumnType.INT2,
"int16": RustWrenEngineColumnType.INT2,
"uint16": RustWrenEngineColumnType.INT2,
"int32": RustWrenEngineColumnType.INT4,
"uint32": RustWrenEngineColumnType.INT4,
"int64": RustWrenEngineColumnType.INT8,
"uint64": RustWrenEngineColumnType.INT8,
# Float Types
"float32": RustWrenEngineColumnType.FLOAT4,
"float64": RustWrenEngineColumnType.FLOAT8,
"decimal": RustWrenEngineColumnType.DECIMAL,
# Date/Time Types
"date": RustWrenEngineColumnType.DATE,
"datetime": RustWrenEngineColumnType.TIMESTAMP,
# String Types
"string": RustWrenEngineColumnType.VARCHAR,
"fixedstring": RustWrenEngineColumnType.CHAR,
# Special Types
"uuid": RustWrenEngineColumnType.UUID,
"enum8": RustWrenEngineColumnType.STRING, # Enums can be mapped to strings
"enum16": RustWrenEngineColumnType.STRING, # Enums can be mapped to strings
"ipv4": RustWrenEngineColumnType.INET,
"ipv6": RustWrenEngineColumnType.INET,
}


class ClickHouseMetadata(Metadata):
def __init__(self, connection_info: ClickHouseConnectionInfo):
Expand Down Expand Up @@ -76,33 +109,24 @@ def get_version(self) -> str:
def _format_compact_table_name(self, schema: str, table: str):
return f"{schema}.{table}"

def _transform_column_type(self, data_type):
# lower case the data_type
data_type = data_type.lower()

# Map ClickHouse types to RustWrenEngineColumnType
switcher = {
"boolean": RustWrenEngineColumnType.BOOL,
"int8": RustWrenEngineColumnType.TINYINT,
"uint8": RustWrenEngineColumnType.INT2,
"int16": RustWrenEngineColumnType.INT2,
"uint16": RustWrenEngineColumnType.INT2,
"int32": RustWrenEngineColumnType.INT4,
"uint32": RustWrenEngineColumnType.INT4,
"int64": RustWrenEngineColumnType.INT8,
"uint64": RustWrenEngineColumnType.INT8,
"float32": RustWrenEngineColumnType.FLOAT4,
"float64": RustWrenEngineColumnType.FLOAT8,
"decimal": RustWrenEngineColumnType.DECIMAL,
"date": RustWrenEngineColumnType.DATE,
"datetime": RustWrenEngineColumnType.TIMESTAMP,
"string": RustWrenEngineColumnType.VARCHAR,
"fixedstring": RustWrenEngineColumnType.CHAR,
"uuid": RustWrenEngineColumnType.UUID,
"enum8": RustWrenEngineColumnType.STRING, # Enums can be mapped to strings
"enum16": RustWrenEngineColumnType.STRING, # Enums can be mapped to strings
"ipv4": RustWrenEngineColumnType.INET,
"ipv6": RustWrenEngineColumnType.INET,
}

return switcher.get(data_type, RustWrenEngineColumnType.UNKNOWN)
def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType:
"""Transform ClickHouse data type to RustWrenEngineColumnType.

Args:
data_type: The ClickHouse data type string

Returns:
The corresponding RustWrenEngineColumnType
"""
# Convert to lowercase for comparison
normalized_type = data_type.lower()

# Use the module-level mapping table
mapped_type = CLICKHOUSE_TYPE_MAPPING.get(
normalized_type, RustWrenEngineColumnType.UNKNOWN
)

if mapped_type == RustWrenEngineColumnType.UNKNOWN:
logger.warning(f"Unknown ClickHouse data type: {data_type}")

return mapped_type
Loading
Loading