Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 45 additions & 4 deletions ibis-server/app/model/metadata/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
CLICKHOUSE_TYPE_MAPPING = {
# Boolean Types
"boolean": RustWrenEngineColumnType.BOOL,
"bool": RustWrenEngineColumnType.BOOL,
# Integer Types
"int8": RustWrenEngineColumnType.TINYINT,
"uint8": RustWrenEngineColumnType.INT2,
Expand All @@ -24,14 +25,20 @@
"uint32": RustWrenEngineColumnType.INT4,
"int64": RustWrenEngineColumnType.INT8,
"uint64": RustWrenEngineColumnType.INT8,
"int128": RustWrenEngineColumnType.NUMERIC,
"int256": RustWrenEngineColumnType.NUMERIC,
"uint128": RustWrenEngineColumnType.NUMERIC,
"uint256": RustWrenEngineColumnType.NUMERIC,
# Float Types
"float32": RustWrenEngineColumnType.FLOAT4,
"float64": RustWrenEngineColumnType.FLOAT8,
"decimal": RustWrenEngineColumnType.DECIMAL,
"numeric": RustWrenEngineColumnType.NUMERIC,
# Date/Time Types
"date": RustWrenEngineColumnType.DATE,
"date32": RustWrenEngineColumnType.DATE,
"datetime": RustWrenEngineColumnType.TIMESTAMP,
"datetime64": RustWrenEngineColumnType.TIMESTAMP,
# String Types
"string": RustWrenEngineColumnType.VARCHAR,
"fixedstring": RustWrenEngineColumnType.CHAR,
Expand All @@ -41,6 +48,8 @@
"enum16": RustWrenEngineColumnType.STRING, # Enums can be mapped to strings
"ipv4": RustWrenEngineColumnType.INET,
"ipv6": RustWrenEngineColumnType.INET,
"nothing": RustWrenEngineColumnType.NULL,
"json": RustWrenEngineColumnType.JSON,
}


Expand Down Expand Up @@ -113,29 +122,61 @@ def _format_compact_table_name(self, schema: str, table: str):
def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType:
"""Transform ClickHouse data type to RustWrenEngineColumnType.

Handles wrapper types (LowCardinality, Nullable) by recursively
unwrapping them to extract the inner type.

Args:
data_type: The ClickHouse data type string

Returns:
The corresponding RustWrenEngineColumnType
"""
# Convert to lowercase for comparison
normalized_type = data_type.lower()
normalized_type = data_type.lower().strip()

# Decimal type with precision and scale
# Decimal type with precision and scale, e.g. Decimal(15,2)
if normalized_type.startswith("decimal"):
return RustWrenEngineColumnType.DECIMAL

# Numeric type with precision and scale
if normalized_type.startswith("numeric"):
return RustWrenEngineColumnType.NUMERIC

# Support to Nullable wrapper
# Support LowCardinality wrapper — unwrap and recurse
if normalized_type.startswith("lowcardinality("):
inner_type = normalized_type[len("lowcardinality(") : -1]
return self._transform_column_type(inner_type)

# Support Nullable wrapper — unwrap and recurse
if normalized_type.startswith("nullable("):
inner_type = normalized_type[9:-1]
return self._transform_column_type(inner_type)

# Use the module-level mapping table
# Support FixedString(N) — e.g. FixedString(32)
if normalized_type.startswith("fixedstring("):
return RustWrenEngineColumnType.CHAR

# Support DateTime64(precision[, timezone]) — e.g. DateTime64(3, 'UTC')
if normalized_type.startswith("datetime64("):
return RustWrenEngineColumnType.TIMESTAMP

# Support DateTime([timezone]) — e.g. DateTime('UTC')
if normalized_type.startswith("datetime("):
return RustWrenEngineColumnType.TIMESTAMP

# Support JSON(...) with options — e.g. JSON(max_dynamic_paths=1024)
if normalized_type.startswith("json("):
return RustWrenEngineColumnType.JSON

# Support Enum8/Enum16 with values — e.g. Enum8('a'=1, 'b'=2)
if normalized_type.startswith(("enum8(", "enum16(")):
return RustWrenEngineColumnType.STRING

# Support Array, Map, Tuple — treat as VARCHAR (serialized as strings)
if normalized_type.startswith(("array(", "map(", "tuple(")):
return RustWrenEngineColumnType.VARCHAR

# Use the module-level mapping table for simple types
mapped_type = CLICKHOUSE_TYPE_MAPPING.get(
normalized_type, RustWrenEngineColumnType.UNKNOWN
)
Expand Down
278 changes: 278 additions & 0 deletions ibis-server/tests/test_clickhouse_type_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
"""Unit tests for ClickHouse type mapping in _transform_column_type.

These tests validate that ClickHouse-specific data types are correctly
mapped to RustWrenEngineColumnType, including wrapper types like
LowCardinality() and Nullable().
"""

import pytest

from app.model.metadata.clickhouse import ClickHouseMetadata
from app.model.metadata.dto import RustWrenEngineColumnType


@pytest.mark.clickhouse
class TestTransformColumnType:
"""Tests for ClickHouseMetadata._transform_column_type."""

@pytest.fixture
def metadata(self):
"""Create a ClickHouseMetadata instance for testing.

We use __new__ to skip __init__ since we only need the method.
"""
instance = object.__new__(ClickHouseMetadata)
return instance

# --- Basic type mapping ---

def test_string(self, metadata):
assert (
metadata._transform_column_type("String")
== RustWrenEngineColumnType.VARCHAR
)

def test_int32(self, metadata):
assert metadata._transform_column_type("Int32") == RustWrenEngineColumnType.INT4

def test_int64(self, metadata):
assert metadata._transform_column_type("Int64") == RustWrenEngineColumnType.INT8

def test_uint8(self, metadata):
assert metadata._transform_column_type("UInt8") == RustWrenEngineColumnType.INT2

def test_uint16(self, metadata):
assert (
metadata._transform_column_type("UInt16") == RustWrenEngineColumnType.INT2
)

def test_uint32(self, metadata):
assert (
metadata._transform_column_type("UInt32") == RustWrenEngineColumnType.INT4
)

def test_float32(self, metadata):
assert (
metadata._transform_column_type("Float32")
== RustWrenEngineColumnType.FLOAT4
)

def test_float64(self, metadata):
assert (
metadata._transform_column_type("Float64")
== RustWrenEngineColumnType.FLOAT8
)

def test_uuid(self, metadata):
assert metadata._transform_column_type("UUID") == RustWrenEngineColumnType.UUID

def test_date(self, metadata):
assert metadata._transform_column_type("Date") == RustWrenEngineColumnType.DATE

def test_datetime(self, metadata):
assert (
metadata._transform_column_type("DateTime")
== RustWrenEngineColumnType.TIMESTAMP
)

# --- Bool type (the missing alias) ---

def test_bool(self, metadata):
"""Bool is a common ClickHouse type that was previously unmapped."""
assert metadata._transform_column_type("Bool") == RustWrenEngineColumnType.BOOL

def test_boolean(self, metadata):
assert (
metadata._transform_column_type("Boolean") == RustWrenEngineColumnType.BOOL
)

# --- Date32 and DateTime64 ---

def test_date32(self, metadata):
assert (
metadata._transform_column_type("Date32") == RustWrenEngineColumnType.DATE
)

def test_datetime64_no_tz(self, metadata):
assert (
metadata._transform_column_type("DateTime64(3)")
== RustWrenEngineColumnType.TIMESTAMP
)

def test_datetime64_with_tz(self, metadata):
assert (
metadata._transform_column_type("DateTime64(3, 'UTC')")
== RustWrenEngineColumnType.TIMESTAMP
)

# --- Large integer types ---

def test_int128(self, metadata):
assert (
metadata._transform_column_type("Int128")
== RustWrenEngineColumnType.NUMERIC
)

def test_int256(self, metadata):
assert (
metadata._transform_column_type("Int256")
== RustWrenEngineColumnType.NUMERIC
)

def test_uint128(self, metadata):
assert (
metadata._transform_column_type("UInt128")
== RustWrenEngineColumnType.NUMERIC
)

def test_uint256(self, metadata):
assert (
metadata._transform_column_type("UInt256")
== RustWrenEngineColumnType.NUMERIC
)

# --- Decimal ---

def test_decimal(self, metadata):
assert (
metadata._transform_column_type("Decimal(15,2)")
== RustWrenEngineColumnType.DECIMAL
)

def test_decimal128(self, metadata):
assert (
metadata._transform_column_type("Decimal128(9)")
== RustWrenEngineColumnType.DECIMAL
)

# --- LowCardinality wrapper (the main fix) ---

def test_lowcardinality_string(self, metadata):
"""LowCardinality(String) should unwrap to VARCHAR."""
assert (
metadata._transform_column_type("LowCardinality(String)")
== RustWrenEngineColumnType.VARCHAR
)

def test_lowcardinality_fixedstring(self, metadata):
assert (
metadata._transform_column_type("LowCardinality(FixedString(32))")
== RustWrenEngineColumnType.CHAR
)

# --- Nullable wrapper ---

def test_nullable_uuid(self, metadata):
assert (
metadata._transform_column_type("Nullable(UUID)")
== RustWrenEngineColumnType.UUID
)

def test_nullable_float32(self, metadata):
assert (
metadata._transform_column_type("Nullable(Float32)")
== RustWrenEngineColumnType.FLOAT4
)

def test_nullable_uint16(self, metadata):
assert (
metadata._transform_column_type("Nullable(UInt16)")
== RustWrenEngineColumnType.INT2
)

def test_nullable_string(self, metadata):
assert (
metadata._transform_column_type("Nullable(String)")
== RustWrenEngineColumnType.VARCHAR
)

# --- Nested wrappers (LowCardinality + Nullable) ---

def test_lowcardinality_nullable_string(self, metadata):
"""LowCardinality(Nullable(String)) should recursively unwrap."""
assert (
metadata._transform_column_type("LowCardinality(Nullable(String))")
== RustWrenEngineColumnType.VARCHAR
)

# --- FixedString with size ---

def test_fixedstring_with_size(self, metadata):
assert (
metadata._transform_column_type("FixedString(32)")
== RustWrenEngineColumnType.CHAR
)

# --- Complex types ---

def test_array(self, metadata):
assert (
metadata._transform_column_type("Array(String)")
== RustWrenEngineColumnType.VARCHAR
)

def test_map(self, metadata):
assert (
metadata._transform_column_type("Map(String, String)")
== RustWrenEngineColumnType.VARCHAR
)

def test_tuple(self, metadata):
assert (
metadata._transform_column_type("Tuple(String, Int32)")
== RustWrenEngineColumnType.VARCHAR
)

# --- Enum with values ---

def test_enum8_with_values(self, metadata):
assert (
metadata._transform_column_type("Enum8('a'=1, 'b'=2)")
== RustWrenEngineColumnType.STRING
)

def test_enum16_with_values(self, metadata):
assert (
metadata._transform_column_type("Enum16('x'=1, 'y'=2)")
== RustWrenEngineColumnType.STRING
)

# --- Special ---

def test_nothing(self, metadata):
assert (
metadata._transform_column_type("Nothing") == RustWrenEngineColumnType.NULL
)

def test_unknown_type_returns_unknown(self, metadata):
result = metadata._transform_column_type("SomeWeirdType")
assert result == RustWrenEngineColumnType.UNKNOWN

# --- Parameterized DateTime (with timezone) ---

def test_datetime_with_timezone(self, metadata):
"""DateTime('UTC') should map to TIMESTAMP."""
assert (
metadata._transform_column_type("DateTime('UTC')")
== RustWrenEngineColumnType.TIMESTAMP
)

def test_datetime_with_named_timezone(self, metadata):
"""DateTime('Europe/Berlin') should map to TIMESTAMP."""
assert (
metadata._transform_column_type("DateTime('Europe/Berlin')")
== RustWrenEngineColumnType.TIMESTAMP
)

# --- Parameterized JSON ---

def test_json_with_options(self, metadata):
"""JSON(max_dynamic_paths=1024) should map to JSON."""
assert (
metadata._transform_column_type("JSON(max_dynamic_paths=1024)")
== RustWrenEngineColumnType.JSON
)

def test_json_plain(self, metadata):
"""Plain JSON (no params) should map to JSON."""
assert metadata._transform_column_type("JSON") == RustWrenEngineColumnType.JSON
Loading