Skip to content
This repository was archived by the owner on May 7, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 39 additions & 4 deletions ibis-server/app/model/metadata/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
CLICKHOUSE_TYPE_MAPPING = {
# Boolean Types
"boolean": RustWrenEngineColumnType.BOOL,
"bool": RustWrenEngineColumnType.BOOL,
# Integer Types
"int8": RustWrenEngineColumnType.TINYINT,
"uint8": RustWrenEngineColumnType.INT2,
Expand All @@ -24,14 +25,20 @@
"uint32": RustWrenEngineColumnType.INT4,
"int64": RustWrenEngineColumnType.INT8,
"uint64": RustWrenEngineColumnType.INT8,
"int128": RustWrenEngineColumnType.NUMERIC,
"int256": RustWrenEngineColumnType.NUMERIC,
"uint128": RustWrenEngineColumnType.NUMERIC,
"uint256": RustWrenEngineColumnType.NUMERIC,
# Float Types
"float32": RustWrenEngineColumnType.FLOAT4,
"float64": RustWrenEngineColumnType.FLOAT8,
"decimal": RustWrenEngineColumnType.DECIMAL,
"numeric": RustWrenEngineColumnType.NUMERIC,
# Date/Time Types
"date": RustWrenEngineColumnType.DATE,
"date32": RustWrenEngineColumnType.DATE,
"datetime": RustWrenEngineColumnType.TIMESTAMP,
"datetime64": RustWrenEngineColumnType.TIMESTAMP,
# String Types
"string": RustWrenEngineColumnType.VARCHAR,
"fixedstring": RustWrenEngineColumnType.CHAR,
Expand All @@ -41,6 +48,8 @@
"enum16": RustWrenEngineColumnType.STRING, # Enums can be mapped to strings
"ipv4": RustWrenEngineColumnType.INET,
"ipv6": RustWrenEngineColumnType.INET,
"nothing": RustWrenEngineColumnType.NULL,
"json": RustWrenEngineColumnType.JSON,
}


Expand Down Expand Up @@ -113,29 +122,55 @@ def _format_compact_table_name(self, schema: str, table: str):
def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType:
"""Transform ClickHouse data type to RustWrenEngineColumnType.

Handles wrapper types (LowCardinality, Nullable) by recursively
unwrapping them to extract the inner type.

Args:
data_type: The ClickHouse data type string

Returns:
The corresponding RustWrenEngineColumnType
"""
# Convert to lowercase for comparison
normalized_type = data_type.lower()
normalized_type = data_type.lower().strip()

# Decimal type with precision and scale
# Decimal type with precision and scale, e.g. Decimal(15,2)
if normalized_type.startswith("decimal"):
return RustWrenEngineColumnType.DECIMAL

# Numeric type with precision and scale
if normalized_type.startswith("numeric"):
return RustWrenEngineColumnType.NUMERIC

# Support to Nullable wrapper
# Support LowCardinality wrapper — unwrap and recurse
if normalized_type.startswith("lowcardinality("):
inner_type = normalized_type[len("lowcardinality("):-1]
return self._transform_column_type(inner_type)

# Support Nullable wrapper — unwrap and recurse
if normalized_type.startswith("nullable("):
inner_type = normalized_type[9:-1]
return self._transform_column_type(inner_type)

# Use the module-level mapping table
# Support FixedString(N) — e.g. FixedString(32)
if normalized_type.startswith("fixedstring("):
return RustWrenEngineColumnType.CHAR

# Support DateTime64(precision[, timezone]) — e.g. DateTime64(3, 'UTC')
if normalized_type.startswith("datetime64("):
return RustWrenEngineColumnType.TIMESTAMP
Comment thread
ahmedjawedaj marked this conversation as resolved.

# Support Enum8/Enum16 with values — e.g. Enum8('a'=1, 'b'=2)
if normalized_type.startswith("enum8(") or normalized_type.startswith(
"enum16("
):
return RustWrenEngineColumnType.STRING

# Support Array, Map, Tuple — treat as VARCHAR (serialized as strings)
if normalized_type.startswith(("array(", "map(", "tuple(")):
return RustWrenEngineColumnType.VARCHAR

# Use the module-level mapping table for simple types
mapped_type = CLICKHOUSE_TYPE_MAPPING.get(
normalized_type, RustWrenEngineColumnType.UNKNOWN
)
Expand Down
162 changes: 162 additions & 0 deletions ibis-server/tests/test_clickhouse_type_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
"""Unit tests for ClickHouse type mapping in _transform_column_type.

These tests validate that ClickHouse-specific data types are correctly
mapped to RustWrenEngineColumnType, including wrapper types like
LowCardinality() and Nullable().
"""

import pytest

from app.model.metadata.clickhouse import ClickHouseMetadata
from app.model.metadata.dto import RustWrenEngineColumnType


class TestTransformColumnType:
Comment thread
ahmedjawedaj marked this conversation as resolved.
"""Tests for ClickHouseMetadata._transform_column_type."""

@pytest.fixture
def metadata(self):
"""Create a ClickHouseMetadata instance for testing.

We use __new__ to skip __init__ since we only need the method.
"""
instance = object.__new__(ClickHouseMetadata)
return instance

# --- Basic type mapping ---

def test_string(self, metadata):
assert metadata._transform_column_type("String") == RustWrenEngineColumnType.VARCHAR

def test_int32(self, metadata):
assert metadata._transform_column_type("Int32") == RustWrenEngineColumnType.INT4

def test_int64(self, metadata):
assert metadata._transform_column_type("Int64") == RustWrenEngineColumnType.INT8

def test_uint8(self, metadata):
assert metadata._transform_column_type("UInt8") == RustWrenEngineColumnType.INT2

def test_uint16(self, metadata):
assert metadata._transform_column_type("UInt16") == RustWrenEngineColumnType.INT2

def test_uint32(self, metadata):
assert metadata._transform_column_type("UInt32") == RustWrenEngineColumnType.INT4

def test_float32(self, metadata):
assert metadata._transform_column_type("Float32") == RustWrenEngineColumnType.FLOAT4

def test_float64(self, metadata):
assert metadata._transform_column_type("Float64") == RustWrenEngineColumnType.FLOAT8

def test_uuid(self, metadata):
assert metadata._transform_column_type("UUID") == RustWrenEngineColumnType.UUID

def test_date(self, metadata):
assert metadata._transform_column_type("Date") == RustWrenEngineColumnType.DATE

def test_datetime(self, metadata):
assert metadata._transform_column_type("DateTime") == RustWrenEngineColumnType.TIMESTAMP

# --- Bool type (the missing alias) ---

def test_bool(self, metadata):
"""Bool is a common ClickHouse type that was previously unmapped."""
assert metadata._transform_column_type("Bool") == RustWrenEngineColumnType.BOOL

def test_boolean(self, metadata):
assert metadata._transform_column_type("Boolean") == RustWrenEngineColumnType.BOOL

# --- Date32 and DateTime64 ---

def test_date32(self, metadata):
assert metadata._transform_column_type("Date32") == RustWrenEngineColumnType.DATE

def test_datetime64_no_tz(self, metadata):
assert metadata._transform_column_type("DateTime64(3)") == RustWrenEngineColumnType.TIMESTAMP

def test_datetime64_with_tz(self, metadata):
assert metadata._transform_column_type("DateTime64(3, 'UTC')") == RustWrenEngineColumnType.TIMESTAMP

# --- Large integer types ---

def test_int128(self, metadata):
assert metadata._transform_column_type("Int128") == RustWrenEngineColumnType.NUMERIC

def test_int256(self, metadata):
assert metadata._transform_column_type("Int256") == RustWrenEngineColumnType.NUMERIC

def test_uint128(self, metadata):
assert metadata._transform_column_type("UInt128") == RustWrenEngineColumnType.NUMERIC

def test_uint256(self, metadata):
assert metadata._transform_column_type("UInt256") == RustWrenEngineColumnType.NUMERIC

# --- Decimal ---

def test_decimal(self, metadata):
assert metadata._transform_column_type("Decimal(15,2)") == RustWrenEngineColumnType.DECIMAL

def test_decimal128(self, metadata):
assert metadata._transform_column_type("Decimal128(9)") == RustWrenEngineColumnType.DECIMAL

# --- LowCardinality wrapper (the main fix) ---

def test_lowcardinality_string(self, metadata):
"""LowCardinality(String) should unwrap to VARCHAR."""
assert metadata._transform_column_type("LowCardinality(String)") == RustWrenEngineColumnType.VARCHAR

def test_lowcardinality_fixedstring(self, metadata):
assert metadata._transform_column_type("LowCardinality(FixedString(32))") == RustWrenEngineColumnType.CHAR

# --- Nullable wrapper ---

def test_nullable_uuid(self, metadata):
assert metadata._transform_column_type("Nullable(UUID)") == RustWrenEngineColumnType.UUID

def test_nullable_float32(self, metadata):
assert metadata._transform_column_type("Nullable(Float32)") == RustWrenEngineColumnType.FLOAT4

def test_nullable_uint16(self, metadata):
assert metadata._transform_column_type("Nullable(UInt16)") == RustWrenEngineColumnType.INT2

def test_nullable_string(self, metadata):
assert metadata._transform_column_type("Nullable(String)") == RustWrenEngineColumnType.VARCHAR

# --- Nested wrappers (LowCardinality + Nullable) ---

def test_lowcardinality_nullable_string(self, metadata):
"""LowCardinality(Nullable(String)) should recursively unwrap."""
assert metadata._transform_column_type("LowCardinality(Nullable(String))") == RustWrenEngineColumnType.VARCHAR

# --- FixedString with size ---

def test_fixedstring_with_size(self, metadata):
assert metadata._transform_column_type("FixedString(32)") == RustWrenEngineColumnType.CHAR

# --- Complex types ---

def test_array(self, metadata):
assert metadata._transform_column_type("Array(String)") == RustWrenEngineColumnType.VARCHAR

def test_map(self, metadata):
assert metadata._transform_column_type("Map(String, String)") == RustWrenEngineColumnType.VARCHAR

def test_tuple(self, metadata):
assert metadata._transform_column_type("Tuple(String, Int32)") == RustWrenEngineColumnType.VARCHAR

# --- Enum with values ---

def test_enum8_with_values(self, metadata):
assert metadata._transform_column_type("Enum8('a'=1, 'b'=2)") == RustWrenEngineColumnType.STRING

def test_enum16_with_values(self, metadata):
assert metadata._transform_column_type("Enum16('x'=1, 'y'=2)") == RustWrenEngineColumnType.STRING

# --- Special ---

def test_nothing(self, metadata):
assert metadata._transform_column_type("Nothing") == RustWrenEngineColumnType.NULL

def test_unknown_type_returns_unknown(self, metadata):
assert metadata._transform_column_type("SomeWeirdType") == RustWrenEngineColumnType.UNKNOWN
Loading