Skip to content

v0.6.1 Can't parse Clickhouse argMaxIf function #53

@smoothml

Description

@smoothml

Since v0.6.1 SQLMock is unable to parse Clickhouse's argMaxIf function.

Consider the following example:

from datetime import datetime

from sql_mock.clickhouse import column_mocks as col
from sql_mock.clickhouse.table_mocks import ClickHouseTableMock
from sql_mock.table_mocks import table_meta

query = """SELECT
    user_id,
    count() AS num_sessions,
    countIf(valid = 1) AS num_valid_sessions,
    argMaxIf(in_trial, dt, isNotNull(in_trial)) AS in_trial
FROM sessions
GROUP BY user_id
"""


@table_meta(table_ref="sessions")
class SessionsMock(ClickHouseTableMock):
    dt = col.Datetime(default=datetime(2024, 1, 1 ,10, 30))
    valid = col.Boolean(default=True)
    user_id = col.String(default="foo")
    in_trial = col.Int(default=0, nullable=True)


@table_meta(query=query)
class ResultMock(ClickHouseTableMock):
    user_id = col.String(default="foo")
    num_sessions = col.Int(default=0)
    num_valid_sessions = col.Int(default=0)
    in_trial = col.Int(default=0)


def test_example() -> None:
    sessions_mock = SessionsMock.from_dicts(
        [
            dict(
                dt=datetime(2024, 1, 1, 10),
                valid=1,
                user_id="foo",
                in_trial=1,
            ),
            dict(
                dt=datetime(2024, 1, 2, 10),
                valid=0,
                user_id="foo",
                in_trial=1,
            ),
            dict(
                dt=datetime(2024, 1, 3, 10),
                valid=1,
                user_id="foo",
                in_trial=1,
            ),
            dict(
                dt=datetime(2024, 1, 4, 10),
                valid=1,
                user_id="foo",
                in_trial=None,
            ),
            dict(
                dt=datetime(2024, 1, 5, 10),
                valid=1,
                user_id="foo",
                in_trial=0,
            ),
        ]
    )

    result = ResultMock.from_mocks(input_data=[sessions_mock])

    expected = [
        dict(
            user_id="foo",
            num_sessions=5,
            num_valid_sessions=4,
            in_trial=0,
        )
    ]

    result.assert_equal(expected)

Running test_example results in the following exception:

self = <sqlglot.generator.Generator object at 0x107920d60>
expression = ('argMax', 'If'), key = None, comment = True

    def sql(
        self,
        expression: t.Optional[str | exp.Expression],
        key: t.Optional[str] = None,
        comment: bool = True,
    ) -> str:
        if not expression:
            return ""
    
        if isinstance(expression, str):
            return expression
    
        if key:
            value = expression.args.get(key)
            if value:
                return self.sql(value)
            return ""
    
        transform = self.TRANSFORMS.get(expression.__class__)
    
        if callable(transform):
            sql = transform(self, expression)
        elif transform:
            sql = transform
        elif isinstance(expression, exp.Expression):
            exp_handler_name = f"{expression.key}_sql"
    
            if hasattr(self, exp_handler_name):
                sql = getattr(self, exp_handler_name)(expression)
            elif isinstance(expression, exp.Func):
                sql = self.function_fallback_sql(expression)
            elif isinstance(expression, exp.Property):
                sql = self.property_sql(expression)
            else:
                raise ValueError(f"Unsupported expression type {expression.__class__.__name__}")
        else:
>           raise ValueError(f"Expected an Expression. Received {type(expression)}: {expression}")
E           ValueError: Expected an Expression. Received <class 'tuple'>: ('argMax', 'If')

We do not see this error for other xIf functions (e.g. the countIf in the example query) or for functions without it If (e.g. plain argMax). My initial thought was a change in sqlglot, but the version specified in poetry.lock has not changed in this release.

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions