Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions ibis-server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,75 @@ docker compose down -v
```


### Running Doris Tests Locally
Doris-related tests require a running Apache Doris instance.
Our GitHub CI already handles this automatically, but you must start Doris manually when running tests locally.

Prerequisites

- Docker & Docker Compose
- Python dependencies installed (`just install`)
- `pymysql` installed in the dev environment (already included in dev dependencies)

#### Config Doris Cluster

1. Start the Doris Container

From the `ibis-server` directory:
```bash
cd tests/routers/v3/connector/doris
docker compose up -d
```

The container uses `apache/doris:4.0.3-all-slim` (all-in-one image with FE + BE).

> ⚠️ The all-in-one Doris image requires sufficient memory (at least 8 GB recommended).
> If you see `MEM_ALLOC_FAILED` errors, increase Docker's memory limit.

Wait until Doris is healthy. Check the status:
```bash
mysql -h 127.0.0.1 -P 9030 -uroot -e "SHOW BACKENDS\G" | grep "Alive"
# Alive: true
```

2. Update Connection Info (if needed)

The default connection in `tests/routers/v3/connector/doris/conftest.py`:
```python
DORIS_HOST = "127.0.0.1"
DORIS_PORT = 9030
DORIS_USER = "root"
DORIS_PASSWORD = ""
```


Adjust these values if your Doris instance has different credentials.

If you already have a remote Doris cluster, update the connection constants in `conftest.py`:
```python
DORIS_HOST = "<your-doris-host>"
DORIS_PORT = 9030
DORIS_USER = "<user>"
DORIS_PASSWORD = "<password>"
```

#### Run Doris Tests

Go back to the `ibis-server` directory and run:
```bash
just test doris
```

⚠️ Doris tests will fail if the Doris instance is not reachable.

#### Cleanup (Local Docker)

After tests finish:
```bash
cd tests/routers/v3/connector/doris
docker compose down -v
```


### Start with Python Interactive Mode
Install the dependencies
Expand Down
1 change: 1 addition & 0 deletions ibis-server/app/custom_sqlglot/dialects/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# ruff: noqa: F401

from app.custom_sqlglot.dialects.doris import Doris
from app.custom_sqlglot.dialects.mysql import MySQL
10 changes: 10 additions & 0 deletions ibis-server/app/custom_sqlglot/dialects/doris.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from sqlglot import exp
from sqlglot.dialects import Doris as OriginalDoris


class Doris(OriginalDoris):
class Generator(OriginalDoris.Generator):
TYPE_MAPPING = {
**OriginalDoris.Generator.TYPE_MAPPING,
exp.DataType.Type.VARBINARY: "BINARY",
}
28 changes: 28 additions & 0 deletions ibis-server/app/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ class QueryMySqlDTO(QueryDTO):
connection_info: ConnectionUrl | MySqlConnectionInfo = connection_info_field


class QueryDorisDTO(QueryDTO):
connection_info: DorisConnectionInfo = connection_info_field


class QueryOracleDTO(QueryDTO):
connection_info: ConnectionUrl | OracleConnectionInfo = connection_info_field

Expand Down Expand Up @@ -322,6 +326,29 @@ class MySqlConnectionInfo(BaseConnectionInfo):
)


class DorisConnectionInfo(BaseConnectionInfo):
host: SecretStr = Field(
description="the hostname of your Doris FE", examples=["localhost"]
)
port: SecretStr = Field(
description="the query port of your Doris FE", examples=["9030"]
)
database: SecretStr = Field(
description="the database name of your Doris database", examples=["default"]
)
user: SecretStr = Field(
description="the username of your Doris database", examples=["root"]
)
password: SecretStr | None = Field(
description="the password of your Doris database",
examples=["password"],
default=None,
)
kwargs: dict[str, str] | None = Field(
description="Additional keyword arguments to pass to PyMySQL", default=None
)


class PostgresConnectionInfo(BaseConnectionInfo):
host: SecretStr = Field(
examples=["localhost"], description="the hostname of your database"
Expand Down Expand Up @@ -636,6 +663,7 @@ class GcsFileConnectionInfo(BaseConnectionInfo):
| ConnectionUrl
| MSSqlConnectionInfo
| MySqlConnectionInfo
| DorisConnectionInfo
| OracleConnectionInfo
| PostgresConnectionInfo
| RedshiftConnectionInfo
Expand Down
39 changes: 39 additions & 0 deletions ibis-server/app/model/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ def __init__(self, data_source: DataSource, connection_info: ConnectionInfo):
self._connector = DatabricksConnector(connection_info)
elif data_source == DataSource.mysql:
self._connector = MySqlConnector(connection_info)
elif data_source == DataSource.doris:
self._connector = DorisConnector(connection_info)
else:
self._connector = IbisConnector(data_source, connection_info)

Expand Down Expand Up @@ -357,6 +359,43 @@ def _cast_json_columns(self, result_table: Table, col_name: str) -> Table:
return result_table.mutate(**{col_name: casted_col})


class DorisConnector(IbisConnector):
"""Doris connector - reuses MySQL protocol via ibis.mysql backend.

Doris is an analytical database that is MySQL-protocol compatible.
Autocommit is forced on in get_doris_connection() because Doris may not
properly reflect the SERVER_STATUS_AUTOCOMMIT flag, which would cause
ibis's raw_sql() to wrap every query in BEGIN/ROLLBACK unnecessarily.
"""

def __init__(self, connection_info: ConnectionInfo):
super().__init__(DataSource.doris, connection_info)

def _handle_pyarrow_unsupported_type(self, ibis_table: Table, **kwargs) -> Table:
result_table = ibis_table
for name, dtype in ibis_table.schema().items():
if isinstance(dtype, Decimal):
result_table = self._round_decimal_columns(
result_table=result_table, col_name=name, **kwargs
)
elif isinstance(dtype, UUID):
result_table = self._cast_uuid_columns(
result_table=result_table, col_name=name
)
elif isinstance(dtype, dt.JSON):
# Doris JSON columns need the same handling as MySQL
result_table = self._cast_json_columns(
result_table=result_table, col_name=name
)

return result_table

def _cast_json_columns(self, result_table: Table, col_name: str) -> Table:
col = result_table[col_name]
casted_col = col.cast("string")
return result_table.mutate(**{col_name: casted_col})


class MSSqlConnector(IbisConnector):
def __init__(self, connection_info: ConnectionInfo):
super().__init__(DataSource.mssql, connection_info)
Expand Down
42 changes: 42 additions & 0 deletions ibis-server/app/model/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
ConnectionUrl,
DatabricksServicePrincipalConnectionInfo,
DatabricksTokenConnectionInfo,
DorisConnectionInfo,
GcsFileConnectionInfo,
LocalFileConnectionInfo,
MinioFileConnectionInfo,
Expand All @@ -36,6 +37,7 @@
QueryCannerDTO,
QueryClickHouseDTO,
QueryDatabricksDTO,
QueryDorisDTO,
QueryDTO,
QueryGcsFileDTO,
QueryLocalFileDTO,
Expand Down Expand Up @@ -69,6 +71,7 @@ class DataSource(StrEnum):
clickhouse = auto()
mssql = auto()
mysql = auto()
doris = auto()
oracle = auto()
postgres = auto()
redshift = auto()
Expand Down Expand Up @@ -167,6 +170,8 @@ def _build_connection_info(self, data: dict) -> ConnectionInfo:
return MSSqlConnectionInfo.model_validate(data)
case DataSource.mysql:
return MySqlConnectionInfo.model_validate(data)
case DataSource.doris:
return DorisConnectionInfo.model_validate(data)
case DataSource.oracle:
return OracleConnectionInfo.model_validate(data)
case DataSource.postgres:
Expand Down Expand Up @@ -236,6 +241,7 @@ class DataSourceExtension(Enum):
clickhouse = QueryClickHouseDTO
mssql = QueryMSSqlDTO
mysql = QueryMySqlDTO
doris = QueryDorisDTO
oracle = QueryOracleDTO
postgres = QueryPostgresDTO
redshift = QueryRedshiftDTO
Expand Down Expand Up @@ -397,6 +403,42 @@ def get_mysql_connection(cls, info: MySqlConnectionInfo) -> BaseBackend:
**kwargs,
)

@classmethod
def get_doris_connection(cls, info: DorisConnectionInfo) -> BaseBackend:
kwargs = {}

# utf8mb4 is the actual charset used by Doris (MySQL-compatible)
kwargs.setdefault("charset", "utf8mb4")

if info.kwargs:
kwargs.update(info.kwargs)
# Doris is MySQL-protocol compatible, reuse ibis.mysql.connect()
connection = ibis.mysql.connect(
host=info.host.get_secret_value(),
port=int(info.port.get_secret_value()),
database=info.database.get_secret_value(),
user=info.user.get_secret_value(),
password=info.password.get_secret_value() if info.password else "",
**kwargs,
)
# Doris does not properly reflect the SERVER_STATUS_AUTOCOMMIT flag
# in its MySQL-protocol handshake/OK packets. As a result, the
# underlying mysqlclient driver's get_autocommit() always returns
# False — even after explicitly calling autocommit(True).
#
# ibis's raw_sql() checks get_autocommit() and, when it returns
# False, wraps every query in BEGIN/ROLLBACK. Doris (an OLAP engine)
# does not support transactional SELECT inside BEGIN and will reject
# with: "This is in a transaction, only insert, update, delete,
# commit, rollback is acceptable."
#
# Fix: override get_autocommit on THIS connection instance only so
# that ibis skips the BEGIN/ROLLBACK wrapping. This is a per-object
# attribute override — it does NOT affect the MySQLdb class, other
# MySQL connections, or any other data-source driver.
connection.con.get_autocommit = lambda: True
return connection

@staticmethod
def get_postgres_connection(info: PostgresConnectionInfo) -> BaseBackend:
return ibis.postgres.connect(
Expand Down
Loading