diff --git a/.github/workflows/ibis-ci-2.yml b/.github/workflows/ibis-ci-2.yml new file mode 100644 index 000000000..a0ff3d161 --- /dev/null +++ b/.github/workflows/ibis-ci-2.yml @@ -0,0 +1,66 @@ +name: ibis CI 2 +permissions: + contents: read + pull-requests: write + +on: + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.number }} + cancel-in-progress: true + +defaults: + run: + working-directory: ibis-server + +jobs: + ci: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Ruff check + uses: chartboost/ruff-action@v1 + with: + src: './ibis-server' + args: 'format --check' + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: '21' + cache: 'maven' + - name: Start Wren JAVA engine + working-directory: ./wren-core-legacy + run: | + mkdir etc + echo "node.environment=production" >> etc/config.properties + echo "wren.directory=./etc/mdl" >> etc/config.properties + echo "wren.experimental-enable-dynamic-fields=true" >> etc/config.properties + ./mvnw clean install -B -DskipTests -P exec-jar + java -Dconfig=etc/config.properties \ + --add-opens=java.base/java.nio=ALL-UNNAMED \ + -jar ./wren-server/target/wren-server-*-executable.jar & + - name: Install poetry + run: pipx install poetry + - uses: actions/setup-python@v5 + with: + python-version-file: ./ibis-server/pyproject.toml + cache: 'poetry' + - uses: extractions/setup-just@v2 + - name: Cache Cargo + uses: actions/cache@v3 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + wren-core-py/target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('wren-core-py/Cargo.lock') }} + - name: Install dependencies + run: | + just install --with dev + - name: Run tests + env: + WREN_ENGINE_ENDPOINT: http://localhost:8080 + run: poetry run pytest -m "oracle" diff --git a/.github/workflows/ibis-ci.yml b/.github/workflows/ibis-ci.yml index 318e67234..786c763cd 100644 --- a/.github/workflows/ibis-ci.yml +++ b/.github/workflows/ibis-ci.yml @@ -74,7 +74,7 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.AWS_REGION }} AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} - run: poetry run pytest -m "not bigquery and not snowflake and not canner and not s3_file and not gcs_file and not athena and not redshift and not databricks" + run: poetry run pytest -m "not oracle and not bigquery and not snowflake and not canner and not s3_file and not gcs_file and not athena and not redshift and not databricks" - name: Test bigquery if need if: contains(github.event.pull_request.labels.*.name, 'bigquery') env: diff --git a/ibis-server/tests/routers/v3/connector/oracle/conftest.py b/ibis-server/tests/routers/v3/connector/oracle/conftest.py index 1ea70e25d..cb53966c2 100644 --- a/ibis-server/tests/routers/v3/connector/oracle/conftest.py +++ b/ibis-server/tests/routers/v3/connector/oracle/conftest.py @@ -1,9 +1,10 @@ import pathlib +import time import pandas as pd import pytest import sqlalchemy -from sqlalchemy import text +from sqlalchemy import NullPool, text from testcontainers.oracle import OracleDbContainer from app.config import get_config @@ -24,11 +25,38 @@ def pytest_collection_modifyitems(items): item.add_marker(pytestmark) -@pytest.fixture(scope="module") +@pytest.fixture(scope="session") def oracle(request) -> OracleDbContainer: oracle = OracleDbContainer( "gvenzl/oracle-free:23.6-slim-faststart", oracle_password=f"{oracle_password}" ).start() + + max_retries = 30 + retry_interval = 10 + engine = None + + for i in range(max_retries): + try: + engine = sqlalchemy.create_engine( + oracle.get_connection_url(), + poolclass=NullPool, + pool_pre_ping=True, + ) + with engine.connect() as conn: + result = conn.execute(text("SELECT 1")) + result.fetchone() + break + except Exception: + if i == max_retries - 1: + oracle.stop() + raise TimeoutError( + f"Oracle container failed to start after {max_retries * retry_interval}s" + ) + time.sleep(retry_interval) + finally: + if engine: + engine.dispose() + orders_schema = { "o_orderkey": sqlalchemy.Integer(), "o_custkey": sqlalchemy.Integer(), @@ -50,7 +78,7 @@ def oracle(request) -> OracleDbContainer: "c_mktsegment": sqlalchemy.Text(), "c_comment": sqlalchemy.Text(), } - engine = sqlalchemy.create_engine(oracle.get_connection_url()) + engine = sqlalchemy.create_engine(oracle.get_connection_url(), poolclass=NullPool) with engine.begin() as conn: # assign dtype to avoid to create CLOB column for text columns pd.read_parquet(file_path("resource/tpch/data/orders.parquet")).to_sql( diff --git a/ibis-server/tests/routers/v3/connector/oracle/test_function.py b/ibis-server/tests/routers/v3/connector/oracle/test_function.py deleted file mode 100644 index 26cff20cc..000000000 --- a/ibis-server/tests/routers/v3/connector/oracle/test_function.py +++ /dev/null @@ -1,105 +0,0 @@ -import base64 - -import orjson -import pytest - -from app.config import get_config -from app.dependencies import X_WREN_FALLBACK_DISABLE -from tests.conftest import DATAFUSION_FUNCTION_COUNT -from tests.routers.v3.connector.oracle.conftest import base_url, function_list_path - -manifest = { - "dataSource": "oracle", - "catalog": "my_catalog", - "schema": "my_schema", - "models": [ - { - "name": "orders", - "tableReference": { - "schema": "SYSTEM", - "table": "ORDERS", - }, - "columns": [ - {"name": "orderkey", "expression": '"O_ORDERKEY"', "type": "number"}, - ], - }, - ], -} - - -@pytest.fixture(scope="module") -def manifest_str(): - return base64.b64encode(orjson.dumps(manifest)).decode("utf-8") - - -async def test_function_list(client): - config = get_config() - - config.set_remote_function_list_path(None) - response = await client.get(url=f"{base_url}/functions") - assert response.status_code == 200 - result = response.json() - assert len(result) == DATAFUSION_FUNCTION_COUNT - - config.set_remote_function_list_path(function_list_path) - response = await client.get(url=f"{base_url}/functions") - assert response.status_code == 200 - result = response.json() - assert len(result) == DATAFUSION_FUNCTION_COUNT + 1 - the_func = next(filter(lambda x: x["name"] == "to_timestamp_tz", result)) - assert the_func == { - "name": "to_timestamp_tz", - "description": "Convert string to timestamp with time zone", - "function_type": "scalar", - "param_names": None, - "param_types": None, - "return_type": None, - } - - config.set_remote_function_list_path(None) - response = await client.get(url=f"{base_url}/functions") - assert response.status_code == 200 - result = response.json() - assert len(result) == DATAFUSION_FUNCTION_COUNT - - -async def test_scalar_function(client, manifest_str: str, connection_info): - response = await client.post( - url=f"{base_url}/query", - json={ - "connectionInfo": connection_info, - "manifestStr": manifest_str, - "sql": "SELECT ABS(-1) AS col", - }, - headers={ - X_WREN_FALLBACK_DISABLE: "true", - }, - ) - assert response.status_code == 200 - result = response.json() - assert result == { - "columns": ["col"], - "data": [[1]], - "dtypes": {"col": "int64"}, - } - - -async def test_aggregate_function(client, manifest_str: str, connection_info): - response = await client.post( - url=f"{base_url}/query", - json={ - "connectionInfo": connection_info, - "manifestStr": manifest_str, - "sql": "SELECT COUNT(*) AS col FROM (SELECT 1) AS temp_table", - }, - headers={ - X_WREN_FALLBACK_DISABLE: "true", - }, - ) - assert response.status_code == 200 - result = response.json() - assert result == { - "columns": ["col"], - "data": [[1]], - "dtypes": {"col": "int64"}, - } diff --git a/ibis-server/tests/routers/v3/connector/oracle/test_query.py b/ibis-server/tests/routers/v3/connector/oracle/test_query.py index 283a052a1..da30f3328 100644 --- a/ibis-server/tests/routers/v3/connector/oracle/test_query.py +++ b/ibis-server/tests/routers/v3/connector/oracle/test_query.py @@ -3,8 +3,10 @@ import orjson import pytest +from app.config import get_config from app.dependencies import X_WREN_FALLBACK_DISABLE -from tests.routers.v3.connector.oracle.conftest import base_url +from tests.conftest import DATAFUSION_FUNCTION_COUNT +from tests.routers.v3.connector.oracle.conftest import base_url, function_list_path manifest = { "catalog": "my_catalog", @@ -258,3 +260,76 @@ async def test_order_by_nulls_last(client, manifest_str, connection_info): assert result["data"][0][0] == "two" assert result["data"][1][0] == "one" assert result["data"][2][0] == "three" + + +async def test_function_list(client): + config = get_config() + + config.set_remote_function_list_path(None) + response = await client.get(url=f"{base_url}/functions") + assert response.status_code == 200 + result = response.json() + assert len(result) == DATAFUSION_FUNCTION_COUNT + + config.set_remote_function_list_path(function_list_path) + response = await client.get(url=f"{base_url}/functions") + assert response.status_code == 200 + result = response.json() + assert len(result) == DATAFUSION_FUNCTION_COUNT + 1 + the_func = next(filter(lambda x: x["name"] == "to_timestamp_tz", result)) + assert the_func == { + "name": "to_timestamp_tz", + "description": "Convert string to timestamp with time zone", + "function_type": "scalar", + "param_names": None, + "param_types": None, + "return_type": None, + } + + config.set_remote_function_list_path(None) + response = await client.get(url=f"{base_url}/functions") + assert response.status_code == 200 + result = response.json() + assert len(result) == DATAFUSION_FUNCTION_COUNT + + +async def test_scalar_function(client, manifest_str: str, connection_info): + response = await client.post( + url=f"{base_url}/query", + json={ + "connectionInfo": connection_info, + "manifestStr": manifest_str, + "sql": "SELECT ABS(-1) AS col", + }, + headers={ + X_WREN_FALLBACK_DISABLE: "true", + }, + ) + assert response.status_code == 200 + result = response.json() + assert result == { + "columns": ["col"], + "data": [[1]], + "dtypes": {"col": "int64"}, + } + + +async def test_aggregate_function(client, manifest_str: str, connection_info): + response = await client.post( + url=f"{base_url}/query", + json={ + "connectionInfo": connection_info, + "manifestStr": manifest_str, + "sql": "SELECT COUNT(*) AS col FROM (SELECT 1) AS temp_table", + }, + headers={ + X_WREN_FALLBACK_DISABLE: "true", + }, + ) + assert response.status_code == 200 + result = response.json() + assert result == { + "columns": ["col"], + "data": [[1]], + "dtypes": {"col": "int64"}, + }