From 3296098b574d5e9726622dc848efaf0d47dfb484 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Tue, 2 Sep 2025 14:43:28 +0800 Subject: [PATCH 1/3] disable ident normalization --- wren-core/core/src/mdl/context.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wren-core/core/src/mdl/context.rs b/wren-core/core/src/mdl/context.rs index 7aca83d3b..54d3e9e47 100644 --- a/wren-core/core/src/mdl/context.rs +++ b/wren-core/core/src/mdl/context.rs @@ -57,6 +57,10 @@ pub async fn create_ctx_with_mdl( "datafusion.sql_parser.default_null_ordering", &ScalarValue::Utf8(Some("nulls_last".to_string())), ) + .set( + "datafusion.sql_parser.enable_ident_normalization", + &ScalarValue::Utf8(Some("false".to_string())), + ) .with_create_default_catalog_and_schema(false) .with_default_catalog_and_schema( analyzed_mdl.wren_mdl.catalog(), From 92d10b7df3fd731afcc457ab4f49e1fa2d5608bf Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Tue, 2 Sep 2025 16:17:56 +0800 Subject: [PATCH 2/3] modified and enhance tests --- .../routers/v3/connector/postgres/conftest.py | 2 + .../v3/connector/postgres/test_query.py | 42 +++++++++ wren-core/core/src/mdl/mod.rs | 93 +++++++++++++++---- 3 files changed, 121 insertions(+), 16 deletions(-) diff --git a/ibis-server/tests/routers/v3/connector/postgres/conftest.py b/ibis-server/tests/routers/v3/connector/postgres/conftest.py index 20388e7cf..dd5a604c8 100644 --- a/ibis-server/tests/routers/v3/connector/postgres/conftest.py +++ b/ibis-server/tests/routers/v3/connector/postgres/conftest.py @@ -40,6 +40,8 @@ def postgres(request) -> PostgresContainer: "INSERT INTO null_test (id, letter) VALUES (1, 'one'), (2, 'two'), (NULL, 'three')" ) ) + conn.execute(sqlalchemy.text("CREATE TABLE 中文表 (欄位1 int, 欄位2 int)")) + conn.execute(sqlalchemy.text("INSERT INTO 中文表 (欄位1, 欄位2) VALUES (1, 2), (3, 4)")) request.addfinalizer(pg.stop) return pg diff --git a/ibis-server/tests/routers/v3/connector/postgres/test_query.py b/ibis-server/tests/routers/v3/connector/postgres/test_query.py index c1b31abd5..e5a312b56 100644 --- a/ibis-server/tests/routers/v3/connector/postgres/test_query.py +++ b/ibis-server/tests/routers/v3/connector/postgres/test_query.py @@ -1181,3 +1181,45 @@ async def test_cache_with_mixed_relevant_irrelevant_headers( assert ( response3.headers["X-Cache-Hit"] == "false" ) # Should miss cache due to changed relevant header + +async def test_query_unicode_table(client, connection_info): + manifest = { + "catalog": "wrenai", + "schema": "public", + "models": [ + { + "name": "中文表", + "tableReference": { + "schema": "public", + "table": "中文表" + }, + "columns": [ + {"name": "欄位1", "type": "int"}, + {"name": "欄位2", "type": "int"} + ] + } + ], + "dataSource": "postgres" + } + + manifest_str = base64.b64encode(orjson.dumps(manifest)).decode("utf-8") + + response = await client.post( + url=f"{base_url}/query?cacheEnable=true", + json={ + "connectionInfo": connection_info, + "manifestStr": manifest_str, + "sql": "SELECT 欄位1, 欄位2 FROM 中文表 LIMIT 1", + }, + headers={ + X_WREN_FALLBACK_DISABLE: "true" + }, + ) + + assert response.status_code == 200 + result = response.json() + assert result["data"][0] == [1, 2] + assert result["dtypes"] == { + "欄位1": "int32", + "欄位2": "int32", + } \ No newline at end of file diff --git a/wren-core/core/src/mdl/mod.rs b/wren-core/core/src/mdl/mod.rs index ee6680a72..5c4e9dc45 100644 --- a/wren-core/core/src/mdl/mod.rs +++ b/wren-core/core/src/mdl/mod.rs @@ -379,8 +379,6 @@ pub fn transform_sql( } /// Transform the SQL based on the MDL with the SessionContext -/// Wren engine will normalize the SQL to the lower case to solve the case-sensitive -/// issue for the Wren view pub async fn transform_sql_with_ctx( ctx: &SessionContext, analyzed_mdl: Arc, @@ -736,7 +734,7 @@ mod test { Arc::new(HashMap::default()), Mode::Unparse, )?); - let sql = r#"select * from "CTest"."STest"."Customer""#; + let sql = r#"select * from CTest.STest.Customer"#; let actual = mdl::transform_sql_with_ctx( &SessionContext::new(), Arc::clone(&analyzed_mdl), @@ -786,7 +784,7 @@ mod test { Arc::clone(&analyzed_mdl), &functions, Arc::new(HashMap::new()), - r#"select add_two("Custkey") from "Customer""#, + r#"select add_two(Custkey) from Customer"#, ) .await?; assert_snapshot!(actual, @"SELECT add_two(\"Customer\".\"Custkey\") FROM (SELECT \"Customer\".\"Custkey\" \ @@ -829,22 +827,22 @@ mod test { .column(ColumnBuilder::new("名字", "string").build()) .column( ColumnBuilder::new("name_append", "string") - .expression(r#""名字" || "名字""#) + .expression(r#"名字 || 名字"#) .build(), ) .column( ColumnBuilder::new("group", "string") - .expression(r#""組別""#) + .expression(r#"組別"#) .build(), ) .column( ColumnBuilder::new("subscribe", "int") - .expression(r#""訂閱數""#) + .expression(r#"訂閱數"#) .build(), ) .column( ColumnBuilder::new("subscribe_plus", "int") - .expression(r#""訂閱數" + 1"#) + .expression(r#"訂閱數 + 1"#) .build(), ) .build(), @@ -910,12 +908,12 @@ mod test { .table_reference("artist") .column( ColumnBuilder::new("name_append", "string") - .expression(r#""名字" || "名字""#) + .expression(r#"名字 || 名字"#) .build(), ) .column( ColumnBuilder::new("lower_name", "string") - .expression(r#"lower("名字")"#) + .expression(r#"lower(名字)"#) .build(), ) .build(), @@ -974,7 +972,7 @@ mod test { .column(ColumnBuilder::new("名字", "string").hidden(true).build()) .column( ColumnBuilder::new("串接名字", "string") - .expression(r#""名字" || "名字""#) + .expression(r#"名字 || 名字"#) .build(), ) .build(), @@ -986,7 +984,7 @@ mod test { Arc::new(HashMap::default()), Mode::Unparse, )?); - let sql = r#"select "串接名字" from wren.test.artist"#; + let sql = r#"select 串接名字 from wren.test.artist"#; let actual = transform_sql_with_ctx( &SessionContext::new(), Arc::clone(&analyzed_mdl), @@ -1059,7 +1057,7 @@ mod test { Arc::new(HashMap::default()), Mode::Unparse, )?); - let sql = r#"select * from wren.test.artist where "名字" in (SELECT "名字" FROM wren.test.artist)"#; + let sql = r#"select * from wren.test.artist where 名字 in (SELECT 名字 FROM wren.test.artist)"#; let actual = transform_sql_with_ctx( &SessionContext::new(), Arc::clone(&analyzed_mdl), @@ -1360,7 +1358,7 @@ mod test { ) .column( ColumnBuilder::new("cast_timestamptz", "timestamptz") - .expression(r#"cast("出道時間" as timestamp with time zone)"#) + .expression(r#"cast(出道時間 as timestamp with time zone)"#) .build(), ) .build(), @@ -2622,7 +2620,7 @@ mod test { .add_row_level_access_control( "rule", vec![SessionProperty::new_required("預定組別A")], - "\"組別\" = @預定組別A", + "組別 = @預定組別A", ) .build(), ) @@ -2638,7 +2636,7 @@ mod test { Mode::Unparse, )?); - let sql = r#"SELECT "名字", "組別", "訂閱數" FROM "VTU藝人""#; + let sql = r#"SELECT 名字, 組別, 訂閱數 FROM VTU藝人"#; assert_snapshot!( transform_sql_with_ctx(&ctx, Arc::clone(&analyzed_mdl), &[], headers, sql) .await?, @@ -3464,6 +3462,69 @@ mod test { Ok(()) } + #[tokio::test] + async fn test_ambiguous_table_name() -> Result<()> { + let ctx = SessionContext::new(); + let manifest = ManifestBuilder::new() + .catalog("wren") + .schema("test") + .model( + ModelBuilder::new("customer") + .table_reference("customer") + .column(ColumnBuilder::new("c_name", "int").build()) + .column(ColumnBuilder::new("C_name", "string").build()) + .build(), + ) + .model( + ModelBuilder::new("Customer") + .table_reference("customer") + .column(ColumnBuilder::new("c_name", "int").build()) + .column(ColumnBuilder::new("C_name", "string").build()) + .build(), + ) + .build(); + + let headers = Arc::new(HashMap::default()); + let analyzed_mdl = Arc::new(AnalyzedWrenMDL::analyze( + manifest, + Arc::clone(&headers), + Mode::Unparse, + )?); + + let sql = "select c_name, C_name from customer"; + assert_snapshot!( + transform_sql_with_ctx(&ctx, Arc::clone(&analyzed_mdl), &[], Arc::clone(&headers), sql).await?, + @r#"SELECT customer.c_name, customer."C_name" FROM (SELECT customer."C_name", customer.c_name FROM (SELECT __source."C_name" AS "C_name", __source.c_name AS c_name FROM customer AS __source) AS customer) AS customer"# + ); + + let sql = "select c_name, C_name from Customer"; + assert_snapshot!( + transform_sql_with_ctx(&ctx, Arc::clone(&analyzed_mdl), &[], Arc::clone(&headers), sql).await?, + @r#"SELECT "Customer".c_name, "Customer"."C_name" FROM (SELECT "Customer"."C_name", "Customer".c_name FROM (SELECT __source."C_name" AS "C_name", __source.c_name AS c_name FROM customer AS __source) AS "Customer") AS "Customer""# + ); + + let sql = "select * from CUSTOMER"; + match transform_sql_with_ctx( + &ctx, + Arc::clone(&analyzed_mdl), + &[], + Arc::clone(&headers), + sql, + ) + .await + { + Ok(_) => { + panic!("Expected error, but got SQL"); + } + Err(e) => assert_snapshot!( + e.to_string(), + @"Error during planning: table 'wren.test.CUSTOMER' not found" + ), + } + + Ok(()) + } + /// Return a RecordBatch with made up data about customer fn customer() -> RecordBatch { let custkey: ArrayRef = Arc::new(Int64Array::from(vec![1, 2, 3])); From 9d6ec4bff07a8ccb5e8cd01c0cad354c8637377c Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Tue, 2 Sep 2025 16:23:45 +0800 Subject: [PATCH 3/3] fix fmt --- .../routers/v3/connector/postgres/conftest.py | 4 +++- .../v3/connector/postgres/test_query.py | 18 +++++++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/ibis-server/tests/routers/v3/connector/postgres/conftest.py b/ibis-server/tests/routers/v3/connector/postgres/conftest.py index dd5a604c8..7643d6b1a 100644 --- a/ibis-server/tests/routers/v3/connector/postgres/conftest.py +++ b/ibis-server/tests/routers/v3/connector/postgres/conftest.py @@ -41,7 +41,9 @@ def postgres(request) -> PostgresContainer: ) ) conn.execute(sqlalchemy.text("CREATE TABLE 中文表 (欄位1 int, 欄位2 int)")) - conn.execute(sqlalchemy.text("INSERT INTO 中文表 (欄位1, 欄位2) VALUES (1, 2), (3, 4)")) + conn.execute( + sqlalchemy.text("INSERT INTO 中文表 (欄位1, 欄位2) VALUES (1, 2), (3, 4)") + ) request.addfinalizer(pg.stop) return pg diff --git a/ibis-server/tests/routers/v3/connector/postgres/test_query.py b/ibis-server/tests/routers/v3/connector/postgres/test_query.py index e5a312b56..f9fd53d24 100644 --- a/ibis-server/tests/routers/v3/connector/postgres/test_query.py +++ b/ibis-server/tests/routers/v3/connector/postgres/test_query.py @@ -1182,6 +1182,7 @@ async def test_cache_with_mixed_relevant_irrelevant_headers( response3.headers["X-Cache-Hit"] == "false" ) # Should miss cache due to changed relevant header + async def test_query_unicode_table(client, connection_info): manifest = { "catalog": "wrenai", @@ -1189,17 +1190,14 @@ async def test_query_unicode_table(client, connection_info): "models": [ { "name": "中文表", - "tableReference": { - "schema": "public", - "table": "中文表" - }, + "tableReference": {"schema": "public", "table": "中文表"}, "columns": [ {"name": "欄位1", "type": "int"}, - {"name": "欄位2", "type": "int"} - ] + {"name": "欄位2", "type": "int"}, + ], } ], - "dataSource": "postgres" + "dataSource": "postgres", } manifest_str = base64.b64encode(orjson.dumps(manifest)).decode("utf-8") @@ -1211,9 +1209,7 @@ async def test_query_unicode_table(client, connection_info): "manifestStr": manifest_str, "sql": "SELECT 欄位1, 欄位2 FROM 中文表 LIMIT 1", }, - headers={ - X_WREN_FALLBACK_DISABLE: "true" - }, + headers={X_WREN_FALLBACK_DISABLE: "true"}, ) assert response.status_code == 200 @@ -1222,4 +1218,4 @@ async def test_query_unicode_table(client, connection_info): assert result["dtypes"] == { "欄位1": "int32", "欄位2": "int32", - } \ No newline at end of file + }