From 8906a7a32d34a64dd602408d92510347d87c18e5 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Fri, 2 Aug 2024 21:03:39 +0800 Subject: [PATCH 1/6] support map literal syntax --- datafusion/sql/src/expr/mod.rs | 42 +++++++++++++++++++--- datafusion/sqllogictest/test_files/map.slt | 33 +++++++++++++++++ 2 files changed, 71 insertions(+), 4 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 71ff7c03bea2..05c53b20d590 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -21,13 +21,13 @@ use datafusion_expr::planner::PlannerResult; use datafusion_expr::planner::RawDictionaryExpr; use datafusion_expr::planner::RawFieldAccessExpr; use sqlparser::ast::{ - CastKind, DictionaryField, Expr as SQLExpr, StructField, Subscript, TrimWhereField, - Value, + CastKind, DictionaryField, Expr as SQLExpr, MapEntry, StructField, Subscript, + TrimWhereField, Value, }; use datafusion_common::{ - internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result, - ScalarValue, + internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, + DataFusionError, Result, ScalarValue, }; use datafusion_expr::expr::InList; use datafusion_expr::expr::ScalarFunction; @@ -628,6 +628,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLExpr::Dictionary(fields) => { self.try_plan_dictionary_literal(fields, schema, planner_context) } + SQLExpr::Map(map) => { + self.try_plan_map_literal(map.entries, schema, planner_context) + } _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"), } } @@ -714,6 +717,37 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { not_impl_err!("Unsupported dictionary literal: {raw_expr:?}") } + fn try_plan_map_literal( + &self, + entries: Vec, + schema: &DFSchema, + planner_context: &mut PlannerContext, + ) -> Result { + let mut exprs = vec![]; + entries.into_iter().try_for_each(|entry| { + exprs.push(self.sql_expr_to_logical_expr( + *entry.key, + schema, + planner_context, + )?); + exprs.push(self.sql_expr_to_logical_expr( + *entry.value, + schema, + planner_context, + )?); + Ok::<_, DataFusionError>(()) + })?; + for planner in self.context_provider.get_expr_planners() { + match planner.plan_make_map(exprs)? { + PlannerResult::Planned(expr) => { + return Ok(expr); + } + PlannerResult::Original(expr) => exprs = expr, + } + } + not_impl_err!("Unsupported map literal: {exprs:?}") + } + // Handles a call to struct(...) where the arguments are named. For example // `struct (v as foo, v2 as bar)` by creating a call to the `named_struct` function fn create_named_struct_expr( diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index e530e14df66e..44362cf27cd3 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -310,3 +310,36 @@ VALUES (MAP(['a'], [1])), (MAP(['b'], [2])), (MAP(['c', 'a'], [3, 1])) {a: 1} {b: 2} {c: 3, a: 1} + +query ? +SELECT MAP {'a':1, 'b':2, 'c':3}; +---- +{a: 1, b: 2, c: 3} + +query ? +SELECT MAP {'a':1, 'b':2, 'c':3 } FROM t; +---- +{a: 1, b: 2, c: 3} +{a: 1, b: 2, c: 3} +{a: 1, b: 2, c: 3} + +query I +SELECT MAP {'a':1, 'b':2, 'c':3}['a']; +---- +1 + +query I +SELECT MAP {'a':1, 'b':2, 'c':3 }['a'] FROM t; +---- +1 +1 +1 + +# TODO: support parsing an empty map +# query ? +# SELECT MAP {}; +# ---- +# {} + +query error DataFusion error: Execution error: map key cannot be null +SELECT MAP {'a': 1, null: 2} From 014deb26985c12c1583850f14f5d9547955ed994 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sat, 3 Aug 2024 01:16:17 +0800 Subject: [PATCH 2/6] address comment and enhance tests --- datafusion/sql/src/expr/mod.rs | 2 +- datafusion/sqllogictest/test_files/map.slt | 116 +++++++++++++++++++++ 2 files changed, 117 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 05c53b20d590..d2d5acae246c 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -745,7 +745,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { PlannerResult::Original(expr) => exprs = expr, } } - not_impl_err!("Unsupported map literal: {exprs:?}") + not_impl_err!("Unsupported MAP literal: {exprs:?}") } // Handles a call to struct(...) where the arguments are named. For example diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 44362cf27cd3..abef16a4dcf5 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -341,5 +341,121 @@ SELECT MAP {'a':1, 'b':2, 'c':3 }['a'] FROM t; # ---- # {} +# values contain null +query ? +SELECT MAP {'a': 1, 'b': null}; +---- +{a: 1, b: } + +# keys contain null query error DataFusion error: Execution error: map key cannot be null SELECT MAP {'a': 1, null: 2} + +# array as key +query ? +SELECT MAP {[1,2,3]:1, [2,4]:2}; +---- + {[1, 2, 3]: 1, [2, 4]: 2} + +# array with different type as key +# expect to fail due to type coercion error +query error +SELECT MAP {[1,2,3]:1, ['a', 'b']:2}; + +# array as value +query ? +SELECT MAP {'a':[1,2,3], 'b':[2,4]}; +---- +{a: [1, 2, 3], b: [2, 4]} + +# array with different type as value +# expect to fail due to type coercion error +query error +SELECT MAP {'a':[1,2,3], 'b':['a', 'b']}; + +# struct as key +query ? +SELECT MAP {{'a':1, 'b':2}:1, {'a':3, 'b':4}:2}; +---- +{{a: 1, b: 2}: 1, {a: 3, b: 4}: 2} + +# struct with different fields as key +# expect to fail due to type coercion error +query error +SELECT MAP {{'a':1, 'b':2}:1, {'c':3, 'd':4}:2}; + +# struct as value +query ? +SELECT MAP {'a':{'b':1, 'c':2}, 'b':{'b':3, 'c':4}}; +---- +{a: {b: 1, c: 2}, b: {b: 3, c: 4}} + +# struct with different fields as value +# expect to fail due to type coercion error +query error +SELECT MAP {'a':{'b':1, 'c':2}, 'b':{'c':3, 'd':4}}; + +# map as key +query ? +SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 }; +---- +{{1: a, 2: b}: 1, {1: c, 2: d}: 2} + +# map with different keys as key +query ? +SELECT MAP { MAP {1:'a', 2:'b', 3:'c'}:1, MAP {2:'c', 4:'d'}:2 }; +---- + {{1: a, 2: b, 3: c}: 1, {2: c, 4: d}: 2} + +# map as value +query ? +SELECT MAP {1: MAP {1:'a', 2:'b'}, 2: MAP {1:'c', 2:'d'} }; +---- +{1: {1: a, 2: b}, 2: {1: c, 2: d}} + +# map with different keys as value +query ? +SELECT MAP {'a': MAP {1:'a', 2:'b', 3:'c'}, 'b': MAP {2:'c', 4:'d'} }; +---- +{a: {1: a, 2: b, 3: c}, b: {2: c, 4: d}} + +# complex map for each row +query ? +SELECT MAP {'a': MAP {1:'a', 2:'b', 3:'c'}, 'b': MAP {2:'c', 4:'d'} } from t; +---- +{a: {1: a, 2: b, 3: c}, b: {2: c, 4: d}} +{a: {1: a, 2: b, 3: c}, b: {2: c, 4: d}} +{a: {1: a, 2: b, 3: c}, b: {2: c, 4: d}} + +# access map with non-existent key +query ? +SELECT MAP {'a': MAP {1:'a', 2:'b', 3:'c'}, 'b': MAP {2:'c', 4:'d'} }['c']; +---- +NULL + +# access map with null key +query error +SELECT MAP {'a': MAP {1:'a', 2:'b', 3:'c'}, 'b': MAP {2:'c', 4:'d'} }[NULL]; + +query ? +SELECT MAP { 'a': 1, 2: 3 }; +---- +{a: 1, 2: 3} + +# TODO: fix accessing map with non-string key +# query ? +# SELECT MAP { 1: 'a', 2: 'b', 3: 'c' }[1]; +# ---- +# a + +# TODO: fix accessing map with non-string key +# query ? +# SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 }[MAP {1:'a', 2:'b'}]; +# ---- +# 1 + +# TODO: fix accessing map with non-string key +# query ? +# SELECT MAKE_MAP(1, null, 2, 33, 3, null)[2]; +# ---- +# 33 From ea38d6b22c8c0e4d22fcd063653bd6eba0619950 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sat, 3 Aug 2024 01:51:45 +0800 Subject: [PATCH 3/6] align error messages --- datafusion/sql/src/expr/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index d2d5acae246c..b7ee53dde85a 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -714,7 +714,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { PlannerResult::Original(expr) => raw_expr = expr, } } - not_impl_err!("Unsupported dictionary literal: {raw_expr:?}") + not_impl_err!("Dictionary not supported by ExprPlanner: {raw_expr:?}") } fn try_plan_map_literal( @@ -745,7 +745,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { PlannerResult::Original(expr) => exprs = expr, } } - not_impl_err!("Unsupported MAP literal: {exprs:?}") + not_impl_err!("MAP not supported by ExprPlanner: {exprs:?}") } // Handles a call to struct(...) where the arguments are named. For example From a40f819e49a6a07e685d6c7dffa9bbc49a0ab47d Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sat, 3 Aug 2024 12:05:15 +0800 Subject: [PATCH 4/6] add issue references --- datafusion/sqllogictest/test_files/map.slt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index abef16a4dcf5..11998eea9044 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -335,7 +335,7 @@ SELECT MAP {'a':1, 'b':2, 'c':3 }['a'] FROM t; 1 1 -# TODO: support parsing an empty map +# TODO(https://github.com/sqlparser-rs/sqlparser-rs/pull/1361): support parsing an empty map. Enable this after upgrading sqlparser-rs. # query ? # SELECT MAP {}; # ---- @@ -442,19 +442,19 @@ SELECT MAP { 'a': 1, 2: 3 }; ---- {a: 1, 2: 3} -# TODO: fix accessing map with non-string key +# TODO(https://github.com/apache/datafusion/issues/11785): fix accessing map with non-string key # query ? # SELECT MAP { 1: 'a', 2: 'b', 3: 'c' }[1]; # ---- # a -# TODO: fix accessing map with non-string key +# TODO(https://github.com/apache/datafusion/issues/11785): fix accessing map with non-string key # query ? # SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 }[MAP {1:'a', 2:'b'}]; # ---- # 1 -# TODO: fix accessing map with non-string key +# TODO(https://github.com/apache/datafusion/issues/11785): fix accessing map with non-string key # query ? # SELECT MAKE_MAP(1, null, 2, 33, 3, null)[2]; # ---- From e8a34778425474cf46396b4696502c5bc2eccd4f Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sat, 3 Aug 2024 12:17:41 +0800 Subject: [PATCH 5/6] using iterators collecting to build the exprs --- datafusion/sql/src/expr/mod.rs | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index b7ee53dde85a..fd03fac325f8 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -27,7 +27,7 @@ use sqlparser::ast::{ use datafusion_common::{ internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, - DataFusionError, Result, ScalarValue, + Result, ScalarValue, }; use datafusion_expr::expr::InList; use datafusion_expr::expr::ScalarFunction; @@ -723,20 +723,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { schema: &DFSchema, planner_context: &mut PlannerContext, ) -> Result { - let mut exprs = vec![]; - entries.into_iter().try_for_each(|entry| { - exprs.push(self.sql_expr_to_logical_expr( - *entry.key, - schema, - planner_context, - )?); - exprs.push(self.sql_expr_to_logical_expr( - *entry.value, - schema, - planner_context, - )?); - Ok::<_, DataFusionError>(()) - })?; + let mut exprs: Vec<_> = entries.into_iter() + .flat_map(|entry| { + vec![entry.key, entry.value].into_iter() + }) + .map(|expr| { + self.sql_expr_to_logical_expr(*expr, schema, planner_context) + }) + .collect::>>()?; for planner in self.context_provider.get_expr_planners() { match planner.plan_make_map(exprs)? { PlannerResult::Planned(expr) => { From 50500ea75237dee34b5067acad7e5a8be1388fbc Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sat, 3 Aug 2024 12:18:15 +0800 Subject: [PATCH 6/6] cargo fmt --- datafusion/sql/src/expr/mod.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index fd03fac325f8..b80ffb6aed3f 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -26,8 +26,8 @@ use sqlparser::ast::{ }; use datafusion_common::{ - internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, - Result, ScalarValue, + internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result, + ScalarValue, }; use datafusion_expr::expr::InList; use datafusion_expr::expr::ScalarFunction; @@ -723,13 +723,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { schema: &DFSchema, planner_context: &mut PlannerContext, ) -> Result { - let mut exprs: Vec<_> = entries.into_iter() - .flat_map(|entry| { - vec![entry.key, entry.value].into_iter() - }) - .map(|expr| { - self.sql_expr_to_logical_expr(*expr, schema, planner_context) - }) + let mut exprs: Vec<_> = entries + .into_iter() + .flat_map(|entry| vec![entry.key, entry.value].into_iter()) + .map(|expr| self.sql_expr_to_logical_expr(*expr, schema, planner_context)) .collect::>>()?; for planner in self.context_provider.get_expr_planners() { match planner.plan_make_map(exprs)? {