From 7393c64c803886bc2bbae3538b87a6c0c5c319a8 Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 May 2024 07:48:45 +0800 Subject: [PATCH 01/33] build(deps): upgrade sqlparser to 0.46.0 Signed-off-by: tison --- Cargo.toml | 2 +- datafusion/sql/src/statement.rs | 72 +++++++++++++-------------------- 2 files changed, 29 insertions(+), 45 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3ca3af284675..9227958b4a8c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -103,7 +103,7 @@ parquet = { version = "51.0.0", default-features = false, features = ["arrow", " rand = "0.8" rstest = "0.19.0" serde_json = "1" -sqlparser = { version = "0.45.0", features = ["visitor"] } +sqlparser = { version = "0.46.0", features = ["visitor"] } tempfile = "3" thiserror = "1.0.44" tokio = { version = "1.36", features = ["macros", "rt", "sync"] } diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 137bb5fb20b7..c4bb5f1afebf 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -53,18 +53,30 @@ use datafusion_expr::{ Volatility, WriteOp, }; use sqlparser::ast; -use sqlparser::ast::{ - Assignment, ColumnDef, CreateTableOptions, DescribeAlias, Expr as SQLExpr, Expr, - FromTable, Ident, ObjectName, ObjectType, Query, SchemaName, SetExpr, - ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor, - TableWithJoins, TransactionMode, UnaryOperator, Value, -}; +use sqlparser::ast::{Assignment, ColumnDef, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, Expr, FromTable, Ident, Insert, ObjectName, ObjectType, Query, SchemaName, SetExpr, ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor, TableWithJoins, TransactionMode, UnaryOperator, Value}; use sqlparser::parser::ParserError::ParserError; fn ident_to_string(ident: &Ident) -> String { normalize_ident(ident.to_owned()) } +fn value_to_string(value: &Value) -> Option { + match value { + Value::SingleQuotedString(s) => Some(s.to_string()), + Value::DollarQuotedString(s) => Some(s.to_string()), + Value::Number(_, _) | Value::Boolean(_) => Some(value.to_string()), + Value::DoubleQuotedString(_) + | Value::EscapedStringLiteral(_) + | Value::NationalStringLiteral(_) + | Value::SingleQuotedByteStringLiteral(_) + | Value::DoubleQuotedByteStringLiteral(_) + | Value::RawStringLiteral(_) + | Value::HexStringLiteral(_) + | Value::Null + | Value::Placeholder(_) => None, + } +} + fn object_name_to_string(object_name: &ObjectName) -> String { object_name .0 @@ -463,7 +475,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { filter, } => self.show_columns_to_plan(extended, full, table_name, filter), - Statement::Insert { + Statement::Insert(Insert { or, into, table_name, @@ -480,7 +492,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { replace_into, priority, insert_alias, - } => { + }) => { if or.is_some() { plan_err!("Inserts with or clauses not supported")?; } @@ -537,7 +549,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { self.update_to_plan(table, assignments, from, selection) } - Statement::Delete { + Statement::Delete(Delete { tables, using, selection, @@ -545,7 +557,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { from, order_by, limit, - } => { + }) => { if !tables.is_empty() { plan_err!("DELETE not supported")?; } @@ -851,23 +863,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let mut options = HashMap::new(); for (key, value) in statement.options { - let value_string = match value { - Value::SingleQuotedString(s) => s.to_string(), - Value::DollarQuotedString(s) => s.to_string(), - Value::UnQuotedString(s) => s.to_string(), - Value::Number(_, _) | Value::Boolean(_) => value.to_string(), - Value::DoubleQuotedString(_) - | Value::EscapedStringLiteral(_) - | Value::NationalStringLiteral(_) - | Value::SingleQuotedByteStringLiteral(_) - | Value::DoubleQuotedByteStringLiteral(_) - | Value::RawStringLiteral(_) - | Value::HexStringLiteral(_) - | Value::Null - | Value::Placeholder(_) => { - return plan_err!("Unsupported Value in COPY statement {}", value); - } - }; + let value_string = value_to_string(&value).ok_or_else(|| { + plan_err!("Unsupported Value in COPY statement {}", value) + })?; if !(&key.contains('.')) { // If config does not belong to any namespace, assume it is // a format option and apply the format prefix for backwards @@ -1132,23 +1130,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // parse value string from Expr let value_string = match &value[0] { SQLExpr::Identifier(i) => ident_to_string(i), - SQLExpr::Value(v) => match v { - Value::SingleQuotedString(s) => s.to_string(), - Value::DollarQuotedString(s) => s.to_string(), - Value::Number(_, _) | Value::Boolean(_) => v.to_string(), - Value::DoubleQuotedString(_) - | Value::UnQuotedString(_) - | Value::EscapedStringLiteral(_) - | Value::NationalStringLiteral(_) - | Value::SingleQuotedByteStringLiteral(_) - | Value::DoubleQuotedByteStringLiteral(_) - | Value::RawStringLiteral(_) - | Value::HexStringLiteral(_) - | Value::Null - | Value::Placeholder(_) => { - return plan_err!("Unsupported Value {}", value[0]); - } - }, + SQLExpr::Value(v) => value_to_string(v).ok_or_else(|| { + plan_err!("Unsupported Value {}", value[0]) + })?, // for capture signed number e.g. +8, -8 SQLExpr::UnaryOp { op, expr } => match op { UnaryOperator::Plus => format!("+{expr}"), From d64ac06adbb76aec2af95f00fac4d26d0b28e6e4 Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Mon, 6 May 2024 09:07:45 -0700 Subject: [PATCH 02/33] function and cast fixups --- datafusion/sql/src/expr/binary_op.rs | 2 ++ datafusion/sql/src/expr/mod.rs | 24 ++++++------------ datafusion/sql/src/select.rs | 1 + datafusion/sql/src/unparser/ast.rs | 2 ++ datafusion/sql/src/unparser/expr.rs | 37 ++++++++++++++++++---------- 5 files changed, 36 insertions(+), 30 deletions(-) diff --git a/datafusion/sql/src/expr/binary_op.rs b/datafusion/sql/src/expr/binary_op.rs index 0d37742e5b07..fcb57e8a82e4 100644 --- a/datafusion/sql/src/expr/binary_op.rs +++ b/datafusion/sql/src/expr/binary_op.rs @@ -51,6 +51,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { BinaryOperator::PGBitwiseShiftRight => Ok(Operator::BitwiseShiftRight), BinaryOperator::PGBitwiseShiftLeft => Ok(Operator::BitwiseShiftLeft), BinaryOperator::StringConcat => Ok(Operator::StringConcat), + BinaryOperator::ArrowAt => Ok(Operator::ArrowAt), + BinaryOperator::AtArrow => Ok(Operator::AtArrow), _ => not_impl_err!("Unsupported SQL binary operator {op:?}"), } } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index ed5421edfbb0..1bcfd8a20e30 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -17,7 +17,9 @@ use arrow_schema::DataType; use arrow_schema::TimeUnit; -use sqlparser::ast::{ArrayAgg, Expr as SQLExpr, JsonOperator, TrimWhereField, Value}; +use sqlparser::ast::{ + ArrayAgg, CastKind, Expr as SQLExpr, JsonOperator, TrimWhereField, Value, +}; use sqlparser::parser::ParserError::ParserError; use datafusion_common::{ @@ -76,16 +78,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { stack.push(StackEntry::SQLExpr(right)); stack.push(StackEntry::SQLExpr(left)); } - SQLExpr::JsonAccess { - left, - operator, - right, - } => { - let op = self.parse_sql_json_access(operator)?; - stack.push(StackEntry::Operator(op)); - stack.push(StackEntry::SQLExpr(right)); - stack.push(StackEntry::SQLExpr(left)); - } _ => { let expr = self.sql_expr_to_logical_expr_internal( *sql_expr, @@ -267,6 +259,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ), SQLExpr::Cast { + kind: CastKind::Cast | CastKind::DoubleColon, expr, data_type, format, @@ -296,7 +289,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(Expr::Cast(Cast::new(Box::new(expr), dt))) } - SQLExpr::TryCast { + SQLExpr::Cast { + kind: CastKind::TryCast | CastKind::SafeCast, expr, data_type, format, @@ -943,11 +937,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ) => GetFieldAccess::NamedStructField { name: ScalarValue::from(s), }, - SQLExpr::JsonAccess { - left, - operator: JsonOperator::Colon, - right, - } => { + SQLExpr::JsonAccess { value, path } => { let (start, stop, stride) = if let SQLExpr::JsonAccess { left: l, operator: JsonOperator::Colon, diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 730e84cd094b..006b9d79d7cc 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -527,6 +527,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { opt_except: _opt_except, opt_rename, opt_replace: _opt_replace, + opt_ilike: _opt_ilike, } = options; if opt_rename.is_some() { diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index 0a76aee2e066..b0b699a56d90 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -258,6 +258,8 @@ impl SelectBuilder { named_window: self.named_window.clone(), qualify: self.qualify.clone(), value_table_mode: self.value_table_mode, + connect_by: None, + window_before_qualify: false, }) } fn create_empty() -> Self { diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index c619c62668cc..30774d5d638d 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -129,13 +129,15 @@ impl Unparser<'_> { value: func_name.to_string(), quote_style: None, }]), - args, + args: ast::FunctionArguments::List(ast::FunctionArgumentList { + duplicate_treatment: None, + args, + clauses: vec![], + }), filter: None, null_treatment: None, over: None, - distinct: false, - special: false, - order_by: vec![], + within_group: vec![], })) } Expr::Between(Between { @@ -200,6 +202,7 @@ impl Unparser<'_> { Expr::Cast(Cast { expr, data_type }) => { let inner_expr = self.expr_to_sql(expr)?; Ok(ast::Expr::Cast { + kind: ast::CastKind::Cast, expr: Box::new(inner_expr), data_type: self.arrow_dtype_to_ast_dtype(data_type)?, format: None, @@ -256,13 +259,15 @@ impl Unparser<'_> { value: func_name.to_string(), quote_style: None, }]), - args, + args: ast::FunctionArguments::List(ast::FunctionArgumentList { + duplicate_treatment: None, + args, + clauses: vec![], + }), filter: None, null_treatment: None, over, - distinct: false, - special: false, - order_by: vec![], + within_group: vec![], })) } Expr::SimilarTo(Like { @@ -282,7 +287,7 @@ impl Unparser<'_> { negated: *negated, expr: Box::new(self.expr_to_sql(expr)?), pattern: Box::new(self.expr_to_sql(pattern)?), - escape_char: *escape_char, + escape_char: escape_char.map(|c| c.to_string()), }), Expr::AggregateFunction(agg) => { let func_name = agg.func_def.name(); @@ -297,13 +302,17 @@ impl Unparser<'_> { value: func_name.to_string(), quote_style: None, }]), - args, + args: ast::FunctionArguments::List(ast::FunctionArgumentList { + duplicate_treatment: agg + .distinct + .then_some(ast::DuplicateTreatment::Distinct), + args, + clauses: vec![], + }), filter, null_treatment: None, over: None, - distinct: agg.distinct, - special: false, - order_by: vec![], + within_group: vec![], })) } Expr::ScalarSubquery(subq) => { @@ -643,6 +652,7 @@ impl Unparser<'_> { ))?; Ok(ast::Expr::Cast { + kind: ast::CastKind::Cast, expr: Box::new(ast::Expr::Value(ast::Value::SingleQuotedString( date.to_string(), ))), @@ -665,6 +675,7 @@ impl Unparser<'_> { ))?; Ok(ast::Expr::Cast { + kind: ast::CastKind::Cast, expr: Box::new(ast::Expr::Value(ast::Value::SingleQuotedString( datetime.to_string(), ))), From c11222272f0368d1a9896af5c3959bbbe29e3afb Mon Sep 17 00:00:00 2001 From: tison Date: Wed, 8 May 2024 18:10:25 +0800 Subject: [PATCH 03/33] catchup refactors Signed-off-by: tison --- datafusion/sql/src/expr/json_access.rs | 31 -------------------- datafusion/sql/src/expr/mod.rs | 25 ++++++++++++---- datafusion/sql/src/parser.rs | 1 - datafusion/sql/src/planner.rs | 2 +- datafusion/sql/src/select.rs | 15 +++++++--- datafusion/sql/src/statement.rs | 40 +++++++++++++++++--------- 6 files changed, 57 insertions(+), 57 deletions(-) delete mode 100644 datafusion/sql/src/expr/json_access.rs diff --git a/datafusion/sql/src/expr/json_access.rs b/datafusion/sql/src/expr/json_access.rs deleted file mode 100644 index b24482f88297..000000000000 --- a/datafusion/sql/src/expr/json_access.rs +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::planner::{ContextProvider, SqlToRel}; -use datafusion_common::{not_impl_err, Result}; -use datafusion_expr::Operator; -use sqlparser::ast::JsonOperator; - -impl<'a, S: ContextProvider> SqlToRel<'a, S> { - pub(crate) fn parse_sql_json_access(&self, op: JsonOperator) -> Result { - match op { - JsonOperator::AtArrow => Ok(Operator::AtArrow), - JsonOperator::ArrowAt => Ok(Operator::ArrowAt), - _ => not_impl_err!("Unsupported SQL json operator {op:?}"), - } - } -} diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 1bcfd8a20e30..6cc62384cd61 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -17,9 +17,7 @@ use arrow_schema::DataType; use arrow_schema::TimeUnit; -use sqlparser::ast::{ - ArrayAgg, CastKind, Expr as SQLExpr, JsonOperator, TrimWhereField, Value, -}; +use sqlparser::ast::{CastKind, Expr as SQLExpr, TrimWhereField, Value}; use sqlparser::parser::ParserError::ParserError; use datafusion_common::{ @@ -40,7 +38,6 @@ mod binary_op; mod function; mod grouping_set; mod identifier; -mod json_access; mod order_by; mod subquery; mod substring; @@ -748,7 +745,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { negated: bool, expr: SQLExpr, pattern: SQLExpr, - escape_char: Option, + escape_char: Option, schema: &DFSchema, planner_context: &mut PlannerContext, case_insensitive: bool, @@ -758,6 +755,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { if pattern_type != DataType::Utf8 && pattern_type != DataType::Null { return plan_err!("Invalid pattern in LIKE expression"); } + let escape_char = if let Some(char) = escape_char { + if char.len() != 1 { + return plan_err!("Invalid escape character in LIKE expression"); + } + Some(char.chars().next().unwrap()) + } else { + None + }; Ok(Expr::Like(Like::new( negated, Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?), @@ -772,7 +777,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { negated: bool, expr: SQLExpr, pattern: SQLExpr, - escape_char: Option, + escape_char: Option, schema: &DFSchema, planner_context: &mut PlannerContext, ) -> Result { @@ -781,6 +786,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { if pattern_type != DataType::Utf8 && pattern_type != DataType::Null { return plan_err!("Invalid pattern in SIMILAR TO expression"); } + let escape_char = if let Some(char) = escape_char { + if char.len() != 1 { + return plan_err!("Invalid escape character in SIMILAR TO expression"); + } + Some(char.chars().next().unwrap()) + } else { + None + }; Ok(Expr::SimilarTo(Like::new( negated, Box::new(self.sql_expr_to_logical_expr(expr, schema, planner_context)?), diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 5a999ab21d30..14ceebdeaee1 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -498,7 +498,6 @@ impl<'a> DFParser<'a> { pub fn parse_option_value(&mut self) -> Result { let next_token = self.parser.next_token(); match next_token.token { - Token::Word(Word { value, .. }) => Ok(Value::UnQuotedString(value)), Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)), Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)), Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)), diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 0066f75f0d30..0804bce24a6f 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -366,7 +366,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { pub(crate) fn convert_data_type(&self, sql_type: &SQLDataType) -> Result { match sql_type { SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) - | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_sql_type)) => { + | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_sql_type, _)) => { // Arrays may be multi-dimensional. let inner_data_type = self.convert_data_type(inner_sql_type)?; Ok(DataType::new_list(inner_data_type, true)) diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 006b9d79d7cc..3e2a2b97559b 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -39,8 +39,8 @@ use datafusion_expr::{ Expr, Filter, GroupingSet, LogicalPlan, LogicalPlanBuilder, Partitioning, }; use sqlparser::ast::{ - Distinct, Expr as SQLExpr, GroupByExpr, OrderByExpr, ReplaceSelectItem, - WildcardAdditionalOptions, WindowType, + Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr, OrderByExpr, + ReplaceSelectItem, WildcardAdditionalOptions, WindowType, }; use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins}; @@ -727,10 +727,17 @@ fn match_window_definitions( } | SelectItem::UnnamedExpr(SQLExpr::Function(f)) = proj { - for NamedWindowDefinition(window_ident, window_spec) in named_windows.iter() { + for NamedWindowDefinition(window_ident, window_expr) in named_windows.iter() { if let Some(WindowType::NamedWindow(ident)) = &f.over { if ident.eq(window_ident) { - f.over = Some(WindowType::WindowSpec(window_spec.clone())) + f.over = Some(match window_expr { + NamedWindowExpr::NamedWindow(ident) => { + WindowType::NamedWindow(ident.clone()) + } + NamedWindowExpr::WindowSpec(spec) => { + WindowType::WindowSpec(spec.clone()) + } + }) } } } diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index c4bb5f1afebf..816b57ac04bd 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -53,7 +53,12 @@ use datafusion_expr::{ Volatility, WriteOp, }; use sqlparser::ast; -use sqlparser::ast::{Assignment, ColumnDef, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, Expr, FromTable, Ident, Insert, ObjectName, ObjectType, Query, SchemaName, SetExpr, ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor, TableWithJoins, TransactionMode, UnaryOperator, Value}; +use sqlparser::ast::{ + Assignment, ColumnDef, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, + Expr, FromTable, Ident, Insert, ObjectName, ObjectType, Query, SchemaName, SetExpr, + ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor, + TableWithJoins, TransactionMode, UnaryOperator, Value, +}; use sqlparser::parser::ParserError::ParserError; fn ident_to_string(ident: &Ident) -> String { @@ -417,18 +422,19 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } ObjectType::Schema => { let name = match name { - TableReference::Bare { table } => Ok(SchemaReference::Bare { schema: table } ) , - TableReference::Partial { schema, table } => Ok(SchemaReference::Full { schema: table,catalog: schema }), + TableReference::Bare { table } => Ok(SchemaReference::Bare { schema: table }), + TableReference::Partial { schema, table } => Ok(SchemaReference::Full { schema: table, catalog: schema }), TableReference::Full { catalog: _, schema: _, table: _ } => { Err(ParserError("Invalid schema specifier (has 3 parts)".to_string())) - }, + } }?; Ok(LogicalPlan::Ddl(DdlStatement::DropCatalogSchema(DropCatalogSchema { name, if_exists, cascade, schema: DFSchemaRef::new(DFSchema::empty()), - })))}, + }))) + } _ => not_impl_err!( "Only `DROP TABLE/VIEW/SCHEMA ...` statement is supported currently" ), @@ -863,9 +869,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let mut options = HashMap::new(); for (key, value) in statement.options { - let value_string = value_to_string(&value).ok_or_else(|| { - plan_err!("Unsupported Value in COPY statement {}", value) - })?; + let value_string = match value_to_string(&value) { + None => { + return plan_err!("Unsupported Value in COPY statement {}", value) + } + Some(v) => v, + }; if !(&key.contains('.')) { // If config does not belong to any namespace, assume it is // a format option and apply the format prefix for backwards @@ -885,9 +894,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } else { let e = || { DataFusionError::Configuration( - "Format not explicitly set and unable to get file extension! Use STORED AS to define file format." - .to_string(), - ) + "Format not explicitly set and unable to get file extension! Use STORED AS to define file format." + .to_string(), + ) }; // try to infer file format from file extension let extension: &str = &Path::new(&statement.target) @@ -1130,9 +1139,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // parse value string from Expr let value_string = match &value[0] { SQLExpr::Identifier(i) => ident_to_string(i), - SQLExpr::Value(v) => value_to_string(v).ok_or_else(|| { - plan_err!("Unsupported Value {}", value[0]) - })?, + SQLExpr::Value(v) => match value_to_string(v) { + None => { + return plan_err!("Unsupported Value {}", value[0]); + } + Some(v) => v, + }, // for capture signed number e.g. +8, -8 SQLExpr::UnaryOp { op, expr } => match op { UnaryOperator::Plus => format!("+{expr}"), From f29af9d2ae19c4209d39f90cd7c5d6331a1eb6bd Mon Sep 17 00:00:00 2001 From: tison Date: Tue, 14 May 2024 10:36:12 +0800 Subject: [PATCH 04/33] try migrate json expr Signed-off-by: tison --- datafusion/sql/src/expr/mod.rs | 93 ++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 6cc62384cd61..f148900bdaae 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -17,7 +17,7 @@ use arrow_schema::DataType; use arrow_schema::TimeUnit; -use sqlparser::ast::{CastKind, Expr as SQLExpr, TrimWhereField, Value}; +use sqlparser::ast::{CastKind, Expr as SQLExpr, JsonPathElem, TrimWhereField, Value}; use sqlparser::parser::ParserError::ParserError; use datafusion_common::{ @@ -212,7 +212,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { agg_func.order_by.clone(), agg_func.null_treatment, )), true) - }, + } _ => (expr, false), } } @@ -916,7 +916,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { distinct, order_by, null_treatment, - filter: None, // filter is passed in + filter: _, // filter is passed in }) => Ok(Expr::AggregateFunction(expr::AggregateFunction::new( fun, args, @@ -944,65 +944,68 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { schema: &DFSchema, planner_context: &mut PlannerContext, ) -> Result { - let field = match expr.clone() { + match expr.clone() { SQLExpr::Value( Value::SingleQuotedString(s) | Value::DoubleQuotedString(s), - ) => GetFieldAccess::NamedStructField { - name: ScalarValue::from(s), - }, + ) => { + return Ok(GetFieldAccess::NamedStructField { + name: ScalarValue::from(s), + }) + } SQLExpr::JsonAccess { value, path } => { - let (start, stop, stride) = if let SQLExpr::JsonAccess { - left: l, - operator: JsonOperator::Colon, - right: r, - } = *left - { - let start = Box::new(self.sql_expr_to_logical_expr( - *l, - schema, - planner_context, - )?); - let stop = Box::new(self.sql_expr_to_logical_expr( - *r, - schema, - planner_context, - )?); - let stride = Box::new(self.sql_expr_to_logical_expr( - *right, - schema, - planner_context, - )?); - (start, stop, stride) - } else { - let start = Box::new(self.sql_expr_to_logical_expr( - *left, + let start = Box::new(self.sql_expr_to_logical_expr( + *value, + schema, + planner_context, + )?); + + fn json_path_elem_to_expr(expr: JsonPathElem) -> SQLExpr { + match expr { + JsonPathElem::Dot { key, .. } => SQLExpr::Value(Value::SingleQuotedString(key)), + JsonPathElem::Bracket { key } => key, + } + } + + let mut path_iter = path.path.into_iter(); + let stop = match path_iter.next() { + None => { + return Ok(GetFieldAccess::ListIndex { + key: Box::new(self.sql_expr_to_logical_expr( + expr, + schema, + planner_context, + )?), + }) + } + Some(expr) => Box::new(self.sql_expr_to_logical_expr( + json_path_elem_to_expr(expr), schema, planner_context, - )?); - let stop = Box::new(self.sql_expr_to_logical_expr( - *right, + )?), + }; + let stride = match path_iter.next() { + None => Box::new(Expr::Literal(ScalarValue::Int64(Some(1)))), + Some(expr) => Box::new(self.sql_expr_to_logical_expr( + json_path_elem_to_expr(expr), schema, planner_context, - )?); - let stride = Box::new(Expr::Literal(ScalarValue::Int64(Some(1)))); - (start, stop, stride) + )?), }; - GetFieldAccess::ListRange { + + Ok(GetFieldAccess::ListRange { start, stop, stride, - } + }) } - _ => GetFieldAccess::ListIndex { + _ => Ok(GetFieldAccess::ListIndex { key: Box::new(self.sql_expr_to_logical_expr( expr, schema, planner_context, )?), - }, - }; - - Ok(field) + }), + } } fn plan_indexed( From 1ac63fa2a3bf6f97b1f97a54166f3ffc164900a2 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 14 May 2024 16:34:02 -0400 Subject: [PATCH 05/33] Update for changes in sqlparser --- datafusion/sql/src/expr/function.rs | 66 ++++++++++- datafusion/sql/src/expr/mod.rs | 107 ++---------------- datafusion/sql/src/parser.rs | 4 +- datafusion/sqllogictest/test_files/array.slt | 12 +- datafusion/sqllogictest/test_files/errors.slt | 2 +- datafusion/sqllogictest/test_files/struct.slt | 24 ++-- 6 files changed, 89 insertions(+), 126 deletions(-) diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index 3adf2960784d..eceee912f9d2 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -30,7 +30,9 @@ use datafusion_expr::{ BuiltInWindowFunction, }; use sqlparser::ast::{ - Expr as SQLExpr, Function as SQLFunction, FunctionArg, FunctionArgExpr, WindowType, + DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg, + FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, + WindowType, }; use std::str::FromStr; use strum::IntoEnumIterator; @@ -90,13 +92,63 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { name, args, over, - distinct, filter, - null_treatment, - special: _, // true if not called with trailing parens - order_by, + mut null_treatment, + within_group, } = function; + // todo handle case when args is none (for current_timestamp) + let FunctionArguments::List(args) = args else { + return not_impl_err!("Unsupported function argument {args:?}"); + }; + + let FunctionArgumentList { + duplicate_treatment, + args, + clauses, + } = args; + + let distinct = match duplicate_treatment { + Some(DuplicateTreatment::Distinct) => true, + Some(DuplicateTreatment::All) => false, + None => false, + }; + + // Pull out argument handling + let mut order_by = None; + for clause in clauses { + match clause { + FunctionArgumentClause::IgnoreOrRespectNulls(nt) => { + if null_treatment.is_some() { + return not_impl_err!( + "Calling {name}: Duplicated null treatment clause" + ); + } + null_treatment = Some(nt); + } + FunctionArgumentClause::OrderBy(oby) => { + if order_by.is_some() { + return not_impl_err!("Calling {name}: Duplicated ORDER BY clause in function arguments"); + } + order_by = Some(oby); + } + FunctionArgumentClause::Limit(limit) => { + return not_impl_err!( + "Calling {name}: LIMIT not supported in function arguments: {limit}" + ) + } + FunctionArgumentClause::OnOverflow(overflow) => { + return not_impl_err!( + "Calling {name}: ON OVERFLOW not supported in function arguments: {overflow}" + ) + } + } + } + + if !within_group.is_empty() { + return not_impl_err!("WITHIN GROUP is not supported yet: {within_group:?}"); + } + // If function is a window function (it has an OVER clause), // it shouldn't have ordering requirement as function argument // required ordering should be defined in OVER clause. @@ -128,12 +180,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { return Ok(Expr::Unnest(Unnest::new(expr))); } - if !order_by.is_empty() && is_function_window { + if order_by.is_some() && is_function_window { return plan_err!( "Aggregate ORDER BY is not implemented for window functions" ); } + let order_by = order_by.unwrap_or_default(); + // then, window function if let Some(WindowType::WindowSpec(window)) = over { let partition_by = window diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index f148900bdaae..cc87c30be8f6 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -24,11 +24,10 @@ use datafusion_common::{ internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, }; -use datafusion_expr::expr::AggregateFunctionDefinition; use datafusion_expr::expr::InList; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::{ - col, expr, lit, AggregateFunction, Between, BinaryExpr, Cast, Expr, ExprSchemable, + col, lit, AggregateFunction, Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, GetIndexedField, Like, Literal, Operator, TryCast, }; @@ -488,10 +487,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { planner_context, ), - SQLExpr::AggregateExpressionWithFilter { expr, filter } => { - self.sql_agg_with_filter_to_expr(*expr, *filter, schema, planner_context) - } - SQLExpr::Function(function) => { self.sql_function_to_expr(function, schema, planner_context) } @@ -543,10 +538,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { self.parse_scalar_subquery(*subquery, schema, planner_context) } - SQLExpr::ArrayAgg(array_agg) => { - self.parse_array_agg(array_agg, schema, planner_context) - } - SQLExpr::Struct { values, fields } => { self.parse_struct(values, fields, schema, planner_context) } @@ -670,55 +661,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ))) } - fn parse_array_agg( - &self, - array_agg: ArrayAgg, - input_schema: &DFSchema, - planner_context: &mut PlannerContext, - ) -> Result { - // Some dialects have special syntax for array_agg. DataFusion only supports it like a function. - let ArrayAgg { - distinct, - expr, - order_by, - limit, - within_group, - } = array_agg; - let order_by = if let Some(order_by) = order_by { - Some(self.order_by_to_sort_expr( - &order_by, - input_schema, - planner_context, - true, - None, - )?) - } else { - None - }; - - if let Some(limit) = limit { - return not_impl_err!("LIMIT not supported in ARRAY_AGG: {limit}"); - } - - if within_group { - return not_impl_err!("WITHIN GROUP not supported in ARRAY_AGG"); - } - - let args = - vec![self.sql_expr_to_logical_expr(*expr, input_schema, planner_context)?]; - - // next, aggregate built-ins - Ok(Expr::AggregateFunction(expr::AggregateFunction::new( - AggregateFunction::ArrayAgg, - args, - distinct, - None, - order_by, - None, - ))) - // see if we can rewrite it into NTH-VALUE - } - fn sql_in_list_to_expr( &self, expr: SQLExpr, @@ -902,41 +844,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let args = vec![fullstr, substr]; Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fun, args))) } - fn sql_agg_with_filter_to_expr( - &self, - expr: SQLExpr, - filter: SQLExpr, - schema: &DFSchema, - planner_context: &mut PlannerContext, - ) -> Result { - match self.sql_expr_to_logical_expr(expr, schema, planner_context)? { - Expr::AggregateFunction(expr::AggregateFunction { - func_def: AggregateFunctionDefinition::BuiltIn(fun), - args, - distinct, - order_by, - null_treatment, - filter: _, // filter is passed in - }) => Ok(Expr::AggregateFunction(expr::AggregateFunction::new( - fun, - args, - distinct, - Some(Box::new(self.sql_expr_to_logical_expr( - filter, - schema, - planner_context, - )?)), - order_by, - null_treatment, - ))), - Expr::AggregateFunction(..) => { - internal_err!("Expected null filter clause in aggregate function") - } - _ => internal_err!( - "AggregateExpressionWithFilter expression was not an AggregateFunction" - ), - } - } fn plan_indices( &self, @@ -947,11 +854,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { match expr.clone() { SQLExpr::Value( Value::SingleQuotedString(s) | Value::DoubleQuotedString(s), - ) => { - return Ok(GetFieldAccess::NamedStructField { - name: ScalarValue::from(s), - }) - } + ) => Ok(GetFieldAccess::NamedStructField { + name: ScalarValue::from(s), + }), SQLExpr::JsonAccess { value, path } => { let start = Box::new(self.sql_expr_to_logical_expr( *value, @@ -961,7 +866,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { fn json_path_elem_to_expr(expr: JsonPathElem) -> SQLExpr { match expr { - JsonPathElem::Dot { key, .. } => SQLExpr::Value(Value::SingleQuotedString(key)), + JsonPathElem::Dot { key, .. } => { + SQLExpr::Value(Value::SingleQuotedString(key)) + } JsonPathElem::Bracket { key } => key, } } diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 14ceebdeaee1..58866189b0cb 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -498,6 +498,8 @@ impl<'a> DFParser<'a> { pub fn parse_option_value(&mut self) -> Result { let next_token = self.parser.next_token(); match next_token.token { + // e.g. things like "snappy" or "gzip" that may be keywords + Token::Word(word) => Ok(Value::SingleQuotedString(word.value)), Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)), Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)), Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)), @@ -1580,7 +1582,7 @@ mod tests { ), ( "format.compression".to_string(), - Value::UnQuotedString("snappy".to_string()), + Value::SingleQuotedString("snappy".to_string()), ), ]; diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 3b90187f07e0..0871c11396be 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -689,7 +689,7 @@ select column1, column2, column3, column4, column5 from nested_arrays; # values table query IIIRT -select a, b, c, d, e from values; +select a, b, c, d, e from "values"; ---- 1 1 2 1.1 Lorem 2 3 4 2.2 ipsum @@ -1074,7 +1074,7 @@ select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NUL # make_array with 1 columns query ??? -select make_array(a), make_array(d), make_array(e) from values; +select make_array(a), make_array(d), make_array(e) from "values"; ---- [1] [1.1] [Lorem] [2] [2.2] [ipsum] @@ -1088,7 +1088,7 @@ select make_array(a), make_array(d), make_array(e) from values; # make_array with 2 columns #1 query ?? -select make_array(b, c), make_array(e, f) from values; +select make_array(b, c), make_array(e, f) from "values"; ---- [1, 2] [Lorem, A] [3, 4] [ipsum, ] @@ -1102,7 +1102,7 @@ select make_array(b, c), make_array(e, f) from values; # make_array with 4 columns query ? -select make_array(a, b, c, d) from values; +select make_array(a, b, c, d) from "values"; ---- [1.0, 1.0, 2.0, 1.1] [2.0, 3.0, 4.0, 2.2] @@ -6321,7 +6321,7 @@ SELECT string_to_array('abc def', ' ', 'def') [abc, ] query ? -select string_to_array(e, ',') from values; +select string_to_array(e, ',') from "values"; ---- [Lorem] [ipsum] @@ -6334,7 +6334,7 @@ select string_to_array(e, ',') from values; NULL query ? -select string_to_list(e, 'm') from values; +select string_to_list(e, 'm') from "values"; ---- [Lore, ] [ipsu, ] diff --git a/datafusion/sqllogictest/test_files/errors.slt b/datafusion/sqllogictest/test_files/errors.slt index ab281eac31f5..f8bb36dddd67 100644 --- a/datafusion/sqllogictest/test_files/errors.slt +++ b/datafusion/sqllogictest/test_files/errors.slt @@ -46,7 +46,7 @@ statement error DataFusion error: Arrow error: Cast error: Cannot cast string 'c SELECT CAST(c1 AS INT) FROM aggregate_test_100 # aggregation_with_bad_arguments -statement error DataFusion error: Error during planning: No function matches the given name and argument types 'COUNT\(\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tCOUNT\(Any, .., Any\) +statement error DataFusion error: SQL error: ParserError\("Expected an expression:, found: \)"\) SELECT COUNT(DISTINCT) FROM aggregate_test_100 # query_cte_incorrect diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index 3e685cbb45a0..a61d1e8734cd 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -39,7 +39,7 @@ select struct(1, 3.14, 'h')['c0'], struct(3, 2.55, 'b')['c1'], struct(2, 6.43, ' # struct[i] with columns query R -select struct(a, b, c)['c1'] from values; +select struct(a, b, c)['c1'] from "values"; ---- 1.1 2.2 @@ -65,7 +65,7 @@ select struct(1, 3.14 as name1, 'e', true); # struct scalar function with columns #1 query ? -select struct(a, b, c) from values; +select struct(a, b, c) from "values"; ---- {c0: 1, c1: 1.1, c2: a} {c0: 2, c1: 2.2, c2: b} @@ -73,7 +73,7 @@ select struct(a, b, c) from values; # struct scalar function with columns and scalars query ? -select struct(a, 'foo') from values; +select struct(a, 'foo') from "values"; ---- {c0: 1, c1: foo} {c0: 2, c1: foo} @@ -82,7 +82,7 @@ select struct(a, 'foo') from values; # explain struct scalar function with columns #1 query TT -explain select struct(a, b, c) from values; +explain select struct(a, b, c) from "values"; ---- logical_plan 01)Projection: struct(values.a, values.b, values.c) @@ -105,7 +105,7 @@ select named_struct(1); # error on odd number of arguments #3 query error DataFusion error: Execution error: named_struct requires an even number of arguments, got 1 instead -select named_struct(values.a) from values; +select named_struct(values.a) from "values"; # error on odd number of arguments #4 query error DataFusion error: Execution error: named_struct requires an even number of arguments, got 3 instead @@ -121,15 +121,15 @@ select named_struct('corret', 1, 0, 'wrong'); # error on even argument not a string literal #3 query error DataFusion error: Execution error: named_struct even arguments must be string literals, got values\.a instead at position 0 -select named_struct(values.a, 'a') from values; +select named_struct(values.a, 'a') from "values"; # error on even argument not a string literal #4 query error DataFusion error: Execution error: named_struct even arguments must be string literals, got values\.c instead at position 0 -select named_struct(values.c, 'c') from values; +select named_struct(values.c, 'c') from "values"; # named_struct with mixed scalar and array values #1 query ? -select named_struct('scalar', 27, 'array', values.a, 'null', NULL) from values; +select named_struct('scalar', 27, 'array', values.a, 'null', NULL) from "values"; ---- {scalar: 27, array: 1, null: } {scalar: 27, array: 2, null: } @@ -137,7 +137,7 @@ select named_struct('scalar', 27, 'array', values.a, 'null', NULL) from values; # named_struct with mixed scalar and array values #2 query ? -select named_struct('array', values.a, 'scalar', 27, 'null', NULL) from values; +select named_struct('array', values.a, 'scalar', 27, 'null', NULL) from "values"; ---- {array: 1, scalar: 27, null: } {array: 2, scalar: 27, null: } @@ -145,7 +145,7 @@ select named_struct('array', values.a, 'scalar', 27, 'null', NULL) from values; # named_struct with mixed scalar and array values #3 query ? -select named_struct('null', NULL, 'array', values.a, 'scalar', 27) from values; +select named_struct('null', NULL, 'array', values.a, 'scalar', 27) from "values"; ---- {null: , array: 1, scalar: 27} {null: , array: 2, scalar: 27} @@ -153,7 +153,7 @@ select named_struct('null', NULL, 'array', values.a, 'scalar', 27) from values; # named_struct with mixed scalar and array values #4 query ? -select named_struct('null_array', values.n, 'array', values.a, 'scalar', 27, 'null', NULL) from values; +select named_struct('null_array', values.n, 'array', values.a, 'scalar', 27, 'null', NULL) from "values"; ---- {null_array: , array: 1, scalar: 27, null: } {null_array: , array: 2, scalar: 27, null: } @@ -161,7 +161,7 @@ select named_struct('null_array', values.n, 'array', values.a, 'scalar', 27, 'nu # named_struct arrays only query ? -select named_struct('field_a', a, 'field_b', b) from values; +select named_struct('field_a', a, 'field_b', b) from "values"; ---- {field_a: 1, field_b: 1.1} {field_a: 2, field_b: 2.2} From d6da6fc7d9a9ebe18ac109692948dc220cc88829 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 14 May 2024 16:45:29 -0400 Subject: [PATCH 06/33] Update dependencies --- datafusion-cli/Cargo.lock | 79 +++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index fd471e750194..00bb0e90fbe0 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -363,9 +363,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e9eabd7a98fe442131a17c316bd9349c43695e49e730c3c8e12cfb5f4da2693" +checksum = "9c90a406b4495d129f00461241616194cb8a032c8d1c53c657f0961d5f8e0498" dependencies = [ "bzip2", "flate2", @@ -387,7 +387,7 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] @@ -1093,7 +1093,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" dependencies = [ "quote", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] @@ -1529,9 +1529,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" dependencies = [ "libc", "windows-sys 0.52.0", @@ -1679,7 +1679,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] @@ -2283,9 +2283,9 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" [[package]] name = "num" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3135b08af27d103b0a51f2ae0f8632117b7b185ccf931445affa8df530576a41" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" dependencies = [ "num-bigint", "num-complex", @@ -2307,9 +2307,9 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" dependencies = [ "num-traits", ] @@ -2342,11 +2342,10 @@ dependencies = [ [[package]] name = "num-rational" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" dependencies = [ - "autocfg", "num-bigint", "num-integer", "num-traits", @@ -2526,9 +2525,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "petgraph" -version = "0.6.4" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", "indexmap 2.2.6", @@ -2589,7 +2588,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] @@ -2995,9 +2994,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51f344d206c5e1b010eec27349b815a4805f70a778895959d70b74b9b529b30a" +checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" [[package]] name = "rustls-webpki" @@ -3115,29 +3114,29 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.200" +version = "1.0.201" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddc6f9cc94d67c0e21aaf7eda3a010fd3af78ebf6e096aa6e2e13c79749cce4f" +checksum = "780f1cebed1629e4753a1a38a3c72d30b97ec044f0aef68cb26650a3c5cf363c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.200" +version = "1.0.201" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "856f046b9400cee3c8c94ed572ecdb752444c24528c035cd35882aad6f492bcb" +checksum = "c5e405930b9796f1c00bee880d03fc7e0bb4b9a11afc776885ffe84320da2865" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] name = "serde_json" -version = "1.0.116" +version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" dependencies = [ "itoa", "ryu", @@ -3249,9 +3248,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.45.0" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7bbffee862a796d67959a89859d6b1046bb5016d63e23835ad0da182777bbe0" +checksum = "11a81a8cad9befe4cf1b9d2d4b9c6841c76f0882a3fec00d95133953c13b3d3d" dependencies = [ "log", "sqlparser_derive", @@ -3265,7 +3264,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] @@ -3311,7 +3310,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] @@ -3324,7 +3323,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] @@ -3346,9 +3345,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.61" +version = "2.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c993ed8ccba56ae856363b1845da7266a7cb78e1d146c8a32d54b45a8b831fc9" +checksum = "bf5be731623ca1a1fb7d8be6f261a3be6d3e2337b8a1f97be944d020c8fcb704" dependencies = [ "proc-macro2", "quote", @@ -3432,7 +3431,7 @@ checksum = "e2470041c06ec3ac1ab38d0356a6119054dedaea53e12fbefc0de730a1c08524" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] @@ -3527,7 +3526,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] @@ -3623,7 +3622,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] @@ -3668,7 +3667,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] @@ -3822,7 +3821,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", "wasm-bindgen-shared", ] @@ -3856,7 +3855,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4121,7 +4120,7 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.63", ] [[package]] From f553243fab44e27eb46e08e4f9e7ef3a114a37cd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 14 May 2024 17:04:14 -0400 Subject: [PATCH 07/33] handle zero argument form --- datafusion/sql/src/expr/function.rs | 81 +++++++++++++++---- .../sqllogictest/test_files/timestamps.slt | 7 ++ 2 files changed, 71 insertions(+), 17 deletions(-) diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index eceee912f9d2..dc8a79f4481a 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -29,11 +29,7 @@ use datafusion_expr::{ expr::{ScalarFunction, Unnest}, BuiltInWindowFunction, }; -use sqlparser::ast::{ - DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg, - FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, - WindowType, -}; +use sqlparser::ast::{DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg, FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, NullTreatment, ObjectName, OrderByExpr, WindowType}; use std::str::FromStr; use strum::IntoEnumIterator; @@ -81,13 +77,27 @@ fn find_closest_match(candidates: Vec, target: &str) -> String { .expect("No candidates provided.") // Panic if `candidates` argument is empty } -impl<'a, S: ContextProvider> SqlToRel<'a, S> { - pub(super) fn sql_function_to_expr( - &self, - function: SQLFunction, - schema: &DFSchema, - planner_context: &mut PlannerContext, - ) -> Result { +/// Arguments to for a function call extracted from the SQL AST +#[derive(Debug)] +struct FunctionArgs { + /// Function name + name: ObjectName, + /// Argument expressions + args: Vec, + /// ORDER BY clause, if any + order_by: Vec, + /// OVER clause, if any + over: Option, + /// FILTER clause, if any + filter: Option>, + /// NULL treatment clause, if any + null_treatment: Option, + /// DISTINCT + distinct: bool, +} + +impl FunctionArgs { + fn try_new(function: SQLFunction) -> Result { let SQLFunction { name, args, @@ -97,9 +107,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { within_group, } = function; - // todo handle case when args is none (for current_timestamp) + // Handle no argument form (aka `current_time` as opposed to `current_time()`) let FunctionArguments::List(args) = args else { - return not_impl_err!("Unsupported function argument {args:?}"); + return Ok(Self { + name, + args: vec![], + order_by: vec![], + over, + filter, + null_treatment, + distinct: false, + }); }; let FunctionArgumentList { @@ -149,6 +167,37 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { return not_impl_err!("WITHIN GROUP is not supported yet: {within_group:?}"); } + let order_by = order_by.unwrap_or_default(); + + Ok(Self { + name, + args, + order_by, + over, + filter, + null_treatment, + distinct, + }) + } +} + +impl<'a, S: ContextProvider> SqlToRel<'a, S> { + pub(super) fn sql_function_to_expr( + &self, + function: SQLFunction, + schema: &DFSchema, + planner_context: &mut PlannerContext, + ) -> Result { + let function_args = FunctionArgs::try_new(function)?; + let FunctionArgs { + name, + args, + order_by, over, + filter, + null_treatment, + distinct, + } = function_args; + // If function is a window function (it has an OVER clause), // it shouldn't have ordering requirement as function argument // required ordering should be defined in OVER clause. @@ -180,14 +229,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { return Ok(Expr::Unnest(Unnest::new(expr))); } - if order_by.is_some() && is_function_window { + if !order_by.is_empty() && is_function_window { return plan_err!( "Aggregate ORDER BY is not implemented for window functions" ); } - let order_by = order_by.unwrap_or_default(); - // then, window function if let Some(WindowType::WindowSpec(window)) = over { let partition_by = window diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 13fb8fba0d31..389c36a4a47d 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -2795,3 +2795,10 @@ SELECT '2000-12-01 04:04:12' AT TIME ZONE 'America/New York'; # abbreviated timezone is not supported statement error SELECT '2023-03-12 02:00:00' AT TIME ZONE 'EDT'; + + +# Test current_time without parentheses +query B +select current_time = current_time; +---- +true From f8eed01659db6c8eb4e2388bb2b9e70e6d8cd010 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 14 May 2024 17:04:42 -0400 Subject: [PATCH 08/33] fmt --- datafusion/sql/src/expr/function.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index dc8a79f4481a..786f0cc3663c 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -29,7 +29,11 @@ use datafusion_expr::{ expr::{ScalarFunction, Unnest}, BuiltInWindowFunction, }; -use sqlparser::ast::{DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg, FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, NullTreatment, ObjectName, OrderByExpr, WindowType}; +use sqlparser::ast::{ + DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg, + FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, + NullTreatment, ObjectName, OrderByExpr, WindowType, +}; use std::str::FromStr; use strum::IntoEnumIterator; @@ -192,7 +196,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let FunctionArgs { name, args, - order_by, over, + order_by, + over, filter, null_treatment, distinct, From 35d66e7b1b5b13b0f847ce2692d689868965d6b6 Mon Sep 17 00:00:00 2001 From: tison Date: Sun, 19 May 2024 13:02:48 +0800 Subject: [PATCH 09/33] fixup more Signed-off-by: tison --- datafusion/sql/src/unparser/expr.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 693231ea096d..8127d4237539 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -425,7 +425,8 @@ impl Unparser<'_> { } Expr::TryCast(TryCast { expr, data_type }) => { let inner_expr = self.expr_to_sql(expr)?; - Ok(ast::Expr::TryCast { + Ok(ast::Expr::Cast { + kind: ast::CastKind::TryCast, expr: Box::new(inner_expr), data_type: self.arrow_dtype_to_ast_dtype(data_type)?, format: None, From 60ecc27d634e521084a046823f4409555bd8602e Mon Sep 17 00:00:00 2001 From: tison Date: Sun, 19 May 2024 13:22:24 +0800 Subject: [PATCH 10/33] fixup more Signed-off-by: tison --- datafusion/sqllogictest/test_files/aggregate.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 983f8a085ba9..bcb2991a3031 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -194,7 +194,7 @@ select array_sort(c1), array_sort(c2) from ( statement ok drop table array_agg_distinct_list_table; -statement error This feature is not implemented: LIMIT not supported in ARRAY_AGG: 1 +statement error This feature is not implemented: Calling array_agg: LIMIT not supported in function arguments: 1 SELECT array_agg(c13 LIMIT 1) FROM aggregate_test_100 From 4fec8ee46dff23c3c2ce504f548d2521b1114ff7 Mon Sep 17 00:00:00 2001 From: tison Date: Wed, 29 May 2024 06:20:02 +0800 Subject: [PATCH 11/33] try use jmhain's branch Signed-off-by: tison --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 73efc213f899..8fc51c195468 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -111,7 +111,7 @@ rand = "0.8" regex = "1.8" rstest = "0.19.0" serde_json = "1" -sqlparser = { version = "0.46.0", features = ["visitor"] } +sqlparser = { git = "https://github.com/jmhain/sqlparser-rs", branch = "subscript", features = ["visitor"] } tempfile = "3" thiserror = "1.0.44" tokio = { version = "1.36", features = ["macros", "rt", "sync"] } From 7e665e9e0dcb89ebe6cb4d1adc93845ad237e3b4 Mon Sep 17 00:00:00 2001 From: tison Date: Wed, 29 May 2024 06:29:48 +0800 Subject: [PATCH 12/33] fix compile FunctionArgumentClause exhausted Signed-off-by: tison --- datafusion/sql/src/expr/function.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index 9ad3f34817f1..7faf73b50df3 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -164,6 +164,16 @@ impl FunctionArgs { "Calling {name}: ON OVERFLOW not supported in function arguments: {overflow}" ) } + FunctionArgumentClause::Having(having) => { + return not_impl_err!( + "Calling {name}: HAVING not supported in function arguments: {having}" + ) + } + FunctionArgumentClause::Separator(sep) => { + return not_impl_err!( + "Calling {name}: SEPARATOR not supported in function arguments: {sep}" + ) + } } } From a5f85688bf0e9d1666c6cfd5c0afe98e33e702dc Mon Sep 17 00:00:00 2001 From: tison Date: Wed, 29 May 2024 06:32:11 +0800 Subject: [PATCH 13/33] fix compile set multi vars Signed-off-by: tison --- datafusion/sql/src/statement.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index ff3dea7533a9..29d817120c22 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -19,6 +19,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::path::Path; use std::str::FromStr; use std::sync::Arc; +use arrow_array::Array; use crate::parser::{ CopyToSource, CopyToStatement, CreateExternalTable, DFParser, ExplainStatement, @@ -53,12 +54,7 @@ use datafusion_expr::{ Volatility, WriteOp, }; use sqlparser::ast; -use sqlparser::ast::{ - Assignment, ColumnDef, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, - Expr, FromTable, Ident, Insert, ObjectName, ObjectType, Query, SchemaName, SetExpr, - ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor, - TableWithJoins, TransactionMode, UnaryOperator, Value, -}; +use sqlparser::ast::{Assignment, ColumnDef, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, Expr, FromTable, Ident, Insert, ObjectName, ObjectType, OneOrManyWithParens, Query, SchemaName, SetExpr, ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor, TableWithJoins, TransactionMode, UnaryOperator, Value}; use sqlparser::parser::ParserError::ParserError; fn ident_to_string(ident: &Ident) -> String { @@ -229,9 +225,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Statement::SetVariable { local, hivevar, - variable, + variables, value, - } => self.set_variable_to_plan(local, hivevar, &variable, value), + } => self.set_variable_to_plan(local, hivevar, &variables, value), Statement::CreateTable { query, @@ -1140,8 +1136,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { &self, local: bool, hivevar: bool, - variable: &ObjectName, - value: Vec, + variables: &OneOrManyWithParens, + value: Vec, ) -> Result { if local { return not_impl_err!("LOCAL is not supported"); @@ -1151,7 +1147,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { return not_impl_err!("HIVEVAR is not supported"); } - let variable = object_name_to_string(variable); + if variables.len() != 1 { + return not_impl_err!("SET only supports single variable assignment"); + } + + let variable = object_name_to_string(variables[0]); let mut variable_lower = variable.to_lowercase(); if variable_lower == "timezone" || variable_lower == "time.zone" { From 545abae6cba61916aa3bc1584080ef8d9c980f48 Mon Sep 17 00:00:00 2001 From: tison Date: Wed, 29 May 2024 06:33:46 +0800 Subject: [PATCH 14/33] fix compile new string values Signed-off-by: tison --- datafusion/sql/src/statement.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 29d817120c22..485f6d688103 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -71,7 +71,14 @@ fn value_to_string(value: &Value) -> Option { | Value::NationalStringLiteral(_) | Value::SingleQuotedByteStringLiteral(_) | Value::DoubleQuotedByteStringLiteral(_) - | Value::RawStringLiteral(_) + | Value::TripleSingleQuotedString(_) + | Value::TripleDoubleQuotedString(_) + | Value::TripleSingleQuotedByteStringLiteral(_) + | Value::TripleDoubleQuotedByteStringLiteral(_) + | Value::SingleQuotedRawStringLiteral(_) + | Value::DoubleQuotedRawStringLiteral(_) + | Value::TripleSingleQuotedRawStringLiteral(_) + | Value::TripleDoubleQuotedRawStringLiteral(_) | Value::HexStringLiteral(_) | Value::Null | Value::Placeholder(_) => None, From c8d5ea075032a46f7f13190349894c028879de67 Mon Sep 17 00:00:00 2001 From: tison Date: Wed, 29 May 2024 06:37:57 +0800 Subject: [PATCH 15/33] fix compile set multi vars Signed-off-by: tison --- datafusion/sql/src/statement.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 485f6d688103..797aa292f502 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -19,7 +19,6 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::path::Path; use std::str::FromStr; use std::sync::Arc; -use arrow_array::Array; use crate::parser::{ CopyToSource, CopyToStatement, CreateExternalTable, DFParser, ExplainStatement, @@ -1154,11 +1153,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { return not_impl_err!("HIVEVAR is not supported"); } - if variables.len() != 1 { - return not_impl_err!("SET only supports single variable assignment"); - } - - let variable = object_name_to_string(variables[0]); + let variable = match variables { + OneOrManyWithParens::One(v) => object_name_to_string(v), + OneOrManyWithParens::Many(vs) => { + return not_impl_err!("SET only supports single variable assignment: {vs:?}"); + } + }; let mut variable_lower = variable.to_lowercase(); if variable_lower == "timezone" || variable_lower == "time.zone" { From 598260fe25c63cc56c156844dbcd9cd4c0913468 Mon Sep 17 00:00:00 2001 From: tison Date: Wed, 29 May 2024 06:43:27 +0800 Subject: [PATCH 16/33] fix compile Subscript Signed-off-by: tison --- datafusion/sql/src/expr/mod.rs | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 81415c93b983..36c5f22f656a 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -17,7 +17,7 @@ use arrow_schema::DataType; use arrow_schema::TimeUnit; -use sqlparser::ast::{CastKind, Expr as SQLExpr, JsonPathElem, TrimWhereField, Value}; +use sqlparser::ast::{CastKind, Expr as SQLExpr, JsonPathElem, Subscript, TrimWhereField, Value}; use sqlparser::parser::ParserError::ParserError; use datafusion_common::{ @@ -194,17 +194,18 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } } - SQLExpr::ArrayIndex { obj, indexes } => { + SQLExpr::Subscript { expr, subscript } => { fn is_unsupported(expr: &SQLExpr) -> bool { matches!(expr, SQLExpr::JsonAccess { .. }) } + fn simplify_array_index_expr(expr: Expr, index: Expr) -> (Expr, bool) { match &expr { Expr::AggregateFunction(agg_func) if agg_func.func_def == datafusion_expr::expr::AggregateFunctionDefinition::BuiltIn(AggregateFunction::ArrayAgg) => { let mut new_args = agg_func.args.clone(); new_args.push(index.clone()); (Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction::new( - datafusion_expr::AggregateFunction::NthValue, + AggregateFunction::NthValue, new_args, agg_func.distinct, agg_func.filter.clone(), @@ -215,11 +216,28 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { _ => (expr, false), } } + let expr = - self.sql_expr_to_logical_expr(*obj, schema, planner_context)?; + self.sql_expr_to_logical_expr(*expr, schema, planner_context)?; + + let indexes = match *subscript { + Subscript::Index { index } => vec![index], + Subscript::Slice { lower_bound, upper_bound } => { + let mut indexes = vec![]; + if let Some(lower_bound) = lower_bound { + indexes.push(lower_bound); + } + if let Some(upper_bound) = upper_bound { + indexes.push(upper_bound); + } + indexes + } + }; + if indexes.len() > 1 || is_unsupported(&indexes[0]) { return self.plan_indexed(expr, indexes, schema, planner_context); } + let (new_expr, changed) = simplify_array_index_expr( expr, self.sql_expr_to_logical_expr( From e3994162c60cccf7404e2d34ed2df518e32b7af7 Mon Sep 17 00:00:00 2001 From: tison Date: Wed, 29 May 2024 06:45:57 +0800 Subject: [PATCH 17/33] cargo fmt Signed-off-by: tison --- datafusion/sql/src/expr/mod.rs | 9 +++++++-- datafusion/sql/src/statement.rs | 11 +++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 36c5f22f656a..0966724ac2d5 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -17,7 +17,9 @@ use arrow_schema::DataType; use arrow_schema::TimeUnit; -use sqlparser::ast::{CastKind, Expr as SQLExpr, JsonPathElem, Subscript, TrimWhereField, Value}; +use sqlparser::ast::{ + CastKind, Expr as SQLExpr, JsonPathElem, Subscript, TrimWhereField, Value, +}; use sqlparser::parser::ParserError::ParserError; use datafusion_common::{ @@ -222,7 +224,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let indexes = match *subscript { Subscript::Index { index } => vec![index], - Subscript::Slice { lower_bound, upper_bound } => { + Subscript::Slice { + lower_bound, + upper_bound, + } => { let mut indexes = vec![]; if let Some(lower_bound) = lower_bound { indexes.push(lower_bound); diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 797aa292f502..0f608e347d29 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -53,7 +53,12 @@ use datafusion_expr::{ Volatility, WriteOp, }; use sqlparser::ast; -use sqlparser::ast::{Assignment, ColumnDef, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, Expr, FromTable, Ident, Insert, ObjectName, ObjectType, OneOrManyWithParens, Query, SchemaName, SetExpr, ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor, TableWithJoins, TransactionMode, UnaryOperator, Value}; +use sqlparser::ast::{ + Assignment, ColumnDef, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, + Expr, FromTable, Ident, Insert, ObjectName, ObjectType, OneOrManyWithParens, Query, + SchemaName, SetExpr, ShowCreateObject, ShowStatementFilter, Statement, + TableConstraint, TableFactor, TableWithJoins, TransactionMode, UnaryOperator, Value, +}; use sqlparser::parser::ParserError::ParserError; fn ident_to_string(ident: &Ident) -> String { @@ -1156,7 +1161,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let variable = match variables { OneOrManyWithParens::One(v) => object_name_to_string(v), OneOrManyWithParens::Many(vs) => { - return not_impl_err!("SET only supports single variable assignment: {vs:?}"); + return not_impl_err!( + "SET only supports single variable assignment: {vs:?}" + ); } }; let mut variable_lower = variable.to_lowercase(); From a4aca5adee8c73a0edef4626ebf1a39b396db5f6 Mon Sep 17 00:00:00 2001 From: tison Date: Wed, 29 May 2024 07:12:19 +0800 Subject: [PATCH 18/33] revert workaround on values Signed-off-by: tison --- datafusion/sqllogictest/test_files/array.slt | 12 +++++----- datafusion/sqllogictest/test_files/struct.slt | 24 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 20c7b680a7f7..9b8b50201243 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -689,7 +689,7 @@ select column1, column2, column3, column4, column5 from nested_arrays; # values table query IIIRT -select a, b, c, d, e from "values"; +select a, b, c, d, e from values; ---- 1 1 2 1.1 Lorem 2 3 4 2.2 ipsum @@ -1074,7 +1074,7 @@ select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NUL # make_array with 1 columns query ??? -select make_array(a), make_array(d), make_array(e) from "values"; +select make_array(a), make_array(d), make_array(e) from values; ---- [1] [1.1] [Lorem] [2] [2.2] [ipsum] @@ -1088,7 +1088,7 @@ select make_array(a), make_array(d), make_array(e) from "values"; # make_array with 2 columns #1 query ?? -select make_array(b, c), make_array(e, f) from "values"; +select make_array(b, c), make_array(e, f) from values; ---- [1, 2] [Lorem, A] [3, 4] [ipsum, ] @@ -1102,7 +1102,7 @@ select make_array(b, c), make_array(e, f) from "values"; # make_array with 4 columns query ? -select make_array(a, b, c, d) from "values"; +select make_array(a, b, c, d) from values; ---- [1.0, 1.0, 2.0, 1.1] [2.0, 3.0, 4.0, 2.2] @@ -6333,7 +6333,7 @@ SELECT string_to_array('abc def', ' ', 'def') [abc, ] query ? -select string_to_array(e, ',') from "values"; +select string_to_array(e, ',') from values; ---- [Lorem] [ipsum] @@ -6346,7 +6346,7 @@ select string_to_array(e, ',') from "values"; NULL query ? -select string_to_list(e, 'm') from "values"; +select string_to_list(e, 'm') from values; ---- [Lore, ] [ipsu, ] diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index 430945701a76..46a08709c3a3 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -39,7 +39,7 @@ select struct(1, 3.14, 'h')['c0'], struct(3, 2.55, 'b')['c1'], struct(2, 6.43, ' # struct[i] with columns query R -select struct(a, b, c)['c1'] from "values"; +select struct(a, b, c)['c1'] from values; ---- 1.1 2.2 @@ -65,7 +65,7 @@ select struct(1, 3.14 as name1, 'e', true); # struct scalar function with columns #1 query ? -select struct(a, b, c) from "values"; +select struct(a, b, c) from values; ---- {c0: 1, c1: 1.1, c2: a} {c0: 2, c1: 2.2, c2: b} @@ -73,7 +73,7 @@ select struct(a, b, c) from "values"; # struct scalar function with columns and scalars query ? -select struct(a, 'foo') from "values"; +select struct(a, 'foo') from values; ---- {c0: 1, c1: foo} {c0: 2, c1: foo} @@ -82,7 +82,7 @@ select struct(a, 'foo') from "values"; # explain struct scalar function with columns #1 query TT -explain select struct(a, b, c) from "values"; +explain select struct(a, b, c) from values; ---- logical_plan 01)Projection: struct(values.a, values.b, values.c) @@ -105,7 +105,7 @@ select named_struct(1); # error on odd number of arguments #3 query error DataFusion error: Execution error: named_struct requires an even number of arguments, got 1 instead -select named_struct(values.a) from "values"; +select named_struct(values.a) from values; # error on odd number of arguments #4 query error DataFusion error: Execution error: named_struct requires an even number of arguments, got 3 instead @@ -121,15 +121,15 @@ select named_struct('corret', 1, 0, 'wrong'); # error on even argument not a string literal #3 query error DataFusion error: Execution error: named_struct even arguments must be string literals, got values\.a instead at position 0 -select named_struct(values.a, 'a') from "values"; +select named_struct(values.a, 'a') from values; # error on even argument not a string literal #4 query error DataFusion error: Execution error: named_struct even arguments must be string literals, got values\.c instead at position 0 -select named_struct(values.c, 'c') from "values"; +select named_struct(values.c, 'c') from values; # named_struct with mixed scalar and array values #1 query ? -select named_struct('scalar', 27, 'array', values.a, 'null', NULL) from "values"; +select named_struct('scalar', 27, 'array', values.a, 'null', NULL) from values; ---- {scalar: 27, array: 1, null: } {scalar: 27, array: 2, null: } @@ -137,7 +137,7 @@ select named_struct('scalar', 27, 'array', values.a, 'null', NULL) from "values" # named_struct with mixed scalar and array values #2 query ? -select named_struct('array', values.a, 'scalar', 27, 'null', NULL) from "values"; +select named_struct('array', values.a, 'scalar', 27, 'null', NULL) from values; ---- {array: 1, scalar: 27, null: } {array: 2, scalar: 27, null: } @@ -145,7 +145,7 @@ select named_struct('array', values.a, 'scalar', 27, 'null', NULL) from "values" # named_struct with mixed scalar and array values #3 query ? -select named_struct('null', NULL, 'array', values.a, 'scalar', 27) from "values"; +select named_struct('null', NULL, 'array', values.a, 'scalar', 27) from values; ---- {null: , array: 1, scalar: 27} {null: , array: 2, scalar: 27} @@ -153,7 +153,7 @@ select named_struct('null', NULL, 'array', values.a, 'scalar', 27) from "values" # named_struct with mixed scalar and array values #4 query ? -select named_struct('null_array', values.n, 'array', values.a, 'scalar', 27, 'null', NULL) from "values"; +select named_struct('null_array', values.n, 'array', values.a, 'scalar', 27, 'null', NULL) from values; ---- {null_array: , array: 1, scalar: 27, null: } {null_array: , array: 2, scalar: 27, null: } @@ -161,7 +161,7 @@ select named_struct('null_array', values.n, 'array', values.a, 'scalar', 27, 'nu # named_struct arrays only query ? -select named_struct('field_a', a, 'field_b', b) from "values"; +select named_struct('field_a', a, 'field_b', b) from values; ---- {field_a: 1, field_b: 1.1} {field_a: 2, field_b: 2.2} From 3cffded26626a1da7bb0515bad71adc359ad4e49 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 30 May 2024 09:35:22 -0400 Subject: [PATCH 19/33] Rework field access --- .../core/src/datasource/listing/helpers.rs | 2 +- datafusion/sql/src/expr/mod.rs | 260 +++++++----------- 2 files changed, 106 insertions(+), 156 deletions(-) diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index b531cf8369cf..822a66783819 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -786,7 +786,7 @@ mod tests { assert_eq!( evaluate_partition_prefix( partitions, - &[col("a").eq(lit("foo")).and((col("b").eq(lit("bar"))))], + &[col("a").eq(lit("foo")).and(col("b").eq(lit("bar")))], ), Some(Path::from("a=foo/b=bar")), ); diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 0966724ac2d5..295f7c88f779 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -17,10 +17,7 @@ use arrow_schema::DataType; use arrow_schema::TimeUnit; -use sqlparser::ast::{ - CastKind, Expr as SQLExpr, JsonPathElem, Subscript, TrimWhereField, Value, -}; -use sqlparser::parser::ParserError::ParserError; +use sqlparser::ast::{CastKind, Expr as SQLExpr, Subscript, TrimWhereField, Value}; use datafusion_common::{ internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result, @@ -29,7 +26,7 @@ use datafusion_common::{ use datafusion_expr::expr::InList; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::{ - col, lit, AggregateFunction, Between, BinaryExpr, Cast, Expr, ExprSchemable, + lit, AggregateFunction, Between, BinaryExpr, Cast, Expr, ExprSchemable, GetFieldAccess, Like, Literal, Operator, TryCast, }; @@ -180,83 +177,85 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { self.sql_identifier_to_expr(id, schema, planner_context) } - SQLExpr::MapAccess { column, keys } => { - if let SQLExpr::Identifier(id) = *column { - let keys = keys.into_iter().map(|mak| mak.key).collect(); - self.plan_indexed( - col(self.normalizer.normalize(id)), - keys, - schema, - planner_context, - ) - } else { - not_impl_err!( - "map access requires an identifier, found column {column} instead" - ) - } + SQLExpr::MapAccess { .. } => { + not_impl_err!("Map Access") } + // ["foo"], [4] or [4:5] SQLExpr::Subscript { expr, subscript } => { - fn is_unsupported(expr: &SQLExpr) -> bool { - matches!(expr, SQLExpr::JsonAccess { .. }) - } - - fn simplify_array_index_expr(expr: Expr, index: Expr) -> (Expr, bool) { - match &expr { - Expr::AggregateFunction(agg_func) if agg_func.func_def == datafusion_expr::expr::AggregateFunctionDefinition::BuiltIn(AggregateFunction::ArrayAgg) => { - let mut new_args = agg_func.args.clone(); - new_args.push(index.clone()); - (Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction::new( - AggregateFunction::NthValue, - new_args, - agg_func.distinct, - agg_func.filter.clone(), - agg_func.order_by.clone(), - agg_func.null_treatment, - )), true) - } - _ => (expr, false), - } - } - let expr = self.sql_expr_to_logical_expr(*expr, schema, planner_context)?; - let indexes = match *subscript { - Subscript::Index { index } => vec![index], + let get_field_access = match *subscript { + Subscript::Index { index } => { + // index can be a name, in which case it is a named field access + match index { + SQLExpr::Value( + Value::SingleQuotedString(s) + | Value::DoubleQuotedString(s), + ) => GetFieldAccess::NamedStructField { + name: ScalarValue::from(s), + }, + SQLExpr::JsonAccess { .. } => { + return not_impl_err!("JsonAccess"); + } + // otherwise treat like a list index + _ => GetFieldAccess::ListIndex { + key: Box::new(self.sql_expr_to_logical_expr( + index, + schema, + planner_context, + )?), + }, + } + } Subscript::Slice { lower_bound, upper_bound, + stride, } => { - let mut indexes = vec![]; - if let Some(lower_bound) = lower_bound { - indexes.push(lower_bound); - } - if let Some(upper_bound) = upper_bound { - indexes.push(upper_bound); + // Means access like [:2] + let lower_bound = if let Some(lower_bound) = lower_bound { + self.sql_expr_to_logical_expr( + lower_bound, + schema, + planner_context, + ) + } else { + not_impl_err!("Slice subscript requires a lower bound") + }?; + + // means access like [2:] + let upper_bound = if let Some(upper_bound) = upper_bound { + self.sql_expr_to_logical_expr( + upper_bound, + schema, + planner_context, + ) + } else { + not_impl_err!("Slice subscript requires an upper bound") + }?; + + // stride, default to 1 + let stride = if let Some(stride) = stride { + self.sql_expr_to_logical_expr( + stride, + schema, + planner_context, + )? + } else { + lit(1i64) + }; + + GetFieldAccess::ListRange { + start: Box::new(lower_bound), + stop: Box::new(upper_bound), + stride: Box::new(stride), } - indexes } }; - if indexes.len() > 1 || is_unsupported(&indexes[0]) { - return self.plan_indexed(expr, indexes, schema, planner_context); - } - - let (new_expr, changed) = simplify_array_index_expr( - expr, - self.sql_expr_to_logical_expr( - indexes[0].clone(), - schema, - planner_context, - )?, - ); - - if changed { - Ok(new_expr) - } else { - self.plan_indexed(new_expr, indexes, schema, planner_context) - } + self.plan_field_access(expr, get_field_access) } SQLExpr::CompoundIdentifier(ids) => { @@ -582,6 +581,36 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } } + /// Simplifies an expresssion like ARRAY_AGG(expr)[index] to NTH_VALUE(expr, index) + /// + /// returns Some(Expr) if the expression was simplified, otherwise None + /// TODO: this should likely be done in ArrayAgg::simplify when it is moved to a UDAF + fn simplify_array_index_expr(expr: &Expr, index: &Expr) -> Option { + fn is_array_agg(agg_func: &datafusion_expr::expr::AggregateFunction) -> bool { + agg_func.func_def + == datafusion_expr::expr::AggregateFunctionDefinition::BuiltIn( + AggregateFunction::ArrayAgg, + ) + } + match expr { + Expr::AggregateFunction(agg_func) if is_array_agg(agg_func) => { + let mut new_args = agg_func.args.clone(); + new_args.push(index.clone()); + Some(Expr::AggregateFunction( + datafusion_expr::expr::AggregateFunction::new( + AggregateFunction::NthValue, + new_args, + agg_func.distinct, + agg_func.filter.clone(), + agg_func.order_by.clone(), + agg_func.null_treatment, + ), + )) + } + _ => None, + } + } + /// Parses a struct(..) expression fn parse_struct( &self, @@ -868,95 +897,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fun, args))) } - fn plan_indices( - &self, - expr: SQLExpr, - schema: &DFSchema, - planner_context: &mut PlannerContext, - ) -> Result { - match expr.clone() { - SQLExpr::Value( - Value::SingleQuotedString(s) | Value::DoubleQuotedString(s), - ) => Ok(GetFieldAccess::NamedStructField { - name: ScalarValue::from(s), - }), - SQLExpr::JsonAccess { value, path } => { - let start = Box::new(self.sql_expr_to_logical_expr( - *value, - schema, - planner_context, - )?); - - fn json_path_elem_to_expr(expr: JsonPathElem) -> SQLExpr { - match expr { - JsonPathElem::Dot { key, .. } => { - SQLExpr::Value(Value::SingleQuotedString(key)) - } - JsonPathElem::Bracket { key } => key, - } - } - - let mut path_iter = path.path.into_iter(); - let stop = match path_iter.next() { - None => { - return Ok(GetFieldAccess::ListIndex { - key: Box::new(self.sql_expr_to_logical_expr( - expr, - schema, - planner_context, - )?), - }) - } - Some(expr) => Box::new(self.sql_expr_to_logical_expr( - json_path_elem_to_expr(expr), - schema, - planner_context, - )?), - }; - let stride = match path_iter.next() { - None => Box::new(Expr::Literal(ScalarValue::Int64(Some(1)))), - Some(expr) => Box::new(self.sql_expr_to_logical_expr( - json_path_elem_to_expr(expr), - schema, - planner_context, - )?), - }; - - Ok(GetFieldAccess::ListRange { - start, - stop, - stride, - }) - } - _ => Ok(GetFieldAccess::ListIndex { - key: Box::new(self.sql_expr_to_logical_expr( - expr, - schema, - planner_context, - )?), - }), - } - } - - fn plan_indexed( + /// Given an expression and the field to access, creates a new expression for accessing that field + fn plan_field_access( &self, expr: Expr, - mut keys: Vec, - schema: &DFSchema, - planner_context: &mut PlannerContext, + get_field_access: GetFieldAccess, ) -> Result { - let indices = keys.pop().ok_or_else(|| { - ParserError("Internal error: Missing index key expression".to_string()) - })?; - - let expr = if !keys.is_empty() { - self.plan_indexed(expr, keys, schema, planner_context)? - } else { - expr - }; - - let field = self.plan_indices(indices, schema, planner_context)?; - match field { + match get_field_access { GetFieldAccess::NamedStructField { name } => { if let Some(udf) = self.context_provider.get_function_meta("get_field") { Ok(Expr::ScalarFunction(ScalarFunction::new_udf( @@ -969,7 +916,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } // expr[idx] ==> array_element(expr, idx) GetFieldAccess::ListIndex { key } => { - if let Some(udf) = + // Special case for array_agg(expr)[index] to NTH_VALUE(expr, index) + if let Some(simplified) = Self::simplify_array_index_expr(&expr, &key) { + Ok(simplified) + } else if let Some(udf) = self.context_provider.get_function_meta("array_element") { Ok(Expr::ScalarFunction(ScalarFunction::new_udf( From 1bd355a9e7875b39465004b3140ca9c1250fd5c2 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 30 May 2024 12:17:39 -0400 Subject: [PATCH 20/33] update lock --- datafusion-cli/Cargo.lock | 128 +++++++++++++++++++------------------- 1 file changed, 63 insertions(+), 65 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index f2f2ccafdd01..06ced1829fdc 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "addr2line" -version = "0.21.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" dependencies = [ "gimli", ] @@ -363,9 +363,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c90a406b4495d129f00461241616194cb8a032c8d1c53c657f0961d5f8e0498" +checksum = "cd066d0b4ef8ecb03a55319dc13aa6910616d0f44008a045bb1835af830abff5" dependencies = [ "bzip2", "flate2", @@ -387,7 +387,7 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] @@ -708,9 +708,9 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.71" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" dependencies = [ "addr2line", "cc", @@ -869,9 +869,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.97" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" +checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" dependencies = [ "jobserver", "libc", @@ -1042,9 +1042,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ "cfg-if", ] @@ -1093,7 +1093,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" dependencies = [ "quote", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] @@ -1495,9 +1495,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "encoding_rs" @@ -1685,7 +1685,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] @@ -1747,9 +1747,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.1" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" [[package]] name = "glob" @@ -1987,9 +1987,9 @@ dependencies = [ [[package]] name = "instant" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ "cfg-if", "js-sys", @@ -2114,9 +2114,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.154" +version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "libflate" @@ -2150,9 +2150,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libmimalloc-sys" -version = "0.1.37" +version = "0.1.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81eb4061c0582dedea1cbc7aff2240300dd6982e0239d1c99e65c1dbf4a30ba7" +checksum = "0e7bb23d733dfcc8af652a78b7bf232f0e967710d044732185e561e47c0336b6" dependencies = [ "cc", "libc", @@ -2170,9 +2170,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "lock_api" @@ -2228,9 +2228,9 @@ checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "mimalloc" -version = "0.1.41" +version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f41a2280ded0da56c8cf898babb86e8f10651a34adcfff190ae9a1159c6908d" +checksum = "e9186d86b79b52f4a77af65604b51225e8db1d6ee7e3f41aec1e40829c71a176" dependencies = [ "libmimalloc-sys", ] @@ -2243,9 +2243,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" dependencies = [ "adler", ] @@ -2379,9 +2379,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" dependencies = [ "memchr", ] @@ -2452,9 +2452,9 @@ checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" [[package]] name = "parking_lot" -version = "0.12.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ "lock_api", "parking_lot_core", @@ -2594,7 +2594,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] @@ -2683,9 +2683,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.82" +version = "1.0.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b" +checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" dependencies = [ "unicode-ident", ] @@ -3016,9 +3016,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "092474d1a01ea8278f69e6a358998405fae5b8b963ddaeb2b0b04a128bf1dfb0" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" [[package]] name = "rustyline" @@ -3120,22 +3120,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.201" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "780f1cebed1629e4753a1a38a3c72d30b97ec044f0aef68cb26650a3c5cf363c" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.201" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5e405930b9796f1c00bee880d03fc7e0bb4b9a11afc776885ffe84320da2865" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] @@ -3255,8 +3255,7 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11a81a8cad9befe4cf1b9d2d4b9c6841c76f0882a3fec00d95133953c13b3d3d" +source = "git+https://github.com/jmhain/sqlparser-rs?branch=subscript#4df6dc8ef343a7bace9d0cb43996d32ebc44f75c" dependencies = [ "log", "sqlparser_derive", @@ -3265,12 +3264,11 @@ dependencies = [ [[package]] name = "sqlparser_derive" version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +source = "git+https://github.com/jmhain/sqlparser-rs?branch=subscript#4df6dc8ef343a7bace9d0cb43996d32ebc44f75c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] @@ -3316,7 +3314,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] @@ -3329,7 +3327,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] @@ -3351,9 +3349,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.63" +version = "2.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf5be731623ca1a1fb7d8be6f261a3be6d3e2337b8a1f97be944d020c8fcb704" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" dependencies = [ "proc-macro2", "quote", @@ -3422,22 +3420,22 @@ checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" [[package]] name = "thiserror" -version = "1.0.60" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "579e9083ca58dd9dcf91a9923bb9054071b9ebbd800b342194c9feb0ee89fc18" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.60" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2470041c06ec3ac1ab38d0356a6119054dedaea53e12fbefc0de730a1c08524" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] @@ -3532,7 +3530,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] @@ -3628,7 +3626,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] @@ -3673,7 +3671,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] @@ -3827,7 +3825,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.66", "wasm-bindgen-shared", ] @@ -3861,7 +3859,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.66", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4126,14 +4124,14 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.66", ] [[package]] name = "zeroize" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" [[package]] name = "zstd" From 98b539b31d0cabf456195521bce4968746950782 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 30 May 2024 12:48:35 -0400 Subject: [PATCH 21/33] fix doc --- datafusion/sql/src/expr/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 295f7c88f779..1cf6b470ac61 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -581,7 +581,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } } - /// Simplifies an expresssion like ARRAY_AGG(expr)[index] to NTH_VALUE(expr, index) + /// Simplifies an expresssion like `ARRAY_AGG(expr)[index]` to `NTH_VALUE(expr, index)` /// /// returns Some(Expr) if the expression was simplified, otherwise None /// TODO: this should likely be done in ArrayAgg::simplify when it is moved to a UDAF From 53c72f58516335431736a1b7eedfb8c647766cca Mon Sep 17 00:00:00 2001 From: tison Date: Sat, 1 Jun 2024 15:22:21 +0800 Subject: [PATCH 22/33] try catchup new sqlparser version Signed-off-by: tison --- Cargo.toml | 2 +- datafusion/expr/src/logical_plan/ddl.rs | 25 ++----------------------- datafusion/expr/src/logical_plan/mod.rs | 2 +- datafusion/sql/src/statement.rs | 22 ++++++++++++++-------- 4 files changed, 18 insertions(+), 33 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8fc51c195468..4f5a5882b42c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -111,7 +111,7 @@ rand = "0.8" regex = "1.8" rstest = "0.19.0" serde_json = "1" -sqlparser = { git = "https://github.com/jmhain/sqlparser-rs", branch = "subscript", features = ["visitor"] } +sqlparser = { git = "https://github.com/sqlparser-rs/sqlparser-rs", rev = "afa5f08", features = ["visitor"] } tempfile = "3" thiserror = "1.0.44" tokio = { version = "1.36", features = ["macros", "rt", "sync"] } diff --git a/datafusion/expr/src/logical_plan/ddl.rs b/datafusion/expr/src/logical_plan/ddl.rs index 4538ff52c052..45ddbafecfd7 100644 --- a/datafusion/expr/src/logical_plan/ddl.rs +++ b/datafusion/expr/src/logical_plan/ddl.rs @@ -341,29 +341,8 @@ pub struct CreateFunctionBody { pub language: Option, /// IMMUTABLE | STABLE | VOLATILE pub behavior: Option, - /// AS 'definition' - pub as_: Option, - /// RETURN expression - pub return_: Option, -} - -#[derive(Clone, PartialEq, Eq, Hash, Debug)] -pub enum DefinitionStatement { - SingleQuotedDef(String), - DoubleDollarDef(String), -} - -impl From for DefinitionStatement { - fn from(value: sqlparser::ast::FunctionDefinition) -> Self { - match value { - sqlparser::ast::FunctionDefinition::SingleQuotedDef(s) => { - Self::SingleQuotedDef(s) - } - sqlparser::ast::FunctionDefinition::DoubleDollarDef(s) => { - Self::DoubleDollarDef(s) - } - } - } + /// RETURN or AS function body + pub function_body: Option, } #[derive(Clone, PartialEq, Eq, Hash, Debug)] diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs index 034440643e51..6c376088aa37 100644 --- a/datafusion/expr/src/logical_plan/mod.rs +++ b/datafusion/expr/src/logical_plan/mod.rs @@ -30,7 +30,7 @@ pub use builder::{ }; pub use ddl::{ CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction, - CreateFunctionBody, CreateMemoryTable, CreateView, DdlStatement, DefinitionStatement, + CreateFunctionBody, CreateMemoryTable, CreateView, DdlStatement, DropCatalogSchema, DropFunction, DropTable, DropView, OperateFunctionArg, }; pub use dml::{DmlStatement, WriteOp}; diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 0f608e347d29..d10956efb66c 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -677,7 +677,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { name, args, return_type, - params, + function_body, + behavior, + language, + .. } => { let return_type = match return_type { Some(t) => Some(self.convert_data_type(&t)?), @@ -727,9 +730,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let mut planner_context = PlannerContext::new() .with_prepare_param_data_types(arg_types.unwrap_or_default()); - let result_expression = match params.return_ { + let function_body = match function_body { Some(r) => Some(self.sql_to_expr( - r, + match r { + ast::CreateFunctionBody::AsBeforeOptions(expr) => expr, + ast::CreateFunctionBody::AsAfterOptions(expr) => expr, + ast::CreateFunctionBody::Return(expr) => expr, + }, &DFSchema::empty(), &mut planner_context, )?), @@ -737,14 +744,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { }; let params = CreateFunctionBody { - language: params.language, - behavior: params.behavior.map(|b| match b { + language, + behavior: behavior.map(|b| match b { ast::FunctionBehavior::Immutable => Volatility::Immutable, ast::FunctionBehavior::Stable => Volatility::Stable, ast::FunctionBehavior::Volatile => Volatility::Volatile, }), - as_: params.as_.map(|m| m.into()), - return_: result_expression, + function_body, }; let statement = DdlStatement::CreateFunction(CreateFunction { @@ -878,7 +884,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { for (key, value) in statement.options { let value_string = match value_to_string(&value) { None => { - return plan_err!("Unsupported Value in COPY statement {}", value) + return plan_err!("Unsupported Value in COPY statement {}", value); } Some(v) => v, }; From 4d74ef6af2e501887a4b4c70f7909d2c46522bd4 Mon Sep 17 00:00:00 2001 From: tison Date: Sat, 1 Jun 2024 15:28:46 +0800 Subject: [PATCH 23/33] fixup timezone expr Signed-off-by: tison --- datafusion/expr/src/logical_plan/mod.rs | 4 ++-- datafusion/sql/src/expr/mod.rs | 13 +++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs index 6c376088aa37..8928f70cd5d2 100644 --- a/datafusion/expr/src/logical_plan/mod.rs +++ b/datafusion/expr/src/logical_plan/mod.rs @@ -30,8 +30,8 @@ pub use builder::{ }; pub use ddl::{ CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction, - CreateFunctionBody, CreateMemoryTable, CreateView, DdlStatement, - DropCatalogSchema, DropFunction, DropTable, DropView, OperateFunctionArg, + CreateFunctionBody, CreateMemoryTable, CreateView, DdlStatement, DropCatalogSchema, + DropFunction, DropTable, DropView, OperateFunctionArg, }; pub use dml::{DmlStatement, WriteOp}; pub use plan::{ diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 1cf6b470ac61..8b64ccfb52cb 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -575,13 +575,22 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { schema, planner_context, )?), - DataType::Timestamp(TimeUnit::Nanosecond, Some(time_zone.into())), + match *time_zone { + SQLExpr::Value(Value::SingleQuotedString(s)) => { + DataType::Timestamp(TimeUnit::Nanosecond, Some(s.into())) + } + _ => { + return not_impl_err!( + "Unsupported ast node in sqltorel: {time_zone:?}" + ) + } + }, ))), _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"), } } - /// Simplifies an expresssion like `ARRAY_AGG(expr)[index]` to `NTH_VALUE(expr, index)` + /// Simplifies an expression like `ARRAY_AGG(expr)[index]` to `NTH_VALUE(expr, index)` /// /// returns Some(Expr) if the expression was simplified, otherwise None /// TODO: this should likely be done in ArrayAgg::simplify when it is moved to a UDAF From c1163adb975e3769469a2cf7de9fabbce75de59c Mon Sep 17 00:00:00 2001 From: tison Date: Sat, 1 Jun 2024 15:32:11 +0800 Subject: [PATCH 24/33] fixup params Signed-off-by: tison --- datafusion-examples/examples/function_factory.rs | 2 +- .../core/tests/user_defined/user_defined_scalar_functions.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion-examples/examples/function_factory.rs b/datafusion-examples/examples/function_factory.rs index d61c19af47a4..f57b3bf60404 100644 --- a/datafusion-examples/examples/function_factory.rs +++ b/datafusion-examples/examples/function_factory.rs @@ -212,7 +212,7 @@ impl TryFrom for ScalarFunctionWrapper { name: definition.name, expr: definition .params - .return_ + .function_body .expect("Expression has to be defined!"), return_type: definition .return_type diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index df41cab7bf02..f6ce31a2e127 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -828,7 +828,7 @@ impl TryFrom for ScalarFunctionWrapper { name: definition.name, expr: definition .params - .return_ + .function_body .expect("Expression has to be defined!"), return_type: definition .return_type From e1d0c6b5b7b7649248ac567ac5fe55b6ad569a1e Mon Sep 17 00:00:00 2001 From: tison Date: Sat, 1 Jun 2024 15:40:07 +0800 Subject: [PATCH 25/33] lock Signed-off-by: tison --- datafusion-cli/Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 06ced1829fdc..87c1802d6fa7 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -3255,7 +3255,7 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" version = "0.46.0" -source = "git+https://github.com/jmhain/sqlparser-rs?branch=subscript#4df6dc8ef343a7bace9d0cb43996d32ebc44f75c" +source = "git+https://github.com/sqlparser-rs/sqlparser-rs?rev=afa5f08#afa5f08db9b1f3a4805f21fea6b1e72710cdb138" dependencies = [ "log", "sqlparser_derive", @@ -3264,7 +3264,7 @@ dependencies = [ [[package]] name = "sqlparser_derive" version = "0.2.2" -source = "git+https://github.com/jmhain/sqlparser-rs?branch=subscript#4df6dc8ef343a7bace9d0cb43996d32ebc44f75c" +source = "git+https://github.com/sqlparser-rs/sqlparser-rs?rev=afa5f08#afa5f08db9b1f3a4805f21fea6b1e72710cdb138" dependencies = [ "proc-macro2", "quote", From 685025a3d02e9cc817982cc4ca0efc86388dc868 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 1 Jun 2024 06:30:57 -0400 Subject: [PATCH 26/33] Update to sqlparser 0.47.0 --- Cargo.toml | 2 +- datafusion-cli/Cargo.lock | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4f5a5882b42c..57c9d3aaa415 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -111,7 +111,7 @@ rand = "0.8" regex = "1.8" rstest = "0.19.0" serde_json = "1" -sqlparser = { git = "https://github.com/sqlparser-rs/sqlparser-rs", rev = "afa5f08", features = ["visitor"] } +sqlparser = { version = "0.47", features = ["visitor"] } tempfile = "3" thiserror = "1.0.44" tokio = { version = "1.36", features = ["macros", "rt", "sync"] } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 87c1802d6fa7..d91bed38247c 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -3254,8 +3254,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.46.0" -source = "git+https://github.com/sqlparser-rs/sqlparser-rs?rev=afa5f08#afa5f08db9b1f3a4805f21fea6b1e72710cdb138" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "295e9930cd7a97e58ca2a070541a3ca502b17f5d1fa7157376d0fabd85324f25" dependencies = [ "log", "sqlparser_derive", @@ -3264,7 +3265,8 @@ dependencies = [ [[package]] name = "sqlparser_derive" version = "0.2.2" -source = "git+https://github.com/sqlparser-rs/sqlparser-rs?rev=afa5f08#afa5f08db9b1f3a4805f21fea6b1e72710cdb138" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", @@ -3505,9 +3507,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.37.0" +version = "1.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" +checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" dependencies = [ "backtrace", "bytes", @@ -3524,9 +3526,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", From b5743d55a3992880a8d5c6f359c28aa4fa17c98a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 1 Jun 2024 06:42:06 -0400 Subject: [PATCH 27/33] Update rust stack size on windows --- .github/workflows/rust.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index ce4b4b06cf44..e15800b07197 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -334,6 +334,8 @@ jobs: run: | export PATH=$PATH:$HOME/d/protoc/bin cargo test --lib --tests --bins --features avro,json,backtrace + # 8MB stack size otherwise the cli_integration test fails + export RUST_MIN_STACK=8000000 cd datafusion-cli cargo test --lib --tests --bins --all-features From d655ad61237e1ddbe297a56e24f2260c5c08baf3 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 1 Jun 2024 15:38:54 -0400 Subject: [PATCH 28/33] Revert "Update rust stack size on windows" This reverts commit b5743d55a3992880a8d5c6f359c28aa4fa17c98a. --- .github/workflows/rust.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index e15800b07197..ce4b4b06cf44 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -334,8 +334,6 @@ jobs: run: | export PATH=$PATH:$HOME/d/protoc/bin cargo test --lib --tests --bins --features avro,json,backtrace - # 8MB stack size otherwise the cli_integration test fails - export RUST_MIN_STACK=8000000 cd datafusion-cli cargo test --lib --tests --bins --all-features From 84468515808991159632aac8db0f68e7e039419b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 4 Jun 2024 14:12:55 -0400 Subject: [PATCH 29/33] Add test + support for `$$` function definition --- .../user_defined_scalar_functions.rs | 107 +++++++++++++++++- datafusion/sql/src/expr/value.rs | 1 + 2 files changed, 105 insertions(+), 3 deletions(-) diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index d0aa7d56ce28..475fd847d75a 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -28,14 +28,18 @@ use datafusion_common::cast::{as_float64_array, as_int32_array}; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{ assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err, internal_err, - not_impl_err, plan_err, DataFusionError, ExprSchema, Result, ScalarValue, + not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, Result, ScalarValue, }; use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{ - Accumulator, ColumnarValue, CreateFunction, ExprSchemable, LogicalPlanBuilder, - ScalarUDF, ScalarUDFImpl, Signature, Volatility, + Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody, ExprSchemable, + LogicalPlanBuilder, OperateFunctionArg, ScalarUDF, ScalarUDFImpl, Signature, + Volatility, }; +use datafusion_functions_array::range::range_udf; +use parking_lot::Mutex; +use sqlparser::ast::Ident; /// test that casting happens on udfs. /// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and @@ -926,6 +930,103 @@ async fn create_scalar_function_from_sql_statement() -> Result<()> { Ok(()) } +/// Saves whatever is passed to it as a scalar function +#[derive(Debug, Default)] +struct RecordingFunctonFactory { + calls: Mutex>, +} + +impl RecordingFunctonFactory { + fn new() -> Self { + Self::default() + } + + /// return all the calls made to the factory + fn calls(&self) -> Vec { + self.calls.lock().clone() + } +} + +#[async_trait::async_trait] +impl FunctionFactory for RecordingFunctonFactory { + async fn create( + &self, + _state: &SessionState, + statement: CreateFunction, + ) -> Result { + self.calls.lock().push(statement); + + let udf = range_udf(); + Ok(RegisterFunction::Scalar(udf)) + } +} + +#[tokio::test] +async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<()> { + let function_factory = Arc::new(RecordingFunctonFactory::new()); + let runtime_config = RuntimeConfig::new(); + let runtime_environment = RuntimeEnv::new(runtime_config)?; + + let session_config = SessionConfig::new(); + let state = + SessionState::new_with_config_rt(session_config, Arc::new(runtime_environment)) + .with_function_factory(function_factory.clone()); + + let ctx = SessionContext::new_with_state(state); + + let sql = r#" + CREATE FUNCTION strlen(name TEXT) + RETURNS int LANGUAGE plrust AS + $$ + Ok(Some(name.unwrap().len() as i32)) + $$; + "#; + + let body = " + Ok(Some(name.unwrap().len() as i32)) + "; + + match ctx.sql(sql).await { + Ok(_) => {} + Err(e) => { + panic!("Error creating function: {}", e); + } + } + + // verify that the call was passed through + let calls = function_factory.calls(); + let schema = DFSchema::try_from(Schema::empty())?; + assert_eq!(calls.len(), 1); + let call = &calls[0]; + let expected = CreateFunction { + or_replace: false, + temporary: false, + name: "strlen".into(), + args: Some(vec![OperateFunctionArg { + name: Some(Ident { + value: "name".into(), + quote_style: None, + }), + data_type: DataType::Utf8, + default_expr: None, + }]), + return_type: Some(DataType::Int32), + params: CreateFunctionBody { + language: Some(Ident { + value: "plrust".into(), + quote_style: None, + }), + behavior: None, + function_body: Some(lit(body)), + }, + schema: Arc::new(schema), + }; + + assert_eq!(call, &expected); + + Ok(()) +} + fn create_udf_context() -> SessionContext { let ctx = SessionContext::new(); // register a custom UDF diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs index 25857db839c8..fa95fc2e051d 100644 --- a/datafusion/sql/src/expr/value.rs +++ b/datafusion/sql/src/expr/value.rs @@ -50,6 +50,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { plan_err!("Invalid HexStringLiteral '{s}'") } } + Value::DollarQuotedString(s) => Ok(lit(s.value)), Value::EscapedStringLiteral(s) => Ok(lit(s)), _ => plan_err!("Unsupported Value '{value:?}'"), } From 72f61b082c156ef2d760ba7d8edf0c7c35b3ea1b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 4 Jun 2024 14:16:46 -0400 Subject: [PATCH 30/33] Disable failing windows CI test --- datafusion-cli/tests/cli_integration.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 119a0aa39d3c..b58789346304 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -28,6 +28,8 @@ fn init() { let _ = env_logger::try_init(); } +// Disabled due to https://github.com/apache/datafusion/issues/10793 +#[cfg(not(target_family = "windows"))] #[rstest] #[case::exec_from_commands( ["--command", "select 1", "--format", "json", "-q"], @@ -45,6 +47,7 @@ fn init() { ["--command", "show datafusion.execution.batch_size", "--format", "json", "-q", "-b", "1"], "[{\"name\":\"datafusion.execution.batch_size\",\"value\":\"1\"}]\n" )] + #[test] fn cli_quick_test<'a>( #[case] args: impl IntoIterator, From 089b5717dae3fa7e1a977a09b98ea87c79bb8c59 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 4 Jun 2024 14:18:20 -0400 Subject: [PATCH 31/33] fmt --- datafusion-cli/tests/cli_integration.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index b58789346304..27cabf15afec 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -47,7 +47,6 @@ fn init() { ["--command", "show datafusion.execution.batch_size", "--format", "json", "-q", "-b", "1"], "[{\"name\":\"datafusion.execution.batch_size\",\"value\":\"1\"}]\n" )] - #[test] fn cli_quick_test<'a>( #[case] args: impl IntoIterator, From c3e475b1855b43b6c9cc47b5d0dea282fef1d0c9 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 4 Jun 2024 14:23:56 -0400 Subject: [PATCH 32/33] simplify test --- .../user_defined_scalar_functions.rs | 20 ++----------------- 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index 475fd847d75a..efea30b41e29 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -30,7 +30,6 @@ use datafusion_common::{ assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err, internal_err, not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, Result, ScalarValue, }; -use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{ Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody, ExprSchemable, @@ -856,15 +855,7 @@ impl TryFrom for ScalarFunctionWrapper { #[tokio::test] async fn create_scalar_function_from_sql_statement() -> Result<()> { let function_factory = Arc::new(CustomFunctionFactory::default()); - let runtime_config = RuntimeConfig::new(); - let runtime_environment = RuntimeEnv::new(runtime_config)?; - - let session_config = SessionConfig::new(); - let state = - SessionState::new_with_config_rt(session_config, Arc::new(runtime_environment)) - .with_function_factory(function_factory.clone()); - - let ctx = SessionContext::new_with_state(state); + let ctx = SessionContext::new().with_function_factory(function_factory.clone()); let options = SQLOptions::new().with_allow_ddl(false); let sql = r#" @@ -964,15 +955,8 @@ impl FunctionFactory for RecordingFunctonFactory { #[tokio::test] async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<()> { let function_factory = Arc::new(RecordingFunctonFactory::new()); - let runtime_config = RuntimeConfig::new(); - let runtime_environment = RuntimeEnv::new(runtime_config)?; - - let session_config = SessionConfig::new(); - let state = - SessionState::new_with_config_rt(session_config, Arc::new(runtime_environment)) - .with_function_factory(function_factory.clone()); + let ctx = SessionContext::new().with_function_factory(function_factory.clone()); - let ctx = SessionContext::new_with_state(state); let sql = r#" CREATE FUNCTION strlen(name TEXT) From 50a5a0f940ed7bab0c85ee410d5dda330a798753 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 4 Jun 2024 14:54:08 -0400 Subject: [PATCH 33/33] fmt --- .../core/tests/user_defined/user_defined_scalar_functions.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index efea30b41e29..a81fc9159e52 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -957,7 +957,6 @@ async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<( let function_factory = Arc::new(RecordingFunctonFactory::new()); let ctx = SessionContext::new().with_function_factory(function_factory.clone()); - let sql = r#" CREATE FUNCTION strlen(name TEXT) RETURNS int LANGUAGE plrust AS