diff --git a/rust/datafusion/Cargo.toml b/rust/datafusion/Cargo.toml index 2cd75923dfe..186a3bdb1f1 100644 --- a/rust/datafusion/Cargo.toml +++ b/rust/datafusion/Cargo.toml @@ -49,7 +49,7 @@ ahash = "0.6" hashbrown = "0.9" arrow = { path = "../arrow", version = "3.0.0-SNAPSHOT", features = ["prettyprint"] } parquet = { path = "../parquet", version = "3.0.0-SNAPSHOT", features = ["arrow"] } -sqlparser = "0.6.1" +sqlparser = "0.7.0" clap = "2.33" rustyline = {version = "7.0", optional = true} crossbeam = "0.8" diff --git a/rust/datafusion/src/sql/parser.rs b/rust/datafusion/src/sql/parser.rs index fb421f8c338..dc31581a0e6 100644 --- a/rust/datafusion/src/sql/parser.rs +++ b/rust/datafusion/src/sql/parser.rs @@ -20,7 +20,7 @@ //! Declares a SQL parser based on sqlparser that handles custom formats that we need. use sqlparser::{ - ast::{ColumnDef, Statement as SQLStatement, TableConstraint}, + ast::{ColumnDef, ColumnOptionDef, Statement as SQLStatement, TableConstraint}, dialect::{keywords::Keyword, Dialect, GenericDialect}, parser::{Parser, ParserError}, tokenizer::{Token, Tokenizer}, @@ -59,15 +59,6 @@ pub struct CreateExternalTable { pub location: String, } -/// DataFusion extension DDL for `EXPLAIN` and `EXPLAIN VERBOSE` -#[derive(Debug, Clone, PartialEq)] -pub struct ExplainPlan { - /// If true, dumps more intermediate plans and results of optimizaton passes - pub verbose: bool, - /// The statement for which to generate an planning explanation - pub statement: Box, -} - /// DataFusion Statement representations. /// /// Tokens parsed by `DFParser` are converted into these values. @@ -77,16 +68,14 @@ pub enum Statement { Statement(SQLStatement), /// Extension: `CREATE EXTERNAL TABLE` CreateExternalTable(CreateExternalTable), - /// Extension: `EXPLAIN ` - Explain(ExplainPlan), } /// SQL Parser -pub struct DFParser { - parser: Parser, +pub struct DFParser<'a> { + parser: Parser<'a>, } -impl DFParser { +impl<'a> DFParser<'a> { /// Parse the specified tokens pub fn new(sql: &str) -> Result { let dialect = &GenericDialect {}; @@ -96,12 +85,13 @@ impl DFParser { /// Parse the specified tokens with dialect pub fn new_with_dialect( sql: &str, - dialect: &dyn Dialect, + dialect: &'a dyn Dialect, ) -> Result { let mut tokenizer = Tokenizer::new(dialect, sql); let tokens = tokenizer.tokenize()?; + Ok(DFParser { - parser: Parser::new(tokens), + parser: Parser::new(tokens, dialect), }) } @@ -155,10 +145,6 @@ impl DFParser { // use custom parsing self.parse_create() } - Keyword::NoKeyword if w.value.to_uppercase() == "EXPLAIN" => { - self.parser.next_token(); - self.parse_explain() - } _ => { // use the native parser Ok(Statement::Statement(self.parser.parse_statement()?)) @@ -181,26 +167,6 @@ impl DFParser { } } - /// Parse an SQL EXPLAIN statement. - pub fn parse_explain(&mut self) -> Result { - // Parser is at the token immediately after EXPLAIN - // Check for EXPLAIN VERBOSE - let verbose = match self.parser.peek_token() { - Token::Word(w) => match w.keyword { - Keyword::NoKeyword if w.value.to_uppercase() == "VERBOSE" => { - self.parser.next_token(); - true - } - _ => false, - }, - _ => false, - }; - - let statement = Box::new(self.parse_statement()?); - let explain_plan = ExplainPlan { statement, verbose }; - Ok(Statement::Explain(explain_plan)) - } - // This is a copy of the equivalent implementation in sqlparser. fn parse_columns( &mut self, @@ -250,10 +216,21 @@ impl DFParser { }; let mut options = vec![]; loop { - match self.parser.peek_token() { - Token::EOF | Token::Comma | Token::RParen => break, - _ => options.push(self.parser.parse_column_option_def()?), - } + if self.parser.parse_keyword(Keyword::CONSTRAINT) { + let name = Some(self.parser.parse_identifier()?); + if let Some(option) = self.parser.parse_optional_column_option()? { + options.push(ColumnOptionDef { name, option }); + } else { + return self.expected( + "constraint details after CONSTRAINT ", + self.parser.peek_token(), + ); + } + } else if let Some(option) = self.parser.parse_optional_column_option()? { + options.push(ColumnOptionDef { name: None, option }); + } else { + break; + }; } Ok(ColumnDef { name, diff --git a/rust/datafusion/src/sql/planner.rs b/rust/datafusion/src/sql/planner.rs index 53f7abcd23c..78f3b8fab45 100644 --- a/rust/datafusion/src/sql/planner.rs +++ b/rust/datafusion/src/sql/planner.rs @@ -39,12 +39,11 @@ use crate::{ use arrow::datatypes::*; -use super::parser::ExplainPlan; use crate::prelude::JoinType; use sqlparser::ast::{ - BinaryOperator, DataType as SQLDataType, Expr as SQLExpr, Join, JoinConstraint, - JoinOperator, Query, Select, SelectItem, SetExpr, TableFactor, TableWithJoins, - UnaryOperator, Value, + BinaryOperator, DataType as SQLDataType, Expr as SQLExpr, FunctionArg, Join, + JoinConstraint, JoinOperator, Query, Select, SelectItem, SetExpr, TableFactor, + TableWithJoins, UnaryOperator, Value, }; use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption}; use sqlparser::ast::{OrderByExpr, Statement}; @@ -85,13 +84,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { match statement { DFStatement::CreateExternalTable(s) => self.external_table_to_plan(&s), DFStatement::Statement(s) => self.sql_statement_to_plan(&s), - DFStatement::Explain(s) => self.explain_statement_to_plan(&(*s)), } } /// Generate a logical plan from an SQL statement pub fn sql_statement_to_plan(&self, sql: &Statement) -> Result { match sql { + Statement::Explain { + verbose, + statement, + analyze: _, + } => self.explain_statement_to_plan(*verbose, &statement), Statement::Query(query) => self.query_to_plan(&query), _ => Err(DataFusionError::NotImplemented( "Only SELECT statements are implemented".to_string(), @@ -162,10 +165,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { /// pub fn explain_statement_to_plan( &self, - explain_plan: &ExplainPlan, + verbose: bool, + statement: &Statement, ) -> Result { - let verbose = explain_plan.verbose; - let plan = self.statement_to_plan(&explain_plan.statement)?; + let plan = self.sql_statement_to_plan(&statement)?; let stringified_plans = vec![StringifiedPlan::new( PlanType::LogicalPlan, @@ -328,6 +331,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { TableFactor::NestedJoin(table_with_joins) => { self.plan_table_with_joins(table_with_joins) } + // @todo Support TableFactory::TableFunction? + _ => Err(DataFusionError::NotImplemented(format!( + "Unsupported ast node {:?} in create_relation", + relation + ))), } } @@ -608,6 +616,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(expr) } + fn sql_fn_arg_to_logical_expr(&self, sql: &FunctionArg) -> Result { + match sql { + FunctionArg::Named { name: _, arg } => self.sql_expr_to_logical_expr(arg), + FunctionArg::Unnamed(value) => self.sql_expr_to_logical_expr(value), + } + } + fn sql_expr_to_logical_expr(&self, sql: &SQLExpr) -> Result { match sql { SQLExpr::Value(Value::Number(n)) => match n.parse::() { @@ -728,6 +743,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { _ => Ok(Expr::Negative(Box::new(self.sql_expr_to_logical_expr(expr)?))), } } + _ => Err(DataFusionError::NotImplemented(format!( + "Unsupported SQL unary operator {:?}", + op + ))), }, SQLExpr::Between { @@ -784,7 +803,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let args = function .args .iter() - .map(|a| self.sql_expr_to_logical_expr(a)) + .map(|a| self.sql_fn_arg_to_logical_expr(a)) .collect::>>()?; return Ok(Expr::ScalarFunction { fun, args }); @@ -797,16 +816,18 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { .args .iter() .map(|a| match a { - SQLExpr::Value(Value::Number(_)) => Ok(lit(1_u8)), - SQLExpr::Wildcard => Ok(lit(1_u8)), - _ => self.sql_expr_to_logical_expr(a), + FunctionArg::Unnamed(SQLExpr::Value(Value::Number( + _, + ))) => Ok(lit(1_u8)), + FunctionArg::Unnamed(SQLExpr::Wildcard) => Ok(lit(1_u8)), + _ => self.sql_fn_arg_to_logical_expr(a), }) .collect::>>()? } else { function .args .iter() - .map(|a| self.sql_expr_to_logical_expr(a)) + .map(|a| self.sql_fn_arg_to_logical_expr(a)) .collect::>>()? }; @@ -823,7 +844,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let args = function .args .iter() - .map(|a| self.sql_expr_to_logical_expr(a)) + .map(|a| self.sql_fn_arg_to_logical_expr(a)) .collect::>>()?; Ok(Expr::ScalarUDF { fun: fm, args }) @@ -833,7 +854,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let args = function .args .iter() - .map(|a| self.sql_expr_to_logical_expr(a)) + .map(|a| self.sql_fn_arg_to_logical_expr(a)) .collect::>>()?; Ok(Expr::AggregateUDF { fun: fm, args })