Skip to content

Commit

Permalink
feat: support prql (#14922)
Browse files Browse the repository at this point in the history
* feat: support prql

* taplo format

* fix logic tests

---------

Co-authored-by: tangjiaqi <[email protected]>
  • Loading branch information
ncuwaln and tangjiaqi authored Mar 18, 2024
1 parent 19b1fd3 commit e4e0ebe
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 12 deletions.
9 changes: 5 additions & 4 deletions src/query/ast/src/parser/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ pub enum Dialect {
PostgreSQL,
MySQL,
Hive,
PRQL,
Experimental,
}

Expand All @@ -95,31 +96,31 @@ impl Dialect {
Dialect::MySQL => c == '`',
Dialect::Hive => c == '`',
// TODO: remove '`' quote support once mysql handler correctly set mysql dialect.
Dialect::Experimental | Dialect::PostgreSQL => c == '"' || c == '`',
Dialect::Experimental | Dialect::PostgreSQL | Dialect::PRQL => c == '"' || c == '`',
}
}

pub fn is_string_quote(&self, c: char) -> bool {
match self {
Dialect::MySQL => c == '\'' || c == '"',
Dialect::Hive => c == '\'' || c == '"',
Dialect::Experimental | Dialect::PostgreSQL => c == '\'',
Dialect::Experimental | Dialect::PostgreSQL | Dialect::PRQL => c == '\'',
}
}

pub fn is_null_biggest(&self) -> bool {
match self {
Dialect::MySQL => false,
Dialect::Hive => false,
Dialect::Experimental | Dialect::PostgreSQL => true,
Dialect::Experimental | Dialect::PostgreSQL | Dialect::PRQL => true,
}
}

pub fn substr_index_zero_literal_as_one(&self) -> bool {
match self {
Dialect::MySQL => false,
Dialect::Hive => true,
Dialect::Experimental | Dialect::PostgreSQL => false,
Dialect::Experimental | Dialect::PostgreSQL | Dialect::PRQL => false,
}
}
}
2 changes: 1 addition & 1 deletion src/query/settings/src/settings_default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ impl DefaultSettings {
value: UserSettingValue::String("PostgreSQL".to_owned()),
desc: "Sets the SQL dialect. Available values include \"PostgreSQL\", \"MySQL\", \"Experimental\", and \"Hive\".",
mode: SettingMode::Both,
range: Some(SettingRange::String(vec!["PostgreSQL".into(), "MySQL".into(), "Experimental".into(), "Hive".into()])),
range: Some(SettingRange::String(vec!["PostgreSQL".into(), "MySQL".into(), "Experimental".into(), "Hive".into(), "Prql".into()])),
}),
("enable_dphyp", DefaultSettingValue {
value: UserSettingValue::UInt64(1),
Expand Down
1 change: 1 addition & 0 deletions src/query/settings/src/settings_getter_setter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ impl Settings {
"hive" => Ok(Dialect::Hive),
"mysql" => Ok(Dialect::MySQL),
"experimental" => Ok(Dialect::Experimental),
"prql" => Ok(Dialect::PRQL),
_ => Ok(Dialect::PostgreSQL),
}
}
Expand Down
1 change: 1 addition & 0 deletions src/query/sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ opendal = { workspace = true }
ordered-float = { workspace = true }
parking_lot = { workspace = true }
percent-encoding = "2"
prqlc = "0.11.3"
regex = { workspace = true }
roaring = "0.10.1"
serde = { workspace = true }
Expand Down
46 changes: 39 additions & 7 deletions src/query/sql/src/planner/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@ use databend_common_ast::parser::Dialect;
use databend_common_catalog::catalog::CatalogManager;
use databend_common_catalog::query_kind::QueryKind;
use databend_common_catalog::table_context::TableContext;
use databend_common_exception::ErrorCode;
use databend_common_exception::Result;
use derive_visitor::DriveMut;
use log::info;
use log::warn;
use parking_lot::RwLock;

use super::semantic::AggregateRewriter;
Expand Down Expand Up @@ -67,8 +70,31 @@ impl Planner {
pub async fn plan_sql(&mut self, sql: &str) -> Result<(Plan, PlanExtras)> {
let settings = self.ctx.get_settings();
let sql_dialect = settings.get_sql_dialect()?;
// compile prql to sql for prql dialect
let mut prql_converted = false;
let final_sql: String = match sql_dialect == Dialect::PRQL {
true => {
let options = prqlc::Options::default();
match prqlc::compile(sql, &options) {
Ok(res) => {
info!("Try convert prql to sql succeed, generated sql: {}", &res);
prql_converted = true;
res
}
Err(e) => {
warn!(
"Try convert prql to sql failed, still use raw sql to parse. error: {}",
e.to_string()
);
sql.to_string()
}
}
}
false => sql.to_string(),
};

// Step 1: Tokenize the SQL.
let mut tokenizer = Tokenizer::new(sql).peekable();
let mut tokenizer = Tokenizer::new(&final_sql).peekable();

// Only tokenize the beginning tokens for `INSERT INTO` statement because the rest tokens after `VALUES` is unused.
// Stop the tokenizer on unrecognized token because some values inputs (e.g. CSV) may not be valid for the tokenizer.
Expand All @@ -83,8 +109,8 @@ impl Planner {
(&mut tokenizer)
.take(PROBE_INSERT_INITIAL_TOKENS)
.take_while(|token| token.is_ok())
// Make sure the token stream is always ended with EOI.
.chain(std::iter::once(Ok(Token::new_eoi(sql))))
// Make sure the tokens stream is always ended with EOI.
.chain(std::iter::once(Ok(Token::new_eoi(&final_sql))))
.collect::<Result<_>>()
.unwrap()
} else {
Expand All @@ -101,6 +127,12 @@ impl Planner {
} else {
parse_sql(&tokens, sql_dialect)?
};
if !matches!(stmt, Statement::SetVariable { .. })
&& sql_dialect == Dialect::PRQL
&& !prql_converted
{
return Err(ErrorCode::SyntaxException("convert prql to sql failed."));
}

if matches!(stmt, Statement::CopyIntoLocation(_)) {
// Indicate binder there is no need to collect column statistics for the binding table.
Expand Down Expand Up @@ -161,15 +193,15 @@ impl Planner {
.take(tokens.len() * 2)
.take_while(|token| token.is_ok())
.map(|token| token.unwrap())
// Make sure the token stream is always ended with EOI.
.chain(std::iter::once(Token::new_eoi(sql)));
// Make sure the tokens stream is always ended with EOI.
.chain(std::iter::once(Token::new_eoi(&final_sql)));
tokens.extend(iter);
} else {
let iter = (&mut tokenizer)
.take_while(|token| token.is_ok())
.map(|token| token.unwrap())
// Make sure the token stream is always ended with EOI.
.chain(std::iter::once(Token::new_eoi(sql)));
// Make sure the tokens stream is always ended with EOI.
.chain(std::iter::once(Token::new_eoi(&final_sql)));
tokens.extend(iter);
};
} else {
Expand Down
56 changes: 56 additions & 0 deletions tests/sqllogictests/suites/query/02_function/02_0072_prql.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
statement ok
DROP DATABASE IF EXISTS prql

statement ok
CREATE DATABASE prql

statement ok
CREATE TABLE `prql`.`aboba` (`user_id` INT UNSIGNED NULL,`message` VARCHAR NULL,`creation_date` TIMESTAMP NULL,`metric` FLOAT NULL) ENGINE=FUSE CLUSTER BY (user_id)

statement ok
INSERT INTO `prql`.`aboba` (user_id, message, creation_date, metric) VALUES (101, 'xxx', to_datetime('2019-01-01 00:00:00'), -1.0), (102, 'yyy', to_datetime('2019-02-01 00:00:00'), 1.41421 ), (102, 'zzz', to_datetime('2019-03-01 00:00:00'), 2.718), (101, 'xyz', to_datetime('2019-05-01 00:00:00'), 3.14159), (103, 'qwer', to_datetime('2019-04-01 00:00:00'), 42)

statement ok
set sql_dialect = 'prql'

query ITIT
from `prql`.`aboba` | derive { a = 2, b = s"LEFT(message, 2)" } | select { user_id, message, a, b } | sort {user_id, message}
----
101 xxx 2 xx
101 xyz 2 xy
102 yyy 2 yy
102 zzz 2 zz
103 qwer 2 qw

query IR
from `prql`.`aboba` | filter user_id > 101 | group user_id ( aggregate { metrics = sum metric }) | sort {user_id}
----
102 4.132209897041321
103 42.0

query ITIR
from `prql`.`aboba` | select { user_id, message, metric } | derive creation_date = s"TO_UNIX_TIMESTAMP(creation_date)" | select { user_id, message, creation_date, metric} | sort { user_id, message }
----
101 xxx 1546300800 -1.0
101 xyz 1556668800 3.14159
102 yyy 1548979200 1.41421
102 zzz 1551398400 2.718
103 qwer 1554076800 42.0

statement error 1005
SELECT user_id, message, TO_UNIX_TIMESTAMP(creation_date) as creation_date, metric FROM `prql`.`aboba` order by user_id, message

statement ok
set sql_dialect = 'PostgreSQL'

query ITIR
SELECT user_id, message, TO_UNIX_TIMESTAMP(creation_date) as creation_date, metric FROM `prql`.`aboba` order by user_id, message
----
101 xxx 1546300800 -1.0
101 xyz 1556668800 3.14159
102 yyy 1548979200 1.41421
102 zzz 1551398400 2.718
103 qwer 1554076800 42.0

statement ok
DROP DATABASE prql

0 comments on commit e4e0ebe

Please sign in to comment.