Skip to content

Commit

Permalink
feat: copy option 'pattern' support variable. (#16525)
Browse files Browse the repository at this point in the history
* feat: copy option 'pattern' support variable.

* refactor: pattern use type LiteralStringOrVariable.

* fix

* fix
  • Loading branch information
youngsofun authored Oct 10, 2024
1 parent 131d2d8 commit 034ec72
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 28 deletions.
31 changes: 28 additions & 3 deletions src/query/ast/src/ast/statements/copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pub struct CopyIntoTableStmt {

// files to load
pub files: Option<Vec<String>>,
pub pattern: Option<String>,
pub pattern: Option<LiteralStringOrVariable>,
pub force: bool,

// copy options
Expand Down Expand Up @@ -110,7 +110,7 @@ impl Display for CopyIntoTableStmt {
}

if let Some(pattern) = &self.pattern {
write!(f, " PATTERN = '{}'", pattern)?;
write!(f, " PATTERN = {}", pattern)?;
}

if !self.file_format.is_empty() {
Expand Down Expand Up @@ -438,9 +438,34 @@ impl Display for FileLocation {
}
}

/// Used when we want to allow use variable for options etc.
/// Other expr is not necessary, because
/// 1. we can always create a variable that can be used directly.
/// 2. columns can not be referred.
///
/// Can extend to all type of Literals if needed later.
#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
pub enum LiteralStringOrVariable {
Literal(String),
Variable(String),
}

impl Display for LiteralStringOrVariable {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
LiteralStringOrVariable::Literal(s) => {
write!(f, "'{s}'")
}
LiteralStringOrVariable::Variable(s) => {
write!(f, "${s}")
}
}
}
}

pub enum CopyIntoTableOption {
Files(Vec<String>),
Pattern(String),
Pattern(LiteralStringOrVariable),
FileFormat(FileFormatOptions),
ValidationMode(String),
SizeLimit(usize),
Expand Down
11 changes: 6 additions & 5 deletions src/query/ast/src/ast/statements/stage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crate::ast::write_comma_separated_string_list;
use crate::ast::write_comma_separated_string_map;
use crate::ast::CreateOption;
use crate::ast::FileFormatOptions;
use crate::ast::LiteralStringOrVariable;
use crate::ast::UriLocation;

#[derive(Debug, Clone, PartialEq, Eq, Drive, DriveMut)]
Expand Down Expand Up @@ -80,10 +81,10 @@ impl Display for CreateStageStmt {
}
}

#[derive(Debug, Clone, PartialEq, Eq, Drive, DriveMut)]
#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
pub enum SelectStageOption {
Files(Vec<String>),
Pattern(String),
Pattern(LiteralStringOrVariable),
FileFormat(String),
Connection(BTreeMap<String, String>),
}
Expand All @@ -103,10 +104,10 @@ impl SelectStageOptions {
}
}

#[derive(Debug, Clone, PartialEq, Eq, Default, Drive, DriveMut)]
#[derive(Debug, Clone, PartialEq, Default, Drive, DriveMut)]
pub struct SelectStageOptions {
pub files: Option<Vec<String>>,
pub pattern: Option<String>,
pub pattern: Option<LiteralStringOrVariable>,
pub file_format: Option<String>,
pub connection: BTreeMap<String, String>,
}
Expand Down Expand Up @@ -150,7 +151,7 @@ impl Display for SelectStageOptions {
}

if let Some(pattern) = self.pattern.as_ref() {
write!(f, " PATTERN => '{}',", pattern)?;
write!(f, " PATTERN => {},", pattern)?;
}

if !self.connection.is_empty() {
Expand Down
10 changes: 9 additions & 1 deletion src/query/ast/src/parser/copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use crate::ast::CopyIntoLocationStmt;
use crate::ast::CopyIntoTableOption;
use crate::ast::CopyIntoTableSource;
use crate::ast::CopyIntoTableStmt;
use crate::ast::LiteralStringOrVariable;
use crate::ast::Statement;
use crate::ast::Statement::CopyIntoLocation;
use crate::parser::common::comma_separated_list0;
Expand Down Expand Up @@ -138,14 +139,21 @@ pub fn copy_into(i: Input) -> IResult<Statement> {
)(i)
}

pub fn literal_string_or_variable(i: Input) -> IResult<LiteralStringOrVariable> {
alt((
map(literal_string, LiteralStringOrVariable::Literal),
map(variable_ident, LiteralStringOrVariable::Variable),
))(i)
}

fn copy_into_table_option(i: Input) -> IResult<CopyIntoTableOption> {
alt((
map(
rule! { FILES ~ "=" ~ "(" ~ #comma_separated_list0(literal_string) ~ ")" },
|(_, _, _, files, _)| CopyIntoTableOption::Files(files),
),
map(
rule! { PATTERN ~ "=" ~ #literal_string },
rule! { PATTERN ~ ^"=" ~ ^#literal_string_or_variable },
|(_, _, pattern)| CopyIntoTableOption::Pattern(pattern),
),
map(rule! { #file_format_clause }, |options| {
Expand Down
36 changes: 22 additions & 14 deletions src/query/ast/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use crate::parser::query::*;
use crate::parser::token::*;
use crate::parser::Error;
use crate::parser::ErrorKind;
use crate::Span;

pub fn expr(i: Input) -> IResult<Expr> {
context("expression", subexpr(0))(i)
Expand Down Expand Up @@ -643,20 +644,10 @@ impl<'a, I: Iterator<Item = WithSpan<'a, ExprElement>>> PrattParser<I> for ExprP
span: transform_span(elem.span.tokens),
name,
},
ExprElement::VariableAccess(name) => Expr::FunctionCall {
span: transform_span(elem.span.tokens),
func: FunctionCall {
distinct: false,
name: Identifier::from_name(transform_span(elem.span.tokens), "getvariable"),
args: vec![Expr::Literal {
span: transform_span(elem.span.tokens),
value: Literal::String(name),
}],
params: vec![],
window: None,
lambda: None,
},
},
ExprElement::VariableAccess(name) => {
let span = transform_span(elem.span.tokens);
make_func_get_variable(span, name)
}
_ => unreachable!(),
};
Ok(expr)
Expand Down Expand Up @@ -1843,3 +1834,20 @@ pub fn parse_uint(text: &str, radix: u32) -> Result<Literal, ErrorKind> {
})
}
}

pub(crate) fn make_func_get_variable(span: Span, name: String) -> Expr {
Expr::FunctionCall {
span,
func: FunctionCall {
distinct: false,
name: Identifier::from_name(span, "getvariable"),
args: vec![Expr::Literal {
span,
value: Literal::String(name),
}],
params: vec![],
window: None,
lambda: None,
},
}
}
3 changes: 2 additions & 1 deletion src/query/ast/src/parser/stage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crate::ast::FileLocation;
use crate::ast::SelectStageOption;
use crate::ast::UriLocation;
use crate::parser::common::*;
use crate::parser::copy::literal_string_or_variable;
use crate::parser::expr::*;
use crate::parser::input::Input;
use crate::parser::token::*;
Expand Down Expand Up @@ -256,7 +257,7 @@ pub fn select_stage_option(i: Input) -> IResult<SelectStageOption> {
|(_, _, _, files, _)| SelectStageOption::Files(files),
),
map(
rule! { PATTERN ~ ^"=>" ~ ^#literal_string },
rule! { PATTERN ~ ^"=>" ~ ^#literal_string_or_variable },
|(_, _, pattern)| SelectStageOption::Pattern(pattern),
),
map(
Expand Down
8 changes: 6 additions & 2 deletions src/query/ast/tests/it/testdata/stmt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9279,7 +9279,9 @@ Query(
options: SelectStageOptions {
files: None,
pattern: Some(
"[.]*parquet",
Literal(
"[.]*parquet",
),
),
file_format: Some(
"tsv",
Expand Down Expand Up @@ -16830,7 +16832,9 @@ Query(
options: SelectStageOptions {
files: None,
pattern: Some(
"*.parquet",
Literal(
"*.parquet",
),
),
file_format: None,
connection: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,14 @@ impl Binder {
_ => databend_common_base::runtime::block_on(self.ctx.get_file_format(f))?,
}
}
let pattern = match &options.pattern {
None => None,
Some(pattern) => Some(Self::resolve_copy_pattern(self.ctx.clone(), pattern)?),
};

let files_info = StageFilesInfo {
path,
pattern: options.pattern.clone(),
pattern,
files: options.files.clone(),
};
let table_ctx = self.ctx.clone();
Expand Down
29 changes: 28 additions & 1 deletion src/query/sql/src/planner/binder/copy_into_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use databend_common_ast::ast::Hint;
use databend_common_ast::ast::HintItem;
use databend_common_ast::ast::Identifier;
use databend_common_ast::ast::Literal;
use databend_common_ast::ast::LiteralStringOrVariable;
use databend_common_ast::ast::Query;
use databend_common_ast::ast::SelectTarget;
use databend_common_ast::ast::SetExpr;
Expand All @@ -42,6 +43,7 @@ use databend_common_config::GlobalConfig;
use databend_common_exception::ErrorCode;
use databend_common_exception::Result;
use databend_common_expression::infer_table_schema;
use databend_common_expression::shrink_scalar;
use databend_common_expression::types::DataType;
use databend_common_expression::DataSchema;
use databend_common_expression::DataSchemaRef;
Expand Down Expand Up @@ -111,6 +113,26 @@ impl<'a> Binder {
}
}

pub(crate) fn resolve_copy_pattern(
ctx: Arc<dyn TableContext>,
pattern: &LiteralStringOrVariable,
) -> Result<String> {
match pattern {
LiteralStringOrVariable::Literal(s) => Ok(s.clone()),
LiteralStringOrVariable::Variable(var_name) => {
let var_value = ctx.get_variable(var_name).unwrap_or(Scalar::Null);
let var_value = shrink_scalar(var_value);
if let Scalar::String(s) = var_value {
Ok(s)
} else {
Err(ErrorCode::BadArguments(format!(
"invalid pattern expr: {var_value}"
)))
}
}
}
}

async fn bind_copy_into_table_common(
&mut self,
bind_context: &mut BindContext,
Expand All @@ -136,10 +158,15 @@ impl<'a> Binder {
let (mut stage_info, path) = resolve_file_location(self.ctx.as_ref(), location).await?;
self.apply_copy_into_table_options(stmt, &mut stage_info)
.await?;
let pattern = match &stmt.pattern {
None => None,
Some(pattern) => Some(Self::resolve_copy_pattern(self.ctx.clone(), pattern)?),
};

let files_info = StageFilesInfo {
path,
files: stmt.files.clone(),
pattern: stmt.pattern.clone(),
pattern,
};
let required_values_schema: DataSchemaRef = Arc::new(
match &stmt.dst_columns {
Expand Down
31 changes: 31 additions & 0 deletions tests/sqllogictests/suites/stage/options/pattern_variable.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
statement ok
create or replace table t1(c1 int, c2 string);

statement ok
set variable pt='it.csv';

query
select $1 from @data/csv/ (pattern => $pt, file_format=>'csv')
----
1
2

query
copy into t1 from @data/csv/ pattern= $pt file_format=(type=csv)
----
csv/it.csv 2 0 NULL NULL

query
select * from t1
----
1 b
2 d

statement ok
unset variable pt;

query error 1006
copy into t1 from @data/csv/ pattern= $p file_format=(type=csv)

query error 1005
copy into t1 from @data/csv/ pattern=x file_format=(type=csv)

0 comments on commit 034ec72

Please sign in to comment.