From e88992f7a4781304a6e7f1f54a09e592e593b0bf Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Tue, 27 Feb 2024 21:16:59 +0800 Subject: [PATCH] fix: force attachment to use EmptyFieldAs::FieldDefault. (#14767) force attachment to use EmptyFieldAs::FieldDefault. --- .../sql/src/planner/binder/copy_into_table.rs | 14 +++++++++++++- .../07_0003_insert_with_stage_file_format.result | 16 +++++++++------- .../07_0003_insert_with_stage_file_format.sh | 10 ++++++---- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index 2ab1484fa4c10..31af521193178 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -48,6 +48,7 @@ use databend_common_expression::DataSchemaRef; use databend_common_expression::Evaluator; use databend_common_expression::Scalar; use databend_common_functions::BUILTIN_FUNCTIONS; +use databend_common_meta_app::principal::EmptyFieldAs; use databend_common_meta_app::principal::FileFormatOptionsAst; use databend_common_meta_app::principal::FileFormatParams; use databend_common_meta_app::principal::NullAs; @@ -222,10 +223,21 @@ impl<'a> Binder { resolve_stage_location(self.ctx.as_ref(), &attachment.location[1..]).await?; if let Some(ref options) = attachment.file_format_options { - stage_info.file_format_params = FileFormatOptionsAst { + let mut params = FileFormatOptionsAst { options: options.clone(), } .try_into()?; + if let FileFormatParams::Csv(ref mut fmt) = &mut params { + // TODO: remove this after 1. the old server is no longer supported 2. Driver add the option "EmptyFieldAs=FieldDefault" + // CSV attachment is mainly used in Drivers for insert. + // In the future, client should use EmptyFieldAs=STRING or FieldDefault to distinguish NULL and empty string. + // However, old server does not support `empty_field_as`, so client can not add the option directly at now. + // So we will get empty_field_as = NULL, which will raise error if there is empty string for non-nullable string field. + if fmt.empty_field_as == EmptyFieldAs::Null { + fmt.empty_field_as = EmptyFieldAs::FieldDefault; + } + } + stage_info.file_format_params = params; } if let Some(ref options) = attachment.copy_options { stage_info.copy_options.apply(options, true)?; diff --git a/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.result b/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.result index 4e33642b01a25..967e7f28fcd80 100644 --- a/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.result +++ b/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.result @@ -1,16 +1,18 @@ >>>> drop table if exists t1 ->>>> create table t1 (a string, b string) +>>>> create table t1 (a string, b string, c string, d string not null) >>>> drop stage if exists s1 >>>> create stage s1 ->>>> copy into @s1 from (select 'Null', 'NULL') file_format = (type = csv) -1 14 14 +>>>> copy into @s1 from (select 'Null', 'NULL', '', '') file_format = (type = csv) +1 20 20 <<<< Succeeded -14 -38 +20 +71 null ->>>> select a is null, b is null from t1 -true false +>>>> list @s1 +<<<< +>>>> select a is null, b is null, c, d from t1 +true false NULL <<<< >>>> drop table if exists t1 >>>> drop stage if exists s1 diff --git a/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.sh b/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.sh index eb93b6c440c31..abe82c6ec3b8c 100755 --- a/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.sh +++ b/tests/suites/1_stateful/07_stage_attachment/07_0003_insert_with_stage_file_format.sh @@ -4,16 +4,18 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../../../shell_env.sh stmt "drop table if exists t1" -stmt "create table t1 (a string, b string)" +stmt "create table t1 (a string, b string, c string, d string not null)" stmt "drop stage if exists s1" stmt "create stage s1" -query "copy into @s1 from (select 'Null', 'NULL') file_format = (type = csv)" +query "copy into @s1 from (select 'Null', 'NULL', '', '') file_format = (type = csv)" -curl -s -u root: -XPOST "http://localhost:8000/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into t1 (a, b) values", "stage_attachment": {"location": "@s1/", "copy_options": {"purge": "true"}, "file_format_options":{"Type": "csv","Binary_Format":"hex", "null_display": "Null"}}, "pagination": { "wait_time_secs": 8}}' | jq -r '.state, .stats.scan_progress.bytes, .stats.write_progress.bytes, .error' +curl -s -u root: -XPOST "http://localhost:8000/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into t1 (a, b, c, d) values", "stage_attachment": {"location": "@s1/", "copy_options": {"purge": "true"}, "file_format_options":{"Type": "csv","Binary_Format":"hex", "null_display": "Null"}}, "pagination": { "wait_time_secs": 8}}' | jq -r '.state, .stats.scan_progress.bytes, .stats.write_progress.bytes, .error' -query "select a is null, b is null from t1" +query "list @s1" + +query "select a is null, b is null, c, d from t1" stmt "drop table if exists t1" stmt "drop stage if exists s1"