Skip to content

Commit

Permalink
fix: force attachment to use EmptyFieldAs::FieldDefault. (#14767)
Browse files Browse the repository at this point in the history
force attachment to use EmptyFieldAs::FieldDefault.
youngsofun authored Feb 27, 2024

Verified

This commit was signed with the committer’s verified signature.
booc0mtaco Holloway
1 parent e45d28a commit e88992f
Showing 3 changed files with 28 additions and 12 deletions.
14 changes: 13 additions & 1 deletion src/query/sql/src/planner/binder/copy_into_table.rs
Original file line number Diff line number Diff line change
@@ -48,6 +48,7 @@ use databend_common_expression::DataSchemaRef;
use databend_common_expression::Evaluator;
use databend_common_expression::Scalar;
use databend_common_functions::BUILTIN_FUNCTIONS;
use databend_common_meta_app::principal::EmptyFieldAs;
use databend_common_meta_app::principal::FileFormatOptionsAst;
use databend_common_meta_app::principal::FileFormatParams;
use databend_common_meta_app::principal::NullAs;
@@ -222,10 +223,21 @@ impl<'a> Binder {
resolve_stage_location(self.ctx.as_ref(), &attachment.location[1..]).await?;

if let Some(ref options) = attachment.file_format_options {
stage_info.file_format_params = FileFormatOptionsAst {
let mut params = FileFormatOptionsAst {
options: options.clone(),
}
.try_into()?;
if let FileFormatParams::Csv(ref mut fmt) = &mut params {
// TODO: remove this after 1. the old server is no longer supported 2. Driver add the option "EmptyFieldAs=FieldDefault"
// CSV attachment is mainly used in Drivers for insert.
// In the future, client should use EmptyFieldAs=STRING or FieldDefault to distinguish NULL and empty string.
// However, old server does not support `empty_field_as`, so client can not add the option directly at now.
// So we will get empty_field_as = NULL, which will raise error if there is empty string for non-nullable string field.
if fmt.empty_field_as == EmptyFieldAs::Null {
fmt.empty_field_as = EmptyFieldAs::FieldDefault;
}
}
stage_info.file_format_params = params;
}
if let Some(ref options) = attachment.copy_options {
stage_info.copy_options.apply(options, true)?;
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
>>>> drop table if exists t1
>>>> create table t1 (a string, b string)
>>>> create table t1 (a string, b string, c string, d string not null)
>>>> drop stage if exists s1
>>>> create stage s1
>>>> copy into @s1 from (select 'Null', 'NULL') file_format = (type = csv)
1 14 14
>>>> copy into @s1 from (select 'Null', 'NULL', '', '') file_format = (type = csv)
1 20 20
<<<<
Succeeded
14
38
20
71
null
>>>> select a is null, b is null from t1
true false
>>>> list @s1
<<<<
>>>> select a is null, b is null, c, d from t1
true false NULL
<<<<
>>>> drop table if exists t1
>>>> drop stage if exists s1
Original file line number Diff line number Diff line change
@@ -4,16 +4,18 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../../../shell_env.sh

stmt "drop table if exists t1"
stmt "create table t1 (a string, b string)"
stmt "create table t1 (a string, b string, c string, d string not null)"

stmt "drop stage if exists s1"
stmt "create stage s1"

query "copy into @s1 from (select 'Null', 'NULL') file_format = (type = csv)"
query "copy into @s1 from (select 'Null', 'NULL', '', '') file_format = (type = csv)"

curl -s -u root: -XPOST "http://localhost:8000/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into t1 (a, b) values", "stage_attachment": {"location": "@s1/", "copy_options": {"purge": "true"}, "file_format_options":{"Type": "csv","Binary_Format":"hex", "null_display": "Null"}}, "pagination": { "wait_time_secs": 8}}' | jq -r '.state, .stats.scan_progress.bytes, .stats.write_progress.bytes, .error'
curl -s -u root: -XPOST "http://localhost:8000/v1/query" --header 'Content-Type: application/json' -d '{"sql": "insert into t1 (a, b, c, d) values", "stage_attachment": {"location": "@s1/", "copy_options": {"purge": "true"}, "file_format_options":{"Type": "csv","Binary_Format":"hex", "null_display": "Null"}}, "pagination": { "wait_time_secs": 8}}' | jq -r '.state, .stats.scan_progress.bytes, .stats.write_progress.bytes, .error'

query "select a is null, b is null from t1"
query "list @s1"

query "select a is null, b is null, c, d from t1"

stmt "drop table if exists t1"
stmt "drop stage if exists s1"

0 comments on commit e88992f

Please sign in to comment.