Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIx VALUES tuples type casts #12104

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions datafusion/expr/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,67 @@ impl LogicalPlanBuilder {
Ok(Self::from(LogicalPlan::Values(Values { schema, values })))
}

/// Create a values list based relation using the provided column datatypes.
///
/// By default, it assigns the names column1, column2, etc. to the columns of a VALUES table.
/// The column names are not specified by the SQL standard and different database systems do it differently,
/// so it's usually better to override the default names with a table alias list.
///
/// If the values include params/binders such as $1, $2, $3, etc, then the `param_data_types` should be provided.
pub fn values_with_types(
mut values: Vec<Vec<Expr>>,
field_types: &[DataType],
) -> Result<Self> {
if values.is_empty() {
return plan_err!("Values list cannot be empty");
}
let n_cols = values[0].len();
if n_cols == 0 {
return plan_err!("Values list cannot be zero length");
}
if n_cols != field_types.len() {
return plan_err!(
"Values list does not match the provided number of data types: got {} values but expected {}", n_cols,
field_types.len()
);
}
for (i, row) in values.iter().enumerate() {
if row.len() != n_cols {
return plan_err!(
"Inconsistent data length across values list: got {} values in row {} but expected {}",
row.len(),
i,
n_cols
);
}
}

let empty_schema = DFSchema::empty();
// wrap cast if data type is not same as common type.
for row in &mut values {
for (j, field_type) in field_types.iter().enumerate() {
if let Expr::Literal(ScalarValue::Null) = row[j] {
row[j] = Expr::Literal(ScalarValue::try_from(field_type.clone())?);
} else {
row[j] =
std::mem::take(&mut row[j]).cast_to(field_type, &empty_schema)?;
}
}
}
let fields = field_types
.iter()
.enumerate()
.map(|(j, data_type)| {
// naming is following convention https://www.postgresql.org/docs/current/queries-values.html
let name = &format!("column{}", j + 1);
Field::new(name, data_type.clone(), true)
})
.collect::<Vec<_>>();
let dfschema = DFSchema::from_unqualified_fields(fields.into(), HashMap::new())?;
let schema = DFSchemaRef::new(dfschema);
Ok(Self::from(LogicalPlan::Values(Values { schema, values })))
}

/// Convert a table provider into a builder with a TableScan
///
/// Note that if you pass a string as `table_name`, it is treated
Expand Down
16 changes: 16 additions & 0 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ pub struct PlannerContext {
/// Data types for numbered parameters ($1, $2, etc), if supplied
/// in `PREPARE` statement
prepare_param_data_types: Arc<Vec<DataType>>,
/// Column types for VALUES tuples during an INSERT.
values_column_data_types: Arc<Vec<DataType>>,
/// Map of CTE name to logical plan of the WITH clause.
/// Use `Arc<LogicalPlan>` to allow cheap cloning
ctes: HashMap<String, Arc<LogicalPlan>>,
Expand All @@ -151,6 +153,7 @@ impl PlannerContext {
pub fn new() -> Self {
Self {
prepare_param_data_types: Arc::new(vec![]),
values_column_data_types: Arc::new(vec![]),
ctes: HashMap::new(),
outer_query_schema: None,
outer_from_schema: None,
Expand All @@ -166,6 +169,14 @@ impl PlannerContext {
self
}

pub fn with_values_column_data_types(
mut self,
values_column_data_types: Vec<DataType>,
) -> Self {
self.values_column_data_types = values_column_data_types.into();
self
}

// return a reference to the outer queries schema
pub fn outer_query_schema(&self) -> Option<&DFSchema> {
self.outer_query_schema.as_ref().map(|s| s.as_ref())
Expand Down Expand Up @@ -209,6 +220,11 @@ impl PlannerContext {
&self.prepare_param_data_types
}

/// Return the types of columns in VALUES tuples if known
pub fn values_column_data_types(&self) -> &[DataType] {
&self.values_column_data_types
}

/// returns true if there is a Common Table Expression (CTE) /
/// Subquery for the specified name
pub fn contains_cte(&self, cte_name: &str) -> bool {
Expand Down
9 changes: 7 additions & 2 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1468,10 +1468,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}
}
let prepare_param_data_types = prepare_param_data_types.into_values().collect();
let values_column_data_types = fields
.iter()
.map(|f| f.data_type().clone())
.collect::<Vec<_>>();

// Projection
let mut planner_context =
PlannerContext::new().with_prepare_param_data_types(prepare_param_data_types);
let mut planner_context = PlannerContext::new()
.with_prepare_param_data_types(prepare_param_data_types)
.with_values_column_data_types(values_column_data_types);
let source = self.query_to_plan(*source, &mut planner_context)?;
if fields.len() != source.schema().fields().len() {
plan_err!("Column count doesn't match insert query!")?;
Expand Down
7 changes: 6 additions & 1 deletion datafusion/sql/src/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
.collect::<Result<Vec<_>>>()
})
.collect::<Result<Vec<_>>>()?;
LogicalPlanBuilder::values(values)?.build()
let column_types = planner_context.values_column_data_types();
if column_types.is_empty() {
LogicalPlanBuilder::values(values)?.build()
} else {
LogicalPlanBuilder::values_with_types(values, column_types)?.build()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not convinced this is a good idea, as it will lead to inconsistent behavior or statements that should be equivalent. See #12103 (comment)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @findepi -- #12103 (comment)

Thank you @davisp for this PR -- let us know what you think about the alternate proposal

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm definitely on board in making this more general. It didn't feel super awesome to be fixing things this specifically in the first place, it was just the first thing that came to mind.

@alamb You mentioned in your comment on #12103 that there are other coercion patterns to follow, can you point me at anything vaguely similar to reference? Skimming the DataType docs, I'm not immediately seeing how I'd handle the expansion correctly.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}
}
}
2 changes: 1 addition & 1 deletion datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3195,7 +3195,7 @@ fn lateral_left_join() {

#[test]
fn lateral_nested_left_join() {
let sql = "SELECT * FROM
let sql = "SELECT * FROM
j1, \
(j2 LEFT JOIN LATERAL (SELECT * FROM j3 WHERE j1_id + j2_id = j3_id) AS j3 ON(true))";
let expected = "Projection: *\
Expand Down
16 changes: 15 additions & 1 deletion datafusion/sqllogictest/test_files/insert.slt
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ insert into table_without_values(id, id) values(3, 3);
statement error Arrow error: Cast error: Cannot cast string 'zoo' to value of Int64 type
insert into table_without_values(name, id) values(4, 'zoo');

statement error Error during planning: Column count doesn't match insert query!
statement error Error during planning: Values list does not match the provided number of data types: got 2 values but expected 1
insert into table_without_values(id) values(4, 'zoo');

# insert NULL values for the missing column (name)
Expand Down Expand Up @@ -433,3 +433,17 @@ drop table test_column_defaults

statement error DataFusion error: Error during planning: Column reference is not allowed in the DEFAULT expression : Schema error: No field named a.
create table test_column_defaults(a int, b int default a+1)

# test value casting
statement ok
create table test_column_insert_cast(
a BIGINT UNSIGNED
);

query I
insert into test_column_insert_cast(a) values (0), (12775823699315690233);
----
2

statement ok
drop table test_column_insert_cast
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/insert_to_external.slt
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ insert into table_without_values(id, id) values(3, 3);
statement error Arrow error: Cast error: Cannot cast string 'zoo' to value of Int64 type
insert into table_without_values(name, id) values(4, 'zoo');

statement error Error during planning: Column count doesn't match insert query!
statement error Error during planning: Values list does not match the provided number of data types: got 2 values but expected 1
insert into table_without_values(id) values(4, 'zoo');

# insert NULL values for the missing column (name)
Expand Down
Loading