From 85851c4ca5494ac59a40523b8a05c8d5e48698b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Villemain?= Date: Mon, 19 Aug 2024 19:20:00 +0200 Subject: [PATCH 1/6] Add more test for target indirection in UPDATE Queries of the form: ``` UPDATE ... SET (a, b) = (SELECT '1', '2') UPDATE ... SET (b, a) = (SELECT '2', '1') ``` Should do the same thing, but currently the order of the attributes in `SET (...)` as rewriten for pushdown is only based on the physical ordering of the attributes in the relation. This leads to several subtle problems including situation where a DROPped then reADDed attributes will change its placement in the attribute list. There are maybe more tests to add in other situation where a SET (MULTIEXPR) is possible, though UPDATE form is pretty unique as alternatives are not supported by citus: `(INSERT .. ON CONFLICT SET (a,b).....` --- .../regress/expected/multi_modifications.out | 191 ++++++++++++++++++ src/test/regress/sql/multi_modifications.sql | 101 +++++++++ 2 files changed, 292 insertions(+) diff --git a/src/test/regress/expected/multi_modifications.out b/src/test/regress/expected/multi_modifications.out index 887003a974b..b0db0f1a3ae 100644 --- a/src/test/regress/expected/multi_modifications.out +++ b/src/test/regress/expected/multi_modifications.out @@ -890,6 +890,16 @@ SELECT * FROM summary_table ORDER BY id; (2 rows) -- try different syntax +UPDATE summary_table SET (average_value, min_value) = + (SELECT avg(value), min(value) FROM raw_table WHERE id = 2) +WHERE id = 2; +SELECT * FROM summary_table ORDER BY id; + id | min_value | average_value | count | uniques +--------------------------------------------------------------------- + 1 | | 200.0000000000000000 | | + 2 | 400 | 450.0000000000000000 | | +(2 rows) + UPDATE summary_table SET (min_value, average_value) = (SELECT min(value), avg(value) FROM raw_table WHERE id = 2) WHERE id = 2; @@ -1297,6 +1307,187 @@ CREATE TABLE multi_modifications.local (a int default 1, b int); INSERT INTO multi_modifications.local VALUES (default, (SELECT min(id) FROM summary_table)); ERROR: subqueries are not supported within INSERT queries HINT: Try rewriting your queries with 'INSERT INTO ... SELECT' syntax. +-- test advanced UPDATE SET () with indirection and physical reordering. +CREATE TABLE updateset ( + id bigint primary key + , col_0 integer + , col_1 integer + , col_2 integer + , col_3 integer + ); +select create_reference_table('updateset'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +insert into updateset values (1, 0, 0, 0, 0); +-- default physical ordering +update updateset +SET (col_0, col_1, col_2, col_3) + = (SELECT 100, 111, 222, 333) +returning *; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 1 | 100 | 111 | 222 | 333 +(1 row) + +select * from updateset; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 1 | 100 | 111 | 222 | 333 +(1 row) + +-- check indirection +update updateset +SET (col_0, col_1, col_3, col_2) + = (SELECT 10, 11, 33, 22) +returning *; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 1 | 10 | 11 | 22 | 33 +(1 row) + +select * from updateset; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 1 | 10 | 11 | 22 | 33 +(1 row) + +update updateset +SET (col_0, col_3, col_1, col_2) + = (SELECT 100, 333, 111, 222) +returning *; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 1 | 100 | 111 | 222 | 333 +(1 row) + +select * from updateset; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 1 | 100 | 111 | 222 | 333 +(1 row) + +update updateset +SET (col_3, col_1) + = (SELECT 3, 1) +returning *; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 1 | 100 | 1 | 222 | 3 +(1 row) + +select * from updateset; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 1 | 100 | 1 | 222 | 3 +(1 row) + +-- check more complex queries with indirection +insert into updateset values (2, 0, 0, 0, 0); +update updateset +SET (col_0, col_1, col_3, col_2) + = (SELECT 10, 11, 33, 22) +where id = 2 +returning *; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 2 | 10 | 11 | 22 | 33 +(1 row) + +select * from updateset where id = 2; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 2 | 10 | 11 | 22 | 33 +(1 row) + +update updateset +SET (col_0, col_3, col_1, col_2) + = (SELECT 100, 333, 111, 222) +where id = 2 +returning *; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 2 | 100 | 111 | 222 | 333 +(1 row) + +select * from updateset where id = 2; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 2 | 100 | 111 | 222 | 333 +(1 row) + +update updateset +SET (col_3, col_1) + = (SELECT 3, 1) +where id = 2 +returning *; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 2 | 100 | 1 | 222 | 3 +(1 row) + +select * from updateset where id = 2; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 2 | 100 | 1 | 222 | 3 +(1 row) + +-- the single row update is expected behavior +insert into updateset values (3, 0, 1, 2, 3); +insert into updateset values (4, 0, 0, 0, 0); +with qq as ( + update updateset + SET (col_1, col_3) + = (SELECT 11, 33) + where id = 4 + returning * +) +update updateset +set (col_2, col_1, col_3) + = (select col_2, col_1, col_3 from qq) +where id in (3, 4) +returning *; + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 3 | 0 | 11 | 0 | 33 +(1 row) + +select * from updateset where id in (3, 4); + id | col_0 | col_1 | col_2 | col_3 +--------------------------------------------------------------------- + 3 | 0 | 11 | 0 | 33 + 4 | 0 | 11 | 0 | 33 +(2 rows) + +-- add more advanced queries ? +-- we want to ensure the reordering the targetlist +-- from indirection is not run when it should not +truncate updateset; +-- change physical ordering +alter table updateset drop col_2; +alter table updateset add col_2 integer; +insert into updateset values (1, 0, 0, 0, 0); +insert into updateset values (2, 0, 0, 0, 0); +update updateset +SET (col_0, col_1, col_2, col_3) + = (SELECT 10, 11, 22, 33) +returning *; + id | col_0 | col_1 | col_3 | col_2 +--------------------------------------------------------------------- + 1 | 10 | 11 | 33 | 22 + 2 | 10 | 11 | 33 | 22 +(2 rows) + +select * from updateset; + id | col_0 | col_1 | col_3 | col_2 +--------------------------------------------------------------------- + 1 | 10 | 11 | 33 | 22 + 2 | 10 | 11 | 33 | 22 +(2 rows) + +DROP TABLE updateset; DROP TABLE insufficient_shards; DROP TABLE raw_table; DROP TABLE summary_table; diff --git a/src/test/regress/sql/multi_modifications.sql b/src/test/regress/sql/multi_modifications.sql index 7977325ea65..071c670bdb6 100644 --- a/src/test/regress/sql/multi_modifications.sql +++ b/src/test/regress/sql/multi_modifications.sql @@ -556,6 +556,12 @@ WHERE id = 1; SELECT * FROM summary_table ORDER BY id; -- try different syntax +UPDATE summary_table SET (average_value, min_value) = + (SELECT avg(value), min(value) FROM raw_table WHERE id = 2) +WHERE id = 2; + +SELECT * FROM summary_table ORDER BY id; + UPDATE summary_table SET (min_value, average_value) = (SELECT min(value), avg(value) FROM raw_table WHERE id = 2) WHERE id = 2; @@ -875,6 +881,101 @@ DELETE FROM summary_table WHERE id < ( CREATE TABLE multi_modifications.local (a int default 1, b int); INSERT INTO multi_modifications.local VALUES (default, (SELECT min(id) FROM summary_table)); +-- test advanced UPDATE SET () with indirection and physical reordering. +CREATE TABLE updateset ( + id bigint primary key + , col_0 integer + , col_1 integer + , col_2 integer + , col_3 integer + ); +select create_reference_table('updateset'); + +insert into updateset values (1, 0, 0, 0, 0); + +-- default physical ordering +update updateset +SET (col_0, col_1, col_2, col_3) + = (SELECT 100, 111, 222, 333) +returning *; +select * from updateset; + +-- check indirection +update updateset +SET (col_0, col_1, col_3, col_2) + = (SELECT 10, 11, 33, 22) +returning *; +select * from updateset; + +update updateset +SET (col_0, col_3, col_1, col_2) + = (SELECT 100, 333, 111, 222) +returning *; +select * from updateset; + +update updateset +SET (col_3, col_1) + = (SELECT 3, 1) +returning *; +select * from updateset; + +-- check more complex queries with indirection +insert into updateset values (2, 0, 0, 0, 0); +update updateset +SET (col_0, col_1, col_3, col_2) + = (SELECT 10, 11, 33, 22) +where id = 2 +returning *; +select * from updateset where id = 2; + +update updateset +SET (col_0, col_3, col_1, col_2) + = (SELECT 100, 333, 111, 222) +where id = 2 +returning *; +select * from updateset where id = 2; + +update updateset +SET (col_3, col_1) + = (SELECT 3, 1) +where id = 2 +returning *; +select * from updateset where id = 2; + +-- the single row update is expected behavior +insert into updateset values (3, 0, 1, 2, 3); +insert into updateset values (4, 0, 0, 0, 0); +with qq as ( + update updateset + SET (col_1, col_3) + = (SELECT 11, 33) + where id = 4 + returning * +) +update updateset +set (col_2, col_1, col_3) + = (select col_2, col_1, col_3 from qq) +where id in (3, 4) +returning *; +select * from updateset where id in (3, 4); + +-- add more advanced queries ? +-- we want to ensure the reordering the targetlist +-- from indirection is not run when it should not +truncate updateset; + +-- change physical ordering +alter table updateset drop col_2; +alter table updateset add col_2 integer; +insert into updateset values (1, 0, 0, 0, 0); +insert into updateset values (2, 0, 0, 0, 0); +update updateset +SET (col_0, col_1, col_2, col_3) + = (SELECT 10, 11, 22, 33) +returning *; +select * from updateset; + +DROP TABLE updateset; DROP TABLE insufficient_shards; DROP TABLE raw_table; DROP TABLE summary_table; From 73f4f67d812449f77c61bd486311780e6c05e936 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Villemain?= Date: Thu, 22 Aug 2024 19:07:50 +0200 Subject: [PATCH 2/6] Fix issue #7674 related to Indirection in UPDATE SET() ruleutils in Citus is based on PostgreSQL source code, but in PostgreSQL ruleutils is not used at the planner stage. For instance, it is assumed after parser that targetList are ordered as they were read, but it's not true after rewriter, the resulting rewrite tree is then provided to planner (and citus), but the ordering of the list is not granted anymore. It's similar to others previous issues reported and still open, as well as to other bugfixes/improvment over time, the most noticable being the ProcessIndirection() which is for domain and similar. However, the implications of this bug are huge for users of `UPDATE SET (...)`: 1. if you used to order by columns order, you're maybe safe: `SET (col1, col2, col3, ...)` 2. if you used not to order by column order: `SET (col2, col1, col3, ...)` then you probably found a problem, or you have one. Note about 1. that despite appearance and your QA, you are at risk: if physical columns ordering is changed (for example after DROPping/ADDing some), the same query which use to apparently works well will silently update other columns... As it is this code is not optimized for performance, not sure it'll be needed. --- .../distributed/deparser/ruleutils_14.c | 153 ++++++++++++++++++ .../distributed/deparser/ruleutils_15.c | 153 ++++++++++++++++++ .../distributed/deparser/ruleutils_16.c | 153 ++++++++++++++++++ 3 files changed, 459 insertions(+) diff --git a/src/backend/distributed/deparser/ruleutils_14.c b/src/backend/distributed/deparser/ruleutils_14.c index 88948cff542..ac0cb269b95 100644 --- a/src/backend/distributed/deparser/ruleutils_14.c +++ b/src/backend/distributed/deparser/ruleutils_14.c @@ -440,6 +440,9 @@ static void get_tablesample_def(TableSampleClause *tablesample, deparse_context *context); static void get_opclass_name(Oid opclass, Oid actual_datatype, StringInfo buf); +static bool is_update_set_with_multiple_columns(List *targetList); +static List *processTargetsIndirection(List *targetList); +static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func); static Node *processIndirection(Node *node, deparse_context *context); static void printSubscripts(SubscriptingRef *aref, deparse_context *context); static char *get_relation_name(Oid relid); @@ -3464,6 +3467,9 @@ get_update_query_targetlist_def(Query *query, List *targetList, } } } + if (is_update_set_with_multiple_columns(targetList)) + targetList = processTargetsIndirection(targetList); + next_ma_cell = list_head(ma_sublinks); cur_ma_sublink = NULL; remaining_ma_columns = 0; @@ -8101,6 +8107,153 @@ get_opclass_name(Oid opclass, Oid actual_datatype, ReleaseSysCache(ht_opc); } +/* + * helper function to evaluate if we are in an SET (...) + * Caller is responsible to check the command type (UPDATE) + */ +static bool is_update_set_with_multiple_columns(List *targetList) +{ + ListCell *lc; + foreach(lc, targetList) { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + Node *expr; + + if (tle->resjunk) + continue; + + expr = strip_implicit_coercions((Node *) tle->expr); + + if (expr && IsA(expr, Param) && + ((Param *) expr)->paramkind == PARAM_MULTIEXPR) + { + return true; + } + } + + // No multi-column set expression found + return false; +} + +/* + * processTargetsIndirection - reorder targets list (from indirection) + * + * We don't change anything but the order the target list. + * The purpose here is to be able to deparse a query tree as if it was + * provided by the PostgreSQL parser, not the rewriter (which is the one + * received by the planner hook). + * + * It's required only for UPDATE SET (MULTIEXPR) queries, other candidates + * are not supported by Citus. + * + * Returns the new target list, reordered. +*/ +static List *processTargetsIndirection(List *targetList) +{ + int nAssignableCols; + int targetListPosition; + bool sawJunk = false; + List *newTargetList = NIL; + ListCell *lc; + + /* Count non-junk columns and ensure they precede junk columns */ + nAssignableCols = 0; + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + + if (tle->resjunk) + { + sawJunk = true; + } + else + { + if (sawJunk) + elog(ERROR, "Subplan target list is out of order"); + + nAssignableCols++; + } + } + + /* If no assignable columns, return the original target list */ + if (nAssignableCols == 0) + return targetList; + + /* Reorder the target list */ + /* we start from 1 */ + targetListPosition = 1; + while (nAssignableCols > 0) + { + nAssignableCols--; + + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + + if (IsA(tle->expr, FuncExpr)) + { + FuncExpr *funcexpr = (FuncExpr *) tle->expr; + AttrNumber attnum = extract_paramid_from_funcexpr(funcexpr); + + if (attnum == targetListPosition) + { + ereport(DEBUG1, (errmsg("Adding FuncExpr resno: %d", tle->resno))); + newTargetList = lappend(newTargetList, tle); + targetListPosition++; + break; + } + } + else if (IsA(tle->expr, Param)) + { + Param *param = (Param *) tle->expr; + AttrNumber attnum = param->paramid; + + if (attnum == targetListPosition) + { + newTargetList = lappend(newTargetList, tle); + targetListPosition++; + break; + } + } + } + } + + // TODO add check about what we did here ? + + /* Append any remaining junk columns */ + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + if (tle->resjunk) + newTargetList = lappend(newTargetList, tle); + } + + return newTargetList; +} + +/* Function to extract paramid from a FuncExpr node */ +static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func) +{ + AttrNumber targetAttnum = InvalidAttrNumber; + ListCell *lc; + + /* Iterate through the arguments of the FuncExpr */ + foreach(lc, func->args) + { + Node *arg = (Node *) lfirst(lc); + + /* Check if the argument is a PARAM node */ + if (IsA(arg, Param)) + { + Param *param = (Param *) arg; + targetAttnum = param->paramid; + + break; // Exit loop once we find the PARAM node + } + } + + return targetAttnum; +} + /* * processIndirection - take care of array and subfield assignment * diff --git a/src/backend/distributed/deparser/ruleutils_15.c b/src/backend/distributed/deparser/ruleutils_15.c index 018468d0bfb..339d3b3d1f1 100644 --- a/src/backend/distributed/deparser/ruleutils_15.c +++ b/src/backend/distributed/deparser/ruleutils_15.c @@ -452,6 +452,9 @@ static void get_tablesample_def(TableSampleClause *tablesample, deparse_context *context); static void get_opclass_name(Oid opclass, Oid actual_datatype, StringInfo buf); +static bool is_update_set_with_multiple_columns(List *targetList); +static List *processTargetsIndirection(List *targetList); +static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func); static Node *processIndirection(Node *node, deparse_context *context); static void printSubscripts(SubscriptingRef *aref, deparse_context *context); static char *get_relation_name(Oid relid); @@ -3529,6 +3532,9 @@ get_update_query_targetlist_def(Query *query, List *targetList, } } } + if (is_update_set_with_multiple_columns(targetList)) + targetList = processTargetsIndirection(targetList); + next_ma_cell = list_head(ma_sublinks); cur_ma_sublink = NULL; remaining_ma_columns = 0; @@ -8331,6 +8337,153 @@ get_opclass_name(Oid opclass, Oid actual_datatype, ReleaseSysCache(ht_opc); } +/* + * helper function to evaluate if we are in an SET (...) + * Caller is responsible to check the command type (UPDATE) + */ +static bool is_update_set_with_multiple_columns(List *targetList) +{ + ListCell *lc; + foreach(lc, targetList) { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + Node *expr; + + if (tle->resjunk) + continue; + + expr = strip_implicit_coercions((Node *) tle->expr); + + if (expr && IsA(expr, Param) && + ((Param *) expr)->paramkind == PARAM_MULTIEXPR) + { + return true; + } + } + + // No multi-column set expression found + return false; +} + +/* + * processTargetsIndirection - reorder targets list (from indirection) + * + * We don't change anything but the order the target list. + * The purpose here is to be able to deparse a query tree as if it was + * provided by the PostgreSQL parser, not the rewriter (which is the one + * received by the planner hook). + * + * It's required only for UPDATE SET (MULTIEXPR) queries, other candidates + * are not supported by Citus. + * + * Returns the new target list, reordered. +*/ +static List *processTargetsIndirection(List *targetList) +{ + int nAssignableCols; + int targetListPosition; + bool sawJunk = false; + List *newTargetList = NIL; + ListCell *lc; + + /* Count non-junk columns and ensure they precede junk columns */ + nAssignableCols = 0; + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + + if (tle->resjunk) + { + sawJunk = true; + } + else + { + if (sawJunk) + elog(ERROR, "Subplan target list is out of order"); + + nAssignableCols++; + } + } + + /* If no assignable columns, return the original target list */ + if (nAssignableCols == 0) + return targetList; + + /* Reorder the target list */ + /* we start from 1 */ + targetListPosition = 1; + while (nAssignableCols > 0) + { + nAssignableCols--; + + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + + if (IsA(tle->expr, FuncExpr)) + { + FuncExpr *funcexpr = (FuncExpr *) tle->expr; + AttrNumber attnum = extract_paramid_from_funcexpr(funcexpr); + + if (attnum == targetListPosition) + { + ereport(DEBUG1, (errmsg("Adding FuncExpr resno: %d", tle->resno))); + newTargetList = lappend(newTargetList, tle); + targetListPosition++; + break; + } + } + else if (IsA(tle->expr, Param)) + { + Param *param = (Param *) tle->expr; + AttrNumber attnum = param->paramid; + + if (attnum == targetListPosition) + { + newTargetList = lappend(newTargetList, tle); + targetListPosition++; + break; + } + } + } + } + + // TODO add check about what we did here ? + + /* Append any remaining junk columns */ + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + if (tle->resjunk) + newTargetList = lappend(newTargetList, tle); + } + + return newTargetList; +} + +/* Function to extract paramid from a FuncExpr node */ +static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func) +{ + AttrNumber targetAttnum = InvalidAttrNumber; + ListCell *lc; + + /* Iterate through the arguments of the FuncExpr */ + foreach(lc, func->args) + { + Node *arg = (Node *) lfirst(lc); + + /* Check if the argument is a PARAM node */ + if (IsA(arg, Param)) + { + Param *param = (Param *) arg; + targetAttnum = param->paramid; + + break; // Exit loop once we find the PARAM node + } + } + + return targetAttnum; +} + /* * processIndirection - take care of array and subfield assignment * diff --git a/src/backend/distributed/deparser/ruleutils_16.c b/src/backend/distributed/deparser/ruleutils_16.c index 7f2a41d75c3..cc0da165b28 100644 --- a/src/backend/distributed/deparser/ruleutils_16.c +++ b/src/backend/distributed/deparser/ruleutils_16.c @@ -469,6 +469,9 @@ static void get_tablesample_def(TableSampleClause *tablesample, deparse_context *context); static void get_opclass_name(Oid opclass, Oid actual_datatype, StringInfo buf); +static bool is_update_set_with_multiple_columns(List *targetList); +static List *processTargetsIndirection(List *targetList); +static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func); static Node *processIndirection(Node *node, deparse_context *context); static void printSubscripts(SubscriptingRef *aref, deparse_context *context); static char *get_relation_name(Oid relid); @@ -3545,6 +3548,9 @@ get_update_query_targetlist_def(Query *query, List *targetList, } } } + if (is_update_set_with_multiple_columns(targetList)) + targetList = processTargetsIndirection(targetList); + next_ma_cell = list_head(ma_sublinks); cur_ma_sublink = NULL; remaining_ma_columns = 0; @@ -8607,6 +8613,153 @@ get_opclass_name(Oid opclass, Oid actual_datatype, ReleaseSysCache(ht_opc); } +/* + * helper function to evaluate if we are in an SET (...) + * Caller is responsible to check the command type (UPDATE) + */ +static bool is_update_set_with_multiple_columns(List *targetList) +{ + ListCell *lc; + foreach(lc, targetList) { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + Node *expr; + + if (tle->resjunk) + continue; + + expr = strip_implicit_coercions((Node *) tle->expr); + + if (expr && IsA(expr, Param) && + ((Param *) expr)->paramkind == PARAM_MULTIEXPR) + { + return true; + } + } + + // No multi-column set expression found + return false; +} + +/* + * processTargetsIndirection - reorder targets list (from indirection) + * + * We don't change anything but the order the target list. + * The purpose here is to be able to deparse a query tree as if it was + * provided by the PostgreSQL parser, not the rewriter (which is the one + * received by the planner hook). + * + * It's required only for UPDATE SET (MULTIEXPR) queries, other candidates + * are not supported by Citus. + * + * Returns the new target list, reordered. +*/ +static List *processTargetsIndirection(List *targetList) +{ + int nAssignableCols; + int targetListPosition; + bool sawJunk = false; + List *newTargetList = NIL; + ListCell *lc; + + /* Count non-junk columns and ensure they precede junk columns */ + nAssignableCols = 0; + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + + if (tle->resjunk) + { + sawJunk = true; + } + else + { + if (sawJunk) + elog(ERROR, "Subplan target list is out of order"); + + nAssignableCols++; + } + } + + /* If no assignable columns, return the original target list */ + if (nAssignableCols == 0) + return targetList; + + /* Reorder the target list */ + /* we start from 1 */ + targetListPosition = 1; + while (nAssignableCols > 0) + { + nAssignableCols--; + + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + + if (IsA(tle->expr, FuncExpr)) + { + FuncExpr *funcexpr = (FuncExpr *) tle->expr; + AttrNumber attnum = extract_paramid_from_funcexpr(funcexpr); + + if (attnum == targetListPosition) + { + ereport(DEBUG1, (errmsg("Adding FuncExpr resno: %d", tle->resno))); + newTargetList = lappend(newTargetList, tle); + targetListPosition++; + break; + } + } + else if (IsA(tle->expr, Param)) + { + Param *param = (Param *) tle->expr; + AttrNumber attnum = param->paramid; + + if (attnum == targetListPosition) + { + newTargetList = lappend(newTargetList, tle); + targetListPosition++; + break; + } + } + } + } + + // TODO add check about what we did here ? + + /* Append any remaining junk columns */ + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + if (tle->resjunk) + newTargetList = lappend(newTargetList, tle); + } + + return newTargetList; +} + +/* Function to extract paramid from a FuncExpr node */ +static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func) +{ + AttrNumber targetAttnum = InvalidAttrNumber; + ListCell *lc; + + /* Iterate through the arguments of the FuncExpr */ + foreach(lc, func->args) + { + Node *arg = (Node *) lfirst(lc); + + /* Check if the argument is a PARAM node */ + if (IsA(arg, Param)) + { + Param *param = (Param *) arg; + targetAttnum = param->paramid; + + break; // Exit loop once we find the PARAM node + } + } + + return targetAttnum; +} + /* * processIndirection - take care of array and subfield assignment * From b0de95012c7173dbb272efc4c9af052f700cf80e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Villemain?= Date: Fri, 22 Nov 2024 10:44:05 +0100 Subject: [PATCH 3/6] Revert "Fix issue #7674 related to Indirection in UPDATE SET()" This reverts commit 73f4f67d812449f77c61bd486311780e6c05e936. --- .../distributed/deparser/ruleutils_14.c | 153 ------------------ .../distributed/deparser/ruleutils_15.c | 153 ------------------ .../distributed/deparser/ruleutils_16.c | 153 ------------------ 3 files changed, 459 deletions(-) diff --git a/src/backend/distributed/deparser/ruleutils_14.c b/src/backend/distributed/deparser/ruleutils_14.c index ac0cb269b95..88948cff542 100644 --- a/src/backend/distributed/deparser/ruleutils_14.c +++ b/src/backend/distributed/deparser/ruleutils_14.c @@ -440,9 +440,6 @@ static void get_tablesample_def(TableSampleClause *tablesample, deparse_context *context); static void get_opclass_name(Oid opclass, Oid actual_datatype, StringInfo buf); -static bool is_update_set_with_multiple_columns(List *targetList); -static List *processTargetsIndirection(List *targetList); -static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func); static Node *processIndirection(Node *node, deparse_context *context); static void printSubscripts(SubscriptingRef *aref, deparse_context *context); static char *get_relation_name(Oid relid); @@ -3467,9 +3464,6 @@ get_update_query_targetlist_def(Query *query, List *targetList, } } } - if (is_update_set_with_multiple_columns(targetList)) - targetList = processTargetsIndirection(targetList); - next_ma_cell = list_head(ma_sublinks); cur_ma_sublink = NULL; remaining_ma_columns = 0; @@ -8107,153 +8101,6 @@ get_opclass_name(Oid opclass, Oid actual_datatype, ReleaseSysCache(ht_opc); } -/* - * helper function to evaluate if we are in an SET (...) - * Caller is responsible to check the command type (UPDATE) - */ -static bool is_update_set_with_multiple_columns(List *targetList) -{ - ListCell *lc; - foreach(lc, targetList) { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - Node *expr; - - if (tle->resjunk) - continue; - - expr = strip_implicit_coercions((Node *) tle->expr); - - if (expr && IsA(expr, Param) && - ((Param *) expr)->paramkind == PARAM_MULTIEXPR) - { - return true; - } - } - - // No multi-column set expression found - return false; -} - -/* - * processTargetsIndirection - reorder targets list (from indirection) - * - * We don't change anything but the order the target list. - * The purpose here is to be able to deparse a query tree as if it was - * provided by the PostgreSQL parser, not the rewriter (which is the one - * received by the planner hook). - * - * It's required only for UPDATE SET (MULTIEXPR) queries, other candidates - * are not supported by Citus. - * - * Returns the new target list, reordered. -*/ -static List *processTargetsIndirection(List *targetList) -{ - int nAssignableCols; - int targetListPosition; - bool sawJunk = false; - List *newTargetList = NIL; - ListCell *lc; - - /* Count non-junk columns and ensure they precede junk columns */ - nAssignableCols = 0; - foreach(lc, targetList) - { - TargetEntry *tle = lfirst_node(TargetEntry, lc); - - if (tle->resjunk) - { - sawJunk = true; - } - else - { - if (sawJunk) - elog(ERROR, "Subplan target list is out of order"); - - nAssignableCols++; - } - } - - /* If no assignable columns, return the original target list */ - if (nAssignableCols == 0) - return targetList; - - /* Reorder the target list */ - /* we start from 1 */ - targetListPosition = 1; - while (nAssignableCols > 0) - { - nAssignableCols--; - - foreach(lc, targetList) - { - TargetEntry *tle = lfirst_node(TargetEntry, lc); - - if (IsA(tle->expr, FuncExpr)) - { - FuncExpr *funcexpr = (FuncExpr *) tle->expr; - AttrNumber attnum = extract_paramid_from_funcexpr(funcexpr); - - if (attnum == targetListPosition) - { - ereport(DEBUG1, (errmsg("Adding FuncExpr resno: %d", tle->resno))); - newTargetList = lappend(newTargetList, tle); - targetListPosition++; - break; - } - } - else if (IsA(tle->expr, Param)) - { - Param *param = (Param *) tle->expr; - AttrNumber attnum = param->paramid; - - if (attnum == targetListPosition) - { - newTargetList = lappend(newTargetList, tle); - targetListPosition++; - break; - } - } - } - } - - // TODO add check about what we did here ? - - /* Append any remaining junk columns */ - foreach(lc, targetList) - { - TargetEntry *tle = lfirst_node(TargetEntry, lc); - if (tle->resjunk) - newTargetList = lappend(newTargetList, tle); - } - - return newTargetList; -} - -/* Function to extract paramid from a FuncExpr node */ -static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func) -{ - AttrNumber targetAttnum = InvalidAttrNumber; - ListCell *lc; - - /* Iterate through the arguments of the FuncExpr */ - foreach(lc, func->args) - { - Node *arg = (Node *) lfirst(lc); - - /* Check if the argument is a PARAM node */ - if (IsA(arg, Param)) - { - Param *param = (Param *) arg; - targetAttnum = param->paramid; - - break; // Exit loop once we find the PARAM node - } - } - - return targetAttnum; -} - /* * processIndirection - take care of array and subfield assignment * diff --git a/src/backend/distributed/deparser/ruleutils_15.c b/src/backend/distributed/deparser/ruleutils_15.c index 339d3b3d1f1..018468d0bfb 100644 --- a/src/backend/distributed/deparser/ruleutils_15.c +++ b/src/backend/distributed/deparser/ruleutils_15.c @@ -452,9 +452,6 @@ static void get_tablesample_def(TableSampleClause *tablesample, deparse_context *context); static void get_opclass_name(Oid opclass, Oid actual_datatype, StringInfo buf); -static bool is_update_set_with_multiple_columns(List *targetList); -static List *processTargetsIndirection(List *targetList); -static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func); static Node *processIndirection(Node *node, deparse_context *context); static void printSubscripts(SubscriptingRef *aref, deparse_context *context); static char *get_relation_name(Oid relid); @@ -3532,9 +3529,6 @@ get_update_query_targetlist_def(Query *query, List *targetList, } } } - if (is_update_set_with_multiple_columns(targetList)) - targetList = processTargetsIndirection(targetList); - next_ma_cell = list_head(ma_sublinks); cur_ma_sublink = NULL; remaining_ma_columns = 0; @@ -8337,153 +8331,6 @@ get_opclass_name(Oid opclass, Oid actual_datatype, ReleaseSysCache(ht_opc); } -/* - * helper function to evaluate if we are in an SET (...) - * Caller is responsible to check the command type (UPDATE) - */ -static bool is_update_set_with_multiple_columns(List *targetList) -{ - ListCell *lc; - foreach(lc, targetList) { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - Node *expr; - - if (tle->resjunk) - continue; - - expr = strip_implicit_coercions((Node *) tle->expr); - - if (expr && IsA(expr, Param) && - ((Param *) expr)->paramkind == PARAM_MULTIEXPR) - { - return true; - } - } - - // No multi-column set expression found - return false; -} - -/* - * processTargetsIndirection - reorder targets list (from indirection) - * - * We don't change anything but the order the target list. - * The purpose here is to be able to deparse a query tree as if it was - * provided by the PostgreSQL parser, not the rewriter (which is the one - * received by the planner hook). - * - * It's required only for UPDATE SET (MULTIEXPR) queries, other candidates - * are not supported by Citus. - * - * Returns the new target list, reordered. -*/ -static List *processTargetsIndirection(List *targetList) -{ - int nAssignableCols; - int targetListPosition; - bool sawJunk = false; - List *newTargetList = NIL; - ListCell *lc; - - /* Count non-junk columns and ensure they precede junk columns */ - nAssignableCols = 0; - foreach(lc, targetList) - { - TargetEntry *tle = lfirst_node(TargetEntry, lc); - - if (tle->resjunk) - { - sawJunk = true; - } - else - { - if (sawJunk) - elog(ERROR, "Subplan target list is out of order"); - - nAssignableCols++; - } - } - - /* If no assignable columns, return the original target list */ - if (nAssignableCols == 0) - return targetList; - - /* Reorder the target list */ - /* we start from 1 */ - targetListPosition = 1; - while (nAssignableCols > 0) - { - nAssignableCols--; - - foreach(lc, targetList) - { - TargetEntry *tle = lfirst_node(TargetEntry, lc); - - if (IsA(tle->expr, FuncExpr)) - { - FuncExpr *funcexpr = (FuncExpr *) tle->expr; - AttrNumber attnum = extract_paramid_from_funcexpr(funcexpr); - - if (attnum == targetListPosition) - { - ereport(DEBUG1, (errmsg("Adding FuncExpr resno: %d", tle->resno))); - newTargetList = lappend(newTargetList, tle); - targetListPosition++; - break; - } - } - else if (IsA(tle->expr, Param)) - { - Param *param = (Param *) tle->expr; - AttrNumber attnum = param->paramid; - - if (attnum == targetListPosition) - { - newTargetList = lappend(newTargetList, tle); - targetListPosition++; - break; - } - } - } - } - - // TODO add check about what we did here ? - - /* Append any remaining junk columns */ - foreach(lc, targetList) - { - TargetEntry *tle = lfirst_node(TargetEntry, lc); - if (tle->resjunk) - newTargetList = lappend(newTargetList, tle); - } - - return newTargetList; -} - -/* Function to extract paramid from a FuncExpr node */ -static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func) -{ - AttrNumber targetAttnum = InvalidAttrNumber; - ListCell *lc; - - /* Iterate through the arguments of the FuncExpr */ - foreach(lc, func->args) - { - Node *arg = (Node *) lfirst(lc); - - /* Check if the argument is a PARAM node */ - if (IsA(arg, Param)) - { - Param *param = (Param *) arg; - targetAttnum = param->paramid; - - break; // Exit loop once we find the PARAM node - } - } - - return targetAttnum; -} - /* * processIndirection - take care of array and subfield assignment * diff --git a/src/backend/distributed/deparser/ruleutils_16.c b/src/backend/distributed/deparser/ruleutils_16.c index cc0da165b28..7f2a41d75c3 100644 --- a/src/backend/distributed/deparser/ruleutils_16.c +++ b/src/backend/distributed/deparser/ruleutils_16.c @@ -469,9 +469,6 @@ static void get_tablesample_def(TableSampleClause *tablesample, deparse_context *context); static void get_opclass_name(Oid opclass, Oid actual_datatype, StringInfo buf); -static bool is_update_set_with_multiple_columns(List *targetList); -static List *processTargetsIndirection(List *targetList); -static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func); static Node *processIndirection(Node *node, deparse_context *context); static void printSubscripts(SubscriptingRef *aref, deparse_context *context); static char *get_relation_name(Oid relid); @@ -3548,9 +3545,6 @@ get_update_query_targetlist_def(Query *query, List *targetList, } } } - if (is_update_set_with_multiple_columns(targetList)) - targetList = processTargetsIndirection(targetList); - next_ma_cell = list_head(ma_sublinks); cur_ma_sublink = NULL; remaining_ma_columns = 0; @@ -8613,153 +8607,6 @@ get_opclass_name(Oid opclass, Oid actual_datatype, ReleaseSysCache(ht_opc); } -/* - * helper function to evaluate if we are in an SET (...) - * Caller is responsible to check the command type (UPDATE) - */ -static bool is_update_set_with_multiple_columns(List *targetList) -{ - ListCell *lc; - foreach(lc, targetList) { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - Node *expr; - - if (tle->resjunk) - continue; - - expr = strip_implicit_coercions((Node *) tle->expr); - - if (expr && IsA(expr, Param) && - ((Param *) expr)->paramkind == PARAM_MULTIEXPR) - { - return true; - } - } - - // No multi-column set expression found - return false; -} - -/* - * processTargetsIndirection - reorder targets list (from indirection) - * - * We don't change anything but the order the target list. - * The purpose here is to be able to deparse a query tree as if it was - * provided by the PostgreSQL parser, not the rewriter (which is the one - * received by the planner hook). - * - * It's required only for UPDATE SET (MULTIEXPR) queries, other candidates - * are not supported by Citus. - * - * Returns the new target list, reordered. -*/ -static List *processTargetsIndirection(List *targetList) -{ - int nAssignableCols; - int targetListPosition; - bool sawJunk = false; - List *newTargetList = NIL; - ListCell *lc; - - /* Count non-junk columns and ensure they precede junk columns */ - nAssignableCols = 0; - foreach(lc, targetList) - { - TargetEntry *tle = lfirst_node(TargetEntry, lc); - - if (tle->resjunk) - { - sawJunk = true; - } - else - { - if (sawJunk) - elog(ERROR, "Subplan target list is out of order"); - - nAssignableCols++; - } - } - - /* If no assignable columns, return the original target list */ - if (nAssignableCols == 0) - return targetList; - - /* Reorder the target list */ - /* we start from 1 */ - targetListPosition = 1; - while (nAssignableCols > 0) - { - nAssignableCols--; - - foreach(lc, targetList) - { - TargetEntry *tle = lfirst_node(TargetEntry, lc); - - if (IsA(tle->expr, FuncExpr)) - { - FuncExpr *funcexpr = (FuncExpr *) tle->expr; - AttrNumber attnum = extract_paramid_from_funcexpr(funcexpr); - - if (attnum == targetListPosition) - { - ereport(DEBUG1, (errmsg("Adding FuncExpr resno: %d", tle->resno))); - newTargetList = lappend(newTargetList, tle); - targetListPosition++; - break; - } - } - else if (IsA(tle->expr, Param)) - { - Param *param = (Param *) tle->expr; - AttrNumber attnum = param->paramid; - - if (attnum == targetListPosition) - { - newTargetList = lappend(newTargetList, tle); - targetListPosition++; - break; - } - } - } - } - - // TODO add check about what we did here ? - - /* Append any remaining junk columns */ - foreach(lc, targetList) - { - TargetEntry *tle = lfirst_node(TargetEntry, lc); - if (tle->resjunk) - newTargetList = lappend(newTargetList, tle); - } - - return newTargetList; -} - -/* Function to extract paramid from a FuncExpr node */ -static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func) -{ - AttrNumber targetAttnum = InvalidAttrNumber; - ListCell *lc; - - /* Iterate through the arguments of the FuncExpr */ - foreach(lc, func->args) - { - Node *arg = (Node *) lfirst(lc); - - /* Check if the argument is a PARAM node */ - if (IsA(arg, Param)) - { - Param *param = (Param *) arg; - targetAttnum = param->paramid; - - break; // Exit loop once we find the PARAM node - } - } - - return targetAttnum; -} - /* * processIndirection - take care of array and subfield assignment * From af39a271971a235aa7a12451c07de70ebf364588 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Villemain?= Date: Fri, 22 Nov 2024 22:41:12 +0100 Subject: [PATCH 4/6] Add more testing for UPDATE with indirection It'll probably be interesting to add more test related to indirection here. --- src/test/regress/expected/indirections.out | 346 +++++++++++++++++++++ src/test/regress/sql/indirections.sql | 196 ++++++++++++ 2 files changed, 542 insertions(+) create mode 100644 src/test/regress/expected/indirections.out create mode 100644 src/test/regress/sql/indirections.sql diff --git a/src/test/regress/expected/indirections.out b/src/test/regress/expected/indirections.out new file mode 100644 index 00000000000..ad5479fd50e --- /dev/null +++ b/src/test/regress/expected/indirections.out @@ -0,0 +1,346 @@ +SET citus.shard_count TO 2; +SET citus.next_shard_id TO 750000; +SET citus.next_placement_id TO 750000; +CREATE SCHEMA indirections; +-- First on reference table +-- test advanced UPDATE SET () with indirection and physical reordering. +CREATE TABLE test_ref_indirection ( + id bigint primary key + , col_int integer + , col_bool bool + , col_text text + ); +select create_reference_table('test_ref_indirection'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +insert into test_ref_indirection values (1, 1, true, 'three'); +-- default physical ordering +update test_ref_indirection +SET (col_int, col_bool, col_text) + = (SELECT 2, false, 'thirty') +returning *; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 1 | 2 | f | thirty +(1 row) + +select * from test_ref_indirection; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 1 | 2 | f | thirty +(1 row) + +-- check indirection +update test_ref_indirection +SET (col_bool, col_text, col_int) + = (SELECT true, 'thirty-three', 11) +returning *; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 1 | 11 | t | thirty-three +(1 row) + +select * from test_ref_indirection; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 1 | 11 | t | thirty-three +(1 row) + +update test_ref_indirection +SET (col_bool, col_int) + = (SELECT false, 111) +returning *; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 1 | 111 | f | thirty-three +(1 row) + +select * from test_ref_indirection; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 1 | 111 | f | thirty-three +(1 row) + +-- check more complex queries with indirection +insert into test_ref_indirection values (2, 0, false, 'empty'); +update test_ref_indirection +SET (col_int, col_bool, col_text) + = (SELECT 22, true, 'full') +where id = 2 +returning *; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 2 | 22 | t | full +(1 row) + +select * from test_ref_indirection where id = 2; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 2 | 22 | t | full +(1 row) + +update test_ref_indirection +SET (col_bool, col_text, col_int) + = (SELECT false, 'really full', 222) +where id = 2 +returning *; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 2 | 222 | f | really full +(1 row) + +select * from test_ref_indirection where id = 2; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 2 | 222 | f | really full +(1 row) + +update test_ref_indirection +SET (col_bool, col_int) + = (SELECT true, 2222) +where id = 2 +returning *; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 2 | 2222 | t | really full +(1 row) + +select * from test_ref_indirection where id = 2; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 2 | 2222 | t | really full +(1 row) + +-- several updates +insert into test_ref_indirection values (3, 0, false, 'empty'); +insert into test_ref_indirection values (4, 0, false, 'empty'); +with qq3 as ( + update test_ref_indirection + SET (col_text, col_bool) + = (SELECT 'full', true) + where id = 3 + returning * +), +qq4 as ( + update test_ref_indirection + SET (col_text, col_bool) + = (SELECT 'fully', true) + where id = 4 + returning * +) +select * from qq3 union all select * from qq4; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 3 | 0 | t | full + 4 | 0 | t | fully +(2 rows) + +select * from test_ref_indirection where id in (3, 4); + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 3 | 0 | t | full + 4 | 0 | t | fully +(2 rows) + +-- add more advanced queries ? +-- we want to ensure the reordering the targetlist +-- from indirection is not run when it should not +truncate test_ref_indirection; +-- change physical ordering +alter table test_ref_indirection drop col_int; +alter table test_ref_indirection add col_int integer; +insert into test_ref_indirection values (3, true, 'three', 1); +insert into test_ref_indirection values (4, true, 'four', 1); +update test_ref_indirection +SET (col_int, col_bool, col_text) + = (SELECT 2, false, 'thirty') +returning *; + id | col_bool | col_text | col_int +--------------------------------------------------------------------- + 3 | f | thirty | 2 + 4 | f | thirty | 2 +(2 rows) + +select * from test_ref_indirection; + id | col_bool | col_text | col_int +--------------------------------------------------------------------- + 3 | f | thirty | 2 + 4 | f | thirty | 2 +(2 rows) + +-- then on distributed table +-- First on reference table +-- test advanced UPDATE SET () with indirection and physical reordering. +CREATE TABLE test_dist_indirection ( + id bigint primary key + , col_int integer + , col_bool bool + , col_text text + ); +SELECT create_distributed_table('test_dist_indirection', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +insert into test_dist_indirection values (1, 1, true, 'three'); +-- default physical ordering +update test_dist_indirection +SET (col_int, col_bool, col_text) + = (SELECT 2, false, 'thirty') +returning *; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 1 | 2 | f | thirty +(1 row) + +select * from test_dist_indirection; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 1 | 2 | f | thirty +(1 row) + +-- check indirection +update test_dist_indirection +SET (col_bool, col_text, col_int) + = (SELECT true, 'thirty-three', 11) +returning *; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 1 | 11 | t | thirty-three +(1 row) + +select * from test_dist_indirection; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 1 | 11 | t | thirty-three +(1 row) + +update test_dist_indirection +SET (col_bool, col_int) + = (SELECT false, 111) +returning *; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 1 | 111 | f | thirty-three +(1 row) + +select * from test_dist_indirection; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 1 | 111 | f | thirty-three +(1 row) + +-- check more complex queries with indirection +insert into test_dist_indirection values (2, 0, false, 'empty'); +update test_dist_indirection +SET (col_int, col_bool, col_text) + = (SELECT 22, true, 'full') +where id = 2 +returning *; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 2 | 22 | t | full +(1 row) + +select * from test_dist_indirection where id = 2; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 2 | 22 | t | full +(1 row) + +update test_dist_indirection +SET (col_bool, col_text, col_int) + = (SELECT false, 'really full', 222) +where id = 2 +returning *; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 2 | 222 | f | really full +(1 row) + +select * from test_dist_indirection where id = 2; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 2 | 222 | f | really full +(1 row) + +update test_dist_indirection +SET (col_bool, col_int) + = (SELECT true, 2222) +where id = 2 +returning *; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 2 | 2222 | t | really full +(1 row) + +select * from test_dist_indirection where id = 2; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 2 | 2222 | t | really full +(1 row) + +-- several updates +insert into test_dist_indirection values (3, 0, false, 'empty'); +insert into test_dist_indirection values (4, 0, false, 'empty'); +with qq3 as ( + update test_dist_indirection + SET (col_text, col_bool) + = (SELECT 'full', true) + where id = 3 + returning * +), +qq4 as ( + update test_dist_indirection + SET (col_text, col_bool) + = (SELECT 'fully', true) + where id = 4 + returning * +) +select * from qq3 union all select * from qq4; + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 3 | 0 | t | full + 4 | 0 | t | fully +(2 rows) + +select * from test_dist_indirection where id in (3, 4); + id | col_int | col_bool | col_text +--------------------------------------------------------------------- + 3 | 0 | t | full + 4 | 0 | t | fully +(2 rows) + +-- add more advanced queries ? +-- we want to ensure the reordering the targetlist +-- from indirection is not run when it should not +truncate test_dist_indirection; +-- change physical ordering +alter table test_dist_indirection drop col_int; +alter table test_dist_indirection add col_int integer; +insert into test_dist_indirection values (3, true, 'three', 1); +insert into test_dist_indirection values (4, true, 'four', 1); +update test_dist_indirection +SET (col_int, col_bool, col_text) + = (SELECT 2, false, 'thirty') +returning *; + id | col_bool | col_text | col_int +--------------------------------------------------------------------- + 3 | f | thirty | 2 + 4 | f | thirty | 2 +(2 rows) + +select * from test_dist_indirection; + id | col_bool | col_text | col_int +--------------------------------------------------------------------- + 3 | f | thirty | 2 + 4 | f | thirty | 2 +(2 rows) + +DROP TABLE test_dist_indirection; +DROP TABLE test_ref_indirection; +DROP SCHEMA indirections; diff --git a/src/test/regress/sql/indirections.sql b/src/test/regress/sql/indirections.sql new file mode 100644 index 00000000000..5c0e0fbe352 --- /dev/null +++ b/src/test/regress/sql/indirections.sql @@ -0,0 +1,196 @@ +SET citus.shard_count TO 2; +SET citus.next_shard_id TO 750000; +SET citus.next_placement_id TO 750000; + +CREATE SCHEMA indirections; + +-- First on reference table +-- test advanced UPDATE SET () with indirection and physical reordering. +CREATE TABLE test_ref_indirection ( + id bigint primary key + , col_int integer + , col_bool bool + , col_text text + ); +select create_reference_table('test_ref_indirection'); + +insert into test_ref_indirection values (1, 1, true, 'three'); + +-- default physical ordering +update test_ref_indirection +SET (col_int, col_bool, col_text) + = (SELECT 2, false, 'thirty') +returning *; +select * from test_ref_indirection; + +-- check indirection +update test_ref_indirection +SET (col_bool, col_text, col_int) + = (SELECT true, 'thirty-three', 11) +returning *; + +select * from test_ref_indirection; +update test_ref_indirection +SET (col_bool, col_int) + = (SELECT false, 111) +returning *; +select * from test_ref_indirection; + +-- check more complex queries with indirection +insert into test_ref_indirection values (2, 0, false, 'empty'); +update test_ref_indirection +SET (col_int, col_bool, col_text) + = (SELECT 22, true, 'full') +where id = 2 +returning *; +select * from test_ref_indirection where id = 2; + +update test_ref_indirection +SET (col_bool, col_text, col_int) + = (SELECT false, 'really full', 222) +where id = 2 +returning *; +select * from test_ref_indirection where id = 2; + +update test_ref_indirection +SET (col_bool, col_int) + = (SELECT true, 2222) +where id = 2 +returning *; +select * from test_ref_indirection where id = 2; + +-- several updates +insert into test_ref_indirection values (3, 0, false, 'empty'); +insert into test_ref_indirection values (4, 0, false, 'empty'); +with qq3 as ( + update test_ref_indirection + SET (col_text, col_bool) + = (SELECT 'full', true) + where id = 3 + returning * +), +qq4 as ( + update test_ref_indirection + SET (col_text, col_bool) + = (SELECT 'fully', true) + where id = 4 + returning * +) +select * from qq3 union all select * from qq4; +select * from test_ref_indirection where id in (3, 4); + +-- add more advanced queries ? +-- we want to ensure the reordering the targetlist +-- from indirection is not run when it should not +truncate test_ref_indirection; + +-- change physical ordering +alter table test_ref_indirection drop col_int; +alter table test_ref_indirection add col_int integer; +insert into test_ref_indirection values (3, true, 'three', 1); +insert into test_ref_indirection values (4, true, 'four', 1); +update test_ref_indirection +SET (col_int, col_bool, col_text) + = (SELECT 2, false, 'thirty') +returning *; +select * from test_ref_indirection; + + + +-- then on distributed table +-- First on reference table +-- test advanced UPDATE SET () with indirection and physical reordering. +CREATE TABLE test_dist_indirection ( + id bigint primary key + , col_int integer + , col_bool bool + , col_text text + ); +SELECT create_distributed_table('test_dist_indirection', 'id'); + +insert into test_dist_indirection values (1, 1, true, 'three'); + +-- default physical ordering +update test_dist_indirection +SET (col_int, col_bool, col_text) + = (SELECT 2, false, 'thirty') +returning *; +select * from test_dist_indirection; + +-- check indirection +update test_dist_indirection +SET (col_bool, col_text, col_int) + = (SELECT true, 'thirty-three', 11) +returning *; + +select * from test_dist_indirection; +update test_dist_indirection +SET (col_bool, col_int) + = (SELECT false, 111) +returning *; +select * from test_dist_indirection; + +-- check more complex queries with indirection +insert into test_dist_indirection values (2, 0, false, 'empty'); +update test_dist_indirection +SET (col_int, col_bool, col_text) + = (SELECT 22, true, 'full') +where id = 2 +returning *; +select * from test_dist_indirection where id = 2; + +update test_dist_indirection +SET (col_bool, col_text, col_int) + = (SELECT false, 'really full', 222) +where id = 2 +returning *; +select * from test_dist_indirection where id = 2; + +update test_dist_indirection +SET (col_bool, col_int) + = (SELECT true, 2222) +where id = 2 +returning *; +select * from test_dist_indirection where id = 2; + +-- several updates +insert into test_dist_indirection values (3, 0, false, 'empty'); +insert into test_dist_indirection values (4, 0, false, 'empty'); +with qq3 as ( + update test_dist_indirection + SET (col_text, col_bool) + = (SELECT 'full', true) + where id = 3 + returning * +), +qq4 as ( + update test_dist_indirection + SET (col_text, col_bool) + = (SELECT 'fully', true) + where id = 4 + returning * +) +select * from qq3 union all select * from qq4; +select * from test_dist_indirection where id in (3, 4); + +-- add more advanced queries ? +-- we want to ensure the reordering the targetlist +-- from indirection is not run when it should not +truncate test_dist_indirection; + +-- change physical ordering +alter table test_dist_indirection drop col_int; +alter table test_dist_indirection add col_int integer; +insert into test_dist_indirection values (3, true, 'three', 1); +insert into test_dist_indirection values (4, true, 'four', 1); +update test_dist_indirection +SET (col_int, col_bool, col_text) + = (SELECT 2, false, 'thirty') +returning *; +select * from test_dist_indirection; + + + +DROP TABLE test_dist_indirection; +DROP TABLE test_ref_indirection; +DROP SCHEMA indirections; From 1cf93c16de5b2a0d2e24dd9d3df10e504aa785b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Villemain?= Date: Fri, 22 Nov 2024 10:46:02 +0100 Subject: [PATCH 5/6] Add a new function to un-rewrite the planner tree We may need to reorder parts of the planner tree we are receiving in dsitributed_planner hook because it has been optimized by PostgreSQL. However we only need to rewrite tree which will be used to produce a query string, otherwise it's pointless. Proceed very early in the planner hook, which is probably a good place to pull up some similar fixes applied here and there. See the exported function from citus_ruleutils.c: void RebuildParserTreeFromPlannerTree(Query *query) Added a new include in citus_ruleutils.c: miscadmin.h from PostgreSQL, because it contains the check_stack_depth() function which is a good idea to use as we don't know the size of the query.. --- .../distributed/deparser/citus_ruleutils.c | 221 ++++++++++++++++++ .../distributed/planner/distributed_planner.c | 7 + src/include/distributed/citus_ruleutils.h | 2 + 3 files changed, 230 insertions(+) diff --git a/src/backend/distributed/deparser/citus_ruleutils.c b/src/backend/distributed/deparser/citus_ruleutils.c index f99462058d9..fabb26bb4ce 100644 --- a/src/backend/distributed/deparser/citus_ruleutils.c +++ b/src/backend/distributed/deparser/citus_ruleutils.c @@ -41,6 +41,7 @@ #include "commands/sequence.h" #include "foreign/foreign.h" #include "lib/stringinfo.h" +#include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "nodes/nodes.h" #include "nodes/parsenodes.h" @@ -1638,3 +1639,223 @@ RoleSpecString(RoleSpec *spec, bool withQuoteIdentifier) } } } + + +/* Function to extract paramid from a FuncExpr node */ +static AttrNumber +extract_paramid_from_funcexpr(FuncExpr *func) +{ + AttrNumber targetAttnum = InvalidAttrNumber; + ListCell *lc; + + /* Iterate through the arguments of the FuncExpr */ + foreach(lc, func->args) + { + Node *arg = (Node *) lfirst(lc); + + /* Check if the argument is a PARAM node */ + if (IsA(arg, Param)) + { + Param *param = (Param *) arg; + targetAttnum = param->paramid; + + break; /* Exit loop once we find the PARAM node */ + } + } + + return targetAttnum; +} + + +/* + * processTargetsIndirection - reorder targets list (from indirection) + * + * We don't change anything but the order of the target list. + * The purpose here is to be able to deparse a query tree as if it was + * provided by the PostgreSQL parser, not the rewriter (which is the one + * received by the planner hook). + * + * It's required only for UPDATE SET (MULTIEXPR) queries at the moment, other + * candidates are not supported by Citus. + */ +static void +processTargetsIndirection(List **targetList) +{ + int nAssignableCols; + int targetListPosition; + bool sawJunk = false; + List *newTargetList = NIL; + ListCell *lc; + + /* Count non-junk columns and ensure they precede junk columns */ + nAssignableCols = 0; + foreach(lc, *targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + + if (tle->resjunk) + { + sawJunk = true; + } + else + { + if (sawJunk) + { + elog(ERROR, "Subplan target list is out of order"); + } + + nAssignableCols++; + } + } + + /* If no assignable columns, return the original target list */ + if (nAssignableCols == 0) + { + return; + } + + /* Reorder the target list */ + /* we start from 1 */ + targetListPosition = 1; + while (nAssignableCols > 0) + { + nAssignableCols--; + + foreach(lc, *targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + + if (IsA(tle->expr, FuncExpr)) + { + FuncExpr *funcexpr = (FuncExpr *) tle->expr; + AttrNumber attnum = extract_paramid_from_funcexpr(funcexpr); + + if (attnum == targetListPosition) + { + ereport(DEBUG1, (errmsg("Adding FuncExpr resno: %d", tle->resno))); + newTargetList = lappend(newTargetList, tle); + targetListPosition++; + break; + } + } + else if (IsA(tle->expr, Param)) + { + Param *param = (Param *) tle->expr; + AttrNumber attnum = param->paramid; + + if (attnum == targetListPosition) + { + newTargetList = lappend(newTargetList, tle); + targetListPosition++; + break; + } + } + } + } + + /* Append any remaining junk columns */ + foreach(lc, *targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + if (tle->resjunk) + { + newTargetList = lappend(newTargetList, tle); + } + } + *targetList = newTargetList; +} + + +/* + * helper function to evaluate if we are in an SET (...) + * Caller is responsible to check the command type (UPDATE) + */ +static inline bool +is_update_set_multiexpr(Query *query) +{ + ListCell *lc; + + /* we're only interested by UPDATE */ + if (query->commandType != CMD_UPDATE) + { + return false; + } + + /* + * Then foreach target entry, check if one of the node or it's descendant + * is a PARAM_MULTIEXPR (i.e. a SET (a, b) = (...)) + */ + foreach(lc, query->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + Node *expr; + + if (tle->resjunk) + { + continue; + } + + expr = strip_implicit_coercions((Node *) tle->expr); + + if (expr && IsA(expr, Param) && + ((Param *) expr)->paramkind == PARAM_MULTIEXPR) + { + return true; + } + } + + /* No multi-column set expression found */ + return false; +} + + +/* + * helper function to evaluate if we are in SELECT with CTE. + */ +static inline bool +is_select_cte(Query *query) +{ + /* we are only looking for a SELECT query */ + if (query->commandType != CMD_SELECT) + { + return false; + } + + /* and it must contain CTE */ + if (query->cteList == NIL) + { + return false; + } + return true; +} + + +/* + * We may need to reorder parts of the planner tree we are receiving here. + * We expect to produce an SQL query text but our tree has been optimized by + * PostgreSL rewriter already... + */ +void +RebuildParserTreeFromPlannerTree(Query *query) +{ + /* Guard against excessively long or deeply-nested queries */ + CHECK_FOR_INTERRUPTS(); + + /* prevent unloyal defeat */ + check_stack_depth(); + + if (is_update_set_multiexpr(query)) + { + processTargetsIndirection(&query->targetList); + } + /* also match UPDATE in CTE, this one is recursive */ + else if (is_select_cte(query)) + { + ListCell *lc; + foreach(lc, query->cteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); + RebuildParserTreeFromPlannerTree((Query *) cte->ctequery); + } + } +} diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 1d6550afdb5..1df88f32fb0 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -217,6 +217,13 @@ distributed_planner(Query *parse, planContext.originalQuery = copyObject(parse); + /* + * We may need to reorder parts of the planner tree we are receiving here. + * We expect to produce an SQL query text but our tree has been optimized by + * PostgreSL rewriter already... + * FIXME is there conditions to reduce the number of calls ? + */ + RebuildParserTreeFromPlannerTree(planContext.originalQuery); if (!fastPathRouterQuery) { diff --git a/src/include/distributed/citus_ruleutils.h b/src/include/distributed/citus_ruleutils.h index 3a9c364824f..729d0b57940 100644 --- a/src/include/distributed/citus_ruleutils.h +++ b/src/include/distributed/citus_ruleutils.h @@ -60,5 +60,7 @@ extern char * generate_operator_name(Oid operid, Oid arg1, Oid arg2); extern List * getOwnedSequences_internal(Oid relid, AttrNumber attnum, char deptype); extern void AppendOptionListToString(StringInfo stringData, List *options); +extern void RebuildParserTreeFromPlannerTree(Query *query); + #endif /* CITUS_RULEUTILS_H */ From 0b434dca028bc095b6ca92047c9913c0c82bdc4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Villemain?= Date: Mon, 23 Dec 2024 16:01:46 +0100 Subject: [PATCH 6/6] Move the patch a bit later in RouterJob() --- src/backend/distributed/planner/distributed_planner.c | 8 -------- src/backend/distributed/planner/multi_router_planner.c | 7 +++++++ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 1df88f32fb0..8bbf1d6094c 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -217,14 +217,6 @@ distributed_planner(Query *parse, planContext.originalQuery = copyObject(parse); - /* - * We may need to reorder parts of the planner tree we are receiving here. - * We expect to produce an SQL query text but our tree has been optimized by - * PostgreSL rewriter already... - * FIXME is there conditions to reduce the number of calls ? - */ - RebuildParserTreeFromPlannerTree(planContext.originalQuery); - if (!fastPathRouterQuery) { /* diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index 44f955a3227..84352415fad 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -1869,6 +1869,13 @@ RouterJob(Query *originalQuery, PlannerRestrictionContext *plannerRestrictionCon } else { + /* + * We may need to reorder parts of the planner tree we are receiving here. + * We expect to produce an SQL query text but our tree has been optimized by + * PostgreSL rewriter already... + * FIXME is there conditions to reduce the number of calls ? + */ + RebuildParserTreeFromPlannerTree(originalQuery); (*planningError) = PlanRouterQuery(originalQuery, plannerRestrictionContext, &placementList, &shardId, &relationShardList, &prunedShardIntervalListList,