Skip to content

Commit 73f4f67

Browse files
committed
Fix issue #7674 related to Indirection in UPDATE SET()
ruleutils in Citus is based on PostgreSQL source code, but in PostgreSQL ruleutils is not used at the planner stage. For instance, it is assumed after parser that targetList are ordered as they were read, but it's not true after rewriter, the resulting rewrite tree is then provided to planner (and citus), but the ordering of the list is not granted anymore. It's similar to others previous issues reported and still open, as well as to other bugfixes/improvment over time, the most noticable being the ProcessIndirection() which is for domain and similar. However, the implications of this bug are huge for users of `UPDATE SET (...)`: 1. if you used to order by columns order, you're maybe safe: `SET (col1, col2, col3, ...)` 2. if you used not to order by column order: `SET (col2, col1, col3, ...)` then you probably found a problem, or you have one. Note about 1. that despite appearance and your QA, you are at risk: if physical columns ordering is changed (for example after DROPping/ADDing some), the same query which use to apparently works well will silently update other columns... As it is this code is not optimized for performance, not sure it'll be needed.
1 parent 85851c4 commit 73f4f67

File tree

3 files changed

+459
-0
lines changed

3 files changed

+459
-0
lines changed

src/backend/distributed/deparser/ruleutils_14.c

+153
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,9 @@ static void get_tablesample_def(TableSampleClause *tablesample,
440440
deparse_context *context);
441441
static void get_opclass_name(Oid opclass, Oid actual_datatype,
442442
StringInfo buf);
443+
static bool is_update_set_with_multiple_columns(List *targetList);
444+
static List *processTargetsIndirection(List *targetList);
445+
static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func);
443446
static Node *processIndirection(Node *node, deparse_context *context);
444447
static void printSubscripts(SubscriptingRef *aref, deparse_context *context);
445448
static char *get_relation_name(Oid relid);
@@ -3464,6 +3467,9 @@ get_update_query_targetlist_def(Query *query, List *targetList,
34643467
}
34653468
}
34663469
}
3470+
if (is_update_set_with_multiple_columns(targetList))
3471+
targetList = processTargetsIndirection(targetList);
3472+
34673473
next_ma_cell = list_head(ma_sublinks);
34683474
cur_ma_sublink = NULL;
34693475
remaining_ma_columns = 0;
@@ -8101,6 +8107,153 @@ get_opclass_name(Oid opclass, Oid actual_datatype,
81018107
ReleaseSysCache(ht_opc);
81028108
}
81038109

8110+
/*
8111+
* helper function to evaluate if we are in an SET (...)
8112+
* Caller is responsible to check the command type (UPDATE)
8113+
*/
8114+
static bool is_update_set_with_multiple_columns(List *targetList)
8115+
{
8116+
ListCell *lc;
8117+
foreach(lc, targetList) {
8118+
TargetEntry *tle = (TargetEntry *) lfirst(lc);
8119+
Node *expr;
8120+
8121+
if (tle->resjunk)
8122+
continue;
8123+
8124+
expr = strip_implicit_coercions((Node *) tle->expr);
8125+
8126+
if (expr && IsA(expr, Param) &&
8127+
((Param *) expr)->paramkind == PARAM_MULTIEXPR)
8128+
{
8129+
return true;
8130+
}
8131+
}
8132+
8133+
// No multi-column set expression found
8134+
return false;
8135+
}
8136+
8137+
/*
8138+
* processTargetsIndirection - reorder targets list (from indirection)
8139+
*
8140+
* We don't change anything but the order the target list.
8141+
* The purpose here is to be able to deparse a query tree as if it was
8142+
* provided by the PostgreSQL parser, not the rewriter (which is the one
8143+
* received by the planner hook).
8144+
*
8145+
* It's required only for UPDATE SET (MULTIEXPR) queries, other candidates
8146+
* are not supported by Citus.
8147+
*
8148+
* Returns the new target list, reordered.
8149+
*/
8150+
static List *processTargetsIndirection(List *targetList)
8151+
{
8152+
int nAssignableCols;
8153+
int targetListPosition;
8154+
bool sawJunk = false;
8155+
List *newTargetList = NIL;
8156+
ListCell *lc;
8157+
8158+
/* Count non-junk columns and ensure they precede junk columns */
8159+
nAssignableCols = 0;
8160+
foreach(lc, targetList)
8161+
{
8162+
TargetEntry *tle = lfirst_node(TargetEntry, lc);
8163+
8164+
if (tle->resjunk)
8165+
{
8166+
sawJunk = true;
8167+
}
8168+
else
8169+
{
8170+
if (sawJunk)
8171+
elog(ERROR, "Subplan target list is out of order");
8172+
8173+
nAssignableCols++;
8174+
}
8175+
}
8176+
8177+
/* If no assignable columns, return the original target list */
8178+
if (nAssignableCols == 0)
8179+
return targetList;
8180+
8181+
/* Reorder the target list */
8182+
/* we start from 1 */
8183+
targetListPosition = 1;
8184+
while (nAssignableCols > 0)
8185+
{
8186+
nAssignableCols--;
8187+
8188+
foreach(lc, targetList)
8189+
{
8190+
TargetEntry *tle = lfirst_node(TargetEntry, lc);
8191+
8192+
if (IsA(tle->expr, FuncExpr))
8193+
{
8194+
FuncExpr *funcexpr = (FuncExpr *) tle->expr;
8195+
AttrNumber attnum = extract_paramid_from_funcexpr(funcexpr);
8196+
8197+
if (attnum == targetListPosition)
8198+
{
8199+
ereport(DEBUG1, (errmsg("Adding FuncExpr resno: %d", tle->resno)));
8200+
newTargetList = lappend(newTargetList, tle);
8201+
targetListPosition++;
8202+
break;
8203+
}
8204+
}
8205+
else if (IsA(tle->expr, Param))
8206+
{
8207+
Param *param = (Param *) tle->expr;
8208+
AttrNumber attnum = param->paramid;
8209+
8210+
if (attnum == targetListPosition)
8211+
{
8212+
newTargetList = lappend(newTargetList, tle);
8213+
targetListPosition++;
8214+
break;
8215+
}
8216+
}
8217+
}
8218+
}
8219+
8220+
// TODO add check about what we did here ?
8221+
8222+
/* Append any remaining junk columns */
8223+
foreach(lc, targetList)
8224+
{
8225+
TargetEntry *tle = lfirst_node(TargetEntry, lc);
8226+
if (tle->resjunk)
8227+
newTargetList = lappend(newTargetList, tle);
8228+
}
8229+
8230+
return newTargetList;
8231+
}
8232+
8233+
/* Function to extract paramid from a FuncExpr node */
8234+
static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func)
8235+
{
8236+
AttrNumber targetAttnum = InvalidAttrNumber;
8237+
ListCell *lc;
8238+
8239+
/* Iterate through the arguments of the FuncExpr */
8240+
foreach(lc, func->args)
8241+
{
8242+
Node *arg = (Node *) lfirst(lc);
8243+
8244+
/* Check if the argument is a PARAM node */
8245+
if (IsA(arg, Param))
8246+
{
8247+
Param *param = (Param *) arg;
8248+
targetAttnum = param->paramid;
8249+
8250+
break; // Exit loop once we find the PARAM node
8251+
}
8252+
}
8253+
8254+
return targetAttnum;
8255+
}
8256+
81048257
/*
81058258
* processIndirection - take care of array and subfield assignment
81068259
*

src/backend/distributed/deparser/ruleutils_15.c

+153
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,9 @@ static void get_tablesample_def(TableSampleClause *tablesample,
452452
deparse_context *context);
453453
static void get_opclass_name(Oid opclass, Oid actual_datatype,
454454
StringInfo buf);
455+
static bool is_update_set_with_multiple_columns(List *targetList);
456+
static List *processTargetsIndirection(List *targetList);
457+
static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func);
455458
static Node *processIndirection(Node *node, deparse_context *context);
456459
static void printSubscripts(SubscriptingRef *aref, deparse_context *context);
457460
static char *get_relation_name(Oid relid);
@@ -3529,6 +3532,9 @@ get_update_query_targetlist_def(Query *query, List *targetList,
35293532
}
35303533
}
35313534
}
3535+
if (is_update_set_with_multiple_columns(targetList))
3536+
targetList = processTargetsIndirection(targetList);
3537+
35323538
next_ma_cell = list_head(ma_sublinks);
35333539
cur_ma_sublink = NULL;
35343540
remaining_ma_columns = 0;
@@ -8331,6 +8337,153 @@ get_opclass_name(Oid opclass, Oid actual_datatype,
83318337
ReleaseSysCache(ht_opc);
83328338
}
83338339

8340+
/*
8341+
* helper function to evaluate if we are in an SET (...)
8342+
* Caller is responsible to check the command type (UPDATE)
8343+
*/
8344+
static bool is_update_set_with_multiple_columns(List *targetList)
8345+
{
8346+
ListCell *lc;
8347+
foreach(lc, targetList) {
8348+
TargetEntry *tle = (TargetEntry *) lfirst(lc);
8349+
Node *expr;
8350+
8351+
if (tle->resjunk)
8352+
continue;
8353+
8354+
expr = strip_implicit_coercions((Node *) tle->expr);
8355+
8356+
if (expr && IsA(expr, Param) &&
8357+
((Param *) expr)->paramkind == PARAM_MULTIEXPR)
8358+
{
8359+
return true;
8360+
}
8361+
}
8362+
8363+
// No multi-column set expression found
8364+
return false;
8365+
}
8366+
8367+
/*
8368+
* processTargetsIndirection - reorder targets list (from indirection)
8369+
*
8370+
* We don't change anything but the order the target list.
8371+
* The purpose here is to be able to deparse a query tree as if it was
8372+
* provided by the PostgreSQL parser, not the rewriter (which is the one
8373+
* received by the planner hook).
8374+
*
8375+
* It's required only for UPDATE SET (MULTIEXPR) queries, other candidates
8376+
* are not supported by Citus.
8377+
*
8378+
* Returns the new target list, reordered.
8379+
*/
8380+
static List *processTargetsIndirection(List *targetList)
8381+
{
8382+
int nAssignableCols;
8383+
int targetListPosition;
8384+
bool sawJunk = false;
8385+
List *newTargetList = NIL;
8386+
ListCell *lc;
8387+
8388+
/* Count non-junk columns and ensure they precede junk columns */
8389+
nAssignableCols = 0;
8390+
foreach(lc, targetList)
8391+
{
8392+
TargetEntry *tle = lfirst_node(TargetEntry, lc);
8393+
8394+
if (tle->resjunk)
8395+
{
8396+
sawJunk = true;
8397+
}
8398+
else
8399+
{
8400+
if (sawJunk)
8401+
elog(ERROR, "Subplan target list is out of order");
8402+
8403+
nAssignableCols++;
8404+
}
8405+
}
8406+
8407+
/* If no assignable columns, return the original target list */
8408+
if (nAssignableCols == 0)
8409+
return targetList;
8410+
8411+
/* Reorder the target list */
8412+
/* we start from 1 */
8413+
targetListPosition = 1;
8414+
while (nAssignableCols > 0)
8415+
{
8416+
nAssignableCols--;
8417+
8418+
foreach(lc, targetList)
8419+
{
8420+
TargetEntry *tle = lfirst_node(TargetEntry, lc);
8421+
8422+
if (IsA(tle->expr, FuncExpr))
8423+
{
8424+
FuncExpr *funcexpr = (FuncExpr *) tle->expr;
8425+
AttrNumber attnum = extract_paramid_from_funcexpr(funcexpr);
8426+
8427+
if (attnum == targetListPosition)
8428+
{
8429+
ereport(DEBUG1, (errmsg("Adding FuncExpr resno: %d", tle->resno)));
8430+
newTargetList = lappend(newTargetList, tle);
8431+
targetListPosition++;
8432+
break;
8433+
}
8434+
}
8435+
else if (IsA(tle->expr, Param))
8436+
{
8437+
Param *param = (Param *) tle->expr;
8438+
AttrNumber attnum = param->paramid;
8439+
8440+
if (attnum == targetListPosition)
8441+
{
8442+
newTargetList = lappend(newTargetList, tle);
8443+
targetListPosition++;
8444+
break;
8445+
}
8446+
}
8447+
}
8448+
}
8449+
8450+
// TODO add check about what we did here ?
8451+
8452+
/* Append any remaining junk columns */
8453+
foreach(lc, targetList)
8454+
{
8455+
TargetEntry *tle = lfirst_node(TargetEntry, lc);
8456+
if (tle->resjunk)
8457+
newTargetList = lappend(newTargetList, tle);
8458+
}
8459+
8460+
return newTargetList;
8461+
}
8462+
8463+
/* Function to extract paramid from a FuncExpr node */
8464+
static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func)
8465+
{
8466+
AttrNumber targetAttnum = InvalidAttrNumber;
8467+
ListCell *lc;
8468+
8469+
/* Iterate through the arguments of the FuncExpr */
8470+
foreach(lc, func->args)
8471+
{
8472+
Node *arg = (Node *) lfirst(lc);
8473+
8474+
/* Check if the argument is a PARAM node */
8475+
if (IsA(arg, Param))
8476+
{
8477+
Param *param = (Param *) arg;
8478+
targetAttnum = param->paramid;
8479+
8480+
break; // Exit loop once we find the PARAM node
8481+
}
8482+
}
8483+
8484+
return targetAttnum;
8485+
}
8486+
83348487
/*
83358488
* processIndirection - take care of array and subfield assignment
83368489
*

0 commit comments

Comments
 (0)