Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions data/test/vtexplain/multi-output/selectsharded-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,33 @@ select * from (select id from user) s /* scatter paren select */
1 ks_sharded/c0-: select * from (select id from user) as s limit 10001 /* scatter paren select */

----------------------------------------------------------------------
select name from user where id = (select id from t1) /* non-correlated subquery as value */

1 ks_unsharded/-: select id from t1 limit 10001 /* non-correlated subquery as value */
2 ks_sharded/-40: select name from user where id = 1 limit 10001 /* non-correlated subquery as value */

----------------------------------------------------------------------
select name from user where id in (select id from t1) /* non-correlated subquery in IN clause */

1 ks_unsharded/-: select id from t1 limit 10001 /* non-correlated subquery in IN clause */
2 ks_sharded/-40: select name from user where 1 = 1 and (id in (1)) limit 10001 /* non-correlated subquery in IN clause */

----------------------------------------------------------------------
select name from user where id not in (select id from t1) /* non-correlated subquery in NOT IN clause */

1 ks_unsharded/-: select id from t1 limit 10001 /* non-correlated subquery in NOT IN clause */
2 ks_sharded/-40: select name from user where (1 = 0 or (id not in (1))) limit 10001 /* non-correlated subquery in NOT IN clause */
2 ks_sharded/40-80: select name from user where (1 = 0 or (id not in (1))) limit 10001 /* non-correlated subquery in NOT IN clause */
2 ks_sharded/80-c0: select name from user where (1 = 0 or (id not in (1))) limit 10001 /* non-correlated subquery in NOT IN clause */
2 ks_sharded/c0-: select name from user where (1 = 0 or (id not in (1))) limit 10001 /* non-correlated subquery in NOT IN clause */

----------------------------------------------------------------------
select name from user where exists (select id from t1) /* non-correlated subquery as EXISTS */

1 ks_unsharded/-: select id from t1 limit 10001 /* non-correlated subquery as EXISTS */
2 ks_sharded/-40: select name from user where 1 limit 10001 /* non-correlated subquery as EXISTS */
2 ks_sharded/40-80: select name from user where 1 limit 10001 /* non-correlated subquery as EXISTS */
2 ks_sharded/80-c0: select name from user where 1 limit 10001 /* non-correlated subquery as EXISTS */
2 ks_sharded/c0-: select name from user where 1 limit 10001 /* non-correlated subquery as EXISTS */

----------------------------------------------------------------------
5 changes: 5 additions & 0 deletions data/test/vtexplain/selectsharded-queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ select name, count(*) from user group by name /* scatter aggregate */;

select 1, "hello", 3.14 from user limit 10 /* select constant sql values */;
select * from (select id from user) s /* scatter paren select */;

select name from user where id = (select id from t1) /* non-correlated subquery as value */;
select name from user where id in (select id from t1) /* non-correlated subquery in IN clause */;
select name from user where id not in (select id from t1) /* non-correlated subquery in NOT IN clause */;
select name from user where exists (select id from t1) /* non-correlated subquery as EXISTS */;
174 changes: 173 additions & 1 deletion data/test/vtgate/filter_cases.txt
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,178 @@
}
}

# cross-shard subquery in IN clause.
# Note the improved Underlying plan as SelectIN.
"select id from user where id in (select col from user)"
{
"Original": "select id from user where id in (select col from user)",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thing that occurred to me in reading this -- can we push down a subquery to a single shard if specified in the query?

Specifically suppose two tables are both sharded by a company_id column and we ran a query like:

select * from user where company_id=123 and id in (select user_id from admins where company_id=123);

In this case the query can be pushed down to whichever shard has both the user and user_admin.

Does the current implementation handle this efficiently?

"Instructions": {
"Opcode": "PulloutIn",
"SubqueryResult": "__sq1",
"HasValues": "__sq_has_values1",
"Subquery": {
"Opcode": "SelectScatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Query": "select col from user",
"FieldQuery": "select col from user where 1 != 1"
},
"Underlying": {
"Opcode": "SelectIN",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Query": "select id from user where :__sq_has_values1 = 1 and (id in ::__vals)",
"FieldQuery": "select id from user where 1 != 1",
"Vindex": "user_index",
"Values": [
"::__sq1"
]
}
}
}

# cross-shard subquery in NOT IN clause.
"select id from user where id not in (select col from user)"
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also think it would be incredibly useful for cases like this to include vtexplain test cases for each of these as well to be able to more easily see as a reviewer (and someone getting to know Vitess) which exact queries end up running for these various patterns.

{
"Original": "select id from user where id not in (select col from user)",
"Instructions": {
"Opcode": "PulloutNotIn",
"SubqueryResult": "__sq1",
"HasValues": "__sq_has_values1",
"Subquery": {
"Opcode": "SelectScatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Query": "select col from user",
"FieldQuery": "select col from user where 1 != 1"
},
"Underlying": {
"Opcode": "SelectScatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Query": "select id from user where (:__sq_has_values1 = 0 or (id not in ::__sq1))",
"FieldQuery": "select id from user where 1 != 1"
}
}
}

# cross-shard subquery in EXISTS clause.
"select id from user where exists (select col from user)"
{
"Original": "select id from user where exists (select col from user)",
"Instructions": {
"Opcode": "PulloutExists",
"SubqueryResult": "__sq1",
"HasValues": "__sq_has_values1",
"Subquery": {
"Opcode": "SelectScatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Query": "select col from user",
"FieldQuery": "select col from user where 1 != 1"
},
"Underlying": {
"Opcode": "SelectScatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Query": "select id from user where :__sq_has_values1",
"FieldQuery": "select id from user where 1 != 1"
}
}
}

# cross-shard subquery as expression
"select id from user where id = (select col from user)"
{
"Original": "select id from user where id = (select col from user)",
"Instructions": {
"Opcode": "PulloutValue",
"SubqueryResult": "__sq1",
"HasValues": "__sq_has_values1",
"Subquery": {
"Opcode": "SelectScatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Query": "select col from user",
"FieldQuery": "select col from user where 1 != 1"
},
"Underlying": {
"Opcode": "SelectEqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Query": "select id from user where id = :__sq1",
"FieldQuery": "select id from user where 1 != 1",
"Vindex": "user_index",
"Values": [
":__sq1"
]
}
}
}

# multi-level pullout
"select id1 from user where id = (select id2 from user where id2 in (select id3 from user))"
{
"Original": "select id1 from user where id = (select id2 from user where id2 in (select id3 from user))",
"Instructions": {
"Opcode": "PulloutValue",
"SubqueryResult": "__sq2",
"HasValues": "__sq_has_values2",
"Subquery": {
"Opcode": "PulloutIn",
"SubqueryResult": "__sq1",
"HasValues": "__sq_has_values1",
"Subquery": {
"Opcode": "SelectScatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Query": "select id3 from user",
"FieldQuery": "select id3 from user where 1 != 1"
},
"Underlying": {
"Opcode": "SelectScatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Query": "select id2 from user where :__sq_has_values1 = 1 and (id2 in ::__sq1)",
"FieldQuery": "select id2 from user where 1 != 1"
}
},
"Underlying": {
"Opcode": "SelectEqualUnique",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"Query": "select id1 from user where id = :__sq2",
"FieldQuery": "select id1 from user where 1 != 1",
"Vindex": "user_index",
"Values": [
":__sq2"
]
}
}
}

# Case preservation test
"select user_extra.Id from user join user_extra on user.iD = user_extra.User_Id where user.Id = 5"
{
Expand Down Expand Up @@ -766,4 +938,4 @@
# but they refer to different things. The first reference is to the outermost query,
# and the second reference is to the the innermost 'from' subquery.
"select id2 from user uu where id in (select id from user where id = uu.id and user.col in (select col from (select id from user_extra where user_id = 5) uu where uu.user_id = uu.id))"
"unsupported: UNION or subquery on different shards: vindex values are different"
"unsupported: cross-shard correlated subquery"
Loading