Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
import static com.facebook.presto.SystemSessionProperties.OPTIMIZE_METADATA_QUERIES_IGNORE_STATS;
import static com.facebook.presto.SystemSessionProperties.PUSHDOWN_DEREFERENCE_ENABLED;
import static com.facebook.presto.SystemSessionProperties.PUSHDOWN_SUBFIELDS_ENABLED;
import static com.facebook.presto.SystemSessionProperties.PUSHDOWN_SUBFIELDS_FOR_MAP_SUBSET;
import static com.facebook.presto.common.function.OperatorType.EQUAL;
import static com.facebook.presto.common.predicate.Domain.create;
import static com.facebook.presto.common.predicate.Domain.multipleValues;
Expand Down Expand Up @@ -1461,6 +1462,44 @@ public void testPushdownSubfields()
assertUpdate("DROP TABLE test_pushdown_struct_subfields");
}

@Test
public void testPushdownSubfieldsForMapSubset()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a test where the specified keys to subset are not also the index of that key? Also for non-numeric map keys.

Copy link
Contributor Author

@feilong-liu feilong-liu Jun 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a test where the specified keys to subset are not also the index of that key

What do you mean by not also the index of that key? Do you mean the type of index in array is different from map key?
If map key is of integer type, and element in array is of bigint type, the map will be wrapped with a cast and thie pushdown field will not work (and this needs to be resolved with this PR #25395)
If element type of array is not compatible with map key type, for example integer vs. varchar, query will fail during during parsing

Also for non-numeric map keys.

Added tests for varchar keys too

{
Session mapSubset = Session.builder(getSession()).setSystemProperty(PUSHDOWN_SUBFIELDS_FOR_MAP_SUBSET, "true").build();
assertUpdate("CREATE TABLE test_pushdown_map_subfields(id integer, x map(integer, double))");
assertPushdownSubfields(mapSubset, "SELECT t.id, map_subset(x, array[1, 2, 3]) FROM test_pushdown_map_subfields t", "test_pushdown_map_subfields",
ImmutableMap.of("x", toSubfields("x[1]", "x[2]", "x[3]")));
assertPushdownSubfields(mapSubset, "SELECT t.id, map_subset(x, array[-1, -2, 3]) FROM test_pushdown_map_subfields t", "test_pushdown_map_subfields",
ImmutableMap.of("x", toSubfields("x[-1]", "x[-2]", "x[3]")));
assertPushdownSubfields(mapSubset, "SELECT t.id, map_subset(x, array[1, 2, null]) FROM test_pushdown_map_subfields t", "test_pushdown_map_subfields",
ImmutableMap.of("x", toSubfields()));
assertPushdownSubfields(mapSubset, "SELECT t.id, map_subset(x, array[1, 2, 3, id]) FROM test_pushdown_map_subfields t", "test_pushdown_map_subfields",
ImmutableMap.of("x", toSubfields()));
assertPushdownSubfields(mapSubset, "SELECT t.id, map_subset(x, array[id]) FROM test_pushdown_map_subfields t", "test_pushdown_map_subfields",
ImmutableMap.of("x", toSubfields()));
assertUpdate("DROP TABLE test_pushdown_map_subfields");

assertUpdate("CREATE TABLE test_pushdown_map_subfields(id integer, x array(map(integer, double)))");
assertPushdownSubfields(mapSubset, "SELECT t.id, transform(x, mp -> map_subset(mp, array[1, 2, 3])) FROM test_pushdown_map_subfields t", "test_pushdown_map_subfields",
ImmutableMap.of("x", toSubfields("x[*][1]", "x[*][2]", "x[*][3]")));
assertPushdownSubfields(mapSubset, "SELECT t.id, transform(x, mp -> map_subset(mp, array[1, 2, null])) FROM test_pushdown_map_subfields t", "test_pushdown_map_subfields",
ImmutableMap.of("x", toSubfields()));
assertPushdownSubfields(mapSubset, "SELECT t.id, transform(x, mp -> map_subset(mp, array[1, 2, id])) FROM test_pushdown_map_subfields t", "test_pushdown_map_subfields",
ImmutableMap.of("x", toSubfields()));
assertUpdate("DROP TABLE test_pushdown_map_subfields");

assertUpdate("CREATE TABLE test_pushdown_map_subfields(id varchar, x map(varchar, double))");
assertPushdownSubfields(mapSubset, "SELECT t.id, map_subset(x, array['ab', 'c', 'd']) FROM test_pushdown_map_subfields t", "test_pushdown_map_subfields",
ImmutableMap.of("x", toSubfields("x[\"ab\"]", "x[\"c\"]", "x[\"d\"]")));
assertPushdownSubfields(mapSubset, "SELECT t.id, map_subset(x, array['ab', 'c', null]) FROM test_pushdown_map_subfields t", "test_pushdown_map_subfields",
ImmutableMap.of("x", toSubfields()));
assertPushdownSubfields(mapSubset, "SELECT t.id, map_subset(x, array['ab', 'c', 'd', id]) FROM test_pushdown_map_subfields t", "test_pushdown_map_subfields",
ImmutableMap.of("x", toSubfields()));
assertPushdownSubfields(mapSubset, "SELECT t.id, map_subset(x, array[id]) FROM test_pushdown_map_subfields t", "test_pushdown_map_subfields",
ImmutableMap.of("x", toSubfields()));
assertUpdate("DROP TABLE test_pushdown_map_subfields");
}

@Test
public void testPushdownSubfieldsAssorted()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ public final class SystemSessionProperties
public static final String QUERY_CLIENT_TIMEOUT = "query_client_timeout";
public static final String REWRITE_MIN_MAX_BY_TO_TOP_N = "rewrite_min_max_by_to_top_n";
public static final String ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD = "add_distinct_below_semi_join_build";
public static final String PUSHDOWN_SUBFIELDS_FOR_MAP_SUBSET = "pushdown_subfields_for_map_subset";

// TODO: Native execution related session properties that are temporarily put here. They will be relocated in the future.
public static final String NATIVE_AGGREGATION_SPILL_ALL = "native_aggregation_spill_all";
Expand Down Expand Up @@ -1910,6 +1911,10 @@ public SystemSessionProperties(
"Optimize out APPROX_DISTINCT operations over constant conditionals",
featuresConfig.isOptimizeConditionalApproxDistinct(),
false),
booleanProperty(PUSHDOWN_SUBFIELDS_FOR_MAP_SUBSET,
"Enable subfield pruning for map_subset function",
featuresConfig.isPushdownSubfieldForMapSubset(),
false),
new PropertyMetadata<>(
QUERY_CLIENT_TIMEOUT,
"Configures how long the query runs without contact from the client application, such as the CLI, before it's abandoned",
Expand Down Expand Up @@ -3259,6 +3264,11 @@ public static boolean isEnabledAddExchangeBelowGroupId(Session session)
return session.getSystemProperty(ADD_EXCHANGE_BELOW_PARTIAL_AGGREGATION_OVER_GROUP_ID, Boolean.class);
}

public static boolean isPushSubfieldsForMapSubsetEnabled(Session session)
{
return session.getSystemProperty(PUSHDOWN_SUBFIELDS_FOR_MAP_SUBSET, Boolean.class);
}

public static boolean isAddDistinctBelowSemiJoinBuildEnabled(Session session)
{
return session.getSystemProperty(ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD, Boolean.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ public class FeaturesConfig
private String expressionOptimizerName = DEFAULT_EXPRESSION_OPTIMIZER_NAME;
private boolean addExchangeBelowPartialAggregationOverGroupId;
private boolean addDistinctBelowSemiJoinBuild;
private boolean pushdownSubfieldForMapSubset = true;

public enum PartitioningPrecisionStrategy
{
Expand Down Expand Up @@ -3056,4 +3057,17 @@ public boolean isAddDistinctBelowSemiJoinBuild()
{
return addDistinctBelowSemiJoinBuild;
}

@Config("optimizer.pushdown-subfield-for-map-subset")
@ConfigDescription("Enable subfield pruning for map_subset function")
public FeaturesConfig setPushdownSubfieldForMapSubset(boolean pushdownSubfieldForMapSubset)
{
this.pushdownSubfieldForMapSubset = pushdownSubfieldForMapSubset;
return this;
}

public boolean isPushdownSubfieldForMapSubset()
{
return pushdownSubfieldForMapSubset;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
import com.facebook.presto.common.Subfield;
import com.facebook.presto.common.Subfield.NestedField;
import com.facebook.presto.common.Subfield.PathElement;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.type.ArrayType;
import com.facebook.presto.common.type.MapType;
import com.facebook.presto.common.type.RowType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.expressions.DefaultRowExpressionTraversalVisitor;
import com.facebook.presto.metadata.FunctionAndTypeManager;
import com.facebook.presto.metadata.Metadata;
Expand Down Expand Up @@ -89,10 +91,12 @@
import java.util.stream.IntStream;

import static com.facebook.presto.SystemSessionProperties.isLegacyUnnest;
import static com.facebook.presto.SystemSessionProperties.isPushSubfieldsForMapSubsetEnabled;
import static com.facebook.presto.SystemSessionProperties.isPushdownSubfieldsEnabled;
import static com.facebook.presto.SystemSessionProperties.isPushdownSubfieldsFromArrayLambdasEnabled;
import static com.facebook.presto.common.Subfield.allSubscripts;
import static com.facebook.presto.common.Subfield.noSubfield;
import static com.facebook.presto.common.type.TypeUtils.readNativeValue;
import static com.facebook.presto.common.type.Varchars.isVarcharType;
import static com.facebook.presto.metadata.BuiltInTypeAndFunctionNamespaceManager.JAVA_BUILTIN_NAMESPACE;
import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.DEREFERENCE;
Expand Down Expand Up @@ -151,7 +155,7 @@ private static class Rewriter
{
private final Session session;
private final Metadata metadata;
private final StandardFunctionResolution functionResolution;
private final FunctionResolution functionResolution;
private final ExpressionOptimizer expressionOptimizer;
private final SubfieldExtractor subfieldExtractor;
private static final QualifiedObjectName ARBITRARY_AGGREGATE_FUNCTION = QualifiedObjectName.valueOf(JAVA_BUILTIN_NAMESPACE, "arbitrary");
Expand All @@ -169,7 +173,7 @@ public Rewriter(Session session, Metadata metadata, ExpressionOptimizerProvider
expressionOptimizer,
session.toConnectorSession(),
metadata.getFunctionAndTypeManager(),
isPushdownSubfieldsFromArrayLambdasEnabled(session));
session);
}

public boolean isPlanChanged()
Expand Down Expand Up @@ -307,9 +311,9 @@ public PlanNode visitProject(ProjectNode node, RewriteContext<Context> context)
continue;
}

Optional<Subfield> subfield = toSubfield(expression, functionResolution, expressionOptimizer, session.toConnectorSession(), metadata.getFunctionAndTypeManager());
Optional<List<Subfield>> subfield = toSubfield(expression, functionResolution, expressionOptimizer, session.toConnectorSession(), metadata.getFunctionAndTypeManager(), isPushSubfieldsForMapSubsetEnabled(session));
if (subfield.isPresent()) {
context.get().addAssignment(variable, subfield.get());
subfield.get().forEach(element -> context.get().addAssignment(variable, element));
continue;
}

Expand Down Expand Up @@ -570,17 +574,18 @@ private static String getColumnName(Session session, Metadata metadata, TableHan
return metadata.getColumnMetadata(session, tableHandle, columnHandle).getName();
}

private static Optional<Subfield> toSubfield(
private static Optional<List<Subfield>> toSubfield(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

map_subset can return a list of subfields

RowExpression expression,
StandardFunctionResolution functionResolution,
FunctionResolution functionResolution,
ExpressionOptimizer expressionOptimizer,
ConnectorSession connectorSession,
FunctionAndTypeManager functionAndTypeManager)
FunctionAndTypeManager functionAndTypeManager,
boolean isPushdownSubfieldsForMapSubsetEnabled)
{
ImmutableList.Builder<Subfield.PathElement> elements = ImmutableList.builder();
while (true) {
if (expression instanceof VariableReferenceExpression) {
return Optional.of(new Subfield(((VariableReferenceExpression) expression).getName(), elements.build().reverse()));
return Optional.of(ImmutableList.of(new Subfield(((VariableReferenceExpression) expression).getName(), elements.build().reverse())));
}
if (expression instanceof CallExpression) {
ComplexTypeFunctionDescriptor functionDescriptor = functionAndTypeManager.getFunctionMetadata(((CallExpression) expression).getFunctionHandle()).getDescriptor();
Expand Down Expand Up @@ -650,6 +655,29 @@ private static Optional<Subfield> toSubfield(
}
return Optional.empty();
}
// map_subset(feature, constant_array) is only accessing fields specified in feature map.
// For example map_subset(feature, array[1, 2]) is equivalent to calling element_at(feature, 1) and element_at(feature, 2) for subfield extraction
if (isPushdownSubfieldsForMapSubsetEnabled && expression instanceof CallExpression && isMapSubSetWithConstantArray((CallExpression) expression, functionResolution)) {
CallExpression call = (CallExpression) expression;
ConstantExpression constantArray = (ConstantExpression) call.getArguments().get(1);
checkState(constantArray.getValue() instanceof Block && constantArray.getType() instanceof ArrayType);
Block arrayValue = (Block) constantArray.getValue();
Type arrayElementType = ((ArrayType) constantArray.getType()).getElementType();
ImmutableList.Builder<Subfield> arguments = ImmutableList.builder();
for (int i = 0; i < arrayValue.getPositionCount(); ++i) {
Object mapKey = readNativeValue(arrayElementType, arrayValue, i);
if (mapKey == null) {
return Optional.empty();
}
if (mapKey instanceof Number) {
arguments.add(new Subfield(((VariableReferenceExpression) call.getArguments().get(0)).getName(), ImmutableList.of(new Subfield.LongSubscript(((Number) mapKey).longValue()))));
}
if (isVarcharType(arrayElementType)) {
arguments.add(new Subfield(((VariableReferenceExpression) call.getArguments().get(0)).getName(), ImmutableList.of(new Subfield.StringSubscript(((Slice) mapKey).toStringUtf8()))));
}
}
return Optional.of(arguments.build());
}

return Optional.empty();
}
Expand All @@ -663,38 +691,41 @@ private static NestedField nestedField(String name)
private static final class SubfieldExtractor
extends DefaultRowExpressionTraversalVisitor<Context>
{
private final StandardFunctionResolution functionResolution;
private final FunctionResolution functionResolution;
private final ExpressionOptimizer expressionOptimizer;
private final ConnectorSession connectorSession;
private final FunctionAndTypeManager functionAndTypeManager;
private final boolean isPushDownSubfieldsFromLambdasEnabled;
private final boolean isPushdownSubfieldsForMapSubsetEnabled;

private SubfieldExtractor(
StandardFunctionResolution functionResolution,
FunctionResolution functionResolution,
ExpressionOptimizer expressionOptimizer,
ConnectorSession connectorSession,
FunctionAndTypeManager functionAndTypeManager,
boolean isPushDownSubfieldsFromLambdasEnabled)
Session session)
{
this.functionResolution = requireNonNull(functionResolution, "functionResolution is null");
this.expressionOptimizer = requireNonNull(expressionOptimizer, "expressionOptimizer is null");
this.connectorSession = connectorSession;
this.functionAndTypeManager = requireNonNull(functionAndTypeManager, "functionAndTypeManager is null");
this.isPushDownSubfieldsFromLambdasEnabled = isPushDownSubfieldsFromLambdasEnabled;
requireNonNull(session);
this.isPushDownSubfieldsFromLambdasEnabled = isPushdownSubfieldsFromArrayLambdasEnabled(session);
this.isPushdownSubfieldsForMapSubsetEnabled = isPushSubfieldsForMapSubsetEnabled(session);
}

@Override
public Void visitCall(CallExpression call, Context context)
{
ComplexTypeFunctionDescriptor functionDescriptor = functionAndTypeManager.getFunctionMetadata(call.getFunctionHandle()).getDescriptor();
if (isSubscriptOrElementAtFunction(call, functionResolution, functionAndTypeManager)) {
Optional<Subfield> subfield = toSubfield(call, functionResolution, expressionOptimizer, connectorSession, functionAndTypeManager);
if (isSubscriptOrElementAtFunction(call, functionResolution, functionAndTypeManager) || isMapSubSetWithConstantArray(call, functionResolution)) {
Optional<List<Subfield>> subfield = toSubfield(call, functionResolution, expressionOptimizer, connectorSession, functionAndTypeManager, isPushdownSubfieldsForMapSubsetEnabled);
if (subfield.isPresent()) {
if (context.isPruningLambdaSubfieldsPossible()) {
addRequiredLambdaSubfields(context, subfield.get());
subfield.get().forEach(item -> addRequiredLambdaSubfields(context, item));
}
else {
context.subfields.add(subfield.get());
context.subfields.addAll(subfield.get());
}
}
else {
Expand Down Expand Up @@ -847,14 +878,14 @@ else if (specialForm.getForm() != DEREFERENCE) {
return null;
}

Optional<Subfield> subfield = toSubfield(specialForm, functionResolution, expressionOptimizer, connectorSession, functionAndTypeManager);
Optional<List<Subfield>> subfield = toSubfield(specialForm, functionResolution, expressionOptimizer, connectorSession, functionAndTypeManager, isPushdownSubfieldsForMapSubsetEnabled);

if (subfield.isPresent()) {
if (context.isPruningLambdaSubfieldsPossible()) {
addRequiredLambdaSubfields(context, subfield.get());
subfield.get().forEach(item -> addRequiredLambdaSubfields(context, item));
}
else {
context.subfields.add(subfield.get());
context.subfields.addAll(subfield.get());
}
}
else {
Expand Down Expand Up @@ -887,7 +918,7 @@ private void addRequiredLambdaSubfields(Context context, Subfield input)
public Void visitVariableReference(VariableReferenceExpression reference, Context context)
{
if (context.isPruningLambdaSubfieldsPossible()) {
addRequiredLambdaSubfields(context, toSubfield(reference, functionResolution, expressionOptimizer, connectorSession, functionAndTypeManager).get());
toSubfield(reference, functionResolution, expressionOptimizer, connectorSession, functionAndTypeManager, isPushdownSubfieldsForMapSubsetEnabled).get().forEach(item -> addRequiredLambdaSubfields(context, item));
return null;
}
context.variables.add(reference);
Expand Down Expand Up @@ -978,4 +1009,11 @@ private static boolean isSubscriptOrElementAtFunction(CallExpression expression,
functionAndTypeManager.getFunctionAndTypeResolver().getFunctionMetadata(expression.getFunctionHandle()).getName()
.equals(functionAndTypeManager.getFunctionAndTypeResolver().qualifyObjectName(QualifiedName.of("element_at")));
}

private static boolean isMapSubSetWithConstantArray(CallExpression expression, FunctionResolution functionResolution)
{
return functionResolution.isMapSubSetFunction(expression.getFunctionHandle())
&& expression.getArguments().get(0) instanceof VariableReferenceExpression
&& expression.getArguments().get(1) instanceof ConstantExpression;
}
}
Loading
Loading