diff --git a/presto-common/src/main/java/com/facebook/presto/common/Subfield.java b/presto-common/src/main/java/com/facebook/presto/common/Subfield.java index 5854fff8a8731..4d49c557fcaab 100644 --- a/presto-common/src/main/java/com/facebook/presto/common/Subfield.java +++ b/presto-common/src/main/java/com/facebook/presto/common/Subfield.java @@ -81,6 +81,31 @@ public String toString() } } + public static final class StructureOnly + implements PathElement + { + private static final StructureOnly STRUCTURE_ONLY = new StructureOnly(); + + private StructureOnly() {} + + public static StructureOnly getInstance() + { + return STRUCTURE_ONLY; + } + + @Override + public boolean isSubscript() + { + return true; + } + + @Override + public String toString() + { + return "[$]"; + } + } + public static final class NestedField implements PathElement { @@ -238,6 +263,11 @@ public static PathElement noSubfield() return NoSubfield.getInstance(); } + public static PathElement structureOnly() + { + return StructureOnly.getInstance(); + } + @JsonCreator public Subfield(String path) { diff --git a/presto-common/src/main/java/com/facebook/presto/common/SubfieldTokenizer.java b/presto-common/src/main/java/com/facebook/presto/common/SubfieldTokenizer.java index fe61bf6e73eaf..562a99e1c151a 100644 --- a/presto-common/src/main/java/com/facebook/presto/common/SubfieldTokenizer.java +++ b/presto-common/src/main/java/com/facebook/presto/common/SubfieldTokenizer.java @@ -106,7 +106,7 @@ private Subfield.PathElement computeNext() } if (tryMatch(OPEN_BRACKET)) { - Subfield.PathElement token = tryMatch(QUOTE) ? matchQuotedSubscript() : tryMatch(WILDCARD) ? matchWildcardSubscript() : matchUnquotedSubscript(); + Subfield.PathElement token = tryMatch(QUOTE) ? matchQuotedSubscript() : tryMatch(WILDCARD) ? matchWildcardSubscript() : tryMatch(DOLLAR) ? matchStructureOnlySubscript() : matchUnquotedSubscript(); match(CLOSE_BRACKET); firstSegment = false; @@ -151,6 +151,11 @@ private Subfield.PathElement matchDollarPathElement() return Subfield.noSubfield(); } + private Subfield.PathElement matchStructureOnlySubscript() + { + return Subfield.structureOnly(); + } + private static boolean isUnquotedPathCharacter(char c) { return c == ':' || c == '$' || c == '-' || c == '/' || c == '@' || c == '|' || c == '#' || c == ' ' || c == '<' || c == '>' || isUnquotedSubscriptCharacter(c); diff --git a/presto-docs/src/main/sphinx/admin/properties-session.rst b/presto-docs/src/main/sphinx/admin/properties-session.rst index ac02ca64df544..6d96213ba8404 100644 --- a/presto-docs/src/main/sphinx/admin/properties-session.rst +++ b/presto-docs/src/main/sphinx/admin/properties-session.rst @@ -446,6 +446,17 @@ Use this to optimize the ``map_filter()`` and ``map_subset()`` function. It controls if subfields access is executed at the data source or not. +``pushdown_subfields_for_cardinality`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* **Type:** ``boolean`` +* **Default value:** ``false`` + +Enable subfield pruning for the ``cardinality()`` function to skip reading keys and values. + +When enabled, the query optimizer can push down subfield pruning for cardinality operations, +allowing the data source to skip reading the actual keys and values when only the cardinality +(count of elements) is needed. + ``schedule_splits_based_on_task_load`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * **Type:** ``boolean`` diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveLogicalPlanner.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveLogicalPlanner.java index 534beafe5a0b6..3fec92f66e088 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveLogicalPlanner.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveLogicalPlanner.java @@ -84,6 +84,7 @@ import static com.facebook.presto.SystemSessionProperties.OPTIMIZE_METADATA_QUERIES_IGNORE_STATS; import static com.facebook.presto.SystemSessionProperties.PUSHDOWN_DEREFERENCE_ENABLED; import static com.facebook.presto.SystemSessionProperties.PUSHDOWN_SUBFIELDS_ENABLED; +import static com.facebook.presto.SystemSessionProperties.PUSHDOWN_SUBFIELDS_FOR_CARDINALITY; import static com.facebook.presto.SystemSessionProperties.PUSHDOWN_SUBFIELDS_FOR_MAP_FUNCTIONS; import static com.facebook.presto.SystemSessionProperties.UTILIZE_UNIQUE_PROPERTY_IN_QUERY_PLANNING; import static com.facebook.presto.common.function.OperatorType.EQUAL; @@ -1637,6 +1638,56 @@ public void testPushdownSubfieldsForMapFilter() assertUpdate("DROP TABLE test_pushdown_map_subfields"); } + @Test + public void testPushdownSubfieldsForCardinality() + { + Session cardinalityPushdown = Session.builder(getSession()) + .setSystemProperty(PUSHDOWN_SUBFIELDS_FOR_CARDINALITY, "true") + .build(); + + // Test simple cardinality pushdown for MAP + assertUpdate("CREATE TABLE test_pushdown_cardinality_map(id integer, x map(integer, double))"); + assertPushdownSubfields(cardinalityPushdown, "SELECT t.id, cardinality(x) FROM test_pushdown_cardinality_map t", "test_pushdown_cardinality_map", + ImmutableMap.of("x", toSubfields("x[$]"))); + assertUpdate("DROP TABLE test_pushdown_cardinality_map"); + + // Test cardinality pushdown for ARRAY + assertUpdate("CREATE TABLE test_pushdown_cardinality_array(id integer, arr array(bigint))"); + assertPushdownSubfields(cardinalityPushdown, "SELECT t.id, cardinality(arr) FROM test_pushdown_cardinality_array t", "test_pushdown_cardinality_array", + ImmutableMap.of("arr", toSubfields("arr[$]"))); + assertUpdate("DROP TABLE test_pushdown_cardinality_array"); + + // Test cardinality in WHERE clause + assertUpdate("CREATE TABLE test_pushdown_cardinality_where(id integer, features map(varchar, double))"); + assertPushdownSubfields(cardinalityPushdown, "SELECT t.id FROM test_pushdown_cardinality_where t WHERE cardinality(features) > 10", "test_pushdown_cardinality_where", + ImmutableMap.of("features", toSubfields("features[$]"))); + assertUpdate("DROP TABLE test_pushdown_cardinality_where"); + + // Test cardinality in aggregation + assertUpdate("CREATE TABLE test_pushdown_cardinality_agg(id integer, data map(integer, varchar))"); + assertPushdownSubfields(cardinalityPushdown, "SELECT AVG(cardinality(data)) FROM test_pushdown_cardinality_agg", "test_pushdown_cardinality_agg", + ImmutableMap.of("data", toSubfields("data[$]"))); + assertUpdate("DROP TABLE test_pushdown_cardinality_agg"); + + // Test multiple cardinalities + assertUpdate("CREATE TABLE test_pushdown_cardinality_multi(id integer, map1 map(integer, double), map2 map(varchar, integer))"); + assertPushdownSubfields(cardinalityPushdown, "SELECT cardinality(map1), cardinality(map2) FROM test_pushdown_cardinality_multi", "test_pushdown_cardinality_multi", + ImmutableMap.of("map1", toSubfields("map1[$]"), "map2", toSubfields("map2[$]"))); + assertUpdate("DROP TABLE test_pushdown_cardinality_multi"); + + // Test cardinality with complex expression + assertUpdate("CREATE TABLE test_pushdown_cardinality_expr(id integer, tags map(varchar, varchar))"); + assertPushdownSubfields(cardinalityPushdown, "SELECT cardinality(tags) * 2 FROM test_pushdown_cardinality_expr", "test_pushdown_cardinality_expr", + ImmutableMap.of("tags", toSubfields("tags[$]"))); + assertUpdate("DROP TABLE test_pushdown_cardinality_expr"); + + // Test cardinality on ARRAY of maps + assertUpdate("CREATE TABLE test_pushdown_cardinality_nested(id integer, arr_of_maps array(map(integer, varchar)))"); + assertPushdownSubfields(cardinalityPushdown, "SELECT transform(arr_of_maps, m -> cardinality(m)) FROM test_pushdown_cardinality_nested", "test_pushdown_cardinality_nested", + ImmutableMap.of("arr_of_maps", toSubfields("arr_of_maps[*][$]"))); + assertUpdate("DROP TABLE test_pushdown_cardinality_nested"); + } + @Test public void testPushdownSubfieldsAssorted() { diff --git a/presto-main-base/src/main/java/com/facebook/presto/SystemSessionProperties.java b/presto-main-base/src/main/java/com/facebook/presto/SystemSessionProperties.java index 9726e14b7c40f..078747f34afef 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/SystemSessionProperties.java +++ b/presto-main-base/src/main/java/com/facebook/presto/SystemSessionProperties.java @@ -356,6 +356,7 @@ public final class SystemSessionProperties public static final String ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD = "add_distinct_below_semi_join_build"; public static final String UTILIZE_UNIQUE_PROPERTY_IN_QUERY_PLANNING = "utilize_unique_property_in_query_planning"; public static final String PUSHDOWN_SUBFIELDS_FOR_MAP_FUNCTIONS = "pushdown_subfields_for_map_functions"; + public static final String PUSHDOWN_SUBFIELDS_FOR_CARDINALITY = "pushdown_subfields_for_cardinality"; public static final String MAX_SERIALIZABLE_OBJECT_SIZE = "max_serializable_object_size"; public static final String EXPRESSION_OPTIMIZER_IN_ROW_EXPRESSION_REWRITE = "expression_optimizer_in_row_expression_rewrite"; public static final String TABLE_SCAN_SHUFFLE_PARALLELISM_THRESHOLD = "table_scan_shuffle_parallelism_threshold"; @@ -2053,6 +2054,10 @@ public SystemSessionProperties( "Enable subfield pruning for map functions, currently include map_subset and map_filter", featuresConfig.isPushdownSubfieldForMapFunctions(), false), + booleanProperty(PUSHDOWN_SUBFIELDS_FOR_CARDINALITY, + "Enable subfield pruning for cardinality() function to skip reading keys and values", + featuresConfig.isPushdownSubfieldForCardinality(), + false), longProperty(MAX_SERIALIZABLE_OBJECT_SIZE, "Configure the maximum byte size of a serializable object in expression interpreters", featuresConfig.getMaxSerializableObjectSize(), @@ -3520,6 +3525,11 @@ public static boolean isPushSubfieldsForMapFunctionsEnabled(Session session) return session.getSystemProperty(PUSHDOWN_SUBFIELDS_FOR_MAP_FUNCTIONS, Boolean.class); } + public static boolean isPushSubfieldsForCardinalityEnabled(Session session) + { + return session.getSystemProperty(PUSHDOWN_SUBFIELDS_FOR_CARDINALITY, Boolean.class); + } + public static boolean isUtilizeUniquePropertyInQueryPlanningEnabled(Session session) { return session.getSystemProperty(UTILIZE_UNIQUE_PROPERTY_IN_QUERY_PLANNING, Boolean.class); diff --git a/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java b/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java index 0aea60a29b806..8ed226ad14652 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java +++ b/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java @@ -324,6 +324,7 @@ public class FeaturesConfig private boolean addExchangeBelowPartialAggregationOverGroupId; private boolean addDistinctBelowSemiJoinBuild; private boolean pushdownSubfieldForMapFunctions = true; + private boolean pushdownSubfieldForCardinality; private long maxSerializableObjectSize = 1000; private boolean utilizeUniquePropertyInQueryPlanning = true; private String expressionOptimizerUsedInRowExpressionRewrite = ""; @@ -3269,6 +3270,19 @@ public boolean isPushdownSubfieldForMapFunctions() return pushdownSubfieldForMapFunctions; } + @Config("optimizer.pushdown-subfield-for-cardinality") + @ConfigDescription("Enable subfield pruning for cardinality() function to skip reading keys and values") + public FeaturesConfig setPushdownSubfieldForCardinality(boolean pushdownSubfieldForCardinality) + { + this.pushdownSubfieldForCardinality = pushdownSubfieldForCardinality; + return this; + } + + public boolean isPushdownSubfieldForCardinality() + { + return pushdownSubfieldForCardinality; + } + @Config("optimizer.utilize-unique-property-in-query-planning") @ConfigDescription("Utilize the unique property of input columns in query planning") public FeaturesConfig setUtilizeUniquePropertyInQueryPlanning(boolean utilizeUniquePropertyInQueryPlanning) diff --git a/presto-main-base/src/main/java/com/facebook/presto/sql/planner/optimizations/PushdownSubfields.java b/presto-main-base/src/main/java/com/facebook/presto/sql/planner/optimizations/PushdownSubfields.java index 65f33486c9a15..9121a6ca44844 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/sql/planner/optimizations/PushdownSubfields.java +++ b/presto-main-base/src/main/java/com/facebook/presto/sql/planner/optimizations/PushdownSubfields.java @@ -93,11 +93,13 @@ import java.util.stream.IntStream; import static com.facebook.presto.SystemSessionProperties.isLegacyUnnest; +import static com.facebook.presto.SystemSessionProperties.isPushSubfieldsForCardinalityEnabled; import static com.facebook.presto.SystemSessionProperties.isPushSubfieldsForMapFunctionsEnabled; import static com.facebook.presto.SystemSessionProperties.isPushdownSubfieldsEnabled; import static com.facebook.presto.SystemSessionProperties.isPushdownSubfieldsFromArrayLambdasEnabled; import static com.facebook.presto.common.Subfield.allSubscripts; import static com.facebook.presto.common.Subfield.noSubfield; +import static com.facebook.presto.common.Subfield.structureOnly; import static com.facebook.presto.common.type.TypeUtils.readNativeValue; import static com.facebook.presto.common.type.Varchars.isVarcharType; import static com.facebook.presto.metadata.BuiltInTypeAndFunctionNamespaceManager.JAVA_BUILTIN_NAMESPACE; @@ -820,6 +822,7 @@ private static final class SubfieldExtractor private final FunctionAndTypeManager functionAndTypeManager; private final boolean isPushDownSubfieldsFromLambdasEnabled; private final boolean isPushdownSubfieldsForMapFunctionsEnabled; + private final boolean isPushdownSubfieldsForCardinalityEnabled; private SubfieldExtractor( FunctionResolution functionResolution, @@ -835,11 +838,26 @@ private SubfieldExtractor( requireNonNull(session); this.isPushDownSubfieldsFromLambdasEnabled = isPushdownSubfieldsFromArrayLambdasEnabled(session); this.isPushdownSubfieldsForMapFunctionsEnabled = isPushSubfieldsForMapFunctionsEnabled(session); + this.isPushdownSubfieldsForCardinalityEnabled = isPushSubfieldsForCardinalityEnabled(session); } @Override public Void visitCall(CallExpression call, Context context) { + if (isPushdownSubfieldsForCardinalityEnabled && functionResolution.isCardinalityFunction(call.getFunctionHandle()) && call.getArguments().size() == 1) { + RowExpression argument = call.getArguments().get(0); + if (argument instanceof VariableReferenceExpression) { + Type argumentType = argument.getType(); + if (argumentType instanceof MapType || argumentType instanceof ArrayType) { + VariableReferenceExpression variable = (VariableReferenceExpression) argument; + Subfield cardinalitySubfield = new Subfield( + variable.getName(), + ImmutableList.of(structureOnly())); + context.subfields.add(cardinalitySubfield); + return null; + } + } + } ComplexTypeFunctionDescriptor functionDescriptor = functionAndTypeManager.getFunctionMetadata(call.getFunctionHandle()).getDescriptor(); if (isSubscriptOrElementAtFunction(call, functionResolution, functionAndTypeManager) || isMapSubSetWithConstantArray(call, functionResolution) || isMapFilterWithConstantFilterInMapKey(call, functionResolution)) { Optional> subfield = toSubfield(call, functionResolution, expressionOptimizer, connectorSession, functionAndTypeManager, isPushdownSubfieldsForMapFunctionsEnabled); diff --git a/presto-main-base/src/main/java/com/facebook/presto/sql/relational/FunctionResolution.java b/presto-main-base/src/main/java/com/facebook/presto/sql/relational/FunctionResolution.java index bdde9065df34b..22a4e5d54473b 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/sql/relational/FunctionResolution.java +++ b/presto-main-base/src/main/java/com/facebook/presto/sql/relational/FunctionResolution.java @@ -439,6 +439,11 @@ public boolean isMapFilterFunction(FunctionHandle functionHandle) return functionAndTypeResolver.getFunctionMetadata(functionHandle).getName().equals(functionAndTypeResolver.qualifyObjectName(QualifiedName.of("map_filter"))); } + public boolean isCardinalityFunction(FunctionHandle functionHandle) + { + return functionAndTypeResolver.getFunctionMetadata(functionHandle).getName().equals(functionAndTypeResolver.qualifyObjectName(QualifiedName.of("cardinality"))); + } + @Override public FunctionHandle lookupBuiltInFunction(String functionName, List inputTypes) { diff --git a/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java b/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java index 257dc1ff5c148..8f79adf6d0b05 100644 --- a/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java +++ b/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java @@ -272,6 +272,7 @@ public void testDefaults() .setAddExchangeBelowPartialAggregationOverGroupId(false) .setAddDistinctBelowSemiJoinBuild(false) .setPushdownSubfieldForMapFunctions(true) + .setPushdownSubfieldForCardinality(false) .setUtilizeUniquePropertyInQueryPlanning(true) .setExpressionOptimizerUsedInRowExpressionRewrite("") .setInnerJoinPushdownEnabled(false) @@ -505,6 +506,7 @@ public void testExplicitPropertyMappings() .put("exclude-invalid-worker-session-properties", "true") .put("optimizer.add-distinct-below-semi-join-build", "true") .put("optimizer.pushdown-subfield-for-map-functions", "false") + .put("optimizer.pushdown-subfield-for-cardinality", "true") .put("optimizer.utilize-unique-property-in-query-planning", "false") .put("optimizer.expression-optimizer-used-in-expression-rewrite", "custom") .put("optimizer.add-exchange-below-partial-aggregation-over-group-id", "true") @@ -729,6 +731,7 @@ public void testExplicitPropertyMappings() .setAddExchangeBelowPartialAggregationOverGroupId(true) .setAddDistinctBelowSemiJoinBuild(true) .setPushdownSubfieldForMapFunctions(false) + .setPushdownSubfieldForCardinality(true) .setUtilizeUniquePropertyInQueryPlanning(false) .setExpressionOptimizerUsedInRowExpressionRewrite("custom") .setInEqualityJoinPushdownEnabled(true)