diff --git a/presto-docs/src/main/sphinx/functions/array.rst b/presto-docs/src/main/sphinx/functions/array.rst index 9d825d874b6be..e7a1c11c73e4d 100644 --- a/presto-docs/src/main/sphinx/functions/array.rst +++ b/presto-docs/src/main/sphinx/functions/array.rst @@ -44,6 +44,12 @@ Array Functions Remove duplicate values from the array ``x``. +.. function:: array_dupes(array(T)) -> array(bigint/varchar) + + Returns a set of elements that occur more than once in ``array``. + + ``T`` must be coercible to ``bigint`` or ``varchar``. + .. function:: array_except(x, y) -> array Returns an array of elements in ``x`` but not in ``y``, without duplicates. @@ -58,6 +64,12 @@ Array Functions Returns a map: keys are the unique elements in the ``array``, values are how many times the key appears. Ignores null elements. Empty array returns empty map. +.. function:: array_has_dupes(array(T)) -> boolean + + Returns a boolean: whether ``array`` has any elements that occur more than once. + + ``T`` must be coercible to ``bigint`` or ``varchar``. + .. function:: array_intersect(x, y) -> array Returns an array of the elements in the intersection of ``x`` and ``y``, without duplicates. diff --git a/presto-main/src/main/java/com/facebook/presto/metadata/BuiltInTypeAndFunctionNamespaceManager.java b/presto-main/src/main/java/com/facebook/presto/metadata/BuiltInTypeAndFunctionNamespaceManager.java index 59183c42f5cd2..bfbbb80711018 100644 --- a/presto-main/src/main/java/com/facebook/presto/metadata/BuiltInTypeAndFunctionNamespaceManager.java +++ b/presto-main/src/main/java/com/facebook/presto/metadata/BuiltInTypeAndFunctionNamespaceManager.java @@ -164,7 +164,7 @@ import com.facebook.presto.operator.scalar.VarbinaryFunctions; import com.facebook.presto.operator.scalar.WilsonInterval; import com.facebook.presto.operator.scalar.WordStemFunction; -import com.facebook.presto.operator.scalar.sql.ArrayArithmeticFunctions; +import com.facebook.presto.operator.scalar.sql.ArraySqlFunctions; import com.facebook.presto.operator.scalar.sql.MapNormalizeFunction; import com.facebook.presto.operator.window.CumulativeDistributionFunction; import com.facebook.presto.operator.window.DenseRankFunction; @@ -835,7 +835,7 @@ private List getBuildInFunctions(FeaturesConfig featuresC .functions(TDIGEST_AGG, TDIGEST_AGG_WITH_WEIGHT, TDIGEST_AGG_WITH_WEIGHT_AND_COMPRESSION) .function(MergeTDigestFunction.MERGE) .sqlInvokedScalar(MapNormalizeFunction.class) - .sqlInvokedScalars(ArrayArithmeticFunctions.class) + .sqlInvokedScalars(ArraySqlFunctions.class) .sqlInvokedScalars(ArrayIntersectFunction.class) .scalar(DynamicFilterPlaceholderFunction.class) .scalars(EnumCasts.class) diff --git a/presto-main/src/main/java/com/facebook/presto/operator/scalar/sql/ArrayArithmeticFunctions.java b/presto-main/src/main/java/com/facebook/presto/operator/scalar/sql/ArraySqlFunctions.java similarity index 64% rename from presto-main/src/main/java/com/facebook/presto/operator/scalar/sql/ArrayArithmeticFunctions.java rename to presto-main/src/main/java/com/facebook/presto/operator/scalar/sql/ArraySqlFunctions.java index be61800bc9bcb..39d80562b5f5f 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/scalar/sql/ArrayArithmeticFunctions.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/scalar/sql/ArraySqlFunctions.java @@ -18,9 +18,9 @@ import com.facebook.presto.spi.function.SqlParameter; import com.facebook.presto.spi.function.SqlType; -public class ArrayArithmeticFunctions +public class ArraySqlFunctions { - private ArrayArithmeticFunctions() {} + private ArraySqlFunctions() {} @SqlInvokedScalarFunction(value = "array_sum", deterministic = true, calledOnNullInput = false) @Description("Returns the sum of all array elements, or 0 if the array is empty. Ignores null elements.") @@ -78,4 +78,44 @@ public static String arrayFrequencyVarchar() "(m, x) -> IF (x IS NOT NULL, MAP_CONCAT(m,MAP_FROM_ENTRIES(ARRAY[ROW(x, COALESCE(ELEMENT_AT(m,x) + 1, 1))])), m)," + "m -> m)"; } + + @SqlInvokedScalarFunction(value = "array_dupes", deterministic = true, calledOnNullInput = false) + @Description("Returns set of elements that have duplicates") + @SqlParameter(name = "input", type = "array(varchar)") + @SqlType("array(varchar)") + public static String arrayDupesVarchar() + { + return "RETURN CONCAT(" + + "CAST(IF (cardinality(filter(input, x -> x is NULL)) > 1, ARRAY[NULL], ARRAY[]) AS ARRAY(VARCHAR))," + + "map_keys(map_filter(array_frequency(input), (k, v) -> v > 1)))"; + } + + @SqlInvokedScalarFunction(value = "array_dupes", deterministic = true, calledOnNullInput = false) + @Description("Returns set of elements that have duplicates") + @SqlParameter(name = "input", type = "array(bigint)") + @SqlType("array(bigint)") + public static String arrayDupesBigint() + { + return "RETURN CONCAT(" + + "CAST(IF (cardinality(filter(input, x -> x is NULL)) > 1, ARRAY[NULL], ARRAY[]) AS ARRAY(BIGINT))," + + "map_keys(map_filter(array_frequency(input), (k, v) -> v > 1)))"; + } + + @SqlInvokedScalarFunction(value = "array_has_dupes", deterministic = true, calledOnNullInput = false) + @Description("Returns whether array has any duplicate element") + @SqlParameter(name = "input", type = "array(varchar)") + @SqlType("boolean") + public static String arrayHasDupesVarchar() + { + return "RETURN cardinality(array_dupes(input)) > 0"; + } + + @SqlInvokedScalarFunction(value = "array_has_dupes", deterministic = true, calledOnNullInput = false) + @Description("Returns whether array has any duplicate element") + @SqlParameter(name = "input", type = "array(bigint)") + @SqlType("boolean") + public static String arrayHasDupesBigint() + { + return "RETURN cardinality(array_dupes(input)) > 0"; + } } diff --git a/presto-main/src/test/java/com/facebook/presto/operator/scalar/sql/TestArrayArithmeticFunctions.java b/presto-main/src/test/java/com/facebook/presto/operator/scalar/sql/TestArraySqlFunctions.java similarity index 79% rename from presto-main/src/test/java/com/facebook/presto/operator/scalar/sql/TestArrayArithmeticFunctions.java rename to presto-main/src/test/java/com/facebook/presto/operator/scalar/sql/TestArraySqlFunctions.java index 35e60c61e10f9..68a15fa0efc49 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/scalar/sql/TestArrayArithmeticFunctions.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/scalar/sql/TestArraySqlFunctions.java @@ -13,22 +13,27 @@ */ package com.facebook.presto.operator.scalar.sql; +import com.facebook.presto.common.type.ArrayType; import com.facebook.presto.common.type.MapType; import com.facebook.presto.common.type.TestRowType; import com.facebook.presto.common.type.TypeSignature; import com.facebook.presto.metadata.FunctionAndTypeManager; import com.facebook.presto.operator.scalar.AbstractTestFunctions; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.testng.annotations.Test; +import java.util.Collections; + import static com.facebook.presto.common.block.MethodHandleUtil.methodHandle; import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.common.type.BooleanType.BOOLEAN; import static com.facebook.presto.common.type.DoubleType.DOUBLE; import static com.facebook.presto.common.type.IntegerType.INTEGER; import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; -public class TestArrayArithmeticFunctions +public class TestArraySqlFunctions extends AbstractTestFunctions { @Test @@ -121,4 +126,40 @@ public void testArrayFrequencyVarchar() assertFunction("array_frequency(array[varchar 'a', varchar 'a', varchar 'p'])", functionAndTypeManager.getType(typeSignature), ImmutableMap.of("p", 1, "a", 2)); assertFunction("array_frequency(array[varchar 'z'])", functionAndTypeManager.getType(typeSignature), ImmutableMap.of("z", 1)); } + + @Test + public void testArrayHasDupes() + { + assertFunction("array_has_dupes(cast(null as array(varchar)))", BOOLEAN, null); + assertFunction("array_has_dupes(cast(array[] as array(varchar)))", BOOLEAN, false); + + assertFunction("array_has_dupes(array[varchar 'a', varchar 'b', varchar 'a'])", BOOLEAN, true); + assertFunction("array_has_dupes(array[varchar 'a', varchar 'b'])", BOOLEAN, false); + assertFunction("array_has_dupes(array[varchar 'a', varchar 'a'])", BOOLEAN, true); + + assertFunction("array_has_dupes(array[1, 2, 1])", BOOLEAN, true); + assertFunction("array_has_dupes(array[1, 2])", BOOLEAN, false); + assertFunction("array_has_dupes(array[1, 1, 1])", BOOLEAN, true); + + assertFunction("array_has_dupes(array[0, null])", BOOLEAN, false); + assertFunction("array_has_dupes(array[0, null, null])", BOOLEAN, true); + } + + @Test + public void testArrayDupes() + { + assertFunction("array_dupes(cast(null as array(varchar)))", new ArrayType(VARCHAR), null); + assertFunction("array_dupes(cast(array[] as array(varchar)))", new ArrayType(VARCHAR), ImmutableList.of()); + + assertFunction("array_dupes(array[varchar 'a', varchar 'b', varchar 'a'])", new ArrayType(VARCHAR), ImmutableList.of("a")); + assertFunction("array_dupes(array[varchar 'a', varchar 'b'])", new ArrayType(VARCHAR), ImmutableList.of()); + assertFunction("array_dupes(array[varchar 'a', varchar 'a'])", new ArrayType(VARCHAR), ImmutableList.of("a")); + + assertFunction("array_dupes(array[1, 2, 1])", new ArrayType(BIGINT), ImmutableList.of(1L)); + assertFunction("array_dupes(array[1, 2])", new ArrayType(BIGINT), ImmutableList.of()); + assertFunction("array_dupes(array[1, 1, 1])", new ArrayType(BIGINT), ImmutableList.of(1L)); + + assertFunction("array_dupes(array[0, null])", new ArrayType(BIGINT), ImmutableList.of()); + assertFunction("array_dupes(array[0, null, null])", new ArrayType(BIGINT), Collections.singletonList(null)); + } }