Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions presto-docs/src/main/sphinx/functions/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ Array Functions

Remove duplicate values from the array ``x``.

.. function:: array_dupes(array(T)) -> array(bigint/varchar)

Returns a set of elements that occur more than once in ``array``.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'repeated only once' is confusing. Just say the result is a set of elements that occur more than once in the original array.

``T`` must be coercible to ``bigint`` or ``varchar``.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need to repeat for every type. Just make it ARRAY like we do for other functions.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because that's the only 2 versions of the functions we spell out. It's probably nice to support generic types for these builtin SQL functions at least.

.. function:: array_except(x, y) -> array

Returns an array of elements in ``x`` but not in ``y``, without duplicates.
Expand All @@ -58,6 +64,12 @@ Array Functions
Returns a map: keys are the unique elements in the ``array``, values are how many times the key appears.
Ignores null elements. Empty array returns empty map.

.. function:: array_has_dupes(array(T)) -> boolean

Returns a boolean: whether ``array`` has any elements that occur more than once.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

``T`` must be coercible to ``bigint`` or ``varchar``.

.. function:: array_intersect(x, y) -> array

Returns an array of the elements in the intersection of ``x`` and ``y``, without duplicates.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@
import com.facebook.presto.operator.scalar.VarbinaryFunctions;
import com.facebook.presto.operator.scalar.WilsonInterval;
import com.facebook.presto.operator.scalar.WordStemFunction;
import com.facebook.presto.operator.scalar.sql.ArrayArithmeticFunctions;
import com.facebook.presto.operator.scalar.sql.ArraySqlFunctions;
import com.facebook.presto.operator.scalar.sql.MapNormalizeFunction;
import com.facebook.presto.operator.window.CumulativeDistributionFunction;
import com.facebook.presto.operator.window.DenseRankFunction;
Expand Down Expand Up @@ -835,7 +835,7 @@ private List<? extends SqlFunction> getBuildInFunctions(FeaturesConfig featuresC
.functions(TDIGEST_AGG, TDIGEST_AGG_WITH_WEIGHT, TDIGEST_AGG_WITH_WEIGHT_AND_COMPRESSION)
.function(MergeTDigestFunction.MERGE)
.sqlInvokedScalar(MapNormalizeFunction.class)
.sqlInvokedScalars(ArrayArithmeticFunctions.class)
.sqlInvokedScalars(ArraySqlFunctions.class)
.sqlInvokedScalars(ArrayIntersectFunction.class)
.scalar(DynamicFilterPlaceholderFunction.class)
.scalars(EnumCasts.class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
import com.facebook.presto.spi.function.SqlParameter;
import com.facebook.presto.spi.function.SqlType;

public class ArrayArithmeticFunctions
public class ArraySqlFunctions
{
private ArrayArithmeticFunctions() {}
private ArraySqlFunctions() {}

@SqlInvokedScalarFunction(value = "array_sum", deterministic = true, calledOnNullInput = false)
@Description("Returns the sum of all array elements, or 0 if the array is empty. Ignores null elements.")
Expand Down Expand Up @@ -78,4 +78,44 @@ public static String arrayFrequencyVarchar()
"(m, x) -> IF (x IS NOT NULL, MAP_CONCAT(m,MAP_FROM_ENTRIES(ARRAY[ROW(x, COALESCE(ELEMENT_AT(m,x) + 1, 1))])), m)," +
"m -> m)";
}

@SqlInvokedScalarFunction(value = "array_dupes", deterministic = true, calledOnNullInput = false)
@Description("Returns set of elements that have duplicates")
@SqlParameter(name = "input", type = "array(varchar)")
@SqlType("array(varchar)")
public static String arrayDupesVarchar()
{
return "RETURN CONCAT(" +
"CAST(IF (cardinality(filter(input, x -> x is NULL)) > 1, ARRAY[NULL], ARRAY[]) AS ARRAY(VARCHAR))," +
"map_keys(map_filter(array_frequency(input), (k, v) -> v > 1)))";
}

@SqlInvokedScalarFunction(value = "array_dupes", deterministic = true, calledOnNullInput = false)
@Description("Returns set of elements that have duplicates")
@SqlParameter(name = "input", type = "array(bigint)")
@SqlType("array(bigint)")
public static String arrayDupesBigint()
{
return "RETURN CONCAT(" +
"CAST(IF (cardinality(filter(input, x -> x is NULL)) > 1, ARRAY[NULL], ARRAY[]) AS ARRAY(BIGINT))," +
"map_keys(map_filter(array_frequency(input), (k, v) -> v > 1)))";
}

@SqlInvokedScalarFunction(value = "array_has_dupes", deterministic = true, calledOnNullInput = false)
@Description("Returns whether array has any duplicate element")
@SqlParameter(name = "input", type = "array(varchar)")
@SqlType("boolean")
public static String arrayHasDupesVarchar()
{
return "RETURN cardinality(array_dupes(input)) > 0";
}

@SqlInvokedScalarFunction(value = "array_has_dupes", deterministic = true, calledOnNullInput = false)
@Description("Returns whether array has any duplicate element")
@SqlParameter(name = "input", type = "array(bigint)")
@SqlType("boolean")
public static String arrayHasDupesBigint()
{
return "RETURN cardinality(array_dupes(input)) > 0";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,27 @@
*/
package com.facebook.presto.operator.scalar.sql;

import com.facebook.presto.common.type.ArrayType;
import com.facebook.presto.common.type.MapType;
import com.facebook.presto.common.type.TestRowType;
import com.facebook.presto.common.type.TypeSignature;
import com.facebook.presto.metadata.FunctionAndTypeManager;
import com.facebook.presto.operator.scalar.AbstractTestFunctions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.testng.annotations.Test;

import java.util.Collections;

import static com.facebook.presto.common.block.MethodHandleUtil.methodHandle;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.common.type.IntegerType.INTEGER;
import static com.facebook.presto.common.type.VarcharType.VARCHAR;
import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager;

public class TestArrayArithmeticFunctions
public class TestArraySqlFunctions
extends AbstractTestFunctions
{
@Test
Expand Down Expand Up @@ -121,4 +126,40 @@ public void testArrayFrequencyVarchar()
assertFunction("array_frequency(array[varchar 'a', varchar 'a', varchar 'p'])", functionAndTypeManager.getType(typeSignature), ImmutableMap.of("p", 1, "a", 2));
assertFunction("array_frequency(array[varchar 'z'])", functionAndTypeManager.getType(typeSignature), ImmutableMap.of("z", 1));
}

@Test
public void testArrayHasDupes()
{
assertFunction("array_has_dupes(cast(null as array(varchar)))", BOOLEAN, null);
assertFunction("array_has_dupes(cast(array[] as array(varchar)))", BOOLEAN, false);

assertFunction("array_has_dupes(array[varchar 'a', varchar 'b', varchar 'a'])", BOOLEAN, true);
assertFunction("array_has_dupes(array[varchar 'a', varchar 'b'])", BOOLEAN, false);
assertFunction("array_has_dupes(array[varchar 'a', varchar 'a'])", BOOLEAN, true);

assertFunction("array_has_dupes(array[1, 2, 1])", BOOLEAN, true);
assertFunction("array_has_dupes(array[1, 2])", BOOLEAN, false);
assertFunction("array_has_dupes(array[1, 1, 1])", BOOLEAN, true);

assertFunction("array_has_dupes(array[0, null])", BOOLEAN, false);
assertFunction("array_has_dupes(array[0, null, null])", BOOLEAN, true);
}

@Test
public void testArrayDupes()
{
assertFunction("array_dupes(cast(null as array(varchar)))", new ArrayType(VARCHAR), null);
assertFunction("array_dupes(cast(array[] as array(varchar)))", new ArrayType(VARCHAR), ImmutableList.of());

assertFunction("array_dupes(array[varchar 'a', varchar 'b', varchar 'a'])", new ArrayType(VARCHAR), ImmutableList.of("a"));
assertFunction("array_dupes(array[varchar 'a', varchar 'b'])", new ArrayType(VARCHAR), ImmutableList.of());
assertFunction("array_dupes(array[varchar 'a', varchar 'a'])", new ArrayType(VARCHAR), ImmutableList.of("a"));

assertFunction("array_dupes(array[1, 2, 1])", new ArrayType(BIGINT), ImmutableList.of(1L));
assertFunction("array_dupes(array[1, 2])", new ArrayType(BIGINT), ImmutableList.of());
assertFunction("array_dupes(array[1, 1, 1])", new ArrayType(BIGINT), ImmutableList.of(1L));

assertFunction("array_dupes(array[0, null])", new ArrayType(BIGINT), ImmutableList.of());
assertFunction("array_dupes(array[0, null, null])", new ArrayType(BIGINT), Collections.singletonList(null));
}
}