Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,13 @@
import it.unimi.dsi.fastutil.ints.IntArrayList;
import org.openjdk.jol.info.ClassLayout;

import java.lang.invoke.MethodHandle;
import java.util.Optional;

import static com.facebook.presto.spi.StandardErrorCode.EXCEEDED_FUNCTION_MEMORY_LIMIT;
import static com.facebook.presto.spi.StandardErrorCode.GENERIC_INSUFFICIENT_RESOURCES;
import static com.facebook.presto.type.TypeUtils.hashPosition;
import static com.facebook.presto.type.TypeUtils.positionDistinctFromPosition;
import static com.facebook.presto.type.TypeUtils.positionEqualsPosition;
import static com.google.common.base.Preconditions.checkArgument;
import static io.airlift.units.DataSize.Unit.MEGABYTE;
Expand All @@ -43,6 +47,7 @@ public class TypedSet
static final long FOUR_MEGABYTES = MAX_FUNCTION_MEMORY.toBytes();

private final Type elementType;
private final Optional<MethodHandle> elementIsDistinctFrom;
private final IntArrayList blockPositionByHash;
private final BlockBuilder elementBlock;
private final String functionName;
Expand All @@ -55,9 +60,21 @@ public class TypedSet
private boolean containsNullElement;

public TypedSet(Type elementType, int expectedSize, String functionName)
{
// TODO revise other usages of TypedSet and determine whether they should use equality or distinctness semantics
this(elementType, Optional.empty(), expectedSize, functionName);
}

public TypedSet(Type elementType, MethodHandle elementIsDistinctFrom, int expectedSize, String functionName)
{
this(elementType, Optional.of(elementIsDistinctFrom), expectedSize, functionName);
}

private TypedSet(Type elementType, Optional<MethodHandle> elementIsDistinctFrom, int expectedSize, String functionName)
{
checkArgument(expectedSize >= 0, "expectedSize must not be negative");
this.elementType = requireNonNull(elementType, "elementType must not be null");
this.elementIsDistinctFrom = requireNonNull(elementIsDistinctFrom, "elementIsDistinctFrom is null");
this.elementBlock = elementType.createBlockBuilder(null, expectedSize);
this.functionName = functionName;

Expand Down Expand Up @@ -126,19 +143,27 @@ private int getHashPositionOfElement(Block block, int position)
int hashPosition = getMaskedHash(hashPosition(elementType, block, position));
while (true) {
int blockPosition = blockPositionByHash.get(hashPosition);
// Doesn't have this element
if (blockPosition == EMPTY_SLOT) {
// Doesn't have this element
return hashPosition;
}
// Already has this element
else if (positionEqualsPosition(elementType, elementBlock, blockPosition, block, position)) {
if (isContainedAt(block, position, blockPosition)) {
// Already has this element
return hashPosition;
}

hashPosition = getMaskedHash(hashPosition + 1);
}
}

private boolean isContainedAt(Block block, int position, int atPosition)
{
if (elementIsDistinctFrom.isPresent()) {
return !positionDistinctFromPosition(elementType, elementBlock, atPosition, block, position, elementIsDistinctFrom.get());
}
return positionEqualsPosition(elementType, elementBlock, atPosition, block, position);
}

private void addNewElement(int hashPosition, Block block, int position)
{
elementType.appendTo(block, position, elementBlock);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import com.facebook.presto.spi.block.Block;
import com.facebook.presto.spi.block.BlockBuilder;
import com.facebook.presto.spi.function.Description;
import com.facebook.presto.spi.function.OperatorDependency;
import com.facebook.presto.spi.function.ScalarFunction;
import com.facebook.presto.spi.function.SqlType;
import com.facebook.presto.spi.function.TypeParameter;
Expand All @@ -26,7 +27,12 @@
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;

import java.lang.invoke.MethodHandle;

import static com.facebook.presto.spi.function.OperatorType.IS_DISTINCT_FROM;
import static com.facebook.presto.spi.type.BigintType.BIGINT;
import static com.facebook.presto.spi.type.StandardTypes.BOOLEAN;
import static com.facebook.presto.type.TypeUtils.positionDistinctFromPosition;

@ScalarFunction("array_distinct")
@Description("Remove duplicate values from the given array")
Expand All @@ -42,22 +48,23 @@ public ArrayDistinctFunction(@TypeParameter("E") Type elementType)

@TypeParameter("E")
@SqlType("array(E)")
public Block distinct(@TypeParameter("E") Type type, @SqlType("array(E)") Block array)
public Block distinct(
@TypeParameter("E") Type type,
@OperatorDependency(operator = IS_DISTINCT_FROM, returnType = BOOLEAN, argumentTypes = {"E", "E"}) MethodHandle elementIsDistinctFrom,
@SqlType("array(E)") Block array)
{
if (array.getPositionCount() < 2) {
return array;
}

if (array.getPositionCount() == 2) {
if (type.equalTo(array, 0, array, 1)) {
return array.getSingleValueBlock(0);
}
else {
if (positionDistinctFromPosition(type, array, 0, array, 1, elementIsDistinctFrom)) {
return array;
}
return array.getSingleValueBlock(0);
}

TypedSet typedSet = new TypedSet(type, array.getPositionCount(), "array_distinct");
TypedSet typedSet = new TypedSet(type, elementIsDistinctFrom, array.getPositionCount(), "array_distinct");
int distinctCount = 0;

if (pageBuilder.isFull()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@

import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.facebook.presto.spi.type.BigintType.BIGINT;
import static com.facebook.presto.spi.type.TypeUtils.readNativeValue;
import static com.facebook.presto.util.Failures.internalError;
import static com.google.common.base.Defaults.defaultValue;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Throwables.throwIfUnchecked;
import static com.google.common.collect.ImmutableList.toImmutableList;
Expand Down Expand Up @@ -100,6 +103,20 @@ public static boolean positionEqualsPosition(Type type, Block leftBlock, int lef
return type.equalTo(leftBlock, leftPosition, rightBlock, rightPosition);
}

public static boolean positionDistinctFromPosition(Type type, Block leftBlock, int leftPosition, Block rightBlock, int rightPosition, MethodHandle isDistinctFrom)
{
boolean firstValueNull = leftBlock.isNull(leftPosition);
Object firstValue = firstValueNull ? defaultValue(type.getJavaType()) : readNativeValue(type, leftBlock, leftPosition);
boolean secondValueNull = rightBlock.isNull(rightPosition);
Object secondValue = secondValueNull ? defaultValue(type.getJavaType()) : readNativeValue(type, rightBlock, rightPosition);
try {
return (boolean) isDistinctFrom.invoke(firstValue, firstValueNull, secondValue, secondValueNull);
}
catch (Throwable t) {
throw internalError(t);
}
}

public static Type resolveType(TypeSignature typeName, TypeManager typeManager)
{
return typeManager.getType(typeName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,16 @@ public void testDistinct()
assertFunction("ARRAY_DISTINCT(ARRAY [NULL, NULL])", new ArrayType(UNKNOWN), asList((Object) null));
assertFunction("ARRAY_DISTINCT(ARRAY [NULL, NULL, NULL])", new ArrayType(UNKNOWN), asList((Object) null));

// Indeterminate values
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add the test cases mentioned in #11977 as well?

assertFunction(
"ARRAY_DISTINCT(ARRAY[(123, 'abc'), (123, NULL)])",
new ArrayType(RowType.anonymous(ImmutableList.of(INTEGER, createVarcharType(3)))),
ImmutableList.of(asList(123, "abc"), asList(123, null)));
assertFunction(
"ARRAY_DISTINCT(ARRAY[(NULL, NULL), (42, 'def'), (NULL, 'abc'), (123, NULL), (42, 'def'), (NULL, NULL), (NULL, 'abc'), (123, NULL)])",
new ArrayType(RowType.anonymous(ImmutableList.of(INTEGER, createVarcharType(3)))),
ImmutableList.of(asList(null, null), asList(42, "def"), asList(null, "abc"), asList(123, null)));

// Test for BIGINT-optimized implementation
assertFunction("ARRAY_DISTINCT(ARRAY [CAST(5 AS BIGINT), NULL, CAST(12 AS BIGINT), NULL])", new ArrayType(BIGINT), asList(5L, null, 12L));
assertFunction("ARRAY_DISTINCT(ARRAY [CAST(100 AS BIGINT), NULL, CAST(100 AS BIGINT), NULL, 0, -2, 0])", new ArrayType(BIGINT), asList(100L, null, 0L, -2L));
Expand Down