diff --git a/presto-main/src/main/java/com/facebook/presto/operator/scalar/HyperLogLogFunctions.java b/presto-main/src/main/java/com/facebook/presto/operator/scalar/HyperLogLogFunctions.java index 0ea897f054faf..fde3d30dccd17 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/scalar/HyperLogLogFunctions.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/scalar/HyperLogLogFunctions.java @@ -14,6 +14,7 @@ package com.facebook.presto.operator.scalar; import com.facebook.airlift.stats.cardinality.HyperLogLog; +import com.facebook.airlift.stats.cardinality.PrivateLpcaSketch; import com.facebook.presto.common.block.Block; import com.facebook.presto.common.type.StandardTypes; import com.facebook.presto.spi.function.Description; @@ -24,6 +25,9 @@ import static com.facebook.presto.operator.aggregation.ApproximateSetAggregation.DEFAULT_STANDARD_ERROR; import static com.facebook.presto.operator.aggregation.HyperLogLogUtils.standardErrorToBuckets; +import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; +import static com.facebook.presto.spi.function.SqlFunctionVisibility.EXPERIMENTAL; +import static com.facebook.presto.util.Failures.checkCondition; public final class HyperLogLogFunctions { @@ -37,6 +41,16 @@ public static long cardinality(@SqlType(StandardTypes.HYPER_LOG_LOG) Slice seria return HyperLogLog.newInstance(serializedHll).cardinality(); } + @ScalarFunction(visibility = EXPERIMENTAL) + @Description("compute the noisy cardinality of a HyperLogLog instance") + @SqlType(StandardTypes.BIGINT) + public static long noisyCardinality(@SqlType(StandardTypes.HYPER_LOG_LOG) Slice serializedHll, @SqlType(StandardTypes.DOUBLE) double epsilon) + { + checkCondition(epsilon > 0, INVALID_FUNCTION_ARGUMENT, "Epsilon must be greater than 0"); + PrivateLpcaSketch privacySketch = new PrivateLpcaSketch(HyperLogLog.newInstance(serializedHll), 0.1 * epsilon, 0.9 * epsilon); + return privacySketch.cardinality(); + } + @ScalarFunction @Description("an empty HyperLogLog instance") @SqlType(StandardTypes.HYPER_LOG_LOG) diff --git a/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestHyperLogLogFunctions.java b/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestHyperLogLogFunctions.java index eb20e1df18b5e..265aed658c7ea 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestHyperLogLogFunctions.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/scalar/TestHyperLogLogFunctions.java @@ -91,6 +91,21 @@ public void testCardinalitySingleColumn() functionAssertions.assertFunctionWithError(projection, BIGINT, uniqueElements, error); } + @Test + public void testNoisyCardinalitySingleColumn() + { + int[] uniqueElementsCount = {0, 10000, 100000, 1000000}; + for (int uniqueElements : uniqueElementsCount) { + double error = uniqueElements * 0.05; + HyperLogLog hll = HyperLogLog.newInstance(NUMBER_OF_BUCKETS); + for (int i = 0; i < uniqueElements; i++) { + hll.add(i); + } + String projection = getNoisyCardinalityProjection(hll); + functionAssertions.assertFunctionWithError(projection, BIGINT, uniqueElements, error); + } + } + @Test public void testCardinalityTwoColumns() { @@ -206,4 +221,12 @@ private String getMergeProjection(List list) return projection; } + + private String getNoisyCardinalityProjection(HyperLogLog hll) + { + Slice serializedHll = hll.serialize(); + byte[] hllBytes = serializedHll.getBytes(); + String encodedHll = BaseEncoding.base16().lowerCase().encode(hllBytes); + return String.format("noisy_cardinality(CAST(X'%s' AS HyperLogLog), infinity())", encodedHll); + } }