Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ enum Format
DENSE_V1(1),
SPARSE_V2(2),
DENSE_V2(3),
PRIVATE_LPCA_V1(4),
SFM_V1(7);
PRIVATE_LPCA_V1(4);

private byte tag;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
@NotThreadSafe
public class PrivateLpcaSketch
{
private Bitmap bitmap;
private byte[] bitmap;
private final int threshold;
private final int numberOfBuckets;
private final double epsilonThreshold;
Expand Down Expand Up @@ -82,15 +82,18 @@ public PrivateLpcaSketch(Slice serialized, RandomizationStrategy randomizationSt
threshold = input.readInt();
epsilonThreshold = input.readDouble();
epsilonRandomizedResponse = input.readDouble();
bitmap = Bitmap.fromSliceInput(input, numberOfBuckets);
bitmap = new byte[numberOfBuckets / Byte.SIZE];
for (int i = 0; i < bitmap.length; i++) {
bitmap[i] = input.readByte();
}
}

private void applyRandomizedResponse()
{
double p = getFlipProbability();
for (int i = 0; i < numberOfBuckets; i++) {
if (randomizationStrategy.nextBoolean(p)) {
bitmap.flipBit(i);
flipBit(i);
}
}
}
Expand All @@ -99,10 +102,23 @@ private void applyRandomizedResponse(int bucket)
{
double p = getFlipProbability();
if (randomizationStrategy.nextBoolean(p)) {
bitmap.flipBit(bucket);
flipBit(bucket);
}
}

@VisibleForTesting
static int bitmapBitShift(int bucket)
{
return bucket % Byte.SIZE;
}

@VisibleForTesting
static int bitmapByteIndex(int bucket)
{
// n.b.: bucket is 0-indexed
return Math.floorDiv(bucket, Byte.SIZE);
}

public long cardinality()
{
double proportion = getDebiasedBitProportion();
Expand Down Expand Up @@ -139,7 +155,14 @@ private int findThreshold(HyperLogLog hll)
}

@VisibleForTesting
Bitmap getBitmap()
void flipBit(int bucket)
{
byte oneBit = (byte) (1 << bitmapBitShift(bucket));
bitmap[bitmapByteIndex(bucket)] ^= oneBit;
}

@VisibleForTesting
byte[] getBitmap()
{
return bitmap;
}
Expand All @@ -152,8 +175,8 @@ private double getDebiasedBitProportion()
// So the proportion of bits equal to 1 has expectation:
// p + (1-2p) T,
// where T is the true proportion.
double probability = getFlipProbability();
return (getRawBitProportion() - probability) / (1 - 2 * probability);
double effProbability = randomizationStrategy.effectiveProbability(getFlipProbability());
return (getRawBitProportion() - effProbability) / (1 - 2 * effProbability);
}

private double getFlipProbability()
Expand All @@ -169,7 +192,11 @@ public int getNumberOfBuckets()
@VisibleForTesting
double getRawBitProportion()
{
return (double) bitmap.getBitCount() / numberOfBuckets;
double count = 0;
for (byte b : bitmap) {
count += Integer.bitCount(b & BYTE_MASK);
}
return count / numberOfBuckets;
}

public int getThreshold()
Expand All @@ -187,11 +214,23 @@ public Slice serialize()
.appendInt(threshold)
.appendDouble(epsilonThreshold)
.appendDouble(epsilonRandomizedResponse)
.appendBytes(bitmap.toBytes());
.appendBytes(bitmap);

return output.slice();
}

@VisibleForTesting
void setBit(int bucket, boolean value)
{
byte oneBit = (byte) (1 << bitmapBitShift(bucket));
if (value) {
bitmap[bitmapByteIndex(bucket)] |= oneBit;
}
else {
bitmap[bitmapByteIndex(bucket)] &= ~oneBit;
}
}

/**
* Updates current sketch by adding data from a second HyperLogLog
*
Expand All @@ -206,15 +245,15 @@ public void update(HyperLogLog hllOther)
// if the new HLL's bucket value is at or below threshold, we don't need to do anything
// if above threshold, we need to set to 1 and then re-apply randomized response on the bit
if (value > threshold) {
bitmap.setBit(i, true);
setBit(i, true);
applyRandomizedResponse(i);
}
});
}

private void writeBitmap(HyperLogLog hll)
{
bitmap = new Bitmap(numberOfBuckets);
hll.eachBucket((i, value) -> bitmap.setBit(i, value > threshold));
bitmap = new byte[numberOfBuckets / Byte.SIZE];
hll.eachBucket((i, value) -> setBit(i, value > threshold));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,11 @@
*/
package com.facebook.airlift.stats.cardinality;

public abstract class RandomizationStrategy
public interface RandomizationStrategy
{
abstract long getRetainedSizeInBytes();
double effectiveProbability(double probability);

public boolean nextBoolean(double probability)
{
return nextDouble() <= probability;
}
boolean nextBoolean(double probability);

abstract double nextDouble();

public double nextLaplace(double scale)
{
double quantile = nextDouble();
int z = nextDouble() <= 0.5 ? 1 : 0;
return (2 * z - 1) * scale * Math.log(quantile);
}
double nextLaplace(double scale);
}
Loading