Skip to content

Commit 1001994

Browse files
authored
PARQUET-1213: Column indexes: Limit index size (#480)
1 parent 6165a0c commit 1001994

File tree

10 files changed

+393
-231
lines changed

10 files changed

+393
-231
lines changed

parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryColumnIndexBuilder.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,4 +102,9 @@ int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int ind
102102
int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
103103
return comparator.compare(maxValues.get(index1), maxValues.get(index2));
104104
}
105+
106+
@Override
107+
int sizeOf(Object value) {
108+
return ((Binary) value).length();
109+
}
105110
}

parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BooleanColumnIndexBuilder.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,9 @@ int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int ind
103103
int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
104104
return comparator.compare(maxValues.get(index1), maxValues.get(index2));
105105
}
106+
107+
@Override
108+
int sizeOf(Object value) {
109+
return 1;
110+
}
106111
}

parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,13 +195,19 @@ int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int ind
195195
int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
196196
return 0;
197197
}
198+
199+
@Override
200+
int sizeOf(Object value) {
201+
return 0;
202+
}
198203
};
199204

200205
private static final Map<PrimitiveTypeName, ColumnIndexBuilder> BUILDERS = new EnumMap<>(PrimitiveTypeName.class);
201206

202207
private PrimitiveType type;
203208
private final BooleanList nullPages = new BooleanArrayList();
204209
private final LongList nullCounts = new LongArrayList();
210+
private long minMaxSize;
205211

206212
/**
207213
* @return a no-op builder that does not collect statistics objects and therefore returns {@code null} at
@@ -293,7 +299,11 @@ public static ColumnIndex build(
293299
public void add(Statistics<?> stats) {
294300
if (stats.hasNonNullValue()) {
295301
nullPages.add(false);
296-
addMinMax(stats.genericGetMin(), stats.genericGetMax());
302+
Object min = stats.genericGetMin();
303+
Object max = stats.genericGetMax();
304+
addMinMax(min, max);
305+
minMaxSize += sizeOf(min);
306+
minMaxSize += sizeOf(max);
297307
} else {
298308
nullPages.add(true);
299309
addMinMax(null, null);
@@ -316,7 +326,7 @@ private void fill(List<Boolean> nullPages, List<Long> nullCounts, List<ByteBuffe
316326
nullPages.size(), nullCounts == null ? "null" : nullCounts.size(), minValues.size(), maxValues.size()));
317327
}
318328
this.nullPages.addAll(nullPages);
319-
// Null counts is optional in the format
329+
// Nullcounts is optional in the format
320330
if (nullCounts != null) {
321331
this.nullCounts.addAll(nullCounts);
322332
}
@@ -325,7 +335,11 @@ private void fill(List<Boolean> nullPages, List<Long> nullCounts, List<ByteBuffe
325335
if (nullPages.get(i)) {
326336
addMinMaxFromBytes(null, null);
327337
} else {
328-
addMinMaxFromBytes(minValues.get(i), maxValues.get(i));
338+
ByteBuffer min = minValues.get(i);
339+
ByteBuffer max = maxValues.get(i);
340+
addMinMaxFromBytes(min, max);
341+
minMaxSize += min.remaining();
342+
minMaxSize += max.remaining();
329343
}
330344
}
331345
}
@@ -421,9 +435,26 @@ private void clear() {
421435
nullPages.clear();
422436
nullCounts.clear();
423437
clearMinMax();
438+
minMaxSize = 0;
424439
}
425440

426441
abstract void clearMinMax();
427442

428443
abstract ColumnIndexBase createColumnIndex(PrimitiveType type);
444+
445+
abstract int sizeOf(Object value);
446+
447+
/**
448+
* @return the number of pages added so far to this builder
449+
*/
450+
public int getPageCount() {
451+
return nullPages.size();
452+
}
453+
454+
/**
455+
* @return the sum of size in bytes of the min/max values added so far to this builder
456+
*/
457+
public long getMinMaxSize() {
458+
return minMaxSize;
459+
}
429460
}

parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/DoubleColumnIndexBuilder.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ private static double convert(ByteBuffer buffer) {
6767
}
6868

6969
private static ByteBuffer convert(double value) {
70-
return ByteBuffer.allocate(Double.SIZE / 8).order(LITTLE_ENDIAN).putDouble(0, value);
70+
return ByteBuffer.allocate(Double.BYTES).order(LITTLE_ENDIAN).putDouble(0, value);
7171
}
7272

7373
@Override
@@ -105,4 +105,9 @@ int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int ind
105105
int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
106106
return comparator.compare(maxValues.get(index1), maxValues.get(index2));
107107
}
108+
109+
@Override
110+
int sizeOf(Object value) {
111+
return Double.BYTES;
112+
}
108113
}

parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/FloatColumnIndexBuilder.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ private static float convert(ByteBuffer buffer) {
6767
}
6868

6969
private static ByteBuffer convert(float value) {
70-
return ByteBuffer.allocate(Float.SIZE / 8).order(LITTLE_ENDIAN).putFloat(0, value);
70+
return ByteBuffer.allocate(Float.BYTES).order(LITTLE_ENDIAN).putFloat(0, value);
7171
}
7272

7373
@Override
@@ -105,4 +105,9 @@ int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int ind
105105
int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
106106
return comparator.compare(maxValues.get(index1), maxValues.get(index2));
107107
}
108+
109+
@Override
110+
int sizeOf(Object value) {
111+
return Float.BYTES;
112+
}
108113
}

parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/IntColumnIndexBuilder.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ private static int convert(ByteBuffer buffer) {
6767
}
6868

6969
private static ByteBuffer convert(int value) {
70-
return ByteBuffer.allocate(Integer.SIZE / 8).order(LITTLE_ENDIAN).putInt(0, value);
70+
return ByteBuffer.allocate(Integer.BYTES).order(LITTLE_ENDIAN).putInt(0, value);
7171
}
7272

7373
@Override
@@ -105,4 +105,9 @@ int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int ind
105105
int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
106106
return comparator.compare(maxValues.get(index1), maxValues.get(index2));
107107
}
108+
109+
@Override
110+
int sizeOf(Object value) {
111+
return Integer.BYTES;
112+
}
108113
}

parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/LongColumnIndexBuilder.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ private static long convert(ByteBuffer buffer) {
6767
}
6868

6969
private static ByteBuffer convert(long value) {
70-
return ByteBuffer.allocate(Long.SIZE / 8).order(LITTLE_ENDIAN).putLong(0, value);
70+
return ByteBuffer.allocate(Long.BYTES).order(LITTLE_ENDIAN).putLong(0, value);
7171
}
7272

7373
@Override
@@ -105,4 +105,9 @@ int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int ind
105105
int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
106106
return comparator.compare(maxValues.get(index1), maxValues.get(index2));
107107
}
108+
109+
@Override
110+
int sizeOf(Object value) {
111+
return Long.BYTES;
112+
}
108113
}

0 commit comments

Comments
 (0)