Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import io.trino.parquet.writer.repdef.RepLevelIterable;
import io.trino.parquet.writer.repdef.RepLevelIterables;
import io.trino.parquet.writer.valuewriter.PrimitiveValueWriter;
import io.trino.spi.block.Block;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to add a test?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ParquetTester#testRoundTrip already tests with and without nulls
AbstractTestParquetReader also has tests with flat as well as nested types

import org.apache.parquet.bytes.BytesInput;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.Encoding;
Expand Down Expand Up @@ -49,7 +50,6 @@
import static com.google.common.collect.ImmutableList.toImmutableList;
Comment thread
raunaqmorarka marked this conversation as resolved.
Outdated
import static io.trino.parquet.writer.ParquetCompressor.getCompressor;
import static io.trino.parquet.writer.ParquetDataOutput.createDataOutput;
import static io.trino.parquet.writer.repdef.RepLevelIterables.getIterator;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;
import static org.apache.parquet.bytes.BytesInput.copy;
Expand Down Expand Up @@ -113,36 +113,52 @@ public void writeBlock(ColumnChunk columnChunk)
throws IOException
{
checkState(!closed);

ColumnChunk current = new ColumnChunk(columnChunk.getBlock(),
ImmutableList.<DefLevelIterable>builder()
.addAll(columnChunk.getDefLevelIterables())
.add(DefLevelIterables.of(columnChunk.getBlock(), maxDefinitionLevel))
.build(),
ImmutableList.<RepLevelIterable>builder()
.addAll(columnChunk.getRepLevelIterables())
.add(RepLevelIterables.of(columnChunk.getBlock()))
.build());

// write values
primitiveValueWriter.write(columnChunk.getBlock());

// write definition levels
Iterator<Integer> defIterator = DefLevelIterables.getIterator(current.getDefLevelIterables());
while (defIterator.hasNext()) {
int next = defIterator.next();
definitionLevelWriter.writeInteger(next);
if (next != maxDefinitionLevel) {
currentPageNullCounts++;
if (columnChunk.getDefLevelIterables().isEmpty()) {
// write definition levels for flat data types
Block block = columnChunk.getBlock();
if (!block.mayHaveNull()) {
for (int position = 0; position < block.getPositionCount(); position++) {
Comment thread
raunaqmorarka marked this conversation as resolved.
Outdated
definitionLevelWriter.writeInteger(maxDefinitionLevel);
}
}
else {
for (int position = 0; position < block.getPositionCount(); position++) {
byte isNull = (byte) (block.isNull(position) ? 1 : 0);
definitionLevelWriter.writeInteger(maxDefinitionLevel - isNull);
Comment thread
raunaqmorarka marked this conversation as resolved.
Outdated
currentPageNullCounts += isNull;
}
}
valueCount += block.getPositionCount();
}
else {
// write definition levels for nested data types
Iterator<Integer> defIterator = DefLevelIterables.getIterator(ImmutableList.<DefLevelIterable>builder()
.addAll(columnChunk.getDefLevelIterables())
.add(DefLevelIterables.of(columnChunk.getBlock(), maxDefinitionLevel))
.build());
while (defIterator.hasNext()) {
int next = defIterator.next();
definitionLevelWriter.writeInteger(next);
if (next != maxDefinitionLevel) {
currentPageNullCounts++;
}
valueCount++;
}
valueCount++;
}

// write repetition levels
Iterator<Integer> repIterator = getIterator(current.getRepLevelIterables());
while (repIterator.hasNext()) {
int next = repIterator.next();
repetitionLevelWriter.writeInteger(next);
if (columnDescriptor.getMaxRepetitionLevel() > 0) {
// write repetition levels for nested types
Iterator<Integer> repIterator = RepLevelIterables.getIterator(ImmutableList.<RepLevelIterable>builder()
.addAll(columnChunk.getRepLevelIterables())
.add(RepLevelIterables.of(columnChunk.getBlock()))
.build());
while (repIterator.hasNext()) {
int next = repIterator.next();
repetitionLevelWriter.writeInteger(next);
}
}

updateBufferedBytes();
Expand Down