Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Verify.verify;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.airlift.slice.SizeOf.SIZE_OF_INT;
import static io.airlift.slice.Slices.wrappedBuffer;
Expand Down Expand Up @@ -212,6 +213,14 @@ private void flush()
}
List<BufferData> bufferDataList = builder.build();

if (rows == 0) {
// Avoid writing empty row groups as these are ignored by the reader
verify(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to write a test?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to get tested via #13246 (validation fails because reader ignores empty row group and counts 1 fewer row group than what the writer wrote), that's how I noticed it.

bufferDataList.stream().allMatch(buffer -> buffer.getData().size() == 0),
Comment thread
raunaqmorarka marked this conversation as resolved.
Outdated
"Buffer should be empty when there are no rows");
return;
}

// update stats
long stripeStartOffset = outputStream.longSize();
List<ColumnMetaData> metadatas = bufferDataList.stream()
Expand Down Expand Up @@ -261,7 +270,6 @@ Slice getFooter(List<RowGroup> rowGroups, MessageType messageType)

private void updateRowGroups(List<ColumnMetaData> columnMetaData)
{
// TODO Avoid writing empty row group
long totalBytes = columnMetaData.stream().mapToLong(ColumnMetaData::getTotal_compressed_size).sum();
ImmutableList<org.apache.parquet.format.ColumnChunk> columnChunks = columnMetaData.stream().map(ParquetWriter::toColumnChunk).collect(toImmutableList());
rowGroupBuilder.add(new RowGroup(columnChunks, totalBytes, rows));
Expand Down