Skip to content

Commit 5750e29

Browse files
authored
GH-47664: [C++][Parquet] add num_rows_ before each call to RowGroupWriter::Close in FileSerializer (#47665)
### Rationale for this change Fix wrong result of `num_rows()` method in `FileSerializer`. ### What changes are included in this PR? 1. add `num_rows_` before each call to `RowGroupWriter::Close` in `FileSerializer`. ### Are these changes tested? Yes. ### Are there any user-facing changes? Now `num_rows_` will return the corrent result which is the number of rows in the yet started RowGroups. * GitHub Issue: #47664 Authored-by: Zehua Zou <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
1 parent e1f727c commit 5750e29

File tree

2 files changed

+3
-0
lines changed

2 files changed

+3
-0
lines changed

cpp/src/parquet/file_serialize_test.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
7676
for (int rg = 0; rg < num_rowgroups_ / 2; ++rg) {
7777
RowGroupWriter* row_group_writer;
7878
row_group_writer = file_writer->AppendRowGroup();
79+
EXPECT_EQ(rows_per_rowgroup_ * rg, file_writer->num_rows());
7980
for (int col = 0; col < num_columns_; ++col) {
8081
auto column_writer =
8182
static_cast<TypedColumnWriter<TestType>*>(row_group_writer->NextColumn());
@@ -97,6 +98,7 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
9798
for (int rg = 0; rg < num_rowgroups_ / 2; ++rg) {
9899
RowGroupWriter* row_group_writer;
99100
row_group_writer = file_writer->AppendBufferedRowGroup();
101+
EXPECT_EQ(rows_per_rowgroup_ * (rg + num_rowgroups_ / 2), file_writer->num_rows());
100102
for (int batch = 0; batch < (rows_per_rowgroup_ / rows_per_batch_); ++batch) {
101103
for (int col = 0; col < num_columns_; ++col) {
102104
auto column_writer =

cpp/src/parquet/file_writer.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ class FileSerializer : public ParquetFileWriter::Contents {
358358

359359
RowGroupWriter* AppendRowGroup(bool buffered_row_group) {
360360
if (row_group_writer_) {
361+
num_rows_ += row_group_writer_->num_rows();
361362
row_group_writer_->Close();
362363
}
363364
int16_t row_group_ordinal = -1; // row group ordinal not set

0 commit comments

Comments
 (0)