@@ -671,6 +671,7 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
671
671
(static_cast <size_t >(at->num_rows ()) + table.fragment_size - 1 - first_frag_size) /
672
672
table.fragment_size +
673
673
1 ;
674
+ size_t last_orig_frag_idx = fragments.empty () ? 0 : fragments.size () - 1 ;
674
675
// Pre-allocate fragment infos and table stats for each column for the following
675
676
// parallel data import.
676
677
fragments.resize (frag_count);
@@ -770,9 +771,9 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
770
771
if (col_type->isFixedLenArray ()) {
771
772
elems_count = col_type->size () / elem_type->size ();
772
773
}
773
- // Compute stats for each fragment.
774
+ // Compute stats for each added/modified fragment.
774
775
tbb::parallel_for (
775
- tbb::blocked_range (size_t ( 0 ) , frag_count), [&](auto frag_range) {
776
+ tbb::blocked_range (last_orig_frag_idx , frag_count), [&](auto frag_range) {
776
777
for (size_t frag_idx = frag_range.begin (); frag_idx != frag_range.end ();
777
778
++frag_idx) {
778
779
auto & frag = fragments[frag_idx];
@@ -810,17 +811,21 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
810
811
}
811
812
}); // each fragment
812
813
813
- // Merge fragment stats to the table stats.
814
+ // Merge added/mdodified fragment stats to the table stats.
814
815
auto & column_stats = table_stats.at (col_info->column_id );
815
- column_stats = fragments[0 ].metadata [col_idx]->chunkStats ();
816
- for (size_t frag_idx = 1 ; frag_idx < frag_count; ++frag_idx) {
816
+ if (!last_orig_frag_idx) {
817
+ column_stats = fragments[0 ].metadata [col_idx]->chunkStats ();
818
+ }
819
+ for (size_t frag_idx = last_orig_frag_idx ? last_orig_frag_idx : 1 ;
820
+ frag_idx < frag_count;
821
+ ++frag_idx) {
817
822
mergeStats (column_stats,
818
823
fragments[frag_idx].metadata [col_idx]->chunkStats (),
819
824
col_type);
820
825
}
821
826
} else {
822
827
bool has_nulls = false ;
823
- for (size_t frag_idx = 0 ; frag_idx < frag_count; ++frag_idx) {
828
+ for (size_t frag_idx = last_orig_frag_idx ; frag_idx < frag_count; ++frag_idx) {
824
829
auto & frag = fragments[frag_idx];
825
830
frag.offset =
826
831
frag_idx ? ((frag_idx - 1 ) * table.fragment_size + first_frag_size) : 0 ;
@@ -841,7 +846,8 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
841
846
}
842
847
843
848
auto & column_stats = table_stats.at (col_info->column_id );
844
- column_stats.has_nulls = has_nulls;
849
+ column_stats.has_nulls =
850
+ last_orig_frag_idx ? (has_nulls || column_stats.has_nulls ) : has_nulls;
845
851
column_stats.min .stringval = nullptr ;
846
852
column_stats.max .stringval = nullptr ;
847
853
}
0 commit comments