@@ -716,6 +716,7 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
716
716
(static_cast <size_t >(at->num_rows ()) + table.fragment_size - 1 - first_frag_size) /
717
717
table.fragment_size +
718
718
1 ;
719
+ size_t last_orig_frag_idx = fragments.empty () ? 0 : fragments.size () - 1 ;
719
720
// Pre-allocate fragment infos and table stats for each column for the following
720
721
// parallel data import.
721
722
fragments.resize (frag_count);
@@ -815,9 +816,9 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
815
816
if (col_type->isFixedLenArray ()) {
816
817
elems_count = col_type->size () / elem_type->size ();
817
818
}
818
- // Compute stats for each fragment.
819
+ // Compute stats for each added/modified fragment.
819
820
tbb::parallel_for (
820
- tbb::blocked_range (size_t ( 0 ) , frag_count), [&](auto frag_range) {
821
+ tbb::blocked_range (last_orig_frag_idx , frag_count), [&](auto frag_range) {
821
822
for (size_t frag_idx = frag_range.begin (); frag_idx != frag_range.end ();
822
823
++frag_idx) {
823
824
auto & frag = fragments[frag_idx];
@@ -855,17 +856,21 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
855
856
}
856
857
}); // each fragment
857
858
858
- // Merge fragment stats to the table stats.
859
+ // Merge added/mdodified fragment stats to the table stats.
859
860
auto & column_stats = table_stats.at (col_info->column_id );
860
- column_stats = fragments[0 ].metadata [col_idx]->chunkStats ();
861
- for (size_t frag_idx = 1 ; frag_idx < frag_count; ++frag_idx) {
861
+ if (!last_orig_frag_idx) {
862
+ column_stats = fragments[0 ].metadata [col_idx]->chunkStats ();
863
+ }
864
+ for (size_t frag_idx = last_orig_frag_idx ? last_orig_frag_idx : 1 ;
865
+ frag_idx < frag_count;
866
+ ++frag_idx) {
862
867
mergeStats (column_stats,
863
868
fragments[frag_idx].metadata [col_idx]->chunkStats (),
864
869
col_type);
865
870
}
866
871
} else {
867
872
bool has_nulls = false ;
868
- for (size_t frag_idx = 0 ; frag_idx < frag_count; ++frag_idx) {
873
+ for (size_t frag_idx = last_orig_frag_idx ; frag_idx < frag_count; ++frag_idx) {
869
874
auto & frag = fragments[frag_idx];
870
875
frag.offset =
871
876
frag_idx ? ((frag_idx - 1 ) * table.fragment_size + first_frag_size) : 0 ;
@@ -886,7 +891,8 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
886
891
}
887
892
888
893
auto & column_stats = table_stats.at (col_info->column_id );
889
- column_stats.has_nulls = has_nulls;
894
+ column_stats.has_nulls =
895
+ last_orig_frag_idx ? (has_nulls || column_stats.has_nulls ) : has_nulls;
890
896
column_stats.min .stringval = nullptr ;
891
897
column_stats.max .stringval = nullptr ;
892
898
}
0 commit comments