Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit d2c9887

Browse files
committed
Avoid unnecessary chunk stats recomputation on append.
Signed-off-by: ienkovich <[email protected]>
1 parent bfdb831 commit d2c9887

File tree

1 file changed

+13
-7
lines changed

1 file changed

+13
-7
lines changed

omniscidb/ArrowStorage/ArrowStorage.cpp

+13-7
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,7 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
671671
(static_cast<size_t>(at->num_rows()) + table.fragment_size - 1 - first_frag_size) /
672672
table.fragment_size +
673673
1;
674+
size_t last_orig_frag_idx = fragments.empty() ? 0 : fragments.size() - 1;
674675
// Pre-allocate fragment infos and table stats for each column for the following
675676
// parallel data import.
676677
fragments.resize(frag_count);
@@ -770,9 +771,9 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
770771
if (col_type->isFixedLenArray()) {
771772
elems_count = col_type->size() / elem_type->size();
772773
}
773-
// Compute stats for each fragment.
774+
// Compute stats for each added/modified fragment.
774775
tbb::parallel_for(
775-
tbb::blocked_range(size_t(0), frag_count), [&](auto frag_range) {
776+
tbb::blocked_range(last_orig_frag_idx, frag_count), [&](auto frag_range) {
776777
for (size_t frag_idx = frag_range.begin(); frag_idx != frag_range.end();
777778
++frag_idx) {
778779
auto& frag = fragments[frag_idx];
@@ -810,17 +811,21 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
810811
}
811812
}); // each fragment
812813

813-
// Merge fragment stats to the table stats.
814+
// Merge added/mdodified fragment stats to the table stats.
814815
auto& column_stats = table_stats.at(col_info->column_id);
815-
column_stats = fragments[0].metadata[col_idx]->chunkStats();
816-
for (size_t frag_idx = 1; frag_idx < frag_count; ++frag_idx) {
816+
if (!last_orig_frag_idx) {
817+
column_stats = fragments[0].metadata[col_idx]->chunkStats();
818+
}
819+
for (size_t frag_idx = last_orig_frag_idx ? last_orig_frag_idx : 1;
820+
frag_idx < frag_count;
821+
++frag_idx) {
817822
mergeStats(column_stats,
818823
fragments[frag_idx].metadata[col_idx]->chunkStats(),
819824
col_type);
820825
}
821826
} else {
822827
bool has_nulls = false;
823-
for (size_t frag_idx = 0; frag_idx < frag_count; ++frag_idx) {
828+
for (size_t frag_idx = last_orig_frag_idx; frag_idx < frag_count; ++frag_idx) {
824829
auto& frag = fragments[frag_idx];
825830
frag.offset =
826831
frag_idx ? ((frag_idx - 1) * table.fragment_size + first_frag_size) : 0;
@@ -841,7 +846,8 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
841846
}
842847

843848
auto& column_stats = table_stats.at(col_info->column_id);
844-
column_stats.has_nulls = has_nulls;
849+
column_stats.has_nulls =
850+
last_orig_frag_idx ? (has_nulls || column_stats.has_nulls) : has_nulls;
845851
column_stats.min.stringval = nullptr;
846852
column_stats.max.stringval = nullptr;
847853
}

0 commit comments

Comments
 (0)