Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 9d026ff

Browse files
committed
Avoid unnecessary chunk stats recomputation on append.
Signed-off-by: ienkovich <[email protected]>
1 parent c603402 commit 9d026ff

File tree

1 file changed

+13
-7
lines changed

1 file changed

+13
-7
lines changed

omniscidb/ArrowStorage/ArrowStorage.cpp

+13-7
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,7 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
716716
(static_cast<size_t>(at->num_rows()) + table.fragment_size - 1 - first_frag_size) /
717717
table.fragment_size +
718718
1;
719+
size_t last_orig_frag_idx = fragments.empty() ? 0 : fragments.size() - 1;
719720
// Pre-allocate fragment infos and table stats for each column for the following
720721
// parallel data import.
721722
fragments.resize(frag_count);
@@ -815,9 +816,9 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
815816
if (col_type->isFixedLenArray()) {
816817
elems_count = col_type->size() / elem_type->size();
817818
}
818-
// Compute stats for each fragment.
819+
// Compute stats for each added/modified fragment.
819820
tbb::parallel_for(
820-
tbb::blocked_range(size_t(0), frag_count), [&](auto frag_range) {
821+
tbb::blocked_range(last_orig_frag_idx, frag_count), [&](auto frag_range) {
821822
for (size_t frag_idx = frag_range.begin(); frag_idx != frag_range.end();
822823
++frag_idx) {
823824
auto& frag = fragments[frag_idx];
@@ -855,17 +856,21 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
855856
}
856857
}); // each fragment
857858

858-
// Merge fragment stats to the table stats.
859+
// Merge added/mdodified fragment stats to the table stats.
859860
auto& column_stats = table_stats.at(col_info->column_id);
860-
column_stats = fragments[0].metadata[col_idx]->chunkStats();
861-
for (size_t frag_idx = 1; frag_idx < frag_count; ++frag_idx) {
861+
if (!last_orig_frag_idx) {
862+
column_stats = fragments[0].metadata[col_idx]->chunkStats();
863+
}
864+
for (size_t frag_idx = last_orig_frag_idx ? last_orig_frag_idx : 1;
865+
frag_idx < frag_count;
866+
++frag_idx) {
862867
mergeStats(column_stats,
863868
fragments[frag_idx].metadata[col_idx]->chunkStats(),
864869
col_type);
865870
}
866871
} else {
867872
bool has_nulls = false;
868-
for (size_t frag_idx = 0; frag_idx < frag_count; ++frag_idx) {
873+
for (size_t frag_idx = last_orig_frag_idx; frag_idx < frag_count; ++frag_idx) {
869874
auto& frag = fragments[frag_idx];
870875
frag.offset =
871876
frag_idx ? ((frag_idx - 1) * table.fragment_size + first_frag_size) : 0;
@@ -886,7 +891,8 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
886891
}
887892

888893
auto& column_stats = table_stats.at(col_info->column_id);
889-
column_stats.has_nulls = has_nulls;
894+
column_stats.has_nulls =
895+
last_orig_frag_idx ? (has_nulls || column_stats.has_nulls) : has_nulls;
890896
column_stats.min.stringval = nullptr;
891897
column_stats.max.stringval = nullptr;
892898
}

0 commit comments

Comments
 (0)