@@ -790,6 +790,10 @@ void RelAlgExecutor::executeStepWithPartitionedAggregation(const hdk::ir::Node*
790
790
proj.reset ();
791
791
}
792
792
793
+ // Currently, we merge shuffle node with simple projections only. Therefore, we can
794
+ // assign original table stats to shuffling results to avoid metadata computation.
795
+ maybeCopyTableStatsFromInput (shuffle_node.get ());
796
+
793
797
// Create new aggregation node and execute it.
794
798
auto part_agg = std::make_shared<hdk::ir::Aggregate>(
795
799
agg->getGroupByCount (), agg->getAggs (), agg->getFields (), agg_input_shared);
@@ -826,6 +830,74 @@ void RelAlgExecutor::executeStepWithPartitionedAggregation(const hdk::ir::Node*
826
830
temporary_tables_.erase (-new_root->getId ());
827
831
}
828
832
833
+ void RelAlgExecutor::maybeCopyTableStatsFromInput (const hdk::ir::Node* node) {
834
+ std::vector<int > col_mapping;
835
+ col_mapping.reserve (node->size ());
836
+ // Stats copy is supported for shuffle and simple projections only.
837
+ if (node->is <hdk::ir::Project>()) {
838
+ auto proj = node->as <hdk::ir::Project>();
839
+ if (!proj->isSimple ()) {
840
+ VLOG (1 ) << " Cannot copy table stats for non-simple projection." ;
841
+ return ;
842
+ }
843
+ for (auto & expr : proj->getExprs ()) {
844
+ col_mapping.push_back (expr->as <hdk::ir::ColumnRef>()->index ());
845
+ }
846
+ } else if (node->is <hdk::ir::Shuffle>()) {
847
+ for (auto & expr : node->as <hdk::ir::Shuffle>()->exprs ()) {
848
+ CHECK (expr->is <hdk::ir::ColumnRef>());
849
+ col_mapping.push_back (expr->as <hdk::ir::ColumnRef>()->index ());
850
+ }
851
+ } else {
852
+ VLOG (1 ) << " Cannot copy table stats for node " << node->toString ();
853
+ return ;
854
+ }
855
+
856
+ // We can traverse through a chain of simple projections to the original data source.
857
+ auto data_source = node->getInput (0 );
858
+ while (!data_source->getResult () && !data_source->is <hdk::ir::Scan>()) {
859
+ auto proj = data_source->as <hdk::ir::Project>();
860
+ if (!proj || !proj->isSimple ()) {
861
+ VLOG (1 ) << " Cannot copy table stats due to non-simple projection. "
862
+ << node->toString ();
863
+ return ;
864
+ }
865
+ for (size_t i = 0 ; i < col_mapping.size (); ++i) {
866
+ auto idx = static_cast <size_t >(col_mapping[i]);
867
+ CHECK_LT (idx, proj->size ());
868
+ col_mapping[i] = proj->getExpr (idx)->as <hdk::ir::ColumnRef>()->index ();
869
+ }
870
+ data_source = data_source->getInput (0 );
871
+ }
872
+
873
+ auto input_token =
874
+ data_source->getResult () ? data_source->getResult ()->getToken ().get () : nullptr ;
875
+ auto input_scan = data_source->getResult () ? nullptr : data_source->as <hdk::ir::Scan>();
876
+ int input_db_id = input_token ? input_token->dbId () : input_scan->getDatabaseId ();
877
+ int input_table_id = input_token ? input_token->tableId () : input_scan->getTableId ();
878
+ auto input_meta = data_provider_->getTableMetadata (input_db_id, input_table_id);
879
+ if (input_meta.hasComputedTableStats ()) {
880
+ auto & orig_stats = input_meta.getTableStats ();
881
+ auto target_token = node->getResult ()->getToken ();
882
+ TableStats stats;
883
+ for (size_t i = 0 ; i < col_mapping.size (); ++i) {
884
+ auto target_col_id = target_token->columnId (i);
885
+ auto input_col_id = input_token
886
+ ? input_token->columnId (col_mapping[i])
887
+ : input_scan->getColumnInfo (col_mapping[i])->column_id ;
888
+ CHECK (orig_stats.count (input_col_id))
889
+ << " Cannot find stats for column " << input_col_id
890
+ << " . data_source=" << data_source->toString ();
891
+ stats.emplace (target_col_id, orig_stats.at (input_col_id));
892
+ }
893
+ target_token->setTableStats (std::move (stats));
894
+ VLOG (1 ) << " Copy table stats from " << input_db_id << " :" << input_table_id << " to "
895
+ << target_token->dbId () << " :" << target_token->tableId ();
896
+ } else {
897
+ VLOG (1 ) << " Cannot copy table stats because original table stats are unavailable." ;
898
+ }
899
+ }
900
+
829
901
void RelAlgExecutor::executeStep (const hdk::ir::Node* step_root,
830
902
const CompilationOptions& co,
831
903
const ExecutionOptions& eo,
0 commit comments