From 8cc70f33eb8e1acf7ee3746a10ee772414827101 Mon Sep 17 00:00:00 2001 From: Jack Gerrits Date: Fri, 6 Jan 2023 15:23:28 -0500 Subject: [PATCH 1/4] refactor: migrate GD namespace --- python/pylibvw.cc | 2 +- .../core/include/vw/core/gd_predict.h | 120 ++++++++- .../core/include/vw/core/reductions/gd.h | 93 ++++++- vowpalwabbit/core/src/example.cc | 2 +- vowpalwabbit/core/src/reductions/automl.cc | 2 +- vowpalwabbit/core/src/reductions/bfgs.cc | 14 +- .../cb/cb_explore_adf_large_action_space.cc | 4 +- .../src/reductions/cb/cb_explore_adf_rnd.cc | 2 +- .../large_action/compute_dot_prod_scalar.h | 2 +- .../details/large_action/two_pass_svd_impl.cc | 8 +- vowpalwabbit/core/src/reductions/cbzo.cc | 8 +- vowpalwabbit/core/src/reductions/csoaa_ldf.cc | 4 +- .../core/src/reductions/epsilon_decay.cc | 2 +- vowpalwabbit/core/src/reductions/freegrad.cc | 20 +- vowpalwabbit/core/src/reductions/ftrl.cc | 42 ++-- vowpalwabbit/core/src/reductions/gd.cc | 238 +++++++++--------- vowpalwabbit/core/src/reductions/gd_mf.cc | 8 +- vowpalwabbit/core/src/reductions/lda_core.cc | 2 +- vowpalwabbit/core/src/reductions/mf.cc | 2 +- vowpalwabbit/core/src/reductions/mwt.cc | 2 +- vowpalwabbit/core/src/reductions/nn.cc | 4 +- .../core/src/reductions/oja_newton.cc | 16 +- vowpalwabbit/core/src/reductions/print.cc | 2 +- .../core/src/reductions/search/search.cc | 6 +- .../src/reductions/search/search_graph.cc | 4 +- .../core/src/reductions/stagewise_poly.cc | 4 +- vowpalwabbit/core/src/reductions/svrg.cc | 16 +- vowpalwabbit/core/src/vw.cc | 2 +- .../slim/include/vw/slim/vw_slim_predict.h | 4 +- 29 files changed, 404 insertions(+), 231 deletions(-) diff --git a/python/pylibvw.cc b/python/pylibvw.cc index c2dba2c4063..e2ab2b06077 100644 --- a/python/pylibvw.cc +++ b/python/pylibvw.cc @@ -358,7 +358,7 @@ py::object get_options(vw_ptr all, py::object py_class, bool enabled_only) return opt_manager.get_vw_option_pyobjects(enabled_only); } -void my_audit_example(vw_ptr all, example_ptr ec) { GD::print_audit_features(*all, *ec); } +void my_audit_example(vw_ptr all, example_ptr ec) { VW::details::print_audit_features(*all, *ec); } const char* get_model_id(vw_ptr all) { return all->id.c_str(); } diff --git a/vowpalwabbit/core/include/vw/core/gd_predict.h b/vowpalwabbit/core/include/vw/core/gd_predict.h index 2185ec2e61c..b2117a3bd71 100644 --- a/vowpalwabbit/core/include/vw/core/gd_predict.h +++ b/vowpalwabbit/core/include/vw/core/gd_predict.h @@ -11,8 +11,18 @@ #undef VW_DEBUG_LOG #define VW_DEBUG_LOG vw_dbg::GD_PREDICT -namespace GD +namespace VW { + namespace details + { + template +inline void dummy_func(DataT&, const VW::audit_strings*) +{ +} // should never be called due to call_audit overload + +inline void vec_add(float& p, float fx, float fw) { p += fw * fx; } + + } // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_index) template void foreach_feature(WeightsT& /*weights*/, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.) @@ -39,11 +49,6 @@ inline void foreach_feature( for (const auto& f : fs) { FuncT(dat, mult * f.value(), weights[static_cast(f.index() + offset)]); } } -template -inline void dummy_func(DataT&, const VW::audit_strings*) -{ -} // should never be called due to call_audit overload - template // nullptr func can't be used as template param in old // compilers @@ -54,7 +59,7 @@ inline void generate_interactions(const std::vector, WeightsT>( + VW::generate_interactions, WeightsT>( interactions, extent_interactions, permutations, ec, dat, weights, num_interacted_features, cache); } @@ -100,7 +105,7 @@ inline void foreach_feature(WeightsT& weights, bool ignore_some_linear, extent_interactions, permutations, ec, dat, num_interacted_features_ignored, cache); } -inline void vec_add(float& p, float fx, float fw) { p += fw * fx; } + template inline float inline_predict(WeightsT& weights, bool ignore_some_linear, @@ -109,7 +114,7 @@ inline float inline_predict(WeightsT& weights, bool ignore_some_linear, const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, VW::details::generate_interactions_object_cache& cache, float initial = 0.f) { - foreach_feature( + foreach_feature( weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, permutations, ec, initial, cache); return initial; } @@ -121,8 +126,101 @@ inline float inline_predict(WeightsT& weights, bool ignore_some_linear, const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, size_t& num_interacted_features, VW::details::generate_interactions_object_cache& cache, float initial = 0.f) { - foreach_feature(weights, ignore_some_linear, ignore_linear, interactions, + foreach_feature(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, permutations, ec, initial, num_interacted_features, cache); return initial; } -} // namespace GD +} + +// namespace GD +// { + +// // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_index) +// template +// VW_DEPRECATED("Moved to VW namespace") +// void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.) +// { +// VW::foreach_feature(weights, fs, dat, offset, mult); +// } + +// // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) +// template +// VW_DEPRECATED("Moved to VW namespace") +// inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.) +// { +// VW::foreach_feature(weights, fs, dat, offset, mult); +// } + +// // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) +// template +// VW_DEPRECATED("Moved to VW namespace") +// inline void foreach_feature( +// const WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.) +// { +// VW::foreach_feature(weights, fs, dat, offset, mult); +// } + +// template // nullptr func can't be used as template param in old +// // compilers +// VW_DEPRECATED("Moved to VW namespace") +// inline void generate_interactions(const std::vector>& interactions, +// const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, +// DataT& dat, WeightsT& weights, size_t& num_interacted_features, +// VW::details::generate_interactions_object_cache& cache) // default value removed to eliminate +// // ambiguity in old complers +// { +// VW::generate_interactions(interactions, extent_interactions, permutations, ec, +// dat, weights, num_interacted_features, cache); +// } + +// // iterate through all namespaces and quadratic&cubic features, callback function FuncT(some_data_R, feature_value_x, +// // WeightOrIndexT) where WeightOrIndexT is EITHER float& feature_weight OR uint64_t feature_index +// template +// VW_DEPRECATED("Moved to VW namespace") +// inline void foreach_feature(WeightsT& weights, bool ignore_some_linear, +// std::array& ignore_linear, +// const std::vector>& interactions, +// const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, +// DataT& dat, size_t& num_interacted_features, VW::details::generate_interactions_object_cache& cache) +// { +// VW::foreach_feature(weights, ignore_some_linear, ignore_linear, interactions, +// extent_interactions, permutations, ec, dat, num_interacted_features, cache); +// } + +// template +// VW_DEPRECATED("Moved to VW namespace") +// inline void foreach_feature(WeightsT& weights, bool ignore_some_linear, +// std::array& ignore_linear, +// const std::vector>& interactions, +// const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, +// DataT& dat, VW::details::generate_interactions_object_cache& cache) +// { +// VW::foreach_feature(weights, ignore_some_linear, ignore_linear, interactions, +// extent_interactions, permutations, ec, dat, cache); +// } + +// template +// VW_DEPRECATED("Moved to VW namespace") +// inline float inline_predict(WeightsT& weights, bool ignore_some_linear, +// std::array& ignore_linear, +// const std::vector>& interactions, +// const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, +// VW::details::generate_interactions_object_cache& cache, float initial = 0.f) +// { +// return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, permutations, ec, +// cache, initial); +// } + +// template +// VW_DEPRECATED("Moved to VW namespace") +// inline float inline_predict(WeightsT& weights, bool ignore_some_linear, +// std::array& ignore_linear, +// const std::vector>& interactions, +// const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, +// size_t& num_interacted_features, VW::details::generate_interactions_object_cache& cache, float initial = 0.f) +// { +// return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, permutations, ec, +// num_interacted_features, cache, initial); +// } +// } // namespace GD \ No newline at end of file diff --git a/vowpalwabbit/core/include/vw/core/reductions/gd.h b/vowpalwabbit/core/include/vw/core/reductions/gd.h index 009cda60456..d2eae5e2f00 100644 --- a/vowpalwabbit/core/include/vw/core/reductions/gd.h +++ b/vowpalwabbit/core/include/vw/core/reductions/gd.h @@ -20,20 +20,22 @@ namespace VW namespace reductions { VW::LEARNER::base_learner* gd_setup(VW::setup_base_i& stack_builder); -} -} // namespace VW -namespace GD + +namespace details { + class per_model_state { public: double normalized_sum_norm_x = 0.0; double total_weight = 0.0; }; +} // namespace details + class gd { public: - std::vector per_model_states; + std::vector per_model_states; size_t no_win_counter = 0; size_t early_stop_thres = 0; float initial_constant = 0.f; @@ -52,13 +54,17 @@ class gd bool adax = false; VW::workspace* all = nullptr; // parallel, features, parameters }; +} // namespace reductions + +namespace details +{ float finalize_prediction(VW::shared_data* sd, VW::io::logger& logger, float ret); void print_features(VW::workspace& all, VW::example& ec); void print_audit_features(VW::workspace&, VW::example& ec); -void save_load_regressor(VW::workspace& all, VW::io_buf& model_file, bool read, bool text); -void save_load_online_state(VW::workspace& all, VW::io_buf& model_file, bool read, bool text, double& total_weight, - double& normalized_sum_norm_x, GD::gd* g = nullptr, uint32_t ftrl_size = 0); +void save_load_regressor_gd(VW::workspace& all, VW::io_buf& model_file, bool read, bool text); +void save_load_online_state_gd(VW::workspace& all, VW::io_buf& model_file, bool read, bool text, double& total_weight, + double& normalized_sum_norm_x, VW::reductions::gd* g = nullptr, uint32_t ftrl_size = 0); template class multipredict_info @@ -99,6 +105,7 @@ inline void vec_add_multipredict(multipredict_info& mp, const float fx, uint6 } } } +} // namespace details // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) template @@ -180,7 +187,7 @@ inline float trunc_weight(const float w, const float gravity) return (gravity < fabsf(w)) ? w - VW::math::sign(w) * gravity : 0.f; } -} // namespace GD +} // namespace VW namespace VW { @@ -239,3 +246,73 @@ inline void generate_interactions(VW::workspace& all, VW::example_predict& ec, R } } // namespace INTERACTIONS + +// namespace GD +// { + +// using gd = VW::reductions::gd; + +// // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) +// template +// VW_DEPRECATED("Moved to VW namespace") +// inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat) +// { +// VW::foreach_feature(all, ec, dat); +// } + +// // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) +// template +// VW_DEPRECATED("Moved to VW namespace") +// inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat, size_t& num_interacted_features) +// { +// VW::foreach_feature(all, ec, dat, num_interacted_features); +// } + +// // iterate through all namespaces and quadratic&cubic features, callback function T(some_data_R, feature_value_x, +// // feature_weight) +// template +// VW_DEPRECATED("Moved to VW namespace") +// inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat) +// { +// VW::foreach_feature(all, ec, dat); +// } + +// template +// VW_DEPRECATED("Moved to VW namespace") +// inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat) +// { +// VW::foreach_feature(all, ec, dat); +// } + +// template +// VW_DEPRECATED("Moved to VW namespace") +// inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat, size_t& num_interacted_features) +// { +// VW::foreach_feature(all, ec, dat, num_interacted_features); +// } + +// template +// VW_DEPRECATED("Moved to VW namespace") +// inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat, size_t& num_interacted_features) +// { +// VW::foreach_feature(all, ec, dat, num_interacted_features); +// } + +// VW_DEPRECATED("Moved to VW namespace") +// inline float inline_predict(VW::workspace& all, VW::example& ec) +// { +// return VW::inline_predict(all, ec); +// } + +// VW_DEPRECATED("Moved to VW namespace") +// inline float inline_predict(VW::workspace& all, VW::example& ec, size_t& num_generated_features) +// { +// return VW::inline_predict(all, ec, num_generated_features); +// } + +// VW_DEPRECATED("Moved to VW namespace") +// inline float trunc_weight(const float w, const float gravity) +// { +// return VW::trunc_weight(w, gravity); +// } +// } \ No newline at end of file diff --git a/vowpalwabbit/core/src/example.cc b/vowpalwabbit/core/src/example.cc index cebe9c1f6b8..dbd671c825a 100644 --- a/vowpalwabbit/core/src/example.cc +++ b/vowpalwabbit/core/src/example.cc @@ -122,7 +122,7 @@ flat_example* flatten_example(VW::workspace& all, example* ec) ffs.mask = all.weights.mask() >> all.weights.stride_shift(); } else { ffs.mask = static_cast(LONG_MAX) >> all.weights.stride_shift(); } - GD::foreach_feature(all, *ec, ffs); + VW::foreach_feature(all, *ec, ffs); std::swap(fec.fs, ffs.fs); diff --git a/vowpalwabbit/core/src/reductions/automl.cc b/vowpalwabbit/core/src/reductions/automl.cc index dbe368aaccf..1a14a17765e 100644 --- a/vowpalwabbit/core/src/reductions/automl.cc +++ b/vowpalwabbit/core/src/reductions/automl.cc @@ -184,7 +184,7 @@ VW::LEARNER::base_learner* make_automl_with_impl(VW::setup_base_i& stack_builder auto ppw = max_live_configs; auto* persist_ptr = verbose_metrics ? persist : persist; data->adf_learner = as_multiline(base_learner->get_learner_by_name_prefix("cb_adf")); - GD::gd& gd = *static_cast( + VW::reductions::gd& gd = *static_cast( base_learner->get_learner_by_name_prefix("gd")->get_internal_type_erased_data_pointer_test_use_only()); auto& adf_data = *static_cast(data->adf_learner->get_internal_type_erased_data_pointer_test_use_only()); diff --git a/vowpalwabbit/core/src/reductions/bfgs.cc b/vowpalwabbit/core/src/reductions/bfgs.cc index 271f3c6a518..339ba579359 100644 --- a/vowpalwabbit/core/src/reductions/bfgs.cc +++ b/vowpalwabbit/core/src/reductions/bfgs.cc @@ -162,8 +162,8 @@ constexpr bool test_example(VW::example& ec) noexcept { return ec.l.simple.label float bfgs_predict(VW::workspace& all, VW::example& ec) { - ec.partial_prediction = GD::inline_predict(all, ec); - return GD::finalize_prediction(all.sd, all.logger, ec.partial_prediction); + ec.partial_prediction = VW::inline_predict(all, ec); + return VW::details::finalize_prediction(all.sd, all.logger, ec.partial_prediction); } inline void add_grad(float& d, float f, float& fw) { (&fw)[W_GT] += d * f; } @@ -175,7 +175,7 @@ float predict_and_gradient(VW::workspace& all, VW::example& ec) all.set_minmax(all.sd, ld.label); float loss_grad = all.loss->first_derivative(all.sd, fp, ld.label) * ec.weight; - GD::foreach_feature(all, ec, loss_grad); + VW::foreach_feature(all, ec, loss_grad); return fp; } @@ -185,7 +185,7 @@ inline void add_precond(float& d, float f, float& fw) { (&fw)[W_COND] += d * f * void update_preconditioner(VW::workspace& all, VW::example& ec) { float curvature = all.loss->second_derivative(all.sd, ec.pred.scalar, ec.l.simple.label) * ec.weight; - GD::foreach_feature(all, ec, curvature); + VW::foreach_feature(all, ec, curvature); } inline void add_dir(float& p, const float fx, float& fw) { p += (&fw)[W_DIR] * fx; } @@ -194,7 +194,7 @@ float dot_with_direction(VW::workspace& all, VW::example& ec) { const auto& simple_red_features = ec.ex_reduction_features.template get(); float temp = simple_red_features.initial; - GD::foreach_feature(all, ec, temp); + VW::foreach_feature(all, ec, temp); return temp; } @@ -982,7 +982,7 @@ void predict(bfgs& b, base_learner&, VW::example& ec) { VW::workspace* all = b.all; ec.pred.scalar = bfgs_predict(*all, ec); - if (audit) { GD::print_audit_features(*(b.all), ec); } + if (audit) { VW::details::print_audit_features(*(b.all), ec); } } template @@ -1103,7 +1103,7 @@ void save_load(bfgs& b, VW::io_buf& model_file, bool read, bool text) model_file, reinterpret_cast(®_vector), sizeof(reg_vector), read, msg, text); if (reg_vector) { save_load_regularizer(*all, b, model_file, read, text); } - else { GD::save_load_regressor(*all, model_file, read, text); } + else { VW::details::save_load_regressor_gd(*all, model_file, read, text); } } } diff --git a/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_large_action_space.cc b/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_large_action_space.cc index 28293313628..73bf987976c 100644 --- a/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_large_action_space.cc +++ b/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_large_action_space.cc @@ -72,7 +72,7 @@ bool _test_only_generate_A(VW::workspace* _all, const multi_ex& examples, std::v if (_all->weights.sparse) { A_triplet_constructor w(_all->weights.sparse_weights.mask(), row_index, _triplets, max_non_zero_col); - GD::foreach_feature( + VW::foreach_feature( _all->weights.sparse_weights, _all->ignore_some_linear, _all->ignore_linear, (red_features.generated_interactions ? *red_features.generated_interactions : *ex->interactions), (red_features.generated_extent_interactions ? *red_features.generated_extent_interactions @@ -83,7 +83,7 @@ bool _test_only_generate_A(VW::workspace* _all, const multi_ex& examples, std::v { A_triplet_constructor w(_all->weights.dense_weights.mask(), row_index, _triplets, max_non_zero_col); - GD::foreach_feature( + VW::foreach_feature( _all->weights.dense_weights, _all->ignore_some_linear, _all->ignore_linear, (red_features.generated_interactions ? *red_features.generated_interactions : *ex->interactions), (red_features.generated_extent_interactions ? *red_features.generated_extent_interactions diff --git a/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc b/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc index 4da6aa54071..b1b6b43ac0f 100644 --- a/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc +++ b/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc @@ -161,7 +161,7 @@ float cb_explore_adf_rnd::get_initial_prediction(VW::example* ec) lazy_gaussian w; std::pair dotwithnorm(0.f, 0.f); - GD::foreach_feature, float, vec_add_with_norm, lazy_gaussian>(w, _all->ignore_some_linear, + VW::foreach_feature, float, vec_add_with_norm, lazy_gaussian>(w, _all->ignore_some_linear, _all->ignore_linear, _all->interactions, _all->extent_interactions, _all->permutations, *ec, dotwithnorm, _all->generate_interactions_object_cache_state); diff --git a/vowpalwabbit/core/src/reductions/cb/details/large_action/compute_dot_prod_scalar.h b/vowpalwabbit/core/src/reductions/cb/details/large_action/compute_dot_prod_scalar.h index dd0e59f7452..b5752c2bb87 100644 --- a/vowpalwabbit/core/src/reductions/cb/details/large_action/compute_dot_prod_scalar.h +++ b/vowpalwabbit/core/src/reductions/cb/details/large_action/compute_dot_prod_scalar.h @@ -55,7 +55,7 @@ inline float compute_dot_prod_scalar(uint64_t col, VW::workspace* _all, uint64_t AO_triplet_constructor tc(_all->weights.mask(), col, _seed, final_dot_prod); - GD::foreach_feature( + VW::foreach_feature( _all->weights.dense_weights, _all->ignore_some_linear, _all->ignore_linear, (red_features.generated_interactions ? *red_features.generated_interactions : *ex->interactions), (red_features.generated_extent_interactions ? *red_features.generated_extent_interactions diff --git a/vowpalwabbit/core/src/reductions/cb/details/large_action/two_pass_svd_impl.cc b/vowpalwabbit/core/src/reductions/cb/details/large_action/two_pass_svd_impl.cc index 6059bdd50dd..f0680426e8a 100644 --- a/vowpalwabbit/core/src/reductions/cb/details/large_action/two_pass_svd_impl.cc +++ b/vowpalwabbit/core/src/reductions/cb/details/large_action/two_pass_svd_impl.cc @@ -96,7 +96,7 @@ bool two_pass_svd_impl::generate_Y(const multi_ex& examples, const std::vectorweights.sparse_weights.mask(), row_index, col, _seed, _triplets, max_non_zero_col, non_zero_rows, shrink_factors); - GD::foreach_feature( + VW::foreach_feature( _all->weights.sparse_weights, _all->ignore_some_linear, _all->ignore_linear, (red_features.generated_interactions ? *red_features.generated_interactions : *ex->interactions), (red_features.generated_extent_interactions ? *red_features.generated_extent_interactions @@ -107,7 +107,7 @@ bool two_pass_svd_impl::generate_Y(const multi_ex& examples, const std::vectorweights.dense_weights.mask(), row_index, col, _seed, _triplets, max_non_zero_col, non_zero_rows, shrink_factors); - GD::foreach_feature( + VW::foreach_feature( _all->weights.dense_weights, _all->ignore_some_linear, _all->ignore_linear, (red_features.generated_interactions ? *red_features.generated_interactions : *ex->interactions), (red_features.generated_extent_interactions ? *red_features.generated_extent_interactions @@ -152,7 +152,7 @@ void two_pass_svd_impl::generate_B(const multi_ex& examples, const std::vectorweights.sparse) { B_triplet_constructor tc(_all->weights.sparse_weights.mask(), col, Y, final_dot_prod); - GD::foreach_feature( + VW::foreach_feature( _all->weights.sparse_weights, _all->ignore_some_linear, _all->ignore_linear, (red_features.generated_interactions ? *red_features.generated_interactions : *ex->interactions), (red_features.generated_extent_interactions ? *red_features.generated_extent_interactions @@ -162,7 +162,7 @@ void two_pass_svd_impl::generate_B(const multi_ex& examples, const std::vectorweights.dense_weights.mask(), col, Y, final_dot_prod); - GD::foreach_feature( + VW::foreach_feature( _all->weights.dense_weights, _all->ignore_some_linear, _all->ignore_linear, (red_features.generated_interactions ? *red_features.generated_interactions : *ex->interactions), (red_features.generated_extent_interactions ? *red_features.generated_extent_interactions diff --git a/vowpalwabbit/core/src/reductions/cbzo.cc b/vowpalwabbit/core/src/reductions/cbzo.cc index 04470787007..2d57dfc7f56 100644 --- a/vowpalwabbit/core/src/reductions/cbzo.cc +++ b/vowpalwabbit/core/src/reductions/cbzo.cc @@ -84,7 +84,7 @@ inline float constant_inference(VW::workspace& all) float linear_inference(VW::workspace& all, VW::example& ec) { float dotprod = 0; - GD::foreach_feature(all, ec, dotprod); + VW::foreach_feature(all, ec, dotprod); return dotprod; } @@ -139,7 +139,7 @@ void linear_update(cbzo& data, VW::example& ec) upd_data.part_grad = part_grad; upd_data.all = data.all; - GD::foreach_feature>( + VW::foreach_feature>( *data.all, ec, upd_data); } @@ -167,7 +167,7 @@ void print_audit_features(VW::workspace& all, VW::example& ec) VW::to_string(ec.pred.pdf, std::numeric_limits::max_digits10), ec.tag, all.logger); } - GD::print_features(all, ec); + VW::details::print_features(all, ec); } // Returns a value close to x and greater than it @@ -229,7 +229,7 @@ void NO_SANITIZE_UNDEFINED learn(cbzo& data, base_learner& base, VW::example& ec inline void save_load_regressor(VW::workspace& all, VW::io_buf& model_file, bool read, bool text) { - GD::save_load_regressor(all, model_file, read, text); + VW::details::save_load_regressor_gd(all, model_file, read, text); } void save_load(cbzo& data, VW::io_buf& model_file, bool read, bool text) diff --git a/vowpalwabbit/core/src/reductions/csoaa_ldf.cc b/vowpalwabbit/core/src/reductions/csoaa_ldf.cc index 321de353db6..cb3aec614ce 100644 --- a/vowpalwabbit/core/src/reductions/csoaa_ldf.cc +++ b/vowpalwabbit/core/src/reductions/csoaa_ldf.cc @@ -13,7 +13,7 @@ #include "vw/core/loss_functions.h" #include "vw/core/prediction_type.h" #include "vw/core/print_utils.h" -#include "vw/core/reductions/gd.h" // GD::foreach_feature() needed in subtract_example() +#include "vw/core/reductions/gd.h" // VW::foreach_feature() needed in subtract_example() #include "vw/core/scope_exit.h" #include "vw/core/setup_base.h" #include "vw/core/shared_data.h" @@ -78,7 +78,7 @@ void subtract_example(VW::workspace& all, VW::example* ec, VW::example* ecsub) { auto& wap_fs = ec->feature_space[VW::details::WAP_LDF_NAMESPACE]; wap_fs.sum_feat_sq = 0; - GD::foreach_feature(all, *ecsub, *ec); + VW::foreach_feature(all, *ecsub, *ec); ec->indices.push_back(VW::details::WAP_LDF_NAMESPACE); ec->num_features += wap_fs.size(); ec->reset_total_sum_feat_sq(); diff --git a/vowpalwabbit/core/src/reductions/epsilon_decay.cc b/vowpalwabbit/core/src/reductions/epsilon_decay.cc index 919309941cd..fc7449cd469 100644 --- a/vowpalwabbit/core/src/reductions/epsilon_decay.cc +++ b/vowpalwabbit/core/src/reductions/epsilon_decay.cc @@ -385,7 +385,7 @@ VW::LEARNER::base_learner* VW::reductions::epsilon_decay_setup(VW::setup_base_i& // to make sure there are not subtle bugs auto* base_learner = stack_builder.setup_base_learner(); - GD::gd& gd = *static_cast( + VW::reductions::gd& gd = *static_cast( base_learner->get_learner_by_name_prefix("gd")->get_internal_type_erased_data_pointer_test_use_only()); auto& adf_data = *static_cast(as_multiline(base_learner->get_learner_by_name_prefix("cb_adf")) diff --git a/vowpalwabbit/core/src/reductions/freegrad.cc b/vowpalwabbit/core/src/reductions/freegrad.cc index 8fcd8664ff9..8335ba384e7 100644 --- a/vowpalwabbit/core/src/reductions/freegrad.cc +++ b/vowpalwabbit/core/src/reductions/freegrad.cc @@ -69,10 +69,10 @@ template void predict(freegrad& b, base_learner& /* base */, VW::example& ec) { size_t num_features_from_interactions = 0; - ec.partial_prediction = GD::inline_predict(*b.all, ec, num_features_from_interactions); + ec.partial_prediction = VW::inline_predict(*b.all, ec, num_features_from_interactions); ec.num_features_from_interactions = num_features_from_interactions; - ec.pred.scalar = GD::finalize_prediction(b.all->sd, b.all->logger, ec.partial_prediction); - if (audit) { GD::print_audit_features(*(b.all), ec); } + ec.pred.scalar = VW::details::finalize_prediction(b.all->sd, b.all->logger, ec.partial_prediction); + if (audit) { VW::details::print_audit_features(*(b.all), ec); } } void inner_freegrad_predict(freegrad_update_data& d, float x, float& wref) @@ -109,7 +109,7 @@ void freegrad_predict(freegrad& fg, VW::example& ec) float projection_radius; // Compute the unprojected predict - GD::foreach_feature( + VW::foreach_feature( *fg.all, ec, fg.update_data, num_features_from_interactions); norm_w_pred = sqrtf(fg.update_data.squared_norm_prediction); @@ -124,7 +124,7 @@ void freegrad_predict(freegrad& fg, VW::example& ec) ec.partial_prediction = fg.update_data.predict; ec.num_features_from_interactions = num_features_from_interactions; - ec.pred.scalar = GD::finalize_prediction(fg.all->sd, fg.all->logger, ec.partial_prediction); + ec.pred.scalar = VW::details::finalize_prediction(fg.all->sd, fg.all->logger, ec.partial_prediction); } void gradient_dot_w(freegrad_update_data& d, float x, float& wref) @@ -253,10 +253,10 @@ void freegrad_update_after_prediction(freegrad& fg, VW::example& ec) fg.update_data.update = fg.all->loss->first_derivative(fg.all->sd, ec.pred.scalar, ec.l.simple.label); // Compute gradient norm - GD::foreach_feature(*fg.all, ec, fg.update_data); + VW::foreach_feature(*fg.all, ec, fg.update_data); // Performing the update - GD::foreach_feature(*fg.all, ec, fg.update_data); + VW::foreach_feature(*fg.all, ec, fg.update_data); // Update the maximum gradient norm value clipped_grad_norm = sqrtf(fg.update_data.squared_norm_clipped_grad); @@ -277,7 +277,7 @@ void learn_freegrad(freegrad& a, base_learner& /* base */, VW::example& ec) { // update state based on the example and predict freegrad_predict(a, ec); - if (audit) { GD::print_audit_features(*(a.all), ec); } + if (audit) { VW::details::print_audit_features(*(a.all), ec); } // update state based on the prediction freegrad_update_after_prediction(a, ec); @@ -298,10 +298,10 @@ void save_load(freegrad& fg, VW::io_buf& model_file, bool read, bool text) if (resume) { - GD::save_load_online_state( + VW::details::save_load_online_state_gd( *all, model_file, read, text, fg.total_weight, fg.normalized_sum_norm_x, nullptr, fg.freegrad_size); } - else { GD::save_load_regressor(*all, model_file, read, text); } + else { VW::details::save_load_regressor_gd(*all, model_file, read, text); } } } diff --git a/vowpalwabbit/core/src/reductions/ftrl.cc b/vowpalwabbit/core/src/reductions/ftrl.cc index ad6fada5305..7b44a810058 100644 --- a/vowpalwabbit/core/src/reductions/ftrl.cc +++ b/vowpalwabbit/core/src/reductions/ftrl.cc @@ -85,7 +85,7 @@ inline void predict_with_confidence(uncertainty& d, const float fx, float& fw) float sensitivity(ftrl& b, base_learner& /* base */, VW::example& ec) { uncertainty uncetain(b); - GD::foreach_feature(*(b.all), ec, uncetain); + VW::foreach_feature(*(b.all), ec, uncetain); return uncetain.score; } @@ -93,10 +93,10 @@ template void predict(ftrl& b, base_learner&, VW::example& ec) { size_t num_features_from_interactions = 0; - ec.partial_prediction = GD::inline_predict(*b.all, ec, num_features_from_interactions); + ec.partial_prediction = VW::inline_predict(*b.all, ec, num_features_from_interactions); ec.num_features_from_interactions = num_features_from_interactions; - ec.pred.scalar = GD::finalize_prediction(b.all->sd, b.all->logger, ec.partial_prediction); - if (audit) { GD::print_audit_features(*(b.all), ec); } + ec.pred.scalar = VW::details::finalize_prediction(b.all->sd, b.all->logger, ec.partial_prediction); + if (audit) { VW::details::print_audit_features(*(b.all), ec); } } template @@ -112,16 +112,16 @@ void multipredict(ftrl& b, base_learner&, VW::example& ec, size_t count, size_t size_t num_features_from_interactions = 0; if (b.all->weights.sparse) { - GD::multipredict_info mp = { + VW::details::multipredict_info mp = { count, step, pred, all.weights.sparse_weights, static_cast(all.sd->gravity)}; - GD::foreach_feature, uint64_t, GD::vec_add_multipredict>( + VW::foreach_feature, uint64_t, VW::details::vec_add_multipredict>( all, ec, mp, num_features_from_interactions); } else { - GD::multipredict_info mp = { + VW::details::multipredict_info mp = { count, step, pred, all.weights.dense_weights, static_cast(all.sd->gravity)}; - GD::foreach_feature, uint64_t, GD::vec_add_multipredict>( + VW::foreach_feature, uint64_t, VW::details::vec_add_multipredict>( all, ec, mp, num_features_from_interactions); } ec.num_features_from_interactions = num_features_from_interactions; @@ -131,14 +131,14 @@ void multipredict(ftrl& b, base_learner&, VW::example& ec, size_t count, size_t } if (finalize_predictions) { - for (size_t c = 0; c < count; c++) { pred[c].scalar = GD::finalize_prediction(all.sd, all.logger, pred[c].scalar); } + for (size_t c = 0; c < count; c++) { pred[c].scalar = VW::details::finalize_prediction(all.sd, all.logger, pred[c].scalar); } } if (audit) { for (size_t c = 0; c < count; c++) { ec.pred.scalar = pred[c].scalar; - GD::print_audit_features(all, ec); + VW::details::print_audit_features(all, ec); ec.ft_offset += static_cast(step); } ec.ft_offset -= static_cast(step * count); @@ -252,7 +252,7 @@ void coin_betting_predict(ftrl& b, base_learner&, VW::example& ec) b.data.normalized_squared_norm_x = 0; size_t num_features_from_interactions = 0; - GD::foreach_feature(*b.all, ec, b.data, num_features_from_interactions); + VW::foreach_feature(*b.all, ec, b.data, num_features_from_interactions); ec.num_features_from_interactions = num_features_from_interactions; b.normalized_sum_norm_x += (static_cast(ec.weight)) * b.data.normalized_squared_norm_x; @@ -261,7 +261,7 @@ void coin_betting_predict(ftrl& b, base_learner&, VW::example& ec) ec.partial_prediction = b.data.predict / b.data.average_squared_norm_x; - ec.pred.scalar = GD::finalize_prediction(b.all->sd, b.all->logger, ec.partial_prediction); + ec.pred.scalar = VW::details::finalize_prediction(b.all->sd, b.all->logger, ec.partial_prediction); } void update_state_and_predict_pistol(ftrl& b, base_learner&, VW::example& ec) @@ -269,30 +269,30 @@ void update_state_and_predict_pistol(ftrl& b, base_learner&, VW::example& ec) b.data.predict = 0; size_t num_features_from_interactions = 0; - GD::foreach_feature( + VW::foreach_feature( *b.all, ec, b.data, num_features_from_interactions); ec.num_features_from_interactions = num_features_from_interactions; ec.partial_prediction = b.data.predict; - ec.pred.scalar = GD::finalize_prediction(b.all->sd, b.all->logger, ec.partial_prediction); + ec.pred.scalar = VW::details::finalize_prediction(b.all->sd, b.all->logger, ec.partial_prediction); } void update_after_prediction_proximal(ftrl& b, VW::example& ec) { b.data.update = b.all->loss->first_derivative(b.all->sd, ec.pred.scalar, ec.l.simple.label) * ec.weight; - GD::foreach_feature(*b.all, ec, b.data); + VW::foreach_feature(*b.all, ec, b.data); } void update_after_prediction_pistol(ftrl& b, VW::example& ec) { b.data.update = b.all->loss->first_derivative(b.all->sd, ec.pred.scalar, ec.l.simple.label) * ec.weight; - GD::foreach_feature(*b.all, ec, b.data); + VW::foreach_feature(*b.all, ec, b.data); } void coin_betting_update_after_prediction(ftrl& b, VW::example& ec) { b.data.update = b.all->loss->first_derivative(b.all->sd, ec.pred.scalar, ec.l.simple.label) * ec.weight; - GD::foreach_feature(*b.all, ec, b.data); + VW::foreach_feature(*b.all, ec, b.data); } // NO_SANITIZE_UNDEFINED needed in learn functions because @@ -312,7 +312,7 @@ void NO_SANITIZE_UNDEFINED learn_pistol(ftrl& a, base_learner& base, VW::example { // update state based on the example and predict update_state_and_predict_pistol(a, base, ec); - if (audit) { GD::print_audit_features(*(a.all), ec); } + if (audit) { VW::details::print_audit_features(*(a.all), ec); } // update state based on the prediction update_after_prediction_pistol(a, ec); } @@ -322,7 +322,7 @@ void NO_SANITIZE_UNDEFINED learn_coin_betting(ftrl& a, base_learner& base, VW::e { // update state based on the example and predict coin_betting_predict(a, base, ec); - if (audit) { GD::print_audit_features(*(a.all), ec); } + if (audit) { VW::details::print_audit_features(*(a.all), ec); } // update state based on the prediction coin_betting_update_after_prediction(a, ec); } @@ -342,10 +342,10 @@ void save_load(ftrl& b, VW::io_buf& model_file, bool read, bool text) if (resume) { - GD::save_load_online_state( + VW::details::save_load_online_state_gd( *all, model_file, read, text, b.total_weight, b.normalized_sum_norm_x, nullptr, b.ftrl_size); } - else { GD::save_load_regressor(*all, model_file, read, text); } + else { VW::details::save_load_regressor_gd(*all, model_file, read, text); } } } diff --git a/vowpalwabbit/core/src/reductions/gd.cc b/vowpalwabbit/core/src/reductions/gd.cc index 4dbd8e9e7f8..ed7be22f68a 100644 --- a/vowpalwabbit/core/src/reductions/gd.cc +++ b/vowpalwabbit/core/src/reductions/gd.cc @@ -13,6 +13,8 @@ #include "vw/core/setup_base.h" #include +#include + #if !defined(VW_NO_INLINE_SIMD) # if !defined(__SSE2__) && (defined(_M_AMD64) || defined(_M_X64)) @@ -42,11 +44,11 @@ using namespace VW::LEARNER; using namespace VW::config; +namespace +{ constexpr double L1_STATE_DEFAULT = 0.; constexpr double L2_STATE_DEFAULT = 1.; -namespace -{ template void merge_weights_simple(size_t length, const std::vector>& source, const std::vector& per_model_weighting, WeightsT& weights) @@ -94,14 +96,37 @@ void copy_weights(WeightsT& dest, const WeightsT& source, size_t length) const size_t full_weights_size = length << dest.stride_shift(); for (size_t i = 0; i < full_weights_size; i++) { dest[i] = source[i]; } } -} // namespace -// todo: -// 4. Factor various state out of VW::workspace& -namespace GD + +void sync_weights(VW::workspace& all) { -void sync_weights(VW::workspace& all); + // todo, fix length dependence + if (all.sd->gravity == 0. && all.sd->contraction == 1.) + { // to avoid unnecessary weight synchronization + return; + } + if (all.weights.sparse) + { + for (VW::weight& w : all.weights.sparse_weights) + { + w = VW::trunc_weight(w, static_cast(all.sd->gravity)) * static_cast(all.sd->contraction); + } + } + else + { + for (VW::weight& w : all.weights.dense_weights) + { + w = VW::trunc_weight(w, static_cast(all.sd->gravity)) * static_cast(all.sd->contraction); + } + } + + all.sd->gravity = 0.; + all.sd->contraction = 1.; +} + +VW_WARNING_STATE_PUSH +VW_WARNING_DISABLE_UNUSED_FUNCTION inline float quake_inv_sqrt(float x) { // Carmack/Quake/SGI fast method: @@ -113,6 +138,7 @@ inline float quake_inv_sqrt(float x) x = x * (1.5f - xhalf * x * x); // One round of Newton's method return x; } +VW_WARNING_STATE_POP static inline float inv_sqrt(float x) { @@ -141,6 +167,7 @@ static inline float inv_sqrt(float x) return x; } + VW_WARNING_STATE_PUSH VW_WARNING_DISABLE_COND_CONST_EXPR template @@ -173,14 +200,14 @@ float average_update(float total_weight, float normalized_sum_norm_x, float neg_ } template -void train(gd& g, VW::example& ec, float update) +void train(VW::reductions::gd& g, VW::example& ec, float update) { if VW_STD17_CONSTEXPR (normalized != 0) { update *= g.update_multiplier; } VW_DBG(ec) << "gd: train() spare=" << spare << std::endl; - foreach_feature>(*g.all, ec, update); + VW::foreach_feature>(*g.all, ec, update); } -void end_pass(gd& g) +void end_pass(VW::reductions::gd& g) { VW::workspace& all = *g.all; @@ -209,7 +236,7 @@ void end_pass(gd& g) } void merge(const std::vector& per_model_weighting, const std::vector& all_workspaces, - const std::vector& all_data, VW::workspace& output_workspace, GD::gd& output_data) + const std::vector& all_data, VW::workspace& output_workspace, VW::reductions::gd& output_data) { const size_t length = static_cast(1) << output_workspace.num_bits; @@ -248,8 +275,8 @@ void merge(const std::vector& per_model_weighting, const std::vector(1) << ws_out.num_bits; // When adding, output the weights from the model delta (2nd arugment to addition) @@ -267,8 +294,8 @@ void add(const VW::workspace& /* ws1 */, const GD::gd& data1, const VW::workspac } } -void subtract(const VW::workspace& ws1, const GD::gd& data1, const VW::workspace& /* ws2 */, GD::gd& data2, - VW::workspace& ws_out, GD::gd& data_out) +void subtract(const VW::workspace& ws1, const VW::reductions::gd& data1, const VW::workspace& /* ws2 */, VW::reductions::gd& data2, + VW::workspace& ws_out, VW::reductions::gd& data_out) { const size_t length = static_cast(1) << ws_out.num_bits; // When subtracting, output the weights from the newer model (1st arugment to subtraction) @@ -286,8 +313,6 @@ void subtract(const VW::workspace& ws1, const GD::gd& data1, const VW::workspace } } -#include - class string_value { public: @@ -335,7 +360,7 @@ inline void audit_feature(audit_results& dat, const float ft_weight, const uint6 if (dat.all.audit) { tempstream << ':' << (index >> stride_shift) << ':' << ft_weight << ':' - << trunc_weight(weights[index], static_cast(dat.all.sd->gravity)) * + << VW::trunc_weight(weights[index], static_cast(dat.all.sd->gravity)) * static_cast(dat.all.sd->contraction); if (weights.adaptive) @@ -360,7 +385,6 @@ inline void audit_feature(audit_results& dat, const float ft_weight, const uint6 } } } - void print_lda_features(VW::workspace& all, VW::example& ec) { VW::parameters& weights = all.weights; @@ -379,8 +403,9 @@ void print_lda_features(VW::workspace& all, VW::example& ec) } std::cout << " total of " << count << " features." << std::endl; } +} -void print_features(VW::workspace& all, VW::example& ec) +void VW::details::print_features(VW::workspace& all, VW::example& ec) { if (all.lda > 0) { print_lda_features(all, ec); } else @@ -420,14 +445,14 @@ void print_features(VW::workspace& all, VW::example& ec) } } -void print_audit_features(VW::workspace& all, VW::example& ec) +void VW::details::print_audit_features(VW::workspace& all, VW::example& ec) { if (all.audit) { VW::details::print_result_by_ref(all.audit_writer.get(), ec.pred.scalar, -1, ec.tag, all.logger); } fflush(stdout); print_features(all, ec); } -float finalize_prediction(VW::shared_data* sd, VW::io::logger& logger, float ret) +float VW::details::finalize_prediction(VW::shared_data* sd, VW::io::logger& logger, float ret) { if (std::isnan(ret)) { @@ -440,6 +465,9 @@ float finalize_prediction(VW::shared_data* sd, VW::io::logger& logger, float ret return ret; } + +namespace +{ class trunc_data { public: @@ -449,26 +477,19 @@ class trunc_data inline void vec_add_trunc(trunc_data& p, const float fx, float& fw) { - p.prediction += trunc_weight(fw, p.gravity) * fx; + p.prediction += VW::trunc_weight(fw, p.gravity) * fx; } inline float trunc_predict(VW::workspace& all, VW::example& ec, double gravity, size_t& num_interacted_features) { const auto& simple_red_features = ec.ex_reduction_features.template get(); trunc_data temp = {simple_red_features.initial, static_cast(gravity)}; - foreach_feature(all, ec, temp, num_interacted_features); + VW::foreach_feature(all, ec, temp, num_interacted_features); return temp.prediction; } -inline void vec_add_print(float& p, const float fx, float& fw) -{ - // TODO: partial line logging. This function isn't actually called from anywhere though? - p += fw * fx; - std::cerr << " + " << fw << "*" << fx; -} - template -void predict(gd& g, base_learner&, VW::example& ec) +void predict(VW::reductions::gd& g, base_learner&, VW::example& ec) { VW_DBG(ec) << "gd.predict(): ex#=" << ec.example_counter << ", offset=" << ec.ft_offset << std::endl; @@ -479,26 +500,26 @@ void predict(gd& g, base_learner&, VW::example& ec) ec.num_features_from_interactions = num_interacted_features; ec.partial_prediction *= static_cast(all.sd->contraction); - ec.pred.scalar = finalize_prediction(all.sd, all.logger, ec.partial_prediction); + ec.pred.scalar = VW::details::finalize_prediction(all.sd, all.logger, ec.partial_prediction); VW_DBG(ec) << "gd: predict() " << VW::debug::scalar_pred_to_string(ec) << VW::debug::features_to_string(ec) << std::endl; - if (audit) { print_audit_features(all, ec); } + if (audit) { VW::details::print_audit_features(all, ec); } } template -inline void vec_add_trunc_multipredict(multipredict_info& mp, const float fx, uint64_t fi) +inline void vec_add_trunc_multipredict(VW::details::multipredict_info& mp, const float fx, uint64_t fi) { size_t index = fi; for (size_t c = 0; c < mp.count; c++, index += mp.step) { - mp.pred[c].scalar += fx * trunc_weight(mp.weights[index], mp.gravity); + mp.pred[c].scalar += fx * VW::trunc_weight(mp.weights[index], mp.gravity); } } template -void multipredict(gd& g, base_learner&, VW::example& ec, size_t count, size_t step, VW::polyprediction* pred, +void multipredict(VW::reductions::gd& g, base_learner&, VW::example& ec, size_t count, size_t step, VW::polyprediction* pred, bool finalize_predictions) { VW::workspace& all = *g.all; @@ -511,31 +532,31 @@ void multipredict(gd& g, base_learner&, VW::example& ec, size_t count, size_t st size_t num_features_from_interactions = 0; if (g.all->weights.sparse) { - multipredict_info mp = { + VW::details::multipredict_info mp = { count, step, pred, g.all->weights.sparse_weights, static_cast(all.sd->gravity)}; if (l1) { - foreach_feature, uint64_t, vec_add_trunc_multipredict>( + VW::foreach_feature, uint64_t, vec_add_trunc_multipredict>( all, ec, mp, num_features_from_interactions); } else { - foreach_feature, uint64_t, vec_add_multipredict>( + VW::foreach_feature, uint64_t, VW::details::vec_add_multipredict>( all, ec, mp, num_features_from_interactions); } } else { - multipredict_info mp = { + VW::details::multipredict_info mp = { count, step, pred, g.all->weights.dense_weights, static_cast(all.sd->gravity)}; if (l1) { - foreach_feature, uint64_t, vec_add_trunc_multipredict>( + VW::foreach_feature, uint64_t, vec_add_trunc_multipredict>( all, ec, mp, num_features_from_interactions); } else { - foreach_feature, uint64_t, vec_add_multipredict>( + VW::foreach_feature, uint64_t, VW::details::vec_add_multipredict>( all, ec, mp, num_features_from_interactions); } } @@ -547,14 +568,14 @@ void multipredict(gd& g, base_learner&, VW::example& ec, size_t count, size_t st } if (finalize_predictions) { - for (size_t c = 0; c < count; c++) { pred[c].scalar = finalize_prediction(all.sd, all.logger, pred[c].scalar); } + for (size_t c = 0; c < count; c++) { pred[c].scalar = VW::details::finalize_prediction(all.sd, all.logger, pred[c].scalar); } } if (audit) { for (size_t c = 0; c < count; c++) { ec.pred.scalar = pred[c].scalar; - print_audit_features(all, ec); + VW::details::print_audit_features(all, ec); ec.ft_offset += static_cast(step); } ec.ft_offset -= static_cast(step * count); @@ -662,10 +683,9 @@ inline void pred_per_update_feature(norm_data& nd, float x, float& fw) } } -bool global_print_features = false; template -float get_pred_per_update(gd& g, VW::example& ec) +float get_pred_per_update(VW::reductions::gd& g, VW::example& ec) { // We must traverse the features in _precisely_ the same order as during training. auto& ld = ec.l.simple; @@ -677,7 +697,7 @@ float get_pred_per_update(gd& g, VW::example& ec) if (grad_squared == 0 && !stateless) { return 1.; } norm_data nd = {grad_squared, 0., 0., {g.neg_power_t, g.neg_norm_power}, {0}, &g.all->logger}; - foreach_feature>(all, ec, nd); if VW_STD17_CONSTEXPR (normalized != 0) { @@ -702,7 +722,7 @@ float get_pred_per_update(gd& g, VW::example& ec) template -float sensitivity(gd& g, VW::example& ec) +float sensitivity(VW::reductions::gd& g, VW::example& ec) { if VW_STD17_CONSTEXPR (adaptive || normalized) { @@ -717,7 +737,7 @@ float sensitivity(gd& g, VW::example& ec) VW_WARNING_STATE_POP template -float get_scale(gd& g, VW::example& /* ec */, float weight) +float get_scale(VW::reductions::gd& g, VW::example& /* ec */, float weight) { float update_scale = g.all->eta * weight; if (!adaptive) @@ -730,7 +750,7 @@ float get_scale(gd& g, VW::example& /* ec */, float weight) } template -float sensitivity(gd& g, base_learner& /* base */, VW::example& ec) +float sensitivity(VW::reductions::gd& g, base_learner& /* base */, VW::example& ec) { return get_scale(g, ec, 1.) * sensitivity(g, ec); @@ -738,7 +758,7 @@ float sensitivity(gd& g, base_learner& /* base */, VW::example& ec) template -float compute_update(gd& g, VW::example& ec) +float compute_update(VW::reductions::gd& g, VW::example& ec) { // invariant: not a test label, importance weight > 0 const auto& ld = ec.l.simple; @@ -778,7 +798,7 @@ float compute_update(gd& g, VW::example& ec) template -void update(gd& g, base_learner&, VW::example& ec) +void update(VW::reductions::gd& g, base_learner&, VW::example& ec) { // invariant: not a test label, importance weight > 0 float update; @@ -792,13 +812,13 @@ void update(gd& g, base_learner&, VW::example& ec) { // updating weights now to avoid numerical instability sync_weights(*g.all); } -} // namespace GD +} // NO_SANITIZE_UNDEFINED needed in learn functions because // base_learner& base might be a reference created from nullptr template -void NO_SANITIZE_UNDEFINED learn(gd& g, base_learner& base, VW::example& ec) +void NO_SANITIZE_UNDEFINED learn(VW::reductions::gd& g, base_learner& base, VW::example& ec) { // invariant: not a test label, importance weight > 0 assert(ec.l.simple.label != FLT_MAX); @@ -807,33 +827,6 @@ void NO_SANITIZE_UNDEFINED learn(gd& g, base_learner& base, VW::example& ec) update(g, base, ec); } -void sync_weights(VW::workspace& all) -{ - // todo, fix length dependence - if (all.sd->gravity == 0. && all.sd->contraction == 1.) - { // to avoid unnecessary weight synchronization - return; - } - - if (all.weights.sparse) - { - for (VW::weight& w : all.weights.sparse_weights) - { - w = trunc_weight(w, static_cast(all.sd->gravity)) * static_cast(all.sd->contraction); - } - } - else - { - for (VW::weight& w : all.weights.dense_weights) - { - w = trunc_weight(w, static_cast(all.sd->gravity)) * static_cast(all.sd->contraction); - } - } - - all.sd->gravity = 0.; - all.sd->contraction = 1.; -} - size_t write_index(VW::io_buf& model_file, std::stringstream& msg, bool text, uint32_t num_bits, uint64_t i) { size_t brw; @@ -935,15 +928,18 @@ void save_load_regressor(VW::workspace& all, VW::io_buf& model_file, bool read, } } } +} -void save_load_regressor(VW::workspace& all, VW::io_buf& model_file, bool read, bool text) +void VW::details::save_load_regressor_gd(VW::workspace& all, VW::io_buf& model_file, bool read, bool text) { - if (all.weights.sparse) { save_load_regressor(all, model_file, read, text, all.weights.sparse_weights); } - else { save_load_regressor(all, model_file, read, text, all.weights.dense_weights); } + if (all.weights.sparse) { ::save_load_regressor(all, model_file, read, text, all.weights.sparse_weights); } + else { ::save_load_regressor(all, model_file, read, text, all.weights.dense_weights); } } +namespace +{ template -void save_load_online_state_weights(VW::workspace& all, VW::io_buf& model_file, bool read, bool text, gd* g, +void save_load_online_state_weights(VW::workspace& all, VW::io_buf& model_file, bool read, bool text, VW::reductions::gd* g, std::stringstream& msg, uint32_t ftrl_size, T& weights) { uint64_t length = static_cast(1) << all.num_bits; @@ -1072,9 +1068,10 @@ void save_load_online_state_weights(VW::workspace& all, VW::io_buf& model_file, } } } +} -void save_load_online_state(VW::workspace& all, VW::io_buf& model_file, bool read, bool text, double& total_weight, - double& normalized_sum_norm_x, gd* g, uint32_t ftrl_size) +void VW::details::save_load_online_state_gd(VW::workspace& all, VW::io_buf& model_file, bool read, bool text, double& total_weight, + double& normalized_sum_norm_x, VW::reductions::gd* g, uint32_t ftrl_size) { std::stringstream msg; @@ -1219,7 +1216,8 @@ void save_load_online_state(VW::workspace& all, VW::io_buf& model_file, bool rea else { save_load_online_state_weights(all, model_file, read, text, g, msg, ftrl_size, all.weights.dense_weights); } } -void save_load(gd& g, VW::io_buf& model_file, bool read, bool text) +namespace { +void save_load(VW::reductions::gd& g, VW::io_buf& model_file, bool read, bool text) { VW::workspace& all = *g.all; if (read) @@ -1263,13 +1261,13 @@ void save_load(gd& g, VW::io_buf& model_file, bool read, bool text) "save_resume functionality is known to have inaccuracy in model files version less than '{}'", VW::version_definitions::VERSION_SAVE_RESUME_FIX.to_string()); } - save_load_online_state(all, model_file, read, text, g.per_model_states[0].total_weight, + VW::details::save_load_online_state_gd(all, model_file, read, text, g.per_model_states[0].total_weight, g.per_model_states[0].normalized_sum_norm_x, &g); } else { if (!all.weights.not_null()) { THROW("Model weights not initialized."); } - save_load_regressor(all, model_file, read, text); + VW::details::save_load_regressor_gd(all, model_file, read, text); } } if (!all.training) @@ -1281,7 +1279,7 @@ void save_load(gd& g, VW::io_buf& model_file, bool read, bool text) template -uint64_t set_learn(VW::workspace& all, gd& g) +uint64_t set_learn(VW::workspace& all, VW::reductions::gd& g) { all.normalized_idx = normalized; if (g.adax) @@ -1302,7 +1300,7 @@ uint64_t set_learn(VW::workspace& all, gd& g) template -uint64_t set_learn(VW::workspace& all, bool feature_mask_off, gd& g) +uint64_t set_learn(VW::workspace& all, bool feature_mask_off, VW::reductions::gd& g) { all.normalized_idx = normalized; if (feature_mask_off) @@ -1313,7 +1311,7 @@ uint64_t set_learn(VW::workspace& all, bool feature_mask_off, gd& g) } template -uint64_t set_learn(VW::workspace& all, bool feature_mask_off, gd& g) +uint64_t set_learn(VW::workspace& all, bool feature_mask_off, VW::reductions::gd& g) { if (g.sparse_l2 > 0.f) { @@ -1323,7 +1321,7 @@ uint64_t set_learn(VW::workspace& all, bool feature_mask_off, gd& g) } template -uint64_t set_learn(VW::workspace& all, bool feature_mask_off, gd& g) +uint64_t set_learn(VW::workspace& all, bool feature_mask_off, VW::reductions::gd& g) { if (all.invariant_updates) { @@ -1333,7 +1331,7 @@ uint64_t set_learn(VW::workspace& all, bool feature_mask_off, gd& g) } template -uint64_t set_learn(VW::workspace& all, bool feature_mask_off, gd& g) +uint64_t set_learn(VW::workspace& all, bool feature_mask_off, VW::reductions::gd& g) { // select the appropriate learn function based on adaptive, normalization, and feature mask if (all.weights.normalized) @@ -1344,7 +1342,7 @@ uint64_t set_learn(VW::workspace& all, bool feature_mask_off, gd& g) } template -uint64_t set_learn(VW::workspace& all, bool feature_mask_off, gd& g) +uint64_t set_learn(VW::workspace& all, bool feature_mask_off, VW::reductions::gd& g) { if (all.weights.adaptive) { return set_learn(all, feature_mask_off, g); } else { return set_learn(all, feature_mask_off, g); } @@ -1356,14 +1354,14 @@ uint64_t ceil_log_2(uint64_t v) else { return 1 + ceil_log_2(v >> 1); } } -} // namespace GD +} base_learner* VW::reductions::gd_setup(VW::setup_base_i& stack_builder) { options_i& options = *stack_builder.get_options(); VW::workspace& all = *stack_builder.get_all_pointer(); - auto g = VW::make_unique(); + auto g = VW::make_unique(); bool sgd = false; bool adaptive = false; @@ -1399,7 +1397,7 @@ base_learner* VW::reductions::gd_setup(VW::setup_base_i& stack_builder) if (options.was_supplied("l2_state")) { all.sd->contraction = local_contraction; } g->all = &all; - auto single_model_state = GD::per_model_state(); + auto single_model_state = details::per_model_state(); single_model_state.normalized_sum_norm_x = 0; single_model_state.total_weight = 0.; g->per_model_states.emplace_back(single_model_state); @@ -1473,34 +1471,34 @@ base_learner* VW::reductions::gd_setup(VW::setup_base_i& stack_builder) { if (all.audit || all.hash_inv) { - g->predict = GD::predict; - g->multipredict = GD::multipredict; + g->predict = ::predict; + g->multipredict = ::multipredict; } else { - g->predict = GD::predict; - g->multipredict = GD::multipredict; + g->predict = ::predict; + g->multipredict = ::multipredict; } } else if (all.audit || all.hash_inv) { - g->predict = GD::predict; - g->multipredict = GD::multipredict; + g->predict = ::predict; + g->multipredict = ::multipredict; } else { - g->predict = GD::predict; - g->multipredict = GD::multipredict; + g->predict = ::predict; + g->multipredict = ::multipredict; } uint64_t stride; - if (all.power_t == 0.5) { stride = GD::set_learn(all, feature_mask_off, *g.get()); } - else { stride = GD::set_learn(all, feature_mask_off, *g.get()); } + if (all.power_t == 0.5) { stride = ::set_learn(all, feature_mask_off, *g.get()); } + else { stride = ::set_learn(all, feature_mask_off, *g.get()); } - all.weights.stride_shift(static_cast(GD::ceil_log_2(stride - 1))); + all.weights.stride_shift(static_cast(::ceil_log_2(stride - 1))); auto* bare = g.get(); - learner* l = + learner* l = make_base_learner(std::move(g), g->learn, bare->predict, stack_builder.get_setupfn_name(gd_setup), VW::prediction_type_t::SCALAR, VW::label_type_t::SIMPLE) .set_learn_returns_prediction(true) @@ -1508,14 +1506,14 @@ base_learner* VW::reductions::gd_setup(VW::setup_base_i& stack_builder) .set_sensitivity(bare->sensitivity) .set_multipredict(bare->multipredict) .set_update(bare->update) - .set_save_load(GD::save_load) - .set_end_pass(GD::end_pass) - .set_merge_with_all(GD::merge) - .set_add_with_all(GD::add) - .set_subtract_with_all(GD::subtract) - .set_output_example_prediction(VW::details::output_example_prediction_simple_label) - .set_update_stats(VW::details::update_stats_simple_label) - .set_print_update(VW::details::print_update_simple_label) + .set_save_load(::save_load) + .set_end_pass(::end_pass) + .set_merge_with_all(::merge) + .set_add_with_all(::add) + .set_subtract_with_all(::subtract) + .set_output_example_prediction(VW::details::output_example_prediction_simple_label) + .set_update_stats(VW::details::update_stats_simple_label) + .set_print_update(VW::details::print_update_simple_label) .build(); return make_base(*l); } diff --git a/vowpalwabbit/core/src/reductions/gd_mf.cc b/vowpalwabbit/core/src/reductions/gd_mf.cc index 545afa402d6..63260c9e8a8 100644 --- a/vowpalwabbit/core/src/reductions/gd_mf.cc +++ b/vowpalwabbit/core/src/reductions/gd_mf.cc @@ -124,7 +124,7 @@ float mf_predict(gdmf& d, VW::example& ec, T& weights) float linear_prediction = 0.; // linear terms - for (VW::features& fs : ec) { GD::foreach_feature(weights, fs, linear_prediction); } + for (VW::features& fs : ec) { VW::foreach_feature(weights, fs, linear_prediction); } // store constant + linear prediction // note: constant is now automatically added @@ -144,13 +144,13 @@ float mf_predict(gdmf& d, VW::example& ec, T& weights) // l^k is from index+1 to index+d.rank // float x_dot_l = sd_offset_add(weights, ec.atomics[(int)(*i)[0]].begin(), ec.atomics[(int)(*i)[0]].end(), k); pred_offset x_dot_l = {0., k}; - GD::foreach_feature(weights, ec.feature_space[static_cast(i[0])], x_dot_l); + VW::foreach_feature(weights, ec.feature_space[static_cast(i[0])], x_dot_l); // x_r * r^k // r^k is from index+d.rank+1 to index+2*d.rank // float x_dot_r = sd_offset_add(weights, ec.atomics[(int)(*i)[1]].begin(), ec.atomics[(int)(*i)[1]].end(), // k+d.rank); pred_offset x_dot_r = {0., k + d.rank}; - GD::foreach_feature(weights, ec.feature_space[static_cast(i[1])], x_dot_r); + VW::foreach_feature(weights, ec.feature_space[static_cast(i[1])], x_dot_r); prediction += x_dot_l.p * x_dot_r.p; @@ -167,7 +167,7 @@ float mf_predict(gdmf& d, VW::example& ec, T& weights) all.set_minmax(all.sd, ec.l.simple.label); - ec.pred.scalar = GD::finalize_prediction(all.sd, all.logger, ec.partial_prediction); + ec.pred.scalar = VW::details::finalize_prediction(all.sd, all.logger, ec.partial_prediction); if (ec.l.simple.label != FLT_MAX) { diff --git a/vowpalwabbit/core/src/reductions/lda_core.cc b/vowpalwabbit/core/src/reductions/lda_core.cc index 30e20725c33..3c7b6b8d040 100644 --- a/vowpalwabbit/core/src/reductions/lda_core.cc +++ b/vowpalwabbit/core/src/reductions/lda_core.cc @@ -936,7 +936,7 @@ void learn_batch(lda& l) for (size_t d = 0; d < batch_size; d++) { float score = lda_loop(l, l.Elogtheta, &(l.v[d * l.all->lda]), l.examples[d], l.all->power_t); - if (l.all->audit) { GD::print_audit_features(*l.all, *l.examples[d]); } + if (l.all->audit) { VW::details::print_audit_features(*l.all, *l.examples[d]); } // If the doc is empty, give it loss of 0. if (l.doc_lengths[d] > 0) { diff --git a/vowpalwabbit/core/src/reductions/mf.cc b/vowpalwabbit/core/src/reductions/mf.cc index 568b8bd86d4..e06e1e77939 100644 --- a/vowpalwabbit/core/src/reductions/mf.cc +++ b/vowpalwabbit/core/src/reductions/mf.cc @@ -100,7 +100,7 @@ void predict(mf& data, single_learner& base, VW::example& ec) // finalize prediction ec.partial_prediction = prediction; - ec.pred.scalar = GD::finalize_prediction(data.all->sd, data.all->logger, ec.partial_prediction); + ec.pred.scalar = VW::details::finalize_prediction(data.all->sd, data.all->logger, ec.partial_prediction); } void learn(mf& data, single_learner& base, VW::example& ec) diff --git a/vowpalwabbit/core/src/reductions/mwt.cc b/vowpalwabbit/core/src/reductions/mwt.cc index 8eb7f73aa2b..969ad2639cd 100644 --- a/vowpalwabbit/core/src/reductions/mwt.cc +++ b/vowpalwabbit/core/src/reductions/mwt.cc @@ -98,7 +98,7 @@ void predict_or_learn(mwt& c, single_learner& base, VW::example& ec) // For each nonzero feature in observed namespaces, check it's value. for (unsigned char ns : ec.indices) { - if (c.namespaces[ns]) { GD::foreach_feature(c.all, ec.feature_space[ns], c); } + if (c.namespaces[ns]) { VW::foreach_feature(c.all, ec.feature_space[ns], c); } } for (uint64_t policy : c.policies) { diff --git a/vowpalwabbit/core/src/reductions/nn.cc b/vowpalwabbit/core/src/reductions/nn.cc index 0b05343489d..dd0ee5bbf51 100644 --- a/vowpalwabbit/core/src/reductions/nn.cc +++ b/vowpalwabbit/core/src/reductions/nn.cc @@ -317,7 +317,7 @@ void predict_or_learn_multi(nn& n, single_learner& base, VW::example& ec) else { base.predict(n.output_layer, n.k); } } - n.prediction = GD::finalize_prediction(n.all->sd, n.all->logger, n.output_layer.partial_prediction); + n.prediction = VW::details::finalize_prediction(n.all->sd, n.all->logger, n.output_layer.partial_prediction); if (should_output) { @@ -355,7 +355,7 @@ void predict_or_learn_multi(nn& n, single_learner& base, VW::example& ec) float nu = n.outputweight.pred.scalar; float gradhw = 0.5f * nu * gradient * sigmahprime; - ec.l.simple.label = GD::finalize_prediction(n.all->sd, n.all->logger, hidden_units[i].scalar - gradhw); + ec.l.simple.label = VW::details::finalize_prediction(n.all->sd, n.all->logger, hidden_units[i].scalar - gradhw); ec.pred.scalar = hidden_units[i].scalar; if (ec.l.simple.label != hidden_units[i].scalar) { base.update(ec, i); } } diff --git a/vowpalwabbit/core/src/reductions/oja_newton.cc b/vowpalwabbit/core/src/reductions/oja_newton.cc index d6a20c4f5fa..ecbbc334e7b 100644 --- a/vowpalwabbit/core/src/reductions/oja_newton.cc +++ b/vowpalwabbit/core/src/reductions/oja_newton.cc @@ -348,9 +348,9 @@ void make_pred(oja_n_update_data& data, float x, float& wref) void predict(OjaNewton& oja_newton_ptr, base_learner&, VW::example& ec) { oja_newton_ptr.data.prediction = 0; - GD::foreach_feature(*oja_newton_ptr.all, ec, oja_newton_ptr.data); + VW::foreach_feature(*oja_newton_ptr.all, ec, oja_newton_ptr.data); ec.partial_prediction = oja_newton_ptr.data.prediction; - ec.pred.scalar = GD::finalize_prediction(oja_newton_ptr.all->sd, oja_newton_ptr.all->logger, ec.partial_prediction); + ec.pred.scalar = VW::details::finalize_prediction(oja_newton_ptr.all->sd, oja_newton_ptr.all->logger, ec.partial_prediction); } void update_Z_and_wbar(oja_n_update_data& data, float x, float& wref) // NOLINT @@ -408,7 +408,7 @@ void NO_SANITIZE_UNDEFINED learn(OjaNewton& oja_newton_ptr, base_learner& base, if (oja_newton_ptr.normalize) { - GD::foreach_feature(*oja_newton_ptr.all, ec, data); + VW::foreach_feature(*oja_newton_ptr.all, ec, data); } VW::example* next_in_batch = nullptr; @@ -440,7 +440,7 @@ void NO_SANITIZE_UNDEFINED learn(OjaNewton& oja_newton_ptr, base_learner& base, data.norm2_x = 0; std::fill(data.Zx.begin(), data.Zx.end(), 0.f); - GD::foreach_feature(*oja_newton_ptr.all, ex, data); + VW::foreach_feature(*oja_newton_ptr.all, ex, data); oja_newton_ptr.compute_AZx(); oja_newton_ptr.update_eigenvalues(); @@ -448,7 +448,7 @@ void NO_SANITIZE_UNDEFINED learn(OjaNewton& oja_newton_ptr, base_learner& base, oja_newton_ptr.update_K(); - GD::foreach_feature(*oja_newton_ptr.all, ex, data); + VW::foreach_feature(*oja_newton_ptr.all, ex, data); } oja_newton_ptr.update_A(); @@ -459,7 +459,7 @@ void NO_SANITIZE_UNDEFINED learn(OjaNewton& oja_newton_ptr, base_learner& base, } std::fill(data.Zx.begin(), data.Zx.end(), 0.f); - GD::foreach_feature(*oja_newton_ptr.all, ec, data); + VW::foreach_feature(*oja_newton_ptr.all, ec, data); oja_newton_ptr.compute_AZx(); oja_newton_ptr.update_b(); @@ -485,8 +485,8 @@ void save_load(OjaNewton& oja_newton_ptr, VW::io_buf& model_file, bool read, boo double temp = 0.; double temp_normalized_sum_norm_x = 0.; - if (resume) { GD::save_load_online_state(all, model_file, read, text, temp, temp_normalized_sum_norm_x); } - else { GD::save_load_regressor(all, model_file, read, text); } + if (resume) { VW::details::save_load_online_state_gd(all, model_file, read, text, temp, temp_normalized_sum_norm_x); } + else { VW::details::save_load_regressor_gd(all, model_file, read, text); } } } } // namespace diff --git a/vowpalwabbit/core/src/reductions/print.cc b/vowpalwabbit/core/src/reductions/print.cc index 7877c8e8aa6..9f9a99a5bf2 100644 --- a/vowpalwabbit/core/src/reductions/print.cc +++ b/vowpalwabbit/core/src/reductions/print.cc @@ -49,7 +49,7 @@ void learn(print& p, VW::LEARNER::base_learner&, VW::example& ec) (*all.trace_message).write(ec.tag.begin(), ec.tag.size()); } (*all.trace_message) << "| "; - GD::foreach_feature(*(p.all), ec, *p.all); + VW::foreach_feature(*(p.all), ec, *p.all); (*all.trace_message) << std::endl; } } // namespace diff --git a/vowpalwabbit/core/src/reductions/search/search.cc b/vowpalwabbit/core/src/reductions/search/search.cc index b9186a7afaa..7fd2b402375 100644 --- a/vowpalwabbit/core/src/reductions/search/search.cc +++ b/vowpalwabbit/core/src/reductions/search/search.cc @@ -16,7 +16,7 @@ #include "vw/core/rand_state.h" #include "vw/core/reductions/active.h" #include "vw/core/reductions/csoaa.h" -#include "vw/core/reductions/gd.h" // for GD::foreach_feature +#include "vw/core/reductions/gd.h" // for VW::foreach_feature #include "vw/core/reductions/search/search_dep_parser.h" #include "vw/core/reductions/search/search_entityrelationtask.h" #include "vw/core/reductions/search/search_graph.h" @@ -651,7 +651,7 @@ void add_neighbor_features(search_private& priv, VW::multi_ex& ec_seq) else // this is actually a neighbor { VW::example& other = *ec_seq[n + offset]; - GD::foreach_feature(priv.all, other.feature_space[ns], priv, me.ft_offset); + VW::foreach_feature(priv.all, other.feature_space[ns], priv, me.ft_offset); } } @@ -819,7 +819,7 @@ void add_example_conditioning(search_private& priv, VW::example& ec, size_t cond // add the quadratic features if (n < priv.acset.max_quad_ngram_length) { - GD::foreach_feature(*priv.all, ec, priv); + VW::foreach_feature(*priv.all, ec, priv); } } } diff --git a/vowpalwabbit/core/src/reductions/search/search_graph.cc b/vowpalwabbit/core/src/reductions/search/search_graph.cc index 8a48ab80f3e..50ba95fad58 100644 --- a/vowpalwabbit/core/src/reductions/search/search_graph.cc +++ b/vowpalwabbit/core/src/reductions/search/search_graph.cc @@ -327,11 +327,11 @@ void add_edge_features(Search::search& sch, task_data& D, size_t n, VW::multi_ex if (pred_total <= 1.) // single edge { D.neighbor_predictions[0] = static_cast(last_pred); - GD::foreach_feature(sch.get_vw_pointer_unsafe(), edge, D); + VW::foreach_feature(sch.get_vw_pointer_unsafe(), edge, D); } else { // lots of edges - GD::foreach_feature(sch.get_vw_pointer_unsafe(), edge, D); + VW::foreach_feature(sch.get_vw_pointer_unsafe(), edge, D); } } ec[n]->indices.push_back(VW::details::NEIGHBOR_NAMESPACE); diff --git a/vowpalwabbit/core/src/reductions/stagewise_poly.cc b/vowpalwabbit/core/src/reductions/stagewise_poly.cc index be0043cde92..815bd72b501 100644 --- a/vowpalwabbit/core/src/reductions/stagewise_poly.cc +++ b/vowpalwabbit/core/src/reductions/stagewise_poly.cc @@ -469,7 +469,7 @@ void synthetic_create_rec(stagewise_poly& poly, float v, uint64_t findex) #ifdef DEBUG poly.max_depth = (poly.max_depth > poly.cur_depth) ? poly.max_depth : poly.cur_depth; #endif // DEBUG - GD::foreach_feature(*(poly.all), *(poly.original_ec), poly); + VW::foreach_feature(*(poly.all), *(poly.original_ec), poly); --poly.cur_depth; poly.synth_rec_f = parent_f; } @@ -490,7 +490,7 @@ void synthetic_create(stagewise_poly& poly, VW::example& ec, bool training) * parent, and recurse just on that feature (which arguably correctly interprets poly.cur_depth). * Problem with this is if there is a collision with the root... */ - GD::foreach_feature(*poly.all, *poly.original_ec, poly); + VW::foreach_feature(*poly.all, *poly.original_ec, poly); synthetic_decycle(poly); if (training) diff --git a/vowpalwabbit/core/src/reductions/svrg.cc b/vowpalwabbit/core/src/reductions/svrg.cc index f8805fdcaa9..01ba72d1bfb 100644 --- a/vowpalwabbit/core/src/reductions/svrg.cc +++ b/vowpalwabbit/core/src/reductions/svrg.cc @@ -43,7 +43,7 @@ class svrg svrg(VW::workspace* all) : all(all) {} }; -// Mimic GD::inline_predict but with offset for predicting with either +// Mimic VW::inline_predict but with offset for predicting with either // stable versus inner weights. template @@ -58,7 +58,7 @@ inline float inline_predict(VW::workspace& all, VW::example& ec) { const auto& simple_red_features = ec.ex_reduction_features.template get(); float acc = simple_red_features.initial; - GD::foreach_feature >(all, ec, acc); + VW::foreach_feature >(all, ec, acc); return acc; } @@ -66,13 +66,13 @@ inline float inline_predict(VW::workspace& all, VW::example& ec) float predict_stable(const svrg& s, VW::example& ec) { - return GD::finalize_prediction(s.all->sd, s.all->logger, inline_predict(*s.all, ec)); + return VW::details::finalize_prediction(s.all->sd, s.all->logger, inline_predict(*s.all, ec)); } void predict(svrg& s, base_learner&, VW::example& ec) { ec.partial_prediction = inline_predict(*s.all, ec); - ec.pred.scalar = GD::finalize_prediction(s.all->sd, s.all->logger, ec.partial_prediction); + ec.pred.scalar = VW::details::finalize_prediction(s.all->sd, s.all->logger, ec.partial_prediction); } float gradient_scalar(const svrg& s, const VW::example& ec, float pred) @@ -111,13 +111,13 @@ void update_inner(const svrg& s, VW::example& ec) u.g_scalar_stable = gradient_scalar(s, ec, predict_stable(s, ec)); u.eta = s.all->eta; u.norm = static_cast(s.stable_grad_count); - GD::foreach_feature(*s.all, ec, u); + VW::foreach_feature(*s.all, ec, u); } void update_stable(const svrg& s, VW::example& ec) { float g = gradient_scalar(s, ec, predict_stable(s, ec)); - GD::foreach_feature(*s.all, ec, g); + VW::foreach_feature(*s.all, ec, g); } void learn(svrg& s, base_learner& base, VW::example& ec) @@ -169,8 +169,8 @@ void save_load(svrg& s, VW::io_buf& model_file, bool read, bool text) double temp = 0.; double temp_normalized_sum_norm_x = 0.; - if (resume) { GD::save_load_online_state(*s.all, model_file, read, text, temp, temp_normalized_sum_norm_x); } - else { GD::save_load_regressor(*s.all, model_file, read, text); } + if (resume) { VW::details::save_load_online_state_gd(*s.all, model_file, read, text, temp, temp_normalized_sum_norm_x); } + else { VW::details::save_load_regressor_gd(*s.all, model_file, read, text); } } } } // namespace diff --git a/vowpalwabbit/core/src/vw.cc b/vowpalwabbit/core/src/vw.cc index c7f4ad5b6a7..61d3f044e0a 100644 --- a/vowpalwabbit/core/src/vw.cc +++ b/vowpalwabbit/core/src/vw.cc @@ -781,7 +781,7 @@ VW::feature* VW::get_features(VW::workspace& all, example* ec, size_t& feature_n features_and_source fs; fs.stride_shift = all.weights.stride_shift(); fs.mask = all.weights.mask() >> all.weights.stride_shift(); - GD::foreach_feature<::features_and_source, uint64_t, vec_store>(all, *ec, fs); + VW::foreach_feature<::features_and_source, uint64_t, vec_store>(all, *ec, fs); auto* features_array = new feature[fs.feature_map.size()]; std::memcpy(features_array, fs.feature_map.data(), fs.feature_map.size() * sizeof(feature)); diff --git a/vowpalwabbit/slim/include/vw/slim/vw_slim_predict.h b/vowpalwabbit/slim/include/vw/slim/vw_slim_predict.h index 2480be6294c..0d58099c214 100644 --- a/vowpalwabbit/slim/include/vw/slim/vw_slim_predict.h +++ b/vowpalwabbit/slim/include/vw/slim/vw_slim_predict.h @@ -269,13 +269,13 @@ class vw_predict // permutations is not supported by slim so we can just use combinations! _generate_interactions.update_interactions_if_new_namespace_seen< VW::details::generate_namespace_combinations_with_repetition, false>(_interactions, ex.indices); - score = GD::inline_predict(*_weights, false, _ignore_linear, _generate_interactions.generated_interactions, + score = VW::inline_predict(*_weights, false, _ignore_linear, _generate_interactions.generated_interactions, _unused_extent_interactions, /* permutations */ false, ex, _generate_interactions_object_cache); } else { - score = GD::inline_predict(*_weights, false, _ignore_linear, _interactions, _unused_extent_interactions, + score = VW::inline_predict(*_weights, false, _ignore_linear, _interactions, _unused_extent_interactions, /* permutations */ false, ex, _generate_interactions_object_cache); } return S_VW_PREDICT_OK; From 9f4b54871b6281b5979f5fabe98a9932736bc384 Mon Sep 17 00:00:00 2001 From: Jack Gerrits Date: Fri, 6 Jan 2023 15:24:16 -0500 Subject: [PATCH 2/4] formatting --- .../core/include/vw/core/gd_predict.h | 30 ++++++----- vowpalwabbit/core/src/reductions/ftrl.cc | 13 +++-- vowpalwabbit/core/src/reductions/gd.cc | 52 ++++++++++--------- vowpalwabbit/core/src/reductions/nn.cc | 3 +- .../core/src/reductions/oja_newton.cc | 8 ++- vowpalwabbit/core/src/reductions/svrg.cc | 5 +- 6 files changed, 64 insertions(+), 47 deletions(-) diff --git a/vowpalwabbit/core/include/vw/core/gd_predict.h b/vowpalwabbit/core/include/vw/core/gd_predict.h index b2117a3bd71..879d622ef28 100644 --- a/vowpalwabbit/core/include/vw/core/gd_predict.h +++ b/vowpalwabbit/core/include/vw/core/gd_predict.h @@ -13,16 +13,16 @@ namespace VW { - namespace details - { - template +namespace details +{ +template inline void dummy_func(DataT&, const VW::audit_strings*) { } // should never be called due to call_audit overload inline void vec_add(float& p, float fx, float fw) { p += fw * fx; } - } +} // namespace details // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_index) template void foreach_feature(WeightsT& /*weights*/, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.) @@ -105,8 +105,6 @@ inline void foreach_feature(WeightsT& weights, bool ignore_some_linear, extent_interactions, permutations, ec, dat, num_interacted_features_ignored, cache); } - - template inline float inline_predict(WeightsT& weights, bool ignore_some_linear, std::array& ignore_linear, @@ -130,7 +128,7 @@ inline float inline_predict(WeightsT& weights, bool ignore_some_linear, extent_interactions, permutations, ec, initial, num_interacted_features, cache); return initial; } -} +} // namespace VW // namespace GD // { @@ -146,7 +144,8 @@ inline float inline_predict(WeightsT& weights, bool ignore_some_linear, // // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) // template // VW_DEPRECATED("Moved to VW namespace") -// inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.) +// inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult +// = 1.) // { // VW::foreach_feature(weights, fs, dat, offset, mult); // } @@ -170,7 +169,8 @@ inline float inline_predict(WeightsT& weights, bool ignore_some_linear, // VW::details::generate_interactions_object_cache& cache) // default value removed to eliminate // // ambiguity in old complers // { -// VW::generate_interactions(interactions, extent_interactions, permutations, ec, +// VW::generate_interactions(interactions, extent_interactions, permutations, +// ec, // dat, weights, num_interacted_features, cache); // } @@ -184,7 +184,8 @@ inline float inline_predict(WeightsT& weights, bool ignore_some_linear, // const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, // DataT& dat, size_t& num_interacted_features, VW::details::generate_interactions_object_cache& cache) // { -// VW::foreach_feature(weights, ignore_some_linear, ignore_linear, interactions, +// VW::foreach_feature(weights, ignore_some_linear, ignore_linear, +// interactions, // extent_interactions, permutations, ec, dat, num_interacted_features, cache); // } @@ -196,7 +197,8 @@ inline float inline_predict(WeightsT& weights, bool ignore_some_linear, // const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, // DataT& dat, VW::details::generate_interactions_object_cache& cache) // { -// VW::foreach_feature(weights, ignore_some_linear, ignore_linear, interactions, +// VW::foreach_feature(weights, ignore_some_linear, ignore_linear, +// interactions, // extent_interactions, permutations, ec, dat, cache); // } @@ -208,7 +210,8 @@ inline float inline_predict(WeightsT& weights, bool ignore_some_linear, // const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, // VW::details::generate_interactions_object_cache& cache, float initial = 0.f) // { -// return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, permutations, ec, +// return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, +// permutations, ec, // cache, initial); // } @@ -220,7 +223,8 @@ inline float inline_predict(WeightsT& weights, bool ignore_some_linear, // const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, // size_t& num_interacted_features, VW::details::generate_interactions_object_cache& cache, float initial = 0.f) // { -// return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, permutations, ec, +// return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, +// permutations, ec, // num_interacted_features, cache, initial); // } // } // namespace GD \ No newline at end of file diff --git a/vowpalwabbit/core/src/reductions/ftrl.cc b/vowpalwabbit/core/src/reductions/ftrl.cc index 7b44a810058..e4c7b3cc40b 100644 --- a/vowpalwabbit/core/src/reductions/ftrl.cc +++ b/vowpalwabbit/core/src/reductions/ftrl.cc @@ -114,15 +114,15 @@ void multipredict(ftrl& b, base_learner&, VW::example& ec, size_t count, size_t { VW::details::multipredict_info mp = { count, step, pred, all.weights.sparse_weights, static_cast(all.sd->gravity)}; - VW::foreach_feature, uint64_t, VW::details::vec_add_multipredict>( - all, ec, mp, num_features_from_interactions); + VW::foreach_feature, uint64_t, + VW::details::vec_add_multipredict>(all, ec, mp, num_features_from_interactions); } else { VW::details::multipredict_info mp = { count, step, pred, all.weights.dense_weights, static_cast(all.sd->gravity)}; - VW::foreach_feature, uint64_t, VW::details::vec_add_multipredict>( - all, ec, mp, num_features_from_interactions); + VW::foreach_feature, uint64_t, + VW::details::vec_add_multipredict>(all, ec, mp, num_features_from_interactions); } ec.num_features_from_interactions = num_features_from_interactions; if (all.sd->contraction != 1.) @@ -131,7 +131,10 @@ void multipredict(ftrl& b, base_learner&, VW::example& ec, size_t count, size_t } if (finalize_predictions) { - for (size_t c = 0; c < count; c++) { pred[c].scalar = VW::details::finalize_prediction(all.sd, all.logger, pred[c].scalar); } + for (size_t c = 0; c < count; c++) + { + pred[c].scalar = VW::details::finalize_prediction(all.sd, all.logger, pred[c].scalar); + } } if (audit) { diff --git a/vowpalwabbit/core/src/reductions/gd.cc b/vowpalwabbit/core/src/reductions/gd.cc index ed7be22f68a..7fc131df30a 100644 --- a/vowpalwabbit/core/src/reductions/gd.cc +++ b/vowpalwabbit/core/src/reductions/gd.cc @@ -12,9 +12,8 @@ #include "vw/core/prediction_type.h" #include "vw/core/setup_base.h" -#include #include - +#include #if !defined(VW_NO_INLINE_SIMD) # if !defined(__SSE2__) && (defined(_M_AMD64) || defined(_M_X64)) @@ -97,7 +96,6 @@ void copy_weights(WeightsT& dest, const WeightsT& source, size_t length) for (size_t i = 0; i < full_weights_size; i++) { dest[i] = source[i]; } } - void sync_weights(VW::workspace& all) { // todo, fix length dependence @@ -204,7 +202,8 @@ void train(VW::reductions::gd& g, VW::example& ec, float update) { if VW_STD17_CONSTEXPR (normalized != 0) { update *= g.update_multiplier; } VW_DBG(ec) << "gd: train() spare=" << spare << std::endl; - VW::foreach_feature>(*g.all, ec, update); + VW::foreach_feature>( + *g.all, ec, update); } void end_pass(VW::reductions::gd& g) @@ -275,8 +274,8 @@ void merge(const std::vector& per_model_weighting, const std::vector(1) << ws_out.num_bits; // When adding, output the weights from the model delta (2nd arugment to addition) @@ -294,8 +293,8 @@ void add(const VW::workspace& /* ws1 */, const VW::reductions::gd& data1, const } } -void subtract(const VW::workspace& ws1, const VW::reductions::gd& data1, const VW::workspace& /* ws2 */, VW::reductions::gd& data2, - VW::workspace& ws_out, VW::reductions::gd& data_out) +void subtract(const VW::workspace& ws1, const VW::reductions::gd& data1, const VW::workspace& /* ws2 */, + VW::reductions::gd& data2, VW::workspace& ws_out, VW::reductions::gd& data_out) { const size_t length = static_cast(1) << ws_out.num_bits; // When subtracting, output the weights from the newer model (1st arugment to subtraction) @@ -403,7 +402,7 @@ void print_lda_features(VW::workspace& all, VW::example& ec) } std::cout << " total of " << count << " features." << std::endl; } -} +} // namespace void VW::details::print_features(VW::workspace& all, VW::example& ec) { @@ -465,7 +464,6 @@ float VW::details::finalize_prediction(VW::shared_data* sd, VW::io::logger& logg return ret; } - namespace { class trunc_data @@ -519,8 +517,8 @@ inline void vec_add_trunc_multipredict(VW::details::multipredict_info& mp, co } template -void multipredict(VW::reductions::gd& g, base_learner&, VW::example& ec, size_t count, size_t step, VW::polyprediction* pred, - bool finalize_predictions) +void multipredict(VW::reductions::gd& g, base_learner&, VW::example& ec, size_t count, size_t step, + VW::polyprediction* pred, bool finalize_predictions) { VW::workspace& all = *g.all; for (size_t c = 0; c < count; c++) @@ -541,8 +539,8 @@ void multipredict(VW::reductions::gd& g, base_learner&, VW::example& ec, size_t } else { - VW::foreach_feature, uint64_t, VW::details::vec_add_multipredict>( - all, ec, mp, num_features_from_interactions); + VW::foreach_feature, uint64_t, + VW::details::vec_add_multipredict>(all, ec, mp, num_features_from_interactions); } } else @@ -556,8 +554,8 @@ void multipredict(VW::reductions::gd& g, base_learner&, VW::example& ec, size_t } else { - VW::foreach_feature, uint64_t, VW::details::vec_add_multipredict>( - all, ec, mp, num_features_from_interactions); + VW::foreach_feature, uint64_t, + VW::details::vec_add_multipredict>(all, ec, mp, num_features_from_interactions); } } ec.num_features_from_interactions = num_features_from_interactions; @@ -568,7 +566,10 @@ void multipredict(VW::reductions::gd& g, base_learner&, VW::example& ec, size_t } if (finalize_predictions) { - for (size_t c = 0; c < count; c++) { pred[c].scalar = VW::details::finalize_prediction(all.sd, all.logger, pred[c].scalar); } + for (size_t c = 0; c < count; c++) + { + pred[c].scalar = VW::details::finalize_prediction(all.sd, all.logger, pred[c].scalar); + } } if (audit) { @@ -928,7 +929,7 @@ void save_load_regressor(VW::workspace& all, VW::io_buf& model_file, bool read, } } } -} +} // namespace void VW::details::save_load_regressor_gd(VW::workspace& all, VW::io_buf& model_file, bool read, bool text) { @@ -939,8 +940,8 @@ void VW::details::save_load_regressor_gd(VW::workspace& all, VW::io_buf& model_f namespace { template -void save_load_online_state_weights(VW::workspace& all, VW::io_buf& model_file, bool read, bool text, VW::reductions::gd* g, - std::stringstream& msg, uint32_t ftrl_size, T& weights) +void save_load_online_state_weights(VW::workspace& all, VW::io_buf& model_file, bool read, bool text, + VW::reductions::gd* g, std::stringstream& msg, uint32_t ftrl_size, T& weights) { uint64_t length = static_cast(1) << all.num_bits; @@ -1068,10 +1069,10 @@ void save_load_online_state_weights(VW::workspace& all, VW::io_buf& model_file, } } } -} +} // namespace -void VW::details::save_load_online_state_gd(VW::workspace& all, VW::io_buf& model_file, bool read, bool text, double& total_weight, - double& normalized_sum_norm_x, VW::reductions::gd* g, uint32_t ftrl_size) +void VW::details::save_load_online_state_gd(VW::workspace& all, VW::io_buf& model_file, bool read, bool text, + double& total_weight, double& normalized_sum_norm_x, VW::reductions::gd* g, uint32_t ftrl_size) { std::stringstream msg; @@ -1216,7 +1217,8 @@ void VW::details::save_load_online_state_gd(VW::workspace& all, VW::io_buf& mode else { save_load_online_state_weights(all, model_file, read, text, g, msg, ftrl_size, all.weights.dense_weights); } } -namespace { +namespace +{ void save_load(VW::reductions::gd& g, VW::io_buf& model_file, bool read, bool text) { VW::workspace& all = *g.all; @@ -1354,7 +1356,7 @@ uint64_t ceil_log_2(uint64_t v) else { return 1 + ceil_log_2(v >> 1); } } -} +} // namespace base_learner* VW::reductions::gd_setup(VW::setup_base_i& stack_builder) { diff --git a/vowpalwabbit/core/src/reductions/nn.cc b/vowpalwabbit/core/src/reductions/nn.cc index dd0ee5bbf51..b22c8bb235a 100644 --- a/vowpalwabbit/core/src/reductions/nn.cc +++ b/vowpalwabbit/core/src/reductions/nn.cc @@ -355,7 +355,8 @@ void predict_or_learn_multi(nn& n, single_learner& base, VW::example& ec) float nu = n.outputweight.pred.scalar; float gradhw = 0.5f * nu * gradient * sigmahprime; - ec.l.simple.label = VW::details::finalize_prediction(n.all->sd, n.all->logger, hidden_units[i].scalar - gradhw); + ec.l.simple.label = + VW::details::finalize_prediction(n.all->sd, n.all->logger, hidden_units[i].scalar - gradhw); ec.pred.scalar = hidden_units[i].scalar; if (ec.l.simple.label != hidden_units[i].scalar) { base.update(ec, i); } } diff --git a/vowpalwabbit/core/src/reductions/oja_newton.cc b/vowpalwabbit/core/src/reductions/oja_newton.cc index ecbbc334e7b..51ef91f7f37 100644 --- a/vowpalwabbit/core/src/reductions/oja_newton.cc +++ b/vowpalwabbit/core/src/reductions/oja_newton.cc @@ -350,7 +350,8 @@ void predict(OjaNewton& oja_newton_ptr, base_learner&, VW::example& ec) oja_newton_ptr.data.prediction = 0; VW::foreach_feature(*oja_newton_ptr.all, ec, oja_newton_ptr.data); ec.partial_prediction = oja_newton_ptr.data.prediction; - ec.pred.scalar = VW::details::finalize_prediction(oja_newton_ptr.all->sd, oja_newton_ptr.all->logger, ec.partial_prediction); + ec.pred.scalar = + VW::details::finalize_prediction(oja_newton_ptr.all->sd, oja_newton_ptr.all->logger, ec.partial_prediction); } void update_Z_and_wbar(oja_n_update_data& data, float x, float& wref) // NOLINT @@ -485,7 +486,10 @@ void save_load(OjaNewton& oja_newton_ptr, VW::io_buf& model_file, bool read, boo double temp = 0.; double temp_normalized_sum_norm_x = 0.; - if (resume) { VW::details::save_load_online_state_gd(all, model_file, read, text, temp, temp_normalized_sum_norm_x); } + if (resume) + { + VW::details::save_load_online_state_gd(all, model_file, read, text, temp, temp_normalized_sum_norm_x); + } else { VW::details::save_load_regressor_gd(all, model_file, read, text); } } } diff --git a/vowpalwabbit/core/src/reductions/svrg.cc b/vowpalwabbit/core/src/reductions/svrg.cc index 01ba72d1bfb..01ca6c61712 100644 --- a/vowpalwabbit/core/src/reductions/svrg.cc +++ b/vowpalwabbit/core/src/reductions/svrg.cc @@ -169,7 +169,10 @@ void save_load(svrg& s, VW::io_buf& model_file, bool read, bool text) double temp = 0.; double temp_normalized_sum_norm_x = 0.; - if (resume) { VW::details::save_load_online_state_gd(*s.all, model_file, read, text, temp, temp_normalized_sum_norm_x); } + if (resume) + { + VW::details::save_load_online_state_gd(*s.all, model_file, read, text, temp, temp_normalized_sum_norm_x); + } else { VW::details::save_load_regressor_gd(*s.all, model_file, read, text); } } } From 0b8f857d7378a15a40e760d361c06e4594bf7dc2 Mon Sep 17 00:00:00 2001 From: Jack Gerrits Date: Fri, 6 Jan 2023 15:28:11 -0500 Subject: [PATCH 3/4] uncomment compat definitions --- .../core/include/vw/core/gd_predict.h | 196 +++++++++--------- .../core/include/vw/core/reductions/gd.h | 138 ++++++------ 2 files changed, 167 insertions(+), 167 deletions(-) diff --git a/vowpalwabbit/core/include/vw/core/gd_predict.h b/vowpalwabbit/core/include/vw/core/gd_predict.h index 879d622ef28..93f7cfe609e 100644 --- a/vowpalwabbit/core/include/vw/core/gd_predict.h +++ b/vowpalwabbit/core/include/vw/core/gd_predict.h @@ -130,101 +130,101 @@ inline float inline_predict(WeightsT& weights, bool ignore_some_linear, } } // namespace VW -// namespace GD -// { - -// // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_index) -// template -// VW_DEPRECATED("Moved to VW namespace") -// void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.) -// { -// VW::foreach_feature(weights, fs, dat, offset, mult); -// } - -// // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) -// template -// VW_DEPRECATED("Moved to VW namespace") -// inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult -// = 1.) -// { -// VW::foreach_feature(weights, fs, dat, offset, mult); -// } - -// // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) -// template -// VW_DEPRECATED("Moved to VW namespace") -// inline void foreach_feature( -// const WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.) -// { -// VW::foreach_feature(weights, fs, dat, offset, mult); -// } - -// template // nullptr func can't be used as template param in old -// // compilers -// VW_DEPRECATED("Moved to VW namespace") -// inline void generate_interactions(const std::vector>& interactions, -// const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, -// DataT& dat, WeightsT& weights, size_t& num_interacted_features, -// VW::details::generate_interactions_object_cache& cache) // default value removed to eliminate -// // ambiguity in old complers -// { -// VW::generate_interactions(interactions, extent_interactions, permutations, -// ec, -// dat, weights, num_interacted_features, cache); -// } - -// // iterate through all namespaces and quadratic&cubic features, callback function FuncT(some_data_R, feature_value_x, -// // WeightOrIndexT) where WeightOrIndexT is EITHER float& feature_weight OR uint64_t feature_index -// template -// VW_DEPRECATED("Moved to VW namespace") -// inline void foreach_feature(WeightsT& weights, bool ignore_some_linear, -// std::array& ignore_linear, -// const std::vector>& interactions, -// const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, -// DataT& dat, size_t& num_interacted_features, VW::details::generate_interactions_object_cache& cache) -// { -// VW::foreach_feature(weights, ignore_some_linear, ignore_linear, -// interactions, -// extent_interactions, permutations, ec, dat, num_interacted_features, cache); -// } - -// template -// VW_DEPRECATED("Moved to VW namespace") -// inline void foreach_feature(WeightsT& weights, bool ignore_some_linear, -// std::array& ignore_linear, -// const std::vector>& interactions, -// const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, -// DataT& dat, VW::details::generate_interactions_object_cache& cache) -// { -// VW::foreach_feature(weights, ignore_some_linear, ignore_linear, -// interactions, -// extent_interactions, permutations, ec, dat, cache); -// } - -// template -// VW_DEPRECATED("Moved to VW namespace") -// inline float inline_predict(WeightsT& weights, bool ignore_some_linear, -// std::array& ignore_linear, -// const std::vector>& interactions, -// const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, -// VW::details::generate_interactions_object_cache& cache, float initial = 0.f) -// { -// return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, -// permutations, ec, -// cache, initial); -// } - -// template -// VW_DEPRECATED("Moved to VW namespace") -// inline float inline_predict(WeightsT& weights, bool ignore_some_linear, -// std::array& ignore_linear, -// const std::vector>& interactions, -// const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, -// size_t& num_interacted_features, VW::details::generate_interactions_object_cache& cache, float initial = 0.f) -// { -// return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, -// permutations, ec, -// num_interacted_features, cache, initial); -// } -// } // namespace GD \ No newline at end of file +namespace GD +{ + +// iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_index) +template +VW_DEPRECATED("Moved to VW namespace") +void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.) +{ + VW::foreach_feature(weights, fs, dat, offset, mult); +} + +// iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) +template +VW_DEPRECATED("Moved to VW namespace") +inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult += 1.) +{ + VW::foreach_feature(weights, fs, dat, offset, mult); +} + +// iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) +template +VW_DEPRECATED("Moved to VW namespace") +inline void foreach_feature( + const WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.) +{ + VW::foreach_feature(weights, fs, dat, offset, mult); +} + +template // nullptr func can't be used as template param in old + // compilers +VW_DEPRECATED("Moved to VW namespace") +inline void generate_interactions(const std::vector>& interactions, + const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, + DataT& dat, WeightsT& weights, size_t& num_interacted_features, + VW::details::generate_interactions_object_cache& cache) // default value removed to eliminate + // ambiguity in old complers +{ + VW::generate_interactions(interactions, extent_interactions, permutations, + ec, + dat, weights, num_interacted_features, cache); +} + +// iterate through all namespaces and quadratic&cubic features, callback function FuncT(some_data_R, feature_value_x, +// WeightOrIndexT) where WeightOrIndexT is EITHER float& feature_weight OR uint64_t feature_index +template +VW_DEPRECATED("Moved to VW namespace") +inline void foreach_feature(WeightsT& weights, bool ignore_some_linear, + std::array& ignore_linear, + const std::vector>& interactions, + const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, + DataT& dat, size_t& num_interacted_features, VW::details::generate_interactions_object_cache& cache) +{ + VW::foreach_feature(weights, ignore_some_linear, ignore_linear, + interactions, + extent_interactions, permutations, ec, dat, num_interacted_features, cache); +} + +template +VW_DEPRECATED("Moved to VW namespace") +inline void foreach_feature(WeightsT& weights, bool ignore_some_linear, + std::array& ignore_linear, + const std::vector>& interactions, + const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, + DataT& dat, VW::details::generate_interactions_object_cache& cache) +{ + VW::foreach_feature(weights, ignore_some_linear, ignore_linear, + interactions, + extent_interactions, permutations, ec, dat, cache); +} + +template +VW_DEPRECATED("Moved to VW namespace") +inline float inline_predict(WeightsT& weights, bool ignore_some_linear, + std::array& ignore_linear, + const std::vector>& interactions, + const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, + VW::details::generate_interactions_object_cache& cache, float initial = 0.f) +{ + return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, + permutations, ec, + cache, initial); +} + +template +VW_DEPRECATED("Moved to VW namespace") +inline float inline_predict(WeightsT& weights, bool ignore_some_linear, + std::array& ignore_linear, + const std::vector>& interactions, + const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, + size_t& num_interacted_features, VW::details::generate_interactions_object_cache& cache, float initial = 0.f) +{ + return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, + permutations, ec, + num_interacted_features, cache, initial); +} +} // namespace GD \ No newline at end of file diff --git a/vowpalwabbit/core/include/vw/core/reductions/gd.h b/vowpalwabbit/core/include/vw/core/reductions/gd.h index d2eae5e2f00..61ec296937a 100644 --- a/vowpalwabbit/core/include/vw/core/reductions/gd.h +++ b/vowpalwabbit/core/include/vw/core/reductions/gd.h @@ -247,72 +247,72 @@ inline void generate_interactions(VW::workspace& all, VW::example_predict& ec, R } // namespace INTERACTIONS -// namespace GD -// { - -// using gd = VW::reductions::gd; - -// // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) -// template -// VW_DEPRECATED("Moved to VW namespace") -// inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat) -// { -// VW::foreach_feature(all, ec, dat); -// } - -// // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) -// template -// VW_DEPRECATED("Moved to VW namespace") -// inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat, size_t& num_interacted_features) -// { -// VW::foreach_feature(all, ec, dat, num_interacted_features); -// } - -// // iterate through all namespaces and quadratic&cubic features, callback function T(some_data_R, feature_value_x, -// // feature_weight) -// template -// VW_DEPRECATED("Moved to VW namespace") -// inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat) -// { -// VW::foreach_feature(all, ec, dat); -// } - -// template -// VW_DEPRECATED("Moved to VW namespace") -// inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat) -// { -// VW::foreach_feature(all, ec, dat); -// } - -// template -// VW_DEPRECATED("Moved to VW namespace") -// inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat, size_t& num_interacted_features) -// { -// VW::foreach_feature(all, ec, dat, num_interacted_features); -// } - -// template -// VW_DEPRECATED("Moved to VW namespace") -// inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat, size_t& num_interacted_features) -// { -// VW::foreach_feature(all, ec, dat, num_interacted_features); -// } - -// VW_DEPRECATED("Moved to VW namespace") -// inline float inline_predict(VW::workspace& all, VW::example& ec) -// { -// return VW::inline_predict(all, ec); -// } - -// VW_DEPRECATED("Moved to VW namespace") -// inline float inline_predict(VW::workspace& all, VW::example& ec, size_t& num_generated_features) -// { -// return VW::inline_predict(all, ec, num_generated_features); -// } - -// VW_DEPRECATED("Moved to VW namespace") -// inline float trunc_weight(const float w, const float gravity) -// { -// return VW::trunc_weight(w, gravity); -// } -// } \ No newline at end of file +namespace GD +{ + +using gd = VW::reductions::gd; + +// iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) +template +VW_DEPRECATED("Moved to VW namespace") +inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat) +{ + VW::foreach_feature(all, ec, dat); +} + +// iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) +template +VW_DEPRECATED("Moved to VW namespace") +inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat, size_t& num_interacted_features) +{ + VW::foreach_feature(all, ec, dat, num_interacted_features); +} + +// iterate through all namespaces and quadratic&cubic features, callback function T(some_data_R, feature_value_x, +// feature_weight) +template +VW_DEPRECATED("Moved to VW namespace") +inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat) +{ + VW::foreach_feature(all, ec, dat); +} + +template +VW_DEPRECATED("Moved to VW namespace") +inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat) +{ + VW::foreach_feature(all, ec, dat); +} + +template +VW_DEPRECATED("Moved to VW namespace") +inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat, size_t& num_interacted_features) +{ + VW::foreach_feature(all, ec, dat, num_interacted_features); +} + +template +VW_DEPRECATED("Moved to VW namespace") +inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat, size_t& num_interacted_features) +{ + VW::foreach_feature(all, ec, dat, num_interacted_features); +} + +VW_DEPRECATED("Moved to VW namespace") +inline float inline_predict(VW::workspace& all, VW::example& ec) +{ + return VW::inline_predict(all, ec); +} + +VW_DEPRECATED("Moved to VW namespace") +inline float inline_predict(VW::workspace& all, VW::example& ec, size_t& num_generated_features) +{ + return VW::inline_predict(all, ec, num_generated_features); +} + +VW_DEPRECATED("Moved to VW namespace") +inline float trunc_weight(const float w, const float gravity) +{ + return VW::trunc_weight(w, gravity); +} +} \ No newline at end of file From e4c97f1c7733bd77db192c188eff6e6d4a9ec603 Mon Sep 17 00:00:00 2001 From: Jack Gerrits Date: Fri, 6 Jan 2023 16:37:15 -0500 Subject: [PATCH 4/4] format --- .../core/include/vw/core/gd_predict.h | 30 ++++++++----------- .../core/include/vw/core/reductions/gd.h | 12 ++------ 2 files changed, 15 insertions(+), 27 deletions(-) diff --git a/vowpalwabbit/core/include/vw/core/gd_predict.h b/vowpalwabbit/core/include/vw/core/gd_predict.h index 93f7cfe609e..868756250f8 100644 --- a/vowpalwabbit/core/include/vw/core/gd_predict.h +++ b/vowpalwabbit/core/include/vw/core/gd_predict.h @@ -144,8 +144,7 @@ void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& dat, uint // iterate through one namespace (or its part), callback function FuncT(some_data_R, feature_value_x, feature_weight) template VW_DEPRECATED("Moved to VW namespace") -inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult -= 1.) +inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.) { VW::foreach_feature(weights, fs, dat, offset, mult); } @@ -162,16 +161,15 @@ inline void foreach_feature( template // nullptr func can't be used as template param in old // compilers -VW_DEPRECATED("Moved to VW namespace") -inline void generate_interactions(const std::vector>& interactions, +VW_DEPRECATED("Moved to VW namespace") inline void generate_interactions( + const std::vector>& interactions, const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, DataT& dat, WeightsT& weights, size_t& num_interacted_features, VW::details::generate_interactions_object_cache& cache) // default value removed to eliminate // ambiguity in old complers { - VW::generate_interactions(interactions, extent_interactions, permutations, - ec, - dat, weights, num_interacted_features, cache); + VW::generate_interactions( + interactions, extent_interactions, permutations, ec, dat, weights, num_interacted_features, cache); } // iterate through all namespaces and quadratic&cubic features, callback function FuncT(some_data_R, feature_value_x, @@ -184,8 +182,7 @@ inline void foreach_feature(WeightsT& weights, bool ignore_some_linear, const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, DataT& dat, size_t& num_interacted_features, VW::details::generate_interactions_object_cache& cache) { - VW::foreach_feature(weights, ignore_some_linear, ignore_linear, - interactions, + VW::foreach_feature(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, permutations, ec, dat, num_interacted_features, cache); } @@ -197,9 +194,8 @@ inline void foreach_feature(WeightsT& weights, bool ignore_some_linear, const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, DataT& dat, VW::details::generate_interactions_object_cache& cache) { - VW::foreach_feature(weights, ignore_some_linear, ignore_linear, - interactions, - extent_interactions, permutations, ec, dat, cache); + VW::foreach_feature( + weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, permutations, ec, dat, cache); } template @@ -210,9 +206,8 @@ inline float inline_predict(WeightsT& weights, bool ignore_some_linear, const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, VW::details::generate_interactions_object_cache& cache, float initial = 0.f) { - return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, - permutations, ec, - cache, initial); + return VW::inline_predict( + weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, permutations, ec, cache, initial); } template @@ -223,8 +218,7 @@ inline float inline_predict(WeightsT& weights, bool ignore_some_linear, const std::vector>& extent_interactions, bool permutations, VW::example_predict& ec, size_t& num_interacted_features, VW::details::generate_interactions_object_cache& cache, float initial = 0.f) { - return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, - permutations, ec, - num_interacted_features, cache, initial); + return VW::inline_predict(weights, ignore_some_linear, ignore_linear, interactions, extent_interactions, permutations, + ec, num_interacted_features, cache, initial); } } // namespace GD \ No newline at end of file diff --git a/vowpalwabbit/core/include/vw/core/reductions/gd.h b/vowpalwabbit/core/include/vw/core/reductions/gd.h index 61ec296937a..4039d665096 100644 --- a/vowpalwabbit/core/include/vw/core/reductions/gd.h +++ b/vowpalwabbit/core/include/vw/core/reductions/gd.h @@ -299,10 +299,7 @@ inline void foreach_feature(VW::workspace& all, VW::example& ec, DataT& dat, siz } VW_DEPRECATED("Moved to VW namespace") -inline float inline_predict(VW::workspace& all, VW::example& ec) -{ - return VW::inline_predict(all, ec); -} +inline float inline_predict(VW::workspace& all, VW::example& ec) { return VW::inline_predict(all, ec); } VW_DEPRECATED("Moved to VW namespace") inline float inline_predict(VW::workspace& all, VW::example& ec, size_t& num_generated_features) @@ -311,8 +308,5 @@ inline float inline_predict(VW::workspace& all, VW::example& ec, size_t& num_gen } VW_DEPRECATED("Moved to VW namespace") -inline float trunc_weight(const float w, const float gravity) -{ - return VW::trunc_weight(w, gravity); -} -} \ No newline at end of file +inline float trunc_weight(const float w, const float gravity) { return VW::trunc_weight(w, gravity); } +} // namespace GD \ No newline at end of file