VowpalWabbit · bassmang · Dec 27, 2022 · Dec 20, 2022 · Dec 20, 2022 · Dec 20, 2022
diff --git a/test/pred-sets/ref/aml_spinoff.inv b/test/pred-sets/ref/aml_spinoff.inv
diff --git a/test/train-sets/automl_spin_off.txt b/test/train-sets/automl_spin_off.txt
diff --git a/test/train-sets/ref/automl_readable.txt b/test/train-sets/ref/automl_readable.txt
diff --git a/test/train-sets/ref/automl_readable_cubic.txt b/test/train-sets/ref/automl_readable_cubic.txt
diff --git a/test/train-sets/ref/cb_similar_aml_spinoff_save.stderr b/test/train-sets/ref/cb_similar_aml_spinoff_save.stderr
@@ -13,15 +13,15 @@ Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.333333 0.333333            1            1.0          0:1:1         0:0.33     8436
-0.175000 0.016667            2            2.0          1:1:1         2:0.97    18319
-0.333333 0.491667            4            4.0          1:1:1         0:0.97    23278
-0.293750 0.254167            8            8.0          0:1:1         2:0.97    13862
-0.273958 0.254167           16           16.0          0:1:1         2:0.97     2249
+-0.33333 -0.33333            1            1.0         0:-1:1         0:0.33     8436
+-0.17500 -0.01666            2            2.0         1:-1:1         0:0.97    18319
+-0.09583 -0.01666            4            4.0         1:-1:1         2:0.97    23278
+-0.17500 -0.25416            8            8.0         0:-1:1         1:0.97    13862
+-0.27395 -0.37291           16           16.0         0:-1:1         1:0.97     2249
 
 finished run
 number of examples = 18
 weighted example sum = 18.000000
 weighted label sum = 0.000000
-average loss = 0.298148
+average loss = -0.245370
 total feature number = 293853
diff --git a/test/train-sets/ref/cbadf_automl_readable.txt b/test/train-sets/ref/cbadf_automl_readable.txt
diff --git a/test/train-sets/ref/spin_off_aml_load.stderr b/test/train-sets/ref/spin_off_aml_load.stderr
@@ -12,15 +12,15 @@ Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.016667 0.016667            1            1.0          0:1:1         2:0.97     8436
-0.016667 0.016667            2            2.0          1:1:1         2:0.97    18319
-0.016667 0.016667            4            4.0          1:1:1         0:0.97    23278
-0.016667 0.016667            8            8.0          0:1:1         2:0.97    13862
-0.016667 0.016667           16           16.0          0:1:1         2:0.97     2249
+-0.96666 -0.96666            1            1.0         0:-1:1         0:0.97     8436
+-0.96666 -0.96666            2            2.0         1:-1:1         1:0.97    18319
+-0.49166 -0.01666            4            4.0         1:-1:1         2:0.97    23278
+-0.37291 -0.25416            8            8.0         0:-1:1         1:0.97    13862
+-0.43229 -0.49166           16           16.0         0:-1:1         0:0.97     2249
 
 finished run
 number of examples = 18
 weighted example sum = 18.000000
 weighted label sum = 0.000000
-average loss = 0.069444
+average loss = -0.438889
 total feature number = 293853
diff --git a/test/train-sets/ref/spin_off_aml_save.stderr b/test/train-sets/ref/spin_off_aml_save.stderr
@@ -12,15 +12,15 @@ Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.333333 0.333333            1            1.0          0:1:1         0:0.33     8436
-0.175000 0.016667            2            2.0          1:1:1         2:0.97    18319
-0.333333 0.491667            4            4.0          1:1:1         0:0.97    23278
-0.293750 0.254167            8            8.0          0:1:1         2:0.97    13862
-0.273958 0.254167           16           16.0          0:1:1         2:0.97     2249
+-0.33333 -0.33333            1            1.0         0:-1:1         0:0.33     8436
+-0.17500 -0.01666            2            2.0         1:-1:1         0:0.97    18319
+-0.09583 -0.01666            4            4.0         1:-1:1         2:0.97    23278
+-0.17500 -0.25416            8            8.0         0:-1:1         1:0.97    13862
+-0.27395 -0.37291           16           16.0         0:-1:1         1:0.97     2249
 
 finished run
 number of examples = 18
 weighted example sum = 18.000000
 weighted label sum = 0.000000
-average loss = 0.298148
+average loss = -0.245370
 total feature number = 293853
diff --git a/test/unit_test/automl_test.cc b/test/unit_test/automl_test.cc
diff --git a/test/unit_test/epsilon_decay_test.cc b/test/unit_test/epsilon_decay_test.cc
@@ -44,20 +44,26 @@ BOOST_AUTO_TEST_CASE(epsilon_decay_test_init_w_iterations)
 
 BOOST_AUTO_TEST_CASE(epsilon_decay_test_champ_change_w_iterations)
 {
-  const size_t num_iterations = 8000;
-  const std::vector<uint64_t> swap_after = {5000};
-  const float scale_reward = 0.2f;
+  const size_t num_iterations = 700;
   const size_t seed = 100;
-  const size_t deterministic_champ_switch = 7920;
+  const std::vector<uint64_t> swap_after = {500};
+  const size_t deterministic_champ_switch = 662;
   callback_map test_hooks;
 
   test_hooks.emplace(deterministic_champ_switch - 1,
       [&](cb_sim&, VW::workspace& all, VW::multi_ex&)
       {
         epsilon_decay_data* epsilon_decay = epsilon_decay_test::get_epsilon_decay_data(all);
-        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[0][0].update_count, 2183);
-        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][0].update_count, 2183);
-        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][1].update_count, 7919);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[0][0].update_count, 28);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][0].update_count, 28);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][0].update_count, 28);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][0].update_count, 28);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][1].update_count, 48);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][1].update_count, 48);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][1].update_count, 48);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][2].update_count, 53);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][2].update_count, 53);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][3].update_count, 661);
         return true;
       });
 
@@ -67,15 +73,20 @@ BOOST_AUTO_TEST_CASE(epsilon_decay_test_champ_change_w_iterations)
         epsilon_decay_data* epsilon_decay = epsilon_decay_test::get_epsilon_decay_data(all);
         BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[0][0].update_count, 0);
         BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][0].update_count, 0);
-        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][1].update_count, 2184);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][0].update_count, 0);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][0].update_count, 0);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][1].update_count, 29);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][1].update_count, 29);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][1].update_count, 29);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][2].update_count, 49);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][2].update_count, 49);
+        BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][3].update_count, 54);
         return true;
       });
 
   // we initialize the reduction pointing to position 0 as champ, that config is hard-coded to empty
   auto ctr = simulator::_test_helper_hook(
-      "--epsilon_decay --epsilon_decay_significance_level .9 --model_count 2 --cb_explore_adf --quiet  -q :: "
-      "--shift_model_bounds 10",
-      test_hooks, num_iterations, seed, swap_after, scale_reward);
+      "--epsilon_decay --model_count 4 --cb_explore_adf --quiet -q ::", test_hooks, num_iterations, seed, swap_after);
 
   BOOST_CHECK_GT(ctr.back(), 0.6f);
 }
@@ -274,7 +285,9 @@ BOOST_AUTO_TEST_CASE(epsilon_decay_test_score_bounds_unit)
 
   // Set lower_bound of model 2 to beat upper_bound of current champ and run score check
   uint64_t new_champ = 2;
-  for (auto i = 0; i < 10000; ++i) { ep_data.conf_seq_estimators[new_champ][new_champ].update(i, 5); };
+  for (auto i = 0; i < 100; ++i) { ep_data.conf_seq_estimators[new_champ][new_champ].update(i, 1); };
+  for (auto i = 0; i < 100; ++i) { ep_data.conf_seq_estimators[num_models - 1][new_champ].update(i, 0); };
+
   BOOST_CHECK_GT(ep_data.conf_seq_estimators[new_champ][new_champ].lower_bound(),
       ep_data.conf_seq_estimators[num_models - 1][new_champ].upper_bound());
   ep_data.check_estimator_bounds();
@@ -296,7 +309,7 @@ BOOST_AUTO_TEST_CASE(epsilon_decay_test_score_bounds_unit)
   BOOST_CHECK_EQUAL(ep_data.conf_seq_estimators[3][3].update_count, 2);
   BOOST_CHECK_EQUAL(ep_data.conf_seq_estimators[4][2].update_count, 3);
   BOOST_CHECK_EQUAL(ep_data.conf_seq_estimators[4][3].update_count, 4);
-  BOOST_CHECK_EQUAL(ep_data.conf_seq_estimators[4][4].update_count, 10005);
+  BOOST_CHECK_EQUAL(ep_data.conf_seq_estimators[4][4].update_count, 105);
   BOOST_CHECK_EQUAL(ep_data._weight_indices[0], 4);
   BOOST_CHECK_EQUAL(ep_data._weight_indices[1], 3);
   BOOST_CHECK_EQUAL(ep_data._weight_indices[2], 0);

diff --git a/vowpalwabbit/core/include/vw/core/confidence_sequence_robust.h b/vowpalwabbit/core/include/vw/core/confidence_sequence_robust.h
@@ -89,7 +89,7 @@ class confidence_sequence_robust
   double upper_bound() const;
 
   // Constant values -- do not require reset or save_load
-  const double alpha;
+  double alpha;
 
   uint64_t update_count;
   double last_w;

diff --git a/vowpalwabbit/core/include/vw/core/reductions/epsilon_decay.h b/vowpalwabbit/core/include/vw/core/reductions/epsilon_decay.h
@@ -3,7 +3,8 @@
 // license as described in the file LICENSE.
 #pragma once
 
-#include "vw/core/confidence_sequence.h"
+#include "vw/core/confidence_sequence_robust.h"
+#include "vw/core/io_buf.h"
 #include "vw/core/learner_fwd.h"
 #include "vw/core/vw_fwd.h"
 
@@ -36,7 +37,7 @@ class epsilon_decay_data
   void check_estimator_bounds();
   void check_horizon_bounds();
 
-  std::vector<std::vector<VW::estimators::confidence_sequence>> conf_seq_estimators;
+  std::vector<std::vector<VW::estimators::confidence_sequence_robust>> conf_seq_estimators;
   std::vector<uint64_t> _weight_indices;
   uint64_t _min_scope;
   double _epsilon_decay_significance_level;  // Confidence interval

diff --git a/vowpalwabbit/core/src/reductions/automl.cc b/vowpalwabbit/core/src/reductions/automl.cc
@@ -7,7 +7,7 @@
 #include "details/automl_impl.h"
 #include "vw/config/options.h"
 #include "vw/config/options_cli.h"
-#include "vw/core/confidence_sequence.h"
+#include "vw/core/confidence_sequence_robust.h"
 #include "vw/core/multi_model_utils.h"
 
 // TODO: delete this three includes
@@ -249,7 +249,7 @@ VW::LEARNER::base_learner* VW::reductions::automl_setup(VW::setup_base_i& stack_
   bool verbose_metrics = false;
   std::string interaction_type = "quadratic";
   std::string oracle_type = "one_diff";
-  float automl_significance_level = VW::details::CS_DEFAULT_ALPHA;
+  float automl_significance_level = VW::details::CS_ROBUST_DEFAULT_ALPHA;
   bool reversed_learning_order = false;
   bool fixed_significance_level = false;
   bool trace_logging = false;
@@ -301,7 +301,7 @@ VW::LEARNER::base_learner* VW::reductions::automl_setup(VW::setup_base_i& stack_
                .experimental())
       .add(make_option("automl_significance_level", automl_significance_level)
                .keep()
-               .default_value(VW::details::CS_DEFAULT_ALPHA)
+               .default_value(VW::details::CS_ROBUST_DEFAULT_ALPHA)
                .allow_override()
                .help("Set significance level for champion change")
                .experimental())
@@ -356,28 +356,28 @@ VW::LEARNER::base_learner* VW::reductions::automl_setup(VW::setup_base_i& stack_
   {
     if (oracle_type == "one_diff")
     {
-      return make_automl_with_impl<config_oracle<one_diff_impl>, VW::estimators::confidence_sequence>(stack_builder,
-          base_learner, max_live_configs, verbose_metrics, oracle_type, default_lease, all, priority_challengers,
-          interaction_type, priority_type, automl_significance_level, ccb_on, predict_only_model,
+      return make_automl_with_impl<config_oracle<one_diff_impl>, VW::estimators::confidence_sequence_robust>(
+          stack_builder, base_learner, max_live_configs, verbose_metrics, oracle_type, default_lease, all,
+          priority_challengers, interaction_type, priority_type, automl_significance_level, ccb_on, predict_only_model,
           reversed_learning_order, conf_type, trace_logging);
     }
     else if (oracle_type == "rand")
     {
-      return make_automl_with_impl<config_oracle<oracle_rand_impl>, VW::estimators::confidence_sequence>(stack_builder,
-          base_learner, max_live_configs, verbose_metrics, oracle_type, default_lease, all, priority_challengers,
-          interaction_type, priority_type, automl_significance_level, ccb_on, predict_only_model,
+      return make_automl_with_impl<config_oracle<oracle_rand_impl>, VW::estimators::confidence_sequence_robust>(
+          stack_builder, base_learner, max_live_configs, verbose_metrics, oracle_type, default_lease, all,
+          priority_challengers, interaction_type, priority_type, automl_significance_level, ccb_on, predict_only_model,
           reversed_learning_order, conf_type, trace_logging);
     }
     else if (oracle_type == "champdupe")
     {
-      return make_automl_with_impl<config_oracle<champdupe_impl>, VW::estimators::confidence_sequence>(stack_builder,
-          base_learner, max_live_configs, verbose_metrics, oracle_type, default_lease, all, priority_challengers,
-          interaction_type, priority_type, automl_significance_level, ccb_on, predict_only_model,
+      return make_automl_with_impl<config_oracle<champdupe_impl>, VW::estimators::confidence_sequence_robust>(
+          stack_builder, base_learner, max_live_configs, verbose_metrics, oracle_type, default_lease, all,
+          priority_challengers, interaction_type, priority_type, automl_significance_level, ccb_on, predict_only_model,
           reversed_learning_order, conf_type, trace_logging);
     }
     else if (oracle_type == "one_diff_inclusion")
     {
-      return make_automl_with_impl<config_oracle<one_diff_inclusion_impl>, VW::estimators::confidence_sequence>(
+      return make_automl_with_impl<config_oracle<one_diff_inclusion_impl>, VW::estimators::confidence_sequence_robust>(
           stack_builder, base_learner, max_live_configs, verbose_metrics, oracle_type, default_lease, all,
           priority_challengers, interaction_type, priority_type, automl_significance_level, ccb_on, predict_only_model,
           reversed_learning_order, conf_type, trace_logging);
@@ -386,9 +386,9 @@ VW::LEARNER::base_learner* VW::reductions::automl_setup(VW::setup_base_i& stack_
     {
       interaction_type = "both";
       conf_type = config_type::Interaction;
-      return make_automl_with_impl<config_oracle<qbase_cubic>, VW::estimators::confidence_sequence>(stack_builder,
-          base_learner, max_live_configs, verbose_metrics, oracle_type, default_lease, all, priority_challengers,
-          interaction_type, priority_type, automl_significance_level, ccb_on, predict_only_model,
+      return make_automl_with_impl<config_oracle<qbase_cubic>, VW::estimators::confidence_sequence_robust>(
+          stack_builder, base_learner, max_live_configs, verbose_metrics, oracle_type, default_lease, all,
+          priority_challengers, interaction_type, priority_type, automl_significance_level, ccb_on, predict_only_model,
           reversed_learning_order, conf_type, trace_logging);
     }
   }

diff --git a/vowpalwabbit/core/src/reductions/details/automl/automl_impl.cc b/vowpalwabbit/core/src/reductions/details/automl/automl_impl.cc
@@ -5,7 +5,7 @@
 #include "../automl_impl.h"
 
 #include "vw/common/vw_exception.h"
-#include "vw/core/confidence_sequence.h"
+#include "vw/core/confidence_sequence_robust.h"
 
 /*
 This reduction implements the ChaCha algorithm from page 5 of the following paper:
@@ -358,11 +358,12 @@ void interaction_config_manager<config_oracle_impl, estimator_impl>::process_exa
   }
 }
 
-template class interaction_config_manager<config_oracle<oracle_rand_impl>, VW::estimators::confidence_sequence>;
-template class interaction_config_manager<config_oracle<one_diff_impl>, VW::estimators::confidence_sequence>;
-template class interaction_config_manager<config_oracle<champdupe_impl>, VW::estimators::confidence_sequence>;
-template class interaction_config_manager<config_oracle<one_diff_inclusion_impl>, VW::estimators::confidence_sequence>;
-template class interaction_config_manager<config_oracle<qbase_cubic>, VW::estimators::confidence_sequence>;
+template class interaction_config_manager<config_oracle<oracle_rand_impl>, VW::estimators::confidence_sequence_robust>;
+template class interaction_config_manager<config_oracle<one_diff_impl>, VW::estimators::confidence_sequence_robust>;
+template class interaction_config_manager<config_oracle<champdupe_impl>, VW::estimators::confidence_sequence_robust>;
+template class interaction_config_manager<config_oracle<one_diff_inclusion_impl>,
+    VW::estimators::confidence_sequence_robust>;
+template class interaction_config_manager<config_oracle<qbase_cubic>, VW::estimators::confidence_sequence_robust>;
 
 template <typename CMType>
 void automl<CMType>::one_step(
@@ -434,12 +435,16 @@ void automl<CMType>::offset_learn(
   }
 }
 
-template class automl<interaction_config_manager<config_oracle<oracle_rand_impl>, VW::estimators::confidence_sequence>>;
-template class automl<interaction_config_manager<config_oracle<one_diff_impl>, VW::estimators::confidence_sequence>>;
-template class automl<interaction_config_manager<config_oracle<champdupe_impl>, VW::estimators::confidence_sequence>>;
 template class automl<
-    interaction_config_manager<config_oracle<one_diff_inclusion_impl>, VW::estimators::confidence_sequence>>;
-template class automl<interaction_config_manager<config_oracle<qbase_cubic>, VW::estimators::confidence_sequence>>;
+    interaction_config_manager<config_oracle<oracle_rand_impl>, VW::estimators::confidence_sequence_robust>>;
+template class automl<
+    interaction_config_manager<config_oracle<one_diff_impl>, VW::estimators::confidence_sequence_robust>>;
+template class automl<
+    interaction_config_manager<config_oracle<champdupe_impl>, VW::estimators::confidence_sequence_robust>>;
+template class automl<
+    interaction_config_manager<config_oracle<one_diff_inclusion_impl>, VW::estimators::confidence_sequence_robust>>;
+template class automl<
+    interaction_config_manager<config_oracle<qbase_cubic>, VW::estimators::confidence_sequence_robust>>;
 
 }  // namespace automl
 }  // namespace reductions