VowpalWabbit · bassmang · Dec 27, 2022 · Dec 20, 2022 · Dec 20, 2022 · Dec 20, 2022
diff --git a/test/pred-sets/ref/aml_spinoff.inv b/test/pred-sets/ref/aml_spinoff.inv
diff --git a/test/train-sets/automl_spin_off.txt b/test/train-sets/automl_spin_off.txt
diff --git a/test/train-sets/ref/automl_readable.txt b/test/train-sets/ref/automl_readable.txt
diff --git a/test/train-sets/ref/automl_readable_cubic.txt b/test/train-sets/ref/automl_readable_cubic.txt
diff --git a/test/train-sets/ref/cb_similar_aml_spinoff_save.stderr b/test/train-sets/ref/cb_similar_aml_spinoff_save.stderr
@@ -13,15 +13,15 @@ Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.333333 0.333333            1            1.0          0:1:1         0:0.33     8436
-0.175000 0.016667            2            2.0          1:1:1         2:0.97    18319
-0.333333 0.491667            4            4.0          1:1:1         0:0.97    23278
-0.293750 0.254167            8            8.0          0:1:1         2:0.97    13862
-0.273958 0.254167           16           16.0          0:1:1         2:0.97     2249
+-0.33333 -0.33333            1            1.0         0:-1:1         0:0.33     8436
+-0.17500 -0.01666            2            2.0         1:-1:1         0:0.97    18319
+-0.09583 -0.01666            4            4.0         1:-1:1         2:0.97    23278
+-0.17500 -0.25416            8            8.0         0:-1:1         1:0.97    13862
+-0.27395 -0.37291           16           16.0         0:-1:1         1:0.97     2249
 
 finished run
 number of examples = 18
 weighted example sum = 18.000000
 weighted label sum = 0.000000
-average loss = 0.298148
+average loss = -0.245370
 total feature number = 293853
diff --git a/test/train-sets/ref/cbadf_automl_readable.txt b/test/train-sets/ref/cbadf_automl_readable.txt
diff --git a/test/train-sets/ref/spin_off_aml_load.stderr b/test/train-sets/ref/spin_off_aml_load.stderr
@@ -12,15 +12,15 @@ Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.016667 0.016667            1            1.0          0:1:1         2:0.97     8436
-0.016667 0.016667            2            2.0          1:1:1         2:0.97    18319
-0.016667 0.016667            4            4.0          1:1:1         0:0.97    23278
-0.016667 0.016667            8            8.0          0:1:1         2:0.97    13862
-0.016667 0.016667           16           16.0          0:1:1         2:0.97     2249
+-0.96666 -0.96666            1            1.0         0:-1:1         0:0.97     8436
+-0.96666 -0.96666            2            2.0         1:-1:1         1:0.97    18319
+-0.49166 -0.01666            4            4.0         1:-1:1         2:0.97    23278
+-0.37291 -0.25416            8            8.0         0:-1:1         1:0.97    13862
+-0.43229 -0.49166           16           16.0         0:-1:1         0:0.97     2249
 
 finished run
 number of examples = 18
 weighted example sum = 18.000000
 weighted label sum = 0.000000
-average loss = 0.069444
+average loss = -0.438889
 total feature number = 293853
diff --git a/test/train-sets/ref/spin_off_aml_save.stderr b/test/train-sets/ref/spin_off_aml_save.stderr
@@ -12,15 +12,15 @@ Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.333333 0.333333            1            1.0          0:1:1         0:0.33     8436
-0.175000 0.016667            2            2.0          1:1:1         2:0.97    18319
-0.333333 0.491667            4            4.0          1:1:1         0:0.97    23278
-0.293750 0.254167            8            8.0          0:1:1         2:0.97    13862
-0.273958 0.254167           16           16.0          0:1:1         2:0.97     2249
+-0.33333 -0.33333            1            1.0         0:-1:1         0:0.33     8436
+-0.17500 -0.01666            2            2.0         1:-1:1         0:0.97    18319
+-0.09583 -0.01666            4            4.0         1:-1:1         2:0.97    23278
+-0.17500 -0.25416            8            8.0         0:-1:1         1:0.97    13862
+-0.27395 -0.37291           16           16.0         0:-1:1         1:0.97     2249
 
 finished run
 number of examples = 18
 weighted example sum = 18.000000
 weighted label sum = 0.000000
-average loss = 0.298148
+average loss = -0.245370
 total feature number = 293853
diff --git a/test/unit_test/automl_test.cc b/test/unit_test/automl_test.cc
@@ -7,7 +7,7 @@
 #include "reductions/details/automl_impl.h"
 #include "simulator.h"
 #include "test_common.h"
-#include "vw/core/confidence_sequence.h"
+#include "vw/core/confidence_sequence_robust.h"
 #include "vw/core/metric_sink.h"
 #include "vw/core/vw_fwd.h"
 
@@ -36,8 +36,9 @@ std::ostream& operator<<(std::ostream& o, std::vector<T> const& vec)
 namespace aml_test
 {
 template <typename T>
-void check_interactions_match_exclusions(VW::reductions::automl::automl<
-    interaction_config_manager<VW::reductions::automl::config_oracle<T>, VW::estimators::confidence_sequence>>* aml)
+void check_interactions_match_exclusions(
+    VW::reductions::automl::automl<interaction_config_manager<VW::reductions::automl::config_oracle<T>,
+        VW::estimators::confidence_sequence_robust>>* aml)
 {
   for (const auto& estimator : aml->cm->estimators)
   {
@@ -72,8 +73,9 @@ void check_interactions_match_exclusions(VW::reductions::automl::automl<
 }
 
 template <typename T>
-void check_config_states(VW::reductions::automl::automl<
-    interaction_config_manager<VW::reductions::automl::config_oracle<T>, VW::estimators::confidence_sequence>>* aml)
+void check_config_states(
+    VW::reductions::automl::automl<interaction_config_manager<VW::reductions::automl::config_oracle<T>,
+        VW::estimators::confidence_sequence_robust>>* aml)
 {
   // No configs in the index queue should be live
   auto index_queue = aml->cm->_config_oracle.index_queue;
@@ -94,7 +96,7 @@ void check_config_states(VW::reductions::automl::automl<
 
 template <typename T>
 VW::reductions::automl::automl<
-    interaction_config_manager<VW::reductions::automl::config_oracle<T>, VW::estimators::confidence_sequence>>*
+    interaction_config_manager<VW::reductions::automl::config_oracle<T>, VW::estimators::confidence_sequence_robust>>*
 get_automl_data(VW::workspace& all)
 {
   std::vector<std::string> e_r;
@@ -106,34 +108,36 @@ get_automl_data(VW::workspace& all)
 
   VW::LEARNER::multi_learner* automl_learner = as_multiline(all.l->get_learner_by_name_prefix("automl"));
 
-  return (VW::reductions::automl::automl<
-      interaction_config_manager<VW::reductions::automl::config_oracle<T>, VW::estimators::confidence_sequence>>*)
+  return (VW::reductions::automl::automl<interaction_config_manager<VW::reductions::automl::config_oracle<T>,
+          VW::estimators::confidence_sequence_robust>>*)
       automl_learner->get_internal_type_erased_data_pointer_test_use_only();
 }
 template VW::reductions::automl::automl<
     interaction_config_manager<VW::reductions::automl::config_oracle<VW::reductions::automl::oracle_rand_impl>,
-        VW::estimators::confidence_sequence>>*
+        VW::estimators::confidence_sequence_robust>>*
 get_automl_data(VW::workspace& all);
-template VW::reductions::automl::automl<interaction_config_manager<
-    VW::reductions::automl::config_oracle<VW::reductions::automl::one_diff_impl>, VW::estimators::confidence_sequence>>*
+template VW::reductions::automl::automl<
+    interaction_config_manager<VW::reductions::automl::config_oracle<VW::reductions::automl::one_diff_impl>,
+        VW::estimators::confidence_sequence_robust>>*
 get_automl_data(VW::workspace& all);
 template VW::reductions::automl::automl<
     interaction_config_manager<VW::reductions::automl::config_oracle<VW::reductions::automl::champdupe_impl>,
-        VW::estimators::confidence_sequence>>*
+        VW::estimators::confidence_sequence_robust>>*
 get_automl_data(VW::workspace& all);
 template VW::reductions::automl::automl<
     interaction_config_manager<VW::reductions::automl::config_oracle<VW::reductions::automl::one_diff_inclusion_impl>,
-        VW::estimators::confidence_sequence>>*
+        VW::estimators::confidence_sequence_robust>>*
 get_automl_data(VW::workspace& all);
 
 using aml_rand = VW::reductions::automl::automl<
     interaction_config_manager<VW::reductions::automl::config_oracle<VW::reductions::automl::oracle_rand_impl>,
-        VW::estimators::confidence_sequence>>;
-using aml_onediff = VW::reductions::automl::automl<interaction_config_manager<
-    VW::reductions::automl::config_oracle<VW::reductions::automl::one_diff_impl>, VW::estimators::confidence_sequence>>;
+        VW::estimators::confidence_sequence_robust>>;
+using aml_onediff = VW::reductions::automl::automl<
+    interaction_config_manager<VW::reductions::automl::config_oracle<VW::reductions::automl::one_diff_impl>,
+        VW::estimators::confidence_sequence_robust>>;
 using aml_onediff_inclusion = VW::reductions::automl::automl<
     interaction_config_manager<VW::reductions::automl::config_oracle<VW::reductions::automl::one_diff_inclusion_impl>,
-        VW::estimators::confidence_sequence>>;
+        VW::estimators::confidence_sequence_robust>>;
 }  // namespace aml_test
 
 // Need to add save_load functionality to multiple structs in automl reduction including
@@ -358,9 +362,9 @@ BOOST_AUTO_TEST_CASE(automl_clear_configs_w_iterations)
         BOOST_CHECK_EQUAL(aml->cm->current_champ, 0);
         BOOST_CHECK_EQUAL(aml->cm->_config_oracle.valid_config_size, 4);
         BOOST_CHECK_EQUAL(clear_champ_switch - 1, aml->cm->total_learn_count);
-        BOOST_CHECK_EQUAL(aml->cm->estimators[0].first.live_interactions.size(), 3);
-        BOOST_CHECK_EQUAL(aml->cm->estimators[1].first.live_interactions.size(), 2);
-        BOOST_CHECK_EQUAL(aml->cm->estimators[2].first.live_interactions.size(), 2);
+        BOOST_CHECK_EQUAL(aml->cm->estimators[0].first.live_interactions.size(), 2);
+        BOOST_CHECK_EQUAL(aml->cm->estimators[1].first.live_interactions.size(), 3);
+        BOOST_CHECK_EQUAL(aml->cm->estimators[2].first.live_interactions.size(), 1);
         BOOST_CHECK(aml->current_state == VW::reductions::automl::automl_state::Experimenting);
         return true;
       });
@@ -431,9 +435,9 @@ BOOST_AUTO_TEST_CASE(automl_clear_configs_one_diff_w_iterations)
       {
         aml_test::aml_onediff* aml = aml_test::get_automl_data<VW::reductions::automl::one_diff_impl>(all);
         BOOST_CHECK_EQUAL(aml->cm->estimators.size(), 3);
-        BOOST_CHECK_EQUAL(aml->cm->estimators[0].first.live_interactions.size(), 3);
-        BOOST_CHECK_EQUAL(aml->cm->estimators[1].first.live_interactions.size(), 2);
-        BOOST_CHECK_EQUAL(aml->cm->estimators[2].first.live_interactions.size(), 2);
+        BOOST_CHECK_EQUAL(aml->cm->estimators[0].first.live_interactions.size(), 2);
+        BOOST_CHECK_EQUAL(aml->cm->estimators[1].first.live_interactions.size(), 3);
+        BOOST_CHECK_EQUAL(aml->cm->estimators[2].first.live_interactions.size(), 1);
         return true;
       });
 
@@ -479,7 +483,8 @@ BOOST_AUTO_TEST_CASE(one_diff_impl_unittest_w_iterations)
         auto rand_state = all.get_random_state();
 
         std::map<VW::namespace_index, uint64_t> ns_counter;
-        std::vector<std::pair<aml_estimator<VW::estimators::confidence_sequence>, VW::estimators::confidence_sequence>>
+        std::vector<std::pair<aml_estimator<VW::estimators::confidence_sequence_robust>,
+            VW::estimators::confidence_sequence_robust>>
             estimators;
 
         config_oracle<one_diff_impl> oracle(aml->cm->default_lease, co.calc_priority, co._interaction_type,
@@ -495,7 +500,7 @@ BOOST_AUTO_TEST_CASE(one_diff_impl_unittest_w_iterations)
         BOOST_CHECK_EQUAL(estimators.size(), 0);
         BOOST_CHECK_EQUAL(prio_queue.size(), 0);
         interaction_config_manager<config_oracle<one_diff_impl>,
-            VW::estimators::confidence_sequence>::insert_starting_configuration(estimators, oracle,
+            VW::estimators::confidence_sequence_robust>::insert_starting_configuration(estimators, oracle,
             aml->cm->automl_significance_level);
         BOOST_CHECK_EQUAL(configs.size(), 1);
         BOOST_CHECK_EQUAL(estimators.size(), 1);
@@ -540,7 +545,7 @@ BOOST_AUTO_TEST_CASE(one_diff_impl_unittest_w_iterations)
         for (size_t i = 1; i < configs.size(); ++i)
         {
           interaction_config_manager<config_oracle<one_diff_impl>,
-              VW::estimators::confidence_sequence>::apply_config_at_slot(estimators, oracle.configs, i,
+              VW::estimators::confidence_sequence_robust>::apply_config_at_slot(estimators, oracle.configs, i,
               config_oracle<one_diff_impl>::choose(oracle.index_queue), aml->cm->automl_significance_level, 1);
           auto& temp_exclusions = oracle.configs[estimators[i].first.config_index];
           auto& temp_interactions = estimators[i].first.live_interactions;
@@ -551,8 +556,8 @@ BOOST_AUTO_TEST_CASE(one_diff_impl_unittest_w_iterations)
         BOOST_CHECK_EQUAL(estimators.size(), 4);
 
         // excl_2 is now champ
-        interaction_config_manager<config_oracle<one_diff_impl>, VW::estimators::confidence_sequence>::apply_new_champ(
-            oracle, 2, estimators, 0, ns_counter);
+        interaction_config_manager<config_oracle<one_diff_impl>,
+            VW::estimators::confidence_sequence_robust>::apply_new_champ(oracle, 2, estimators, 0, ns_counter);
 
         BOOST_CHECK_EQUAL_COLLECTIONS(
             configs[0].elements.begin(), configs[0].elements.end(), excl_2.begin(), excl_2.end());
@@ -576,7 +581,7 @@ BOOST_AUTO_TEST_CASE(one_diff_impl_unittest_w_iterations)
         for (size_t i = 2; i < 4; ++i)
         {
           interaction_config_manager<config_oracle<one_diff_impl>,
-              VW::estimators::confidence_sequence>::apply_config_at_slot(estimators, oracle.configs, i,
+              VW::estimators::confidence_sequence_robust>::apply_config_at_slot(estimators, oracle.configs, i,
               config_oracle<one_diff_impl>::choose(oracle.index_queue), aml->cm->automl_significance_level, 1);
           auto& temp_config = oracle.configs[estimators[i].first.config_index];
           auto& temp_interactions = estimators[i].first.live_interactions;
@@ -586,8 +591,8 @@ BOOST_AUTO_TEST_CASE(one_diff_impl_unittest_w_iterations)
         BOOST_CHECK_EQUAL(prio_queue.size(), 0);
 
         // excl_4 is now champ
-        interaction_config_manager<config_oracle<one_diff_impl>, VW::estimators::confidence_sequence>::apply_new_champ(
-            oracle, 3, estimators, 0, ns_counter);
+        interaction_config_manager<config_oracle<one_diff_impl>,
+            VW::estimators::confidence_sequence_robust>::apply_new_champ(oracle, 3, estimators, 0, ns_counter);
 
         BOOST_CHECK_EQUAL_COLLECTIONS(
             configs[0].elements.begin(), configs[0].elements.end(), excl_4.begin(), excl_4.end());
@@ -650,15 +655,15 @@ BOOST_AUTO_TEST_CASE(automl_insertion_champ_change_w_iterations)
         BOOST_CHECK_EQUAL(aml->cm->current_champ, 0);
         BOOST_CHECK_EQUAL(aml->cm->_config_oracle.valid_config_size, 4);
         BOOST_CHECK_EQUAL(clear_champ_switch - 1, aml->cm->total_learn_count);
-        BOOST_CHECK_EQUAL(aml->cm->estimators[0].first.live_interactions.size(), 0);
-        BOOST_CHECK_EQUAL(aml->cm->estimators[1].first.live_interactions.size(), 1);
-        BOOST_CHECK_EQUAL(aml->cm->estimators[2].first.live_interactions.size(), 1);
+        BOOST_CHECK_EQUAL(aml->cm->estimators[0].first.live_interactions.size(), 1);
+        BOOST_CHECK_EQUAL(aml->cm->estimators[1].first.live_interactions.size(), 0);
+        BOOST_CHECK_EQUAL(aml->cm->estimators[2].first.live_interactions.size(), 2);
         BOOST_CHECK_EQUAL(aml->cm->_config_oracle.configs.size(), 4);
-        BOOST_CHECK_EQUAL(aml->cm->_config_oracle.configs[0].elements.size(), 0);
-        BOOST_CHECK_EQUAL(aml->cm->_config_oracle.configs[1].elements.size(), 1);
-        BOOST_CHECK_EQUAL(aml->cm->_config_oracle.configs[2].elements.size(), 1);
-        BOOST_CHECK_EQUAL(aml->cm->_config_oracle.configs[3].elements.size(), 1);
-        BOOST_CHECK_EQUAL(aml->cm->total_champ_switches, 0);
+        BOOST_CHECK_EQUAL(aml->cm->_config_oracle.configs[0].elements.size(), 1);
+        BOOST_CHECK_EQUAL(aml->cm->_config_oracle.configs[1].elements.size(), 0);
+        BOOST_CHECK_EQUAL(aml->cm->_config_oracle.configs[2].elements.size(), 2);
+        BOOST_CHECK_EQUAL(aml->cm->_config_oracle.configs[3].elements.size(), 2);
+        BOOST_CHECK_EQUAL(aml->cm->total_champ_switches, 1);
         BOOST_CHECK(aml->current_state == VW::reductions::automl::automl_state::Experimenting);
         return true;
       });
@@ -670,7 +675,7 @@ BOOST_AUTO_TEST_CASE(automl_insertion_champ_change_w_iterations)
         aml_test::check_config_states(aml);
         BOOST_CHECK_EQUAL(aml->cm->current_champ, 0);
         BOOST_CHECK_EQUAL(clear_champ_switch, aml->cm->total_learn_count);
-        BOOST_CHECK_EQUAL(aml->cm->estimators.size(), 2);
+        BOOST_CHECK_EQUAL(aml->cm->estimators.size(), 3);
         BOOST_CHECK_EQUAL(aml->cm->_config_oracle.valid_config_size, 4);
         BOOST_CHECK_EQUAL(aml->cm->estimators[0].first.live_interactions.size(), 1);
         BOOST_CHECK_EQUAL(aml->cm->estimators[1].first.live_interactions.size(), 0);