VowpalWabbit · bassmang · Dec 27, 2022 · Dec 20, 2022 · Dec 20, 2022 · Dec 20, 2022
diff --git a/test/pred-sets/ref/aml_spinoff.inv b/test/pred-sets/ref/aml_spinoff.inv
diff --git a/test/pred-sets/ref/ccb_implicit_and_explicit_interactions.inv b/test/pred-sets/ref/ccb_implicit_and_explicit_interactions.inv
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-1
 Max label:0
 bits:18

diff --git a/test/pred-sets/ref/ccb_implicit_explicit_ignore_interactions.inv b/test/pred-sets/ref/ccb_implicit_explicit_ignore_interactions.inv
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-1
 Max label:0
 bits:18

diff --git a/test/pred-sets/ref/ccb_lots_of_interactions.inv b/test/pred-sets/ref/ccb_lots_of_interactions.inv
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-1
 Max label:0
 bits:18

diff --git a/test/pred-sets/ref/ccb_quad.inv b/test/pred-sets/ref/ccb_quad.inv
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-1
 Max label:0
 bits:18

diff --git a/test/pred-sets/ref/ccb_quad_save_resume.inv b/test/pred-sets/ref/ccb_quad_save_resume.inv
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-1
 Max label:0
 bits:18

diff --git a/test/pred-sets/ref/plt_probabilities.predict b/test/pred-sets/ref/plt_probabilities.predict
@@ -1,10 +1,10 @@
-1:0.754722
-2:0.620111,1:0.798071
-2:0.607861,3:0.629449
-4:0.588903,3:0.63312
-4:0.605069,5:0.663611,2:0.624935,3:0.638721
+1:0.754663
+2:0.620058,1:0.798044
+2:0.607869,3:0.629444
+4:0.588921,3:0.633088
+4:0.605087,5:0.663598,2:0.624917,3:0.638696
 
-5:0.670136,7:0.558485
-8:0.856221
-9:0.748161
-1:0.871351,8:0.871311
+5:0.67011,7:0.558517
+8:0.856213
+9:0.74813
+1:0.87135,8:0.87128
diff --git a/test/pred-sets/ref/plt_top1_probabilities.predict b/test/pred-sets/ref/plt_top1_probabilities.predict
@@ -1,10 +1,10 @@
-1:0.754722
-1:0.798071
-3:0.629449
-3:0.63312
-5:0.663611
-6:0.443445
-5:0.670136
-8:0.856221
-9:0.748161
-1:0.871351
+1:0.754663
+1:0.798044
+3:0.629444
+3:0.633088
+5:0.663598
+6:0.443414
+5:0.67011
+8:0.856213
+9:0.74813
+1:0.87135
diff --git a/test/pred-sets/ref/slates_w_interactions.inv b/test/pred-sets/ref/slates_w_interactions.inv
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:0.8
 bits:18

diff --git a/test/pred-sets/ref/t288.readable b/test/pred-sets/ref/t288.readable
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:1
 bits:18

diff --git a/test/train-sets/automl_spin_off.txt b/test/train-sets/automl_spin_off.txt
diff --git a/test/train-sets/ref/automl_readable.txt b/test/train-sets/ref/automl_readable.txt
diff --git a/test/train-sets/ref/automl_readable_cubic.txt b/test/train-sets/ref/automl_readable_cubic.txt
diff --git a/test/train-sets/ref/automl_readable_qc.txt b/test/train-sets/ref/automl_readable_qc.txt
diff --git a/test/train-sets/ref/cb_similar_aml_spinoff_save.stderr b/test/train-sets/ref/cb_similar_aml_spinoff_save.stderr
@@ -13,15 +13,15 @@ Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.333333 0.333333            1            1.0          0:1:1         0:0.33     8436
-0.175000 0.016667            2            2.0          1:1:1         2:0.97    18319
-0.333333 0.491667            4            4.0          1:1:1         0:0.97    23278
-0.293750 0.254167            8            8.0          0:1:1         2:0.97    13862
-0.273958 0.254167           16           16.0          0:1:1         2:0.97     2249
+-0.33333 -0.33333            1            1.0         0:-1:1         0:0.33     8436
+-0.17500 -0.01666            2            2.0         1:-1:1         0:0.97    18319
+-0.09583 -0.01666            4            4.0         1:-1:1         2:0.97    23278
+-0.17500 -0.25416            8            8.0         0:-1:1         1:0.97    13862
+-0.27395 -0.37291           16           16.0         0:-1:1         1:0.97     2249
 
 finished run
 number of examples = 18
 weighted example sum = 18.000000
 weighted label sum = 0.000000
-average loss = 0.298148
+average loss = -0.245370
 total feature number = 293853
diff --git a/test/train-sets/ref/cbadf_automl_readable.txt b/test/train-sets/ref/cbadf_automl_readable.txt
diff --git a/test/train-sets/ref/cbzo_constant_invert_hash.txt b/test/train-sets/ref/cbzo_constant_invert_hash.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-1.13836
 Max label:3.60884
 bits:18

diff --git a/test/train-sets/ref/cbzo_constant_readable_model.txt b/test/train-sets/ref/cbzo_constant_readable_model.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-1.13836
 Max label:3.60884
 bits:18

diff --git a/test/train-sets/ref/cbzo_linear_invert_hash.txt b/test/train-sets/ref/cbzo_linear_invert_hash.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-3.35097
 Max label:3.13689
 bits:18

diff --git a/test/train-sets/ref/cbzo_linear_readable_model.txt b/test/train-sets/ref/cbzo_linear_readable_model.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-3.35097
 Max label:3.13689
 bits:18

diff --git a/test/train-sets/ref/coin.readable b/test/train-sets/ref/coin.readable
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:2
 bits:18

diff --git a/test/train-sets/ref/coin_model_overflow.invert.txt b/test/train-sets/ref/coin_model_overflow.invert.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:1
 bits:18

diff --git a/test/train-sets/ref/dupeindex_self_cubic.txt b/test/train-sets/ref/dupeindex_self_cubic.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:1
 bits:18

diff --git a/test/train-sets/ref/dupeindex_self_quadratic.txt b/test/train-sets/ref/dupeindex_self_quadratic.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:1
 bits:18

diff --git a/test/train-sets/ref/dupeindex_self_quartic.txt b/test/train-sets/ref/dupeindex_self_quartic.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:1
 bits:18

diff --git a/test/train-sets/ref/ftrl.readable b/test/train-sets/ref/ftrl.readable
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:2
 bits:18

diff --git a/test/train-sets/ref/help.stdout b/test/train-sets/ref/help.stdout
@@ -261,6 +261,8 @@ Weight Options:
                                             0.05, keep, experimental)
     --fixed_significance_level              Use fixed significance level as opposed to scaling by model count
                                             (bonferroni correction) (type: bool, keep, experimental)
+    --reward_as_cost                        Treat rewards as cost (do not negate sign) (type: bool, keep,
+                                            experimental)
 [Reduction] Baseline Options:
     --baseline                              Learn an additive baseline (from constant features) and a residual
                                             separately in regression (type: bool, keep, necessary)
@@ -584,6 +586,8 @@ Weight Options:
     --shift_model_bounds arg                Shift maximum update_count for model i from champ_update_count^(i
                                             / num_models) to champ_update_count^((i + shift) / (num_models
                                             + shift)) (type: uint, default: 0, keep, experimental)
+    --reward_as_cost                        Treat rewards as cost (do not negate sign) (type: bool, keep,
+                                            experimental)
 [Reduction] Error Correcting Tournament Options:
     --ect arg                               Error correcting tournament with <k> labels (type: uint, keep,
                                             necessary)

diff --git a/test/train-sets/ref/ignore_feature.interactions b/test/train-sets/ref/ignore_feature.interactions
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-1
 Max label:0
 bits:18

diff --git a/test/train-sets/ref/ignore_feature_default_ns.interactions b/test/train-sets/ref/ignore_feature_default_ns.interactions
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-1
 Max label:0
 bits:18
@@ -43,4 +43,4 @@ j^organicyes:205927:-0.0709894 2.14368 1
 j^roastlight:219055:0.0237209 0.68872 1
 weatherSunny:229470:0.0237209 0.68872 1
 j^originkenya:231314:-0.0904419 1.45496 1
-j^typecold:257035:0.0237209 0.68872 1
+j^typecold:257035:0.0237209 0.68872 1
diff --git a/test/train-sets/ref/ignore_multiple_features.interactions b/test/train-sets/ref/ignore_multiple_features.interactions
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-1
 Max label:0
 bits:18
@@ -48,4 +48,4 @@ FromUrl^nameDave:189701:-0.0704397 0.24404 1
 FromUrl^weatherRainy:194859:0.0036332 1.20771 1
 j^organicyes:205927:-0.0738507 2.98816 1
 j^roastlight:219055:-0.0227636 1.75644 1
-j^originkenya:231314:-0.0979818 2.18135 1
+j^originkenya:231314:-0.0979818 2.18135 1
diff --git a/test/train-sets/ref/inv_hash_load_model.invert.txt b/test/train-sets/ref/inv_hash_load_model.invert.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:2
 bits:18

diff --git a/test/train-sets/ref/inv_hash_load_model.readable.txt b/test/train-sets/ref/inv_hash_load_model.readable.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:2
 bits:18

diff --git a/test/train-sets/ref/l1_l2_default_model.txt b/test/train-sets/ref/l1_l2_default_model.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:0
 bits:18

diff --git a/test/train-sets/ref/l1_l2_override_model.txt b/test/train-sets/ref/l1_l2_override_model.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:0
 bits:18

diff --git a/test/train-sets/ref/l1_override_l2_leave_model.txt b/test/train-sets/ref/l1_override_l2_leave_model.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:0
 bits:18

diff --git a/test/train-sets/ref/l1l2_migrate.txt b/test/train-sets/ref/l1l2_migrate.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:0
 bits:18

diff --git a/test/train-sets/ref/l1l2_migrate_override.txt b/test/train-sets/ref/l1l2_migrate_override.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:0
 bits:18

diff --git a/test/train-sets/ref/marginal_invert_hash_readable_model.txt b/test/train-sets/ref/marginal_invert_hash_readable_model.txt
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:1
 bits:18

diff --git a/test/train-sets/ref/pistol.readable b/test/train-sets/ref/pistol.readable
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:0
 Max label:2
 bits:18

diff --git a/test/train-sets/ref/plt_predict.stderr b/test/train-sets/ref/plt_predict.stderr
@@ -28,4 +28,4 @@ average loss = 0.000000
 total feature number = 20
 hamming loss = 0.200000
 micro-precision = 1.000000
-micro-recall = 0.894737
+micro-recall = 0.894737
diff --git a/test/train-sets/ref/plt_sgd_predict.stderr b/test/train-sets/ref/plt_sgd_predict.stderr
@@ -28,4 +28,4 @@ average loss = 0.000000
 total feature number = 20
 hamming loss = 1.700000
 micro-precision = 0.562500
-micro-recall = 0.473684
+micro-recall = 0.473684
diff --git a/test/train-sets/ref/plt_sgd_top1_predict.stderr b/test/train-sets/ref/plt_sgd_top1_predict.stderr
@@ -27,4 +27,4 @@ weighted label sum = 0.000000
 average loss = 0.000000
 total feature number = 20
 p@1 = 0.600000
-r@1 = 0.450000
+r@1 = 0.450000
diff --git a/test/train-sets/ref/plt_top1_predict.stderr b/test/train-sets/ref/plt_top1_predict.stderr
@@ -27,4 +27,4 @@ weighted label sum = 0.000000
 average loss = 0.000000
 total feature number = 20
 p@1 = 1.000000
-r@1 = 0.625000
+r@1 = 0.625000
diff --git a/test/train-sets/ref/spin_off_aml_load.stderr b/test/train-sets/ref/spin_off_aml_load.stderr
@@ -12,15 +12,15 @@ Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.016667 0.016667            1            1.0          0:1:1         2:0.97     8436
-0.016667 0.016667            2            2.0          1:1:1         2:0.97    18319
-0.016667 0.016667            4            4.0          1:1:1         0:0.97    23278
-0.016667 0.016667            8            8.0          0:1:1         2:0.97    13862
-0.016667 0.016667           16           16.0          0:1:1         2:0.97     2249
+-0.96666 -0.96666            1            1.0         0:-1:1         0:0.97     8436
+-0.96666 -0.96666            2            2.0         1:-1:1         1:0.97    18319
+-0.49166 -0.01666            4            4.0         1:-1:1         2:0.97    23278
+-0.37291 -0.25416            8            8.0         0:-1:1         1:0.97    13862
+-0.43229 -0.49166           16           16.0         0:-1:1         0:0.97     2249
 
 finished run
 number of examples = 18
 weighted example sum = 18.000000
 weighted label sum = 0.000000
-average loss = 0.069444
+average loss = -0.438889
 total feature number = 293853
diff --git a/test/train-sets/ref/spin_off_aml_save.stderr b/test/train-sets/ref/spin_off_aml_save.stderr
@@ -12,15 +12,15 @@ Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.333333 0.333333            1            1.0          0:1:1         0:0.33     8436
-0.175000 0.016667            2            2.0          1:1:1         2:0.97    18319
-0.333333 0.491667            4            4.0          1:1:1         0:0.97    23278
-0.293750 0.254167            8            8.0          0:1:1         2:0.97    13862
-0.273958 0.254167           16           16.0          0:1:1         2:0.97     2249
+-0.33333 -0.33333            1            1.0         0:-1:1         0:0.33     8436
+-0.17500 -0.01666            2            2.0         1:-1:1         0:0.97    18319
+-0.09583 -0.01666            4            4.0         1:-1:1         2:0.97    23278
+-0.17500 -0.25416            8            8.0         0:-1:1         1:0.97    13862
+-0.27395 -0.37291           16           16.0         0:-1:1         1:0.97     2249
 
 finished run
 number of examples = 18
 weighted example sum = 18.000000
 weighted label sum = 0.000000
-average loss = 0.298148
+average loss = -0.245370
 total feature number = 293853
diff --git a/test/train-sets/ref/w_out_slot_ns.interactions b/test/train-sets/ref/w_out_slot_ns.interactions
@@ -1,5 +1,5 @@
 Version 9.7.0
-Id
+Id 
 Min label:-1
 Max label:0
 bits:18

diff --git a/test/unit_test/CMakeLists.txt b/test/unit_test/CMakeLists.txt
@@ -3,9 +3,6 @@ if(VW_UNIT_TEST_WITH_VALGRIND_INTERNAL)
 endif()
 
 add_executable(vw-unit-test.out
-  epsilon_decay_test.cc
-  automl_test.cc
-  automl_weights_test.cc
   cats_user_provided_pdf.cc
   cb_large_actions_test.cc
   cb_las_one_pass_svd_test.cc
@@ -18,12 +15,9 @@ add_executable(vw-unit-test.out
   custom_reduction_test.cc
   distributionally_robust_test.cc
   main.cc
-  simulator.cc
-  simulator.h
   tag_utils_test.cc
   test_common.cc
   test_common.h
-  tutorial_test.cc
 )
 
 if (VW_BUILD_LAS_WITH_SIMD AND (UNIX AND NOT APPLE) AND (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64"))