From 8e4cb19a0383e535af36f5997630f41983aae6f3 Mon Sep 17 00:00:00 2001 From: Jack Gerrits Date: Tue, 27 Feb 2024 12:44:34 -0500 Subject: [PATCH] fix: fix #4669 by handling empty decision scores elements (#4670) * fix: fix #4669 by handling empty decision scores elements * simplify test * ensure empty predictions do not affect num_labeled as well as loss * Update conditional_contextual_bandit.cc * Bounds check for explicit inclusion * Formatting --------- Co-authored-by: Alexey Taymanov <41013086+ataymano@users.noreply.github.com> --- test/core.vwtest.json | 29 ++++++++++ test/train-sets/issue4669.dsjson | 1 + test/train-sets/ref/issue4669_test.stderr | 23 ++++++++ test/train-sets/ref/issue4669_test.stdout | 0 test/train-sets/ref/issue4669_test_pred.txt | 3 + test/train-sets/ref/issue4669_train.stderr | 22 ++++++++ test/train-sets/ref/issue4669_train.stdout | 0 vowpalwabbit/core/src/decision_scores.cc | 3 +- .../conditional_contextual_bandit.cc | 18 +++++- vowpalwabbit/core/tests/ccb_test.cc | 56 +++++++++++++++++++ 10 files changed, 151 insertions(+), 4 deletions(-) create mode 100644 test/train-sets/issue4669.dsjson create mode 100644 test/train-sets/ref/issue4669_test.stderr create mode 100644 test/train-sets/ref/issue4669_test.stdout create mode 100644 test/train-sets/ref/issue4669_test_pred.txt create mode 100644 test/train-sets/ref/issue4669_train.stderr create mode 100644 test/train-sets/ref/issue4669_train.stdout diff --git a/test/core.vwtest.json b/test/core.vwtest.json index ef6857518f3..f569d3d812d 100644 --- a/test/core.vwtest.json +++ b/test/core.vwtest.json @@ -6073,5 +6073,34 @@ "depends_on": [ 467 ] + }, + { + "id": 469, + "desc": "https://github.com/VowpalWabbit/vowpal_wabbit/issues/4669", + "vw_command": "--ccb_explore_adf --dsjson -d train-sets/issue4669.dsjson -f issue4669.model", + "diff_files": { + "stderr": "train-sets/ref/issue4669_train.stderr", + "stdout": "train-sets/ref/issue4669_train.stdout" + }, + "input_files": [ + "train-sets/issue4669.dsjson" + ] + }, + { + "id": 470, + "desc": "https://github.com/VowpalWabbit/vowpal_wabbit/issues/4669", + "vw_command": "--ccb_explore_adf --dsjson --all_slots_loss --epsilon 0 -t -i issue4669.model -t -d train-sets/issue4669.dsjson -p issue4669_test_pred.txt", + "diff_files": { + "stderr": "train-sets/ref/issue4669_test.stderr", + "stdout": "train-sets/ref/issue4669_test.stdout", + "issue4669_test_pred.txt": "train-sets/ref/issue4669_test_pred.txt" + }, + "input_files": [ + "train-sets/issue4669.dsjson", + "issue4669.model" + ], + "depends_on": [ + 469 + ] } ] \ No newline at end of file diff --git a/test/train-sets/issue4669.dsjson b/test/train-sets/issue4669.dsjson new file mode 100644 index 00000000000..bbd36e32773 --- /dev/null +++ b/test/train-sets/issue4669.dsjson @@ -0,0 +1 @@ +{"c":{"_multi":[{"f":"1"},{"f":"2"}],"_slots":[{"_inc":[0,1]},{"_inc":[1]}]},"_outcomes":[{"_label_cost":1.0,"_a":[0,1],"_p":[0.5,0.5]},{"_label_cost":0.0,"_a":[1],"_p":[1]}]} \ No newline at end of file diff --git a/test/train-sets/ref/issue4669_test.stderr b/test/train-sets/ref/issue4669_test.stderr new file mode 100644 index 00000000000..9b3fb9ce7cf --- /dev/null +++ b/test/train-sets/ref/issue4669_test.stderr @@ -0,0 +1,23 @@ +only testing +predictions = issue4669_test_pred.txt +using no cache +Reading datafile = train-sets/issue4669.dsjson +num sources = 1 +Num weight bits = 18 +learning rate = 0.5 +initial_t = 1 +power_t = 0.5 +cb_type = mtr +Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, cb_sample, shared_feature_merger, ccb_explore_adf +Input label = CCB +Output pred = DECISION_PROBS +average since example example current current current +loss last counter weight label predict features +0.000000 0.000000 1 1.0 0:1,1:0 1,None 9 + +finished run +number of examples = 1 +weighted example sum = 1.000000 +weighted label sum = 0.000000 +average loss = 0.000000 +total feature number = 9 diff --git a/test/train-sets/ref/issue4669_test.stdout b/test/train-sets/ref/issue4669_test.stdout new file mode 100644 index 00000000000..e69de29bb2d diff --git a/test/train-sets/ref/issue4669_test_pred.txt b/test/train-sets/ref/issue4669_test_pred.txt new file mode 100644 index 00000000000..ba6b9ca942b --- /dev/null +++ b/test/train-sets/ref/issue4669_test_pred.txt @@ -0,0 +1,3 @@ +1:1,0:0 + + diff --git a/test/train-sets/ref/issue4669_train.stderr b/test/train-sets/ref/issue4669_train.stderr new file mode 100644 index 00000000000..48505ae87ae --- /dev/null +++ b/test/train-sets/ref/issue4669_train.stderr @@ -0,0 +1,22 @@ +final_regressor = issue4669.model +using no cache +Reading datafile = train-sets/issue4669.dsjson +num sources = 1 +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +cb_type = mtr +Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, cb_sample, shared_feature_merger, ccb_explore_adf +Input label = CCB +Output pred = DECISION_PROBS +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 1 1.0 0:1,1:0 0,1 12 + +finished run +number of examples = 1 +weighted example sum = 1.000000 +weighted label sum = 0.000000 +average loss = 1.000000 +total feature number = 12 diff --git a/test/train-sets/ref/issue4669_train.stdout b/test/train-sets/ref/issue4669_train.stdout new file mode 100644 index 00000000000..e69de29bb2d diff --git a/vowpalwabbit/core/src/decision_scores.cc b/vowpalwabbit/core/src/decision_scores.cc index 4bc0810c7c9..02529c5b42c 100644 --- a/vowpalwabbit/core/src/decision_scores.cc +++ b/vowpalwabbit/core/src/decision_scores.cc @@ -26,7 +26,8 @@ void print_update(VW::workspace& all, const VW::multi_ex& slots, const VW::decis std::string delim; for (const auto& slot : decision_scores) { - pred_ss << delim << slot[0].action; + if (slot.empty()) { pred_ss << delim << "None"; } + else { pred_ss << delim << slot[0].action; } delim = ","; } all.sd->print_update(*all.output_runtime.trace_message, all.passes_config.holdout_set_off, diff --git a/vowpalwabbit/core/src/reductions/conditional_contextual_bandit.cc b/vowpalwabbit/core/src/reductions/conditional_contextual_bandit.cc index b332793ded8..9d15dd4a1ce 100644 --- a/vowpalwabbit/core/src/reductions/conditional_contextual_bandit.cc +++ b/vowpalwabbit/core/src/reductions/conditional_contextual_bandit.cc @@ -5,6 +5,7 @@ #include "vw/core/reductions/conditional_contextual_bandit.h" #include "vw/config/options.h" +#include "vw/core/cb.h" #include "vw/core/ccb_label.h" #include "vw/core/ccb_reduction_features.h" #include "vw/core/constant.h" @@ -213,8 +214,12 @@ void clear_pred_and_label(ccb_data& data) data.actions[data.action_with_label]->l.cb.costs.clear(); } -// true if there exists at least 1 action in the cb multi-example -bool has_action(VW::multi_ex& cb_ex) { return !cb_ex.empty(); } +// true if there exists at least 2 examples (since there can only be up to 1 +// shared example), or the 0th example is not shared. +bool has_action(VW::multi_ex& cb_ex) +{ + return cb_ex.size() > 1 || (!cb_ex.empty() && !VW::ec_is_example_header_cb(*cb_ex[0])); +} // This function intentionally does not handle increasing the num_features of the example because // the output_example function has special logic to ensure the number of features is correctly calculated. @@ -309,7 +314,11 @@ void build_cb_example(VW::multi_ex& cb_ex, VW::example* slot, const VW::ccb_labe // First time seeing this, initialize the vector with falses so we can start setting each included action. if (data.include_list.empty()) { data.include_list.assign(data.actions.size(), false); } - for (uint32_t included_action_id : explicit_includes) { data.include_list[included_action_id] = true; } + for (uint32_t included_action_id : explicit_includes) + { + // The action may be included but not actually exist in the list of possible actions. + if (included_action_id < data.actions.size()) { data.include_list[included_action_id] = true; } + } } // set the available actions in the cb multi-example @@ -545,6 +554,9 @@ void update_stats_ccb(const VW::workspace& /* all */, shared_data& sd, const ccb if (outcome != nullptr) { num_labeled++; + // It is possible for the prediction to be empty if there were no actions available at the time of taking the + // slot decision. In this case it does not contribute to loss. + if (preds[i].empty()) { continue; } if (i == 0 || data.all_slots_loss_report) { const float l = VW::get_cost_estimate(outcome->probabilities[VW::details::TOP_ACTION_INDEX], outcome->cost, diff --git a/vowpalwabbit/core/tests/ccb_test.cc b/vowpalwabbit/core/tests/ccb_test.cc index d9ba62525bc..86962f51df0 100644 --- a/vowpalwabbit/core/tests/ccb_test.cc +++ b/vowpalwabbit/core/tests/ccb_test.cc @@ -145,3 +145,59 @@ TEST(Ccb, InsertInteractionsImplTest) EXPECT_THAT(result, testing::ContainerEq(expected_after)); } + +TEST(Ccb, ExplicitIncludedActionsNonExistentAction) +{ + auto vw = VW::initialize(vwtest::make_args("--ccb_explore_adf", "--quiet")); + VW::multi_ex examples; + examples.push_back(VW::read_example(*vw, "ccb shared |")); + examples.push_back(VW::read_example(*vw, "ccb action |")); + examples.push_back(VW::read_example(*vw, "ccb slot 0:10:10 10 |")); + + vw->learn(examples); + + auto& decision_scores = examples[0]->pred.decision_scores; + EXPECT_EQ(decision_scores.size(), 1); + EXPECT_EQ(decision_scores[0].size(), 0); + vw->finish_example(examples); +} + +TEST(Ccb, NoAvailableActions) +{ + auto vw = VW::initialize(vwtest::make_args("--ccb_explore_adf", "--quiet", "--all_slots_loss")); + { + VW::multi_ex examples; + examples.push_back(VW::read_example(*vw, "ccb shared |")); + examples.push_back(VW::read_example(*vw, "ccb action | a")); + examples.push_back(VW::read_example(*vw, "ccb action | b")); + examples.push_back(VW::read_example(*vw, "ccb slot 0:-1:0.5 0,1 |")); + examples.push_back(VW::read_example(*vw, "ccb slot |")); + + vw->learn(examples); + + auto& decision_scores = examples[0]->pred.decision_scores; + EXPECT_EQ(decision_scores.size(), 2); + vw->finish_example(examples); + } + + { + VW::multi_ex examples; + examples.push_back(VW::read_example(*vw, "ccb shared |")); + examples.push_back(VW::read_example(*vw, "ccb action | a")); + examples.push_back(VW::read_example(*vw, "ccb action | b")); + examples.push_back(VW::read_example(*vw, "ccb slot 0:-1:0.5 0,1 |")); + // This time restrict slot 1 to only have action 0 available + examples.push_back(VW::read_example(*vw, "ccb slot 0:-1:0.5 0 |")); + + vw->predict(examples); + + auto& decision_scores = examples[0]->pred.decision_scores; + EXPECT_EQ(decision_scores.size(), 2); + EXPECT_EQ(decision_scores[0].size(), 2); + EXPECT_EQ(decision_scores[0][0].action, 0); + EXPECT_EQ(decision_scores[0][1].action, 1); + EXPECT_EQ(decision_scores[1].size(), 0); + + vw->finish_example(examples); + } +} \ No newline at end of file