fix: fix #4669 by handling empty decision scores elements (#4670)

* fix: fix #4669 by handling empty decision scores elements * simplify test * ensure empty predictions do not affect num_labeled as well as loss * Update conditional_contextual_bandit.cc * Bounds check for explicit inclusion * Formatting --------- Co-authored-by: Alexey Taymanov <[email protected]>
VowpalWabbit · Feb 27, 2024 · 8e4cb19 · 8e4cb19
1 parent 1675c6b
commit 8e4cb19
Show file tree

Hide file tree

Showing 10 changed files with 151 additions and 4 deletions.
diff --git a/test/core.vwtest.json b/test/core.vwtest.json
@@ -6073,5 +6073,34 @@
     "depends_on": [
       467
     ]
+  },
+  {
+    "id": 469,
+    "desc": "https://github.com/VowpalWabbit/vowpal_wabbit/issues/4669",
+    "vw_command": "--ccb_explore_adf --dsjson -d train-sets/issue4669.dsjson -f issue4669.model",
+    "diff_files": {
+      "stderr": "train-sets/ref/issue4669_train.stderr",
+      "stdout": "train-sets/ref/issue4669_train.stdout"
+    },
+    "input_files": [
+      "train-sets/issue4669.dsjson"
+    ]
+  },
+  {
+    "id": 470,
+    "desc": "https://github.com/VowpalWabbit/vowpal_wabbit/issues/4669",
+    "vw_command": "--ccb_explore_adf --dsjson --all_slots_loss --epsilon 0 -t -i issue4669.model -t -d train-sets/issue4669.dsjson -p issue4669_test_pred.txt",
+    "diff_files": {
+      "stderr": "train-sets/ref/issue4669_test.stderr",
+      "stdout": "train-sets/ref/issue4669_test.stdout",
+      "issue4669_test_pred.txt": "train-sets/ref/issue4669_test_pred.txt"
+    },
+    "input_files": [
+      "train-sets/issue4669.dsjson",
+      "issue4669.model"
+    ],
+    "depends_on": [
+      469
+    ]
   }
 ]
diff --git a/test/train-sets/issue4669.dsjson b/test/train-sets/issue4669.dsjson
@@ -0,0 +1 @@
+{"c":{"_multi":[{"f":"1"},{"f":"2"}],"_slots":[{"_inc":[0,1]},{"_inc":[1]}]},"_outcomes":[{"_label_cost":1.0,"_a":[0,1],"_p":[0.5,0.5]},{"_label_cost":0.0,"_a":[1],"_p":[1]}]}
diff --git a/test/train-sets/ref/issue4669_test.stderr b/test/train-sets/ref/issue4669_test.stderr
@@ -0,0 +1,23 @@
+only testing
+predictions = issue4669_test_pred.txt
+using no cache
+Reading datafile = train-sets/issue4669.dsjson
+num sources = 1
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 1
+power_t = 0.5
+cb_type = mtr
+Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, cb_sample, shared_feature_merger, ccb_explore_adf
+Input label = CCB
+Output pred = DECISION_PROBS
+average  since         example        example        current        current  current
+loss     last          counter         weight          label        predict features
+0.000000 0.000000            1            1.0        0:1,1:0         1,None        9
+
+finished run
+number of examples = 1
+weighted example sum = 1.000000
+weighted label sum = 0.000000
+average loss = 0.000000
+total feature number = 9
diff --git a/test/train-sets/ref/issue4669_test.stdout b/test/train-sets/ref/issue4669_test.stdout
diff --git a/test/train-sets/ref/issue4669_test_pred.txt b/test/train-sets/ref/issue4669_test_pred.txt
@@ -0,0 +1,3 @@
+1:1,0:0
+
+
diff --git a/test/train-sets/ref/issue4669_train.stderr b/test/train-sets/ref/issue4669_train.stderr
@@ -0,0 +1,22 @@
+final_regressor = issue4669.model
+using no cache
+Reading datafile = train-sets/issue4669.dsjson
+num sources = 1
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+cb_type = mtr
+Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, cb_sample, shared_feature_merger, ccb_explore_adf
+Input label = CCB
+Output pred = DECISION_PROBS
+average  since         example        example        current        current  current
+loss     last          counter         weight          label        predict features
+1.000000 1.000000            1            1.0        0:1,1:0            0,1       12
+
+finished run
+number of examples = 1
+weighted example sum = 1.000000
+weighted label sum = 0.000000
+average loss = 1.000000
+total feature number = 12
diff --git a/test/train-sets/ref/issue4669_train.stdout b/test/train-sets/ref/issue4669_train.stdout
diff --git a/vowpalwabbit/core/src/decision_scores.cc b/vowpalwabbit/core/src/decision_scores.cc
@@ -26,7 +26,8 @@ void print_update(VW::workspace& all, const VW::multi_ex& slots, const VW::decis
   std::string delim;
   for (const auto& slot : decision_scores)
   {
-    pred_ss << delim << slot[0].action;
+    if (slot.empty()) { pred_ss << delim << "None"; }
+    else { pred_ss << delim << slot[0].action; }
     delim = ",";
   }
   all.sd->print_update(*all.output_runtime.trace_message, all.passes_config.holdout_set_off,

diff --git a/vowpalwabbit/core/src/reductions/conditional_contextual_bandit.cc b/vowpalwabbit/core/src/reductions/conditional_contextual_bandit.cc
@@ -5,6 +5,7 @@
 #include "vw/core/reductions/conditional_contextual_bandit.h"
 
 #include "vw/config/options.h"
+#include "vw/core/cb.h"
 #include "vw/core/ccb_label.h"
 #include "vw/core/ccb_reduction_features.h"
 #include "vw/core/constant.h"
@@ -213,8 +214,12 @@ void clear_pred_and_label(ccb_data& data)
   data.actions[data.action_with_label]->l.cb.costs.clear();
 }
 
-// true if there exists at least 1 action in the cb multi-example
-bool has_action(VW::multi_ex& cb_ex) { return !cb_ex.empty(); }
+// true if there exists at least 2 examples (since there can only be up to 1
+// shared example), or the 0th example is not shared.
+bool has_action(VW::multi_ex& cb_ex)
+{
+  return cb_ex.size() > 1 || (!cb_ex.empty() && !VW::ec_is_example_header_cb(*cb_ex[0]));
+}
 
 // This function intentionally does not handle increasing the num_features of the example because
 // the output_example function has special logic to ensure the number of features is correctly calculated.
@@ -309,7 +314,11 @@ void build_cb_example(VW::multi_ex& cb_ex, VW::example* slot, const VW::ccb_labe
     // First time seeing this, initialize the vector with falses so we can start setting each included action.
     if (data.include_list.empty()) { data.include_list.assign(data.actions.size(), false); }
 
-    for (uint32_t included_action_id : explicit_includes) { data.include_list[included_action_id] = true; }
+    for (uint32_t included_action_id : explicit_includes)
+    {
+      // The action may be included but not actually exist in the list of possible actions.
+      if (included_action_id < data.actions.size()) { data.include_list[included_action_id] = true; }
+    }
   }
 
   // set the available actions in the cb multi-example
@@ -545,6 +554,9 @@ void update_stats_ccb(const VW::workspace& /* all */, shared_data& sd, const ccb
       if (outcome != nullptr)
       {
         num_labeled++;
+        // It is possible for the prediction to be empty if there were no actions available at the time of taking the
+        // slot decision. In this case it does not contribute to loss.
+        if (preds[i].empty()) { continue; }
         if (i == 0 || data.all_slots_loss_report)
         {
           const float l = VW::get_cost_estimate(outcome->probabilities[VW::details::TOP_ACTION_INDEX], outcome->cost,

diff --git a/vowpalwabbit/core/tests/ccb_test.cc b/vowpalwabbit/core/tests/ccb_test.cc
@@ -145,3 +145,59 @@ TEST(Ccb, InsertInteractionsImplTest)
 
   EXPECT_THAT(result, testing::ContainerEq(expected_after));
 }
+
+TEST(Ccb, ExplicitIncludedActionsNonExistentAction)
+{
+  auto vw = VW::initialize(vwtest::make_args("--ccb_explore_adf", "--quiet"));
+  VW::multi_ex examples;
+  examples.push_back(VW::read_example(*vw, "ccb shared |"));
+  examples.push_back(VW::read_example(*vw, "ccb action |"));
+  examples.push_back(VW::read_example(*vw, "ccb slot 0:10:10 10 |"));
+
+  vw->learn(examples);
+
+  auto& decision_scores = examples[0]->pred.decision_scores;
+  EXPECT_EQ(decision_scores.size(), 1);
+  EXPECT_EQ(decision_scores[0].size(), 0);
+  vw->finish_example(examples);
+}
+
+TEST(Ccb, NoAvailableActions)
+{
+  auto vw = VW::initialize(vwtest::make_args("--ccb_explore_adf", "--quiet", "--all_slots_loss"));
+  {
+    VW::multi_ex examples;
+    examples.push_back(VW::read_example(*vw, "ccb shared |"));
+    examples.push_back(VW::read_example(*vw, "ccb action | a"));
+    examples.push_back(VW::read_example(*vw, "ccb action | b"));
+    examples.push_back(VW::read_example(*vw, "ccb slot 0:-1:0.5 0,1 |"));
+    examples.push_back(VW::read_example(*vw, "ccb slot |"));
+
+    vw->learn(examples);
+
+    auto& decision_scores = examples[0]->pred.decision_scores;
+    EXPECT_EQ(decision_scores.size(), 2);
+    vw->finish_example(examples);
+  }
+
+  {
+    VW::multi_ex examples;
+    examples.push_back(VW::read_example(*vw, "ccb shared |"));
+    examples.push_back(VW::read_example(*vw, "ccb action | a"));
+    examples.push_back(VW::read_example(*vw, "ccb action | b"));
+    examples.push_back(VW::read_example(*vw, "ccb slot 0:-1:0.5 0,1 |"));
+    // This time restrict slot 1 to only have action 0 available
+    examples.push_back(VW::read_example(*vw, "ccb slot 0:-1:0.5 0 |"));
+
+    vw->predict(examples);
+
+    auto& decision_scores = examples[0]->pred.decision_scores;
+    EXPECT_EQ(decision_scores.size(), 2);
+    EXPECT_EQ(decision_scores[0].size(), 2);
+    EXPECT_EQ(decision_scores[0][0].action, 0);
+    EXPECT_EQ(decision_scores[0][1].action, 1);
+    EXPECT_EQ(decision_scores[1].size(), 0);
+
+    vw->finish_example(examples);
+  }
+}