From 8e4cb19a0383e535af36f5997630f41983aae6f3 Mon Sep 17 00:00:00 2001
From: Jack Gerrits <jackgerrits@users.noreply.github.com>
Date: Tue, 27 Feb 2024 12:44:34 -0500
Subject: [PATCH] fix: fix #4669 by handling empty decision scores elements
 (#4670)

* fix: fix #4669 by handling empty decision scores elements

* simplify test

* ensure empty predictions do not affect num_labeled as well as loss

* Update conditional_contextual_bandit.cc

* Bounds check for explicit inclusion

* Formatting

---------

Co-authored-by: Alexey Taymanov <41013086+ataymano@users.noreply.github.com>
---
 test/core.vwtest.json                         | 29 ++++++++++
 test/train-sets/issue4669.dsjson              |  1 +
 test/train-sets/ref/issue4669_test.stderr     | 23 ++++++++
 test/train-sets/ref/issue4669_test.stdout     |  0
 test/train-sets/ref/issue4669_test_pred.txt   |  3 +
 test/train-sets/ref/issue4669_train.stderr    | 22 ++++++++
 test/train-sets/ref/issue4669_train.stdout    |  0
 vowpalwabbit/core/src/decision_scores.cc      |  3 +-
 .../conditional_contextual_bandit.cc          | 18 +++++-
 vowpalwabbit/core/tests/ccb_test.cc           | 56 +++++++++++++++++++
 10 files changed, 151 insertions(+), 4 deletions(-)
 create mode 100644 test/train-sets/issue4669.dsjson
 create mode 100644 test/train-sets/ref/issue4669_test.stderr
 create mode 100644 test/train-sets/ref/issue4669_test.stdout
 create mode 100644 test/train-sets/ref/issue4669_test_pred.txt
 create mode 100644 test/train-sets/ref/issue4669_train.stderr
 create mode 100644 test/train-sets/ref/issue4669_train.stdout

diff --git a/test/core.vwtest.json b/test/core.vwtest.json
index ef6857518f3..f569d3d812d 100644
--- a/test/core.vwtest.json
+++ b/test/core.vwtest.json
@@ -6073,5 +6073,34 @@
     "depends_on": [
       467
     ]
+  },
+  {
+    "id": 469,
+    "desc": "https://github.com/VowpalWabbit/vowpal_wabbit/issues/4669",
+    "vw_command": "--ccb_explore_adf --dsjson -d train-sets/issue4669.dsjson -f issue4669.model",
+    "diff_files": {
+      "stderr": "train-sets/ref/issue4669_train.stderr",
+      "stdout": "train-sets/ref/issue4669_train.stdout"
+    },
+    "input_files": [
+      "train-sets/issue4669.dsjson"
+    ]
+  },
+  {
+    "id": 470,
+    "desc": "https://github.com/VowpalWabbit/vowpal_wabbit/issues/4669",
+    "vw_command": "--ccb_explore_adf --dsjson --all_slots_loss --epsilon 0 -t -i issue4669.model -t -d train-sets/issue4669.dsjson -p issue4669_test_pred.txt",
+    "diff_files": {
+      "stderr": "train-sets/ref/issue4669_test.stderr",
+      "stdout": "train-sets/ref/issue4669_test.stdout",
+      "issue4669_test_pred.txt": "train-sets/ref/issue4669_test_pred.txt"
+    },
+    "input_files": [
+      "train-sets/issue4669.dsjson",
+      "issue4669.model"
+    ],
+    "depends_on": [
+      469
+    ]
   }
 ]
\ No newline at end of file
diff --git a/test/train-sets/issue4669.dsjson b/test/train-sets/issue4669.dsjson
new file mode 100644
index 00000000000..bbd36e32773
--- /dev/null
+++ b/test/train-sets/issue4669.dsjson
@@ -0,0 +1 @@
+{"c":{"_multi":[{"f":"1"},{"f":"2"}],"_slots":[{"_inc":[0,1]},{"_inc":[1]}]},"_outcomes":[{"_label_cost":1.0,"_a":[0,1],"_p":[0.5,0.5]},{"_label_cost":0.0,"_a":[1],"_p":[1]}]}
\ No newline at end of file
diff --git a/test/train-sets/ref/issue4669_test.stderr b/test/train-sets/ref/issue4669_test.stderr
new file mode 100644
index 00000000000..9b3fb9ce7cf
--- /dev/null
+++ b/test/train-sets/ref/issue4669_test.stderr
@@ -0,0 +1,23 @@
+only testing
+predictions = issue4669_test_pred.txt
+using no cache
+Reading datafile = train-sets/issue4669.dsjson
+num sources = 1
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 1
+power_t = 0.5
+cb_type = mtr
+Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, cb_sample, shared_feature_merger, ccb_explore_adf
+Input label = CCB
+Output pred = DECISION_PROBS
+average  since         example        example        current        current  current
+loss     last          counter         weight          label        predict features
+0.000000 0.000000            1            1.0        0:1,1:0         1,None        9
+
+finished run
+number of examples = 1
+weighted example sum = 1.000000
+weighted label sum = 0.000000
+average loss = 0.000000
+total feature number = 9
diff --git a/test/train-sets/ref/issue4669_test.stdout b/test/train-sets/ref/issue4669_test.stdout
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/test/train-sets/ref/issue4669_test_pred.txt b/test/train-sets/ref/issue4669_test_pred.txt
new file mode 100644
index 00000000000..ba6b9ca942b
--- /dev/null
+++ b/test/train-sets/ref/issue4669_test_pred.txt
@@ -0,0 +1,3 @@
+1:1,0:0
+
+
diff --git a/test/train-sets/ref/issue4669_train.stderr b/test/train-sets/ref/issue4669_train.stderr
new file mode 100644
index 00000000000..48505ae87ae
--- /dev/null
+++ b/test/train-sets/ref/issue4669_train.stderr
@@ -0,0 +1,22 @@
+final_regressor = issue4669.model
+using no cache
+Reading datafile = train-sets/issue4669.dsjson
+num sources = 1
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+cb_type = mtr
+Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, cb_sample, shared_feature_merger, ccb_explore_adf
+Input label = CCB
+Output pred = DECISION_PROBS
+average  since         example        example        current        current  current
+loss     last          counter         weight          label        predict features
+1.000000 1.000000            1            1.0        0:1,1:0            0,1       12
+
+finished run
+number of examples = 1
+weighted example sum = 1.000000
+weighted label sum = 0.000000
+average loss = 1.000000
+total feature number = 12
diff --git a/test/train-sets/ref/issue4669_train.stdout b/test/train-sets/ref/issue4669_train.stdout
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/vowpalwabbit/core/src/decision_scores.cc b/vowpalwabbit/core/src/decision_scores.cc
index 4bc0810c7c9..02529c5b42c 100644
--- a/vowpalwabbit/core/src/decision_scores.cc
+++ b/vowpalwabbit/core/src/decision_scores.cc
@@ -26,7 +26,8 @@ void print_update(VW::workspace& all, const VW::multi_ex& slots, const VW::decis
   std::string delim;
   for (const auto& slot : decision_scores)
   {
-    pred_ss << delim << slot[0].action;
+    if (slot.empty()) { pred_ss << delim << "None"; }
+    else { pred_ss << delim << slot[0].action; }
     delim = ",";
   }
   all.sd->print_update(*all.output_runtime.trace_message, all.passes_config.holdout_set_off,
diff --git a/vowpalwabbit/core/src/reductions/conditional_contextual_bandit.cc b/vowpalwabbit/core/src/reductions/conditional_contextual_bandit.cc
index b332793ded8..9d15dd4a1ce 100644
--- a/vowpalwabbit/core/src/reductions/conditional_contextual_bandit.cc
+++ b/vowpalwabbit/core/src/reductions/conditional_contextual_bandit.cc
@@ -5,6 +5,7 @@
 #include "vw/core/reductions/conditional_contextual_bandit.h"
 
 #include "vw/config/options.h"
+#include "vw/core/cb.h"
 #include "vw/core/ccb_label.h"
 #include "vw/core/ccb_reduction_features.h"
 #include "vw/core/constant.h"
@@ -213,8 +214,12 @@ void clear_pred_and_label(ccb_data& data)
   data.actions[data.action_with_label]->l.cb.costs.clear();
 }
 
-// true if there exists at least 1 action in the cb multi-example
-bool has_action(VW::multi_ex& cb_ex) { return !cb_ex.empty(); }
+// true if there exists at least 2 examples (since there can only be up to 1
+// shared example), or the 0th example is not shared.
+bool has_action(VW::multi_ex& cb_ex)
+{
+  return cb_ex.size() > 1 || (!cb_ex.empty() && !VW::ec_is_example_header_cb(*cb_ex[0]));
+}
 
 // This function intentionally does not handle increasing the num_features of the example because
 // the output_example function has special logic to ensure the number of features is correctly calculated.
@@ -309,7 +314,11 @@ void build_cb_example(VW::multi_ex& cb_ex, VW::example* slot, const VW::ccb_labe
     // First time seeing this, initialize the vector with falses so we can start setting each included action.
     if (data.include_list.empty()) { data.include_list.assign(data.actions.size(), false); }
 
-    for (uint32_t included_action_id : explicit_includes) { data.include_list[included_action_id] = true; }
+    for (uint32_t included_action_id : explicit_includes)
+    {
+      // The action may be included but not actually exist in the list of possible actions.
+      if (included_action_id < data.actions.size()) { data.include_list[included_action_id] = true; }
+    }
   }
 
   // set the available actions in the cb multi-example
@@ -545,6 +554,9 @@ void update_stats_ccb(const VW::workspace& /* all */, shared_data& sd, const ccb
       if (outcome != nullptr)
       {
         num_labeled++;
+        // It is possible for the prediction to be empty if there were no actions available at the time of taking the
+        // slot decision. In this case it does not contribute to loss.
+        if (preds[i].empty()) { continue; }
         if (i == 0 || data.all_slots_loss_report)
         {
           const float l = VW::get_cost_estimate(outcome->probabilities[VW::details::TOP_ACTION_INDEX], outcome->cost,
diff --git a/vowpalwabbit/core/tests/ccb_test.cc b/vowpalwabbit/core/tests/ccb_test.cc
index d9ba62525bc..86962f51df0 100644
--- a/vowpalwabbit/core/tests/ccb_test.cc
+++ b/vowpalwabbit/core/tests/ccb_test.cc
@@ -145,3 +145,59 @@ TEST(Ccb, InsertInteractionsImplTest)
 
   EXPECT_THAT(result, testing::ContainerEq(expected_after));
 }
+
+TEST(Ccb, ExplicitIncludedActionsNonExistentAction)
+{
+  auto vw = VW::initialize(vwtest::make_args("--ccb_explore_adf", "--quiet"));
+  VW::multi_ex examples;
+  examples.push_back(VW::read_example(*vw, "ccb shared |"));
+  examples.push_back(VW::read_example(*vw, "ccb action |"));
+  examples.push_back(VW::read_example(*vw, "ccb slot 0:10:10 10 |"));
+
+  vw->learn(examples);
+
+  auto& decision_scores = examples[0]->pred.decision_scores;
+  EXPECT_EQ(decision_scores.size(), 1);
+  EXPECT_EQ(decision_scores[0].size(), 0);
+  vw->finish_example(examples);
+}
+
+TEST(Ccb, NoAvailableActions)
+{
+  auto vw = VW::initialize(vwtest::make_args("--ccb_explore_adf", "--quiet", "--all_slots_loss"));
+  {
+    VW::multi_ex examples;
+    examples.push_back(VW::read_example(*vw, "ccb shared |"));
+    examples.push_back(VW::read_example(*vw, "ccb action | a"));
+    examples.push_back(VW::read_example(*vw, "ccb action | b"));
+    examples.push_back(VW::read_example(*vw, "ccb slot 0:-1:0.5 0,1 |"));
+    examples.push_back(VW::read_example(*vw, "ccb slot |"));
+
+    vw->learn(examples);
+
+    auto& decision_scores = examples[0]->pred.decision_scores;
+    EXPECT_EQ(decision_scores.size(), 2);
+    vw->finish_example(examples);
+  }
+
+  {
+    VW::multi_ex examples;
+    examples.push_back(VW::read_example(*vw, "ccb shared |"));
+    examples.push_back(VW::read_example(*vw, "ccb action | a"));
+    examples.push_back(VW::read_example(*vw, "ccb action | b"));
+    examples.push_back(VW::read_example(*vw, "ccb slot 0:-1:0.5 0,1 |"));
+    // This time restrict slot 1 to only have action 0 available
+    examples.push_back(VW::read_example(*vw, "ccb slot 0:-1:0.5 0 |"));
+
+    vw->predict(examples);
+
+    auto& decision_scores = examples[0]->pred.decision_scores;
+    EXPECT_EQ(decision_scores.size(), 2);
+    EXPECT_EQ(decision_scores[0].size(), 2);
+    EXPECT_EQ(decision_scores[0][0].action, 0);
+    EXPECT_EQ(decision_scores[0][1].action, 1);
+    EXPECT_EQ(decision_scores[1].size(), 0);
+
+    vw->finish_example(examples);
+  }
+}
\ No newline at end of file