Feature score eviction frontend support

EddyLXJ · facebook-github-bot · commit dfda8f668eca · 2025-08-12T10:06:56.000-07:00
Summary: Adding support for feature score eviction in frontend.

Differential Revision: D79591336
diff --git a/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_common.py b/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_common.py
@@ -65,7 +65,7 @@ class EvictionPolicy(NamedTuple):
         0  # disabled, 0: disabled, 1: iteration, 2: mem_util, 3: manual
     )
     eviction_strategy: int = (
-        0  # 0: timestamp, 1: counter (feature score), 2: counter (feature score) + timestamp, 3: feature l2 norm
+        0  # 0: timestamp, 1: counter , 2: counter + timestamp, 3: feature l2 norm 4: timestamp threshold 5: feature score
     )
     eviction_step_intervals: Optional[int] = (
         None  # trigger_step_interval if trigger mode is iteration
@@ -74,17 +74,32 @@ class EvictionPolicy(NamedTuple):
         None  # eviction trigger condition if trigger mode is mem_util
     )
     counter_thresholds: Optional[List[int]] = (
-        None  # count_thresholds for each table if eviction strategy is feature score
+        None  # count_thresholds for each table if eviction strategy is counter
     )
     ttls_in_mins: Optional[List[int]] = (
         None  # ttls_in_mins for each table if eviction strategy is timestamp
     )
     counter_decay_rates: Optional[List[float]] = (
-        None  # count_decay_rates for each table if eviction strategy is feature score
+        None  # count_decay_rates for each table if eviction strategy is counter
+    )
+    feature_score_counter_decay_rates: Optional[List[float]] = (
+        None  # feature_score_counter_decay_rates for each table if eviction strategy is feature score
+    )
+    max_training_id_num_per_table: Optional[List[int]] = (
+        None  # max_training_id_num_per_table for each table
+    )
+    target_eviction_percent_per_table: Optional[List[float]] = (
+        None  # target_eviction_percent_per_table for each table
     )
     l2_weight_thresholds: Optional[List[float]] = (
         None  # l2_weight_thresholds for each table if eviction strategy is feature l2 norm
     )
+    threshold_calculation_bucket_stride: Optional[float] = (
+        0.2  # threshold_calculation_bucket_stride if eviction strategy is feature score
+    )
+    threshold_calculation_bucket_num: Optional[int] = (
+        1000000  # 1M, threshold_calculation_bucket_num if eviction strategy is feature score
+    )
     interval_for_insufficient_eviction_s: int = (
         # wait at least # seconds before trigger next round of eviction, if last finished eviction is insufficient
         # insufficient means we didn't evict enough rows, so we want to wait longer time to
@@ -95,6 +110,9 @@ class EvictionPolicy(NamedTuple):
         # wait at least # seconds before trigger next round of eviction, if last finished eviction is sufficient
         60
     )
+    interval_for_feature_statistics_decay_s: int = (
+        24 * 3600  # 1 day, interval for feature statistics decay
+    )
     meta_header_lens: Optional[List[int]] = None  # metaheader length for each table
 
     def validate(self) -> None:
@@ -105,8 +123,8 @@ def validate(self) -> None:
         if self.eviction_trigger_mode == 0:
             return
 
-        assert self.eviction_strategy in [0, 1, 2, 3], (
-            "eviction_strategy must be 0, 1, 2, or 3, "
+        assert self.eviction_strategy in [0, 1, 2, 3, 4, 5], (
+            "eviction_strategy must be 0, 1, 2, 3, 4 or 5, "
             f"actual {self.eviction_strategy}"
         )
         if self.eviction_trigger_mode == 1:
@@ -161,6 +179,35 @@ def validate(self) -> None:
                 "counter_thresholds and ttls_in_mins must have the same length, "
                 f"actual {self.counter_thresholds} vs {self.ttls_in_mins}"
             )
+        elif self.eviction_strategy == 5:
+            assert self.feature_score_counter_decay_rates is not None, (
+                "feature_score_counter_decay_rates must be set if eviction_strategy is 5, "
+                f"actual {self.feature_score_counter_decay_rates}"
+            )
+            assert self.max_training_id_num_per_table is not None, (
+                "max_training_id_num_per_table must be set if eviction_strategy is 5,"
+                f"actual {self.max_training_id_num_per_table}"
+            )
+            assert self.target_eviction_percent_per_table is not None, (
+                "target_eviction_percent_per_table must be set if eviction_strategy is 5,"
+                f"actual {self.target_eviction_percent_per_table}"
+            )
+            assert self.threshold_calculation_bucket_stride is not None, (
+                "threshold_calculation_bucket_stride must be set if eviction_strategy is 5,"
+                f"actual {self.threshold_calculation_bucket_stride}"
+            )
+            assert self.threshold_calculation_bucket_num is not None, (
+                "threshold_calculation_bucket_num must be set if eviction_strategy is 5,"
+                f"actual {self.threshold_calculation_bucket_num}"
+            )
+            assert (
+                len(self.target_eviction_percent_per_table)
+                == len(self.feature_score_counter_decay_rates)
+                == len(self.max_training_id_num_per_table)
+            ), (
+                "feature_score_thresholds, max_training_id_num_per_table and target_eviction_percent_per_table must have the same length, "
+                f"actual {self.target_eviction_percent_per_table} vs {self.feature_score_counter_decay_rates} vs {self.max_training_id_num_per_table}"
+            )
 
 
 class KVZCHParams(NamedTuple):
diff --git a/fbgemm_gpu/fbgemm_gpu/tbe/ssd/training.py b/fbgemm_gpu/fbgemm_gpu/tbe/ssd/training.py
@@ -672,16 +672,22 @@ def __init__(
                 )
                 eviction_config = torch.classes.fbgemm.FeatureEvictConfig(
                     self.kv_zch_params.eviction_policy.eviction_trigger_mode,  # eviction is disabled, 0: disabled, 1: iteration, 2: mem_util, 3: manual
-                    self.kv_zch_params.eviction_policy.eviction_strategy,  # evict_trigger_strategy: 0: timestamp, 1: counter (feature score), 2: counter (feature score) + timestamp, 3: feature l2 norm
+                    self.kv_zch_params.eviction_policy.eviction_strategy,  # evict_trigger_strategy: 0: timestamp, 1: counter, 2: counter + timestamp, 3: feature l2 norm, 4: timestamp threshold 5: feature score
                     self.kv_zch_params.eviction_policy.eviction_step_intervals,  # trigger_step_interval if trigger mode is iteration
                     eviction_mem_threshold_gb,  # mem_util_threshold_in_GB if trigger mode is mem_util
                     self.kv_zch_params.eviction_policy.ttls_in_mins,  # ttls_in_mins for each table if eviction strategy is timestamp
-                    self.kv_zch_params.eviction_policy.counter_thresholds,  # counter_thresholds for each table if eviction strategy is feature score
-                    self.kv_zch_params.eviction_policy.counter_decay_rates,  # counter_decay_rates for each table if eviction strategy is feature score
+                    self.kv_zch_params.eviction_policy.counter_thresholds,  # counter_thresholds for each table if eviction strategy is counter
+                    self.kv_zch_params.eviction_policy.counter_decay_rates,  # counter_decay_rates for each table if eviction strategy is counter
+                    self.kv_zch_params.eviction_policy.feature_score_counter_decay_rates,  # feature_score_counter_decay_rates for each table if eviction strategy is feature score
+                    self.kv_zch_params.eviction_policy.max_training_id_num_per_table,  # max_training_id_num for each table
+                    self.kv_zch_params.eviction_policy.target_eviction_percent_per_table,  # target_eviction_percent for each table
                     self.kv_zch_params.eviction_policy.l2_weight_thresholds,  # l2_weight_thresholds for each table if eviction strategy is feature l2 norm
                     table_dims.tolist() if table_dims is not None else None,
+                    self.kv_zch_params.eviction_policy.threshold_calculation_bucket_stride,  # threshold_calculation_bucket_stride if eviction strategy is feature score
+                    self.kv_zch_params.eviction_policy.threshold_calculation_bucket_num,  # threshold_calculation_bucket_num if eviction strategy is feature score
                     self.kv_zch_params.eviction_policy.interval_for_insufficient_eviction_s,
                     self.kv_zch_params.eviction_policy.interval_for_sufficient_eviction_s,
+                    self.kv_zch_params.eviction_policy.interval_for_feature_statistics_decay_s,
                 )
             self._ssd_db = torch.classes.fbgemm.DramKVEmbeddingCacheWrapper(
                 self.cache_row_dim,
@@ -1013,6 +1019,9 @@ def __init__(
             self.stats_reporter.register_stats(
                 "eviction.feature_table.exec_duration_ms"
             )
+            self.stats_reporter.register_stats(
+                "eviction.feature_table.dry_run_exec_duration_ms"
+            )
             self.stats_reporter.register_stats(
                 "eviction.feature_table.exec_div_full_duration_rate"
             )
@@ -1600,6 +1609,7 @@ def prefetch(
         self,
         indices: Tensor,
         offsets: Tensor,
+        weights: Optional[Tensor] = None,  # todo: need to update caller
         forward_stream: Optional[torch.cuda.Stream] = None,
         batch_size_per_feature_per_rank: Optional[List[List[int]]] = None,
     ) -> None:
@@ -1625,6 +1635,7 @@ def prefetch(
         self._prefetch(
             indices,
             offsets,
+            weights,
             vbe_metadata,
             forward_stream,
         )
@@ -1633,6 +1644,7 @@ def _prefetch(  # noqa C901
         self,
         indices: Tensor,
         offsets: Tensor,
+        weights: Optional[Tensor] = None,
         vbe_metadata: Optional[invokers.lookup_args.VBEMetadata] = None,
         forward_stream: Optional[torch.cuda.Stream] = None,
     ) -> None:
@@ -1660,6 +1672,12 @@ def _prefetch(  # noqa C901
 
             self.timestep += 1
             self.timesteps_prefetched.append(self.timestep)
+            if self.backend_type == BackendType.DRAM and weights is not None:
+                # DRAM backend supports feature score eviction, if there is weights available
+                # in the prefetch call, we will set metadata for feature score eviction asynchronously
+                cloned_linear_cache_indices = linear_cache_indices.clone()
+            else:
+                cloned_linear_cache_indices = None
 
             # Lookup and virtually insert indices into L1. After this operator,
             # we know:
@@ -1691,6 +1709,18 @@ def _prefetch(  # noqa C901
                 lxu_cache_locking_counter=self.lxu_cache_locking_counter,
             )
 
+            # acc_weights is a 2d tensor, dim0: engagement counter, dim1: show counter
+            # how to get unique indices atm? we only have inserted_indices, evicted_indices and unique_indices_length
+            acc_weights = (
+                torch.ops.fbgemm.jagged_acc_weights_and_counts_2d_tensor(
+                    weights.view(torch.float32).view(-1, 2),
+                    linear_index_inverse_indices,
+                    unique_indices_length,
+                )
+                if weights is not None
+                else None
+            )
+
             # Compute cache locations (rows that are hit are missed but can be
             # inserted will have cache locations != -1)
             with record_function("## ssd_tbe_lxu_cache_lookup ##"):
@@ -2015,6 +2045,16 @@ def _prefetch(  # noqa C901
                     is_bwd=False,
                 )
 
+            if self.backend_type == BackendType.DRAM and weights is not None:
+                # Write feature score metadata to DRAM
+                self.record_function_via_dummy_profile(
+                    "## ssd_write_feature_score_metadata ##",
+                    self.ssd_db.set_feature_score_metadata_cuda,
+                    cloned_linear_cache_indices.cpu(),
+                    torch.tensor([weights.shape[0]], device="cpu", dtype=torch.long),
+                    weights.cpu().view(torch.float32).view(-1, 2),
+                )
+
             # Generate row addresses (pointing to either L1 or the current
             # iteration's scratch pad)
             with record_function("## ssd_generate_row_addrs ##"):
@@ -2157,6 +2197,7 @@ def forward(
         self,
         indices: Tensor,
         offsets: Tensor,
+        weights: Optional[Tensor] = None,
         per_sample_weights: Optional[Tensor] = None,
         feature_requires_grad: Optional[Tensor] = None,
         batch_size_per_feature_per_rank: Optional[List[List[int]]] = None,
@@ -2178,7 +2219,7 @@ def forward(
                 context=self.step,
                 stream=self.ssd_eviction_stream,
             ):
-                self._prefetch(indices, offsets, vbe_metadata)
+                self._prefetch(indices, offsets, weights, vbe_metadata)
 
         assert len(self.ssd_prefetch_data) > 0
 
@@ -3738,8 +3779,13 @@ def _report_eviction_stats(self) -> None:
         processed_counts = torch.zeros(T, dtype=torch.int64)
         full_duration_ms = torch.tensor(0, dtype=torch.int64)
         exec_duration_ms = torch.tensor(0, dtype=torch.int64)
+        dry_run_exec_duration_ms = torch.tensor(0, dtype=torch.int64)
         self.ssd_db.get_feature_evict_metric(
-            evicted_counts, processed_counts, full_duration_ms, exec_duration_ms
+            evicted_counts,
+            processed_counts,
+            full_duration_ms,
+            exec_duration_ms,
+            dry_run_exec_duration_ms,
         )
 
         stats_reporter.report_data_amount(
@@ -3791,6 +3837,12 @@ def _report_eviction_stats(self) -> None:
             duration_ms=exec_duration_ms.item(),
             time_unit="ms",
         )
+        stats_reporter.report_duration(
+            iteration_step=self.step,
+            event_name="eviction.feature_table.dry_run_exec_duration_ms",
+            duration_ms=dry_run_exec_duration_ms.item(),
+            time_unit="ms",
+        )
         if full_duration_ms.item() != 0:
             stats_reporter.report_data_amount(
                 iteration_step=self.step,
diff --git a/fbgemm_gpu/test/tbe/ssd/kv_backend_test.py b/fbgemm_gpu/test/tbe/ssd/kv_backend_test.py