From 37ec9f89956b8ea2651b92d102dcf3938357f9b0 Mon Sep 17 00:00:00 2001
From: Brian Caswell <bcaswell@microsoft.com>
Date: Tue, 24 Feb 2026 22:32:41 +0000
Subject: [PATCH 1/2] fix: shorten guardrail benchmark result filenames for
 Windows long path support

Fixes #21941

The generated result filenames from _save_confusion_results contained
parentheses, dots, and full yaml filenames, producing paths that exceed
the Windows 260-char MAX_PATH limit. Rework the safe_label logic to
produce short {topic}_{method_abbrev} filenames (e.g. insults_cf.json)
while preserving the full label inside the JSON content.

Rename existing tracked result files to match the new naming convention.
---
 ...ied_insults.yaml).json => insults_cf.json} |  0
 ...l_advice.yaml).json => investment_cf.json} |  0
 .../guardrail_benchmarks/test_eval.py         | 21 ++++++++++++++++++-
 3 files changed, 20 insertions(+), 1 deletion(-)
 rename litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/{block_insults_-_contentfilter_(denied_insults.yaml).json => insults_cf.json} (100%)
 rename litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/{block_investment_-_contentfilter_(denied_financial_advice.yaml).json => investment_cf.json} (100%)

diff --git a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/block_insults_-_contentfilter_(denied_insults.yaml).json b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/insults_cf.json
similarity index 100%
rename from litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/block_insults_-_contentfilter_(denied_insults.yaml).json
rename to litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/insults_cf.json
diff --git a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/block_investment_-_contentfilter_(denied_financial_advice.yaml).json b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/investment_cf.json
similarity index 100%
rename from litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/block_investment_-_contentfilter_(denied_financial_advice.yaml).json
rename to litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/investment_cf.json
diff --git a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py
index 01e820163fd..9cce9dc1686 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py
@@ -18,6 +18,7 @@
 
 import json
 import os
+import re
 import time
 from datetime import datetime, timezone
 from typing import Any, Dict, List
@@ -105,7 +106,25 @@ def _print_confusion_report(label: str, metrics: dict, wrong: list) -> None:
 def _save_confusion_results(label: str, metrics: dict, wrong: list, rows: list) -> dict:
     """Save confusion matrix results to a JSON file and return the result dict."""
     os.makedirs(RESULTS_DIR, exist_ok=True)
-    safe_label = label.lower().replace(" ", "_").replace("—", "-")
+    # Build a short, filesystem-safe filename from the label.
+    # Full label is preserved inside the JSON; filename just needs to be
+    # unique and recognisable.  Format: {topic}_{method_abbrev}.json
+    parts = label.split("\u2014")
+    topic = parts[0].strip().lower().replace("block ", "").replace(" ", "_")
+    method_full = parts[1].strip() if len(parts) > 1 else ""
+    method_name = re.sub(r"\s*\(.*?\)", "", method_full).strip().lower()
+    qualifier_match = re.search(r"\(([^)]+)\)", method_full)
+    qualifier = qualifier_match.group(1) if qualifier_match else ""
+    qualifier = re.sub(r"\.[a-z]+$", "", qualifier)  # drop .yaml etc.
+    if method_name == "contentfilter":
+        safe_label = f"{topic}_cf"
+    elif qualifier:
+        safe_label = f"{topic}_{method_name}_{qualifier}"
+    else:
+        safe_label = f"{topic}_{method_name}"
+    safe_label = safe_label.replace(" ", "_")
+    safe_label = re.sub(r"[^a-z0-9_\-]", "", safe_label)
+    safe_label = re.sub(r"_+", "_", safe_label).strip("_")
     result = {
         "label": label,
         "timestamp": datetime.now(timezone.utc).isoformat(),

From bcf9acf5ea8cc7b026f2d3d3b272051fc9bc4ffc Mon Sep 17 00:00:00 2001
From: Brian Caswell <bcaswell@gmail.com>
Date: Tue, 24 Feb 2026 18:45:05 -0500
Subject: [PATCH 2/2] Update
 litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 .../litellm_content_filter/guardrail_benchmarks/test_eval.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py
index 9cce9dc1686..ca66b4da652 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py
@@ -123,7 +123,7 @@ def _save_confusion_results(label: str, metrics: dict, wrong: list, rows: list)
     else:
         safe_label = f"{topic}_{method_name}"
     safe_label = safe_label.replace(" ", "_")
-    safe_label = re.sub(r"[^a-z0-9_\-]", "", safe_label)
+    safe_label = re.sub(r"[^a-z0-9_.\-]", "", safe_label)
     safe_label = re.sub(r"_+", "_", safe_label).strip("_")
     result = {
         "label": label,