From 37ec9f89956b8ea2651b92d102dcf3938357f9b0 Mon Sep 17 00:00:00 2001 From: Brian Caswell Date: Tue, 24 Feb 2026 22:32:41 +0000 Subject: [PATCH 1/2] fix: shorten guardrail benchmark result filenames for Windows long path support Fixes #21941 The generated result filenames from _save_confusion_results contained parentheses, dots, and full yaml filenames, producing paths that exceed the Windows 260-char MAX_PATH limit. Rework the safe_label logic to produce short {topic}_{method_abbrev} filenames (e.g. insults_cf.json) while preserving the full label inside the JSON content. Rename existing tracked result files to match the new naming convention. --- ...ied_insults.yaml).json => insults_cf.json} | 0 ...l_advice.yaml).json => investment_cf.json} | 0 .../guardrail_benchmarks/test_eval.py | 21 ++++++++++++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) rename litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/{block_insults_-_contentfilter_(denied_insults.yaml).json => insults_cf.json} (100%) rename litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/{block_investment_-_contentfilter_(denied_financial_advice.yaml).json => investment_cf.json} (100%) diff --git a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/block_insults_-_contentfilter_(denied_insults.yaml).json b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/insults_cf.json similarity index 100% rename from litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/block_insults_-_contentfilter_(denied_insults.yaml).json rename to litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/insults_cf.json diff --git a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/block_investment_-_contentfilter_(denied_financial_advice.yaml).json b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/investment_cf.json similarity index 100% rename from litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/block_investment_-_contentfilter_(denied_financial_advice.yaml).json rename to litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/results/investment_cf.json diff --git a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py index 01e820163fd..9cce9dc1686 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py +++ b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py @@ -18,6 +18,7 @@ import json import os +import re import time from datetime import datetime, timezone from typing import Any, Dict, List @@ -105,7 +106,25 @@ def _print_confusion_report(label: str, metrics: dict, wrong: list) -> None: def _save_confusion_results(label: str, metrics: dict, wrong: list, rows: list) -> dict: """Save confusion matrix results to a JSON file and return the result dict.""" os.makedirs(RESULTS_DIR, exist_ok=True) - safe_label = label.lower().replace(" ", "_").replace("—", "-") + # Build a short, filesystem-safe filename from the label. + # Full label is preserved inside the JSON; filename just needs to be + # unique and recognisable. Format: {topic}_{method_abbrev}.json + parts = label.split("\u2014") + topic = parts[0].strip().lower().replace("block ", "").replace(" ", "_") + method_full = parts[1].strip() if len(parts) > 1 else "" + method_name = re.sub(r"\s*\(.*?\)", "", method_full).strip().lower() + qualifier_match = re.search(r"\(([^)]+)\)", method_full) + qualifier = qualifier_match.group(1) if qualifier_match else "" + qualifier = re.sub(r"\.[a-z]+$", "", qualifier) # drop .yaml etc. + if method_name == "contentfilter": + safe_label = f"{topic}_cf" + elif qualifier: + safe_label = f"{topic}_{method_name}_{qualifier}" + else: + safe_label = f"{topic}_{method_name}" + safe_label = safe_label.replace(" ", "_") + safe_label = re.sub(r"[^a-z0-9_\-]", "", safe_label) + safe_label = re.sub(r"_+", "_", safe_label).strip("_") result = { "label": label, "timestamp": datetime.now(timezone.utc).isoformat(), From bcf9acf5ea8cc7b026f2d3d3b272051fc9bc4ffc Mon Sep 17 00:00:00 2001 From: Brian Caswell Date: Tue, 24 Feb 2026 18:45:05 -0500 Subject: [PATCH 2/2] Update litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- .../litellm_content_filter/guardrail_benchmarks/test_eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py index 9cce9dc1686..ca66b4da652 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py +++ b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/guardrail_benchmarks/test_eval.py @@ -123,7 +123,7 @@ def _save_confusion_results(label: str, metrics: dict, wrong: list, rows: list) else: safe_label = f"{topic}_{method_name}" safe_label = safe_label.replace(" ", "_") - safe_label = re.sub(r"[^a-z0-9_\-]", "", safe_label) + safe_label = re.sub(r"[^a-z0-9_.\-]", "", safe_label) safe_label = re.sub(r"_+", "_", safe_label).strip("_") result = { "label": label,