BerriAI · krrishdholakia · Feb 14, 2026 · Feb 14, 2026 · Feb 14, 2026 · Feb 14, 2026
diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
@@ -775,6 +775,10 @@ router_settings:
 | LITELLM_METER_NAME | Name for OTEL Meter 
 | LITELLM_OTEL_INTEGRATION_ENABLE_EVENTS | Optionally enable semantic logs for OTEL
 | LITELLM_OTEL_INTEGRATION_ENABLE_METRICS | Optionally enable emantic metrics for OTEL
+| LITELLM_ENABLE_PYROSCOPE | If true, enables Pyroscope CPU profiling. Profiles are sent to PYROSCOPE_SERVER_ADDRESS. Off by default. See [Pyroscope profiling](/proxy/pyroscope_profiling).
+| PYROSCOPE_APP_NAME | Application name reported to Pyroscope. Required when LITELLM_ENABLE_PYROSCOPE is true. No default.
+| PYROSCOPE_SERVER_ADDRESS | Pyroscope server URL to send profiles to. Required when LITELLM_ENABLE_PYROSCOPE is true. No default.
+| PYROSCOPE_SAMPLE_RATE | Optional. Sample rate for Pyroscope profiling (integer). No default; when unset, the pyroscope-io library default is used.
 | LITELLM_MASTER_KEY | Master key for proxy authentication
 | LITELLM_MODE | Operating mode for LiteLLM (e.g., production, development)
 | LITELLM_NON_ROOT | Flag to run LiteLLM in non-root mode for enhanced security in Docker containers

diff --git a/docs/my-website/docs/proxy/pyroscope_profiling.md b/docs/my-website/docs/proxy/pyroscope_profiling.md
@@ -0,0 +1,43 @@
+# Grafana Pyroscope CPU profiling
+
+LiteLLM proxy can send continuous CPU profiles to [Grafana Pyroscope](https://grafana.com/docs/pyroscope/latest/) when enabled via environment variables. This is optional and off by default.
+
+## Quick start
+
+1. **Install the optional dependency** (required only when enabling Pyroscope):
+
+   ```bash
+   pip install pyroscope-io
+   ```
+
+   Or install the proxy extra:
+
+   ```bash
+   pip install "litellm[proxy]"
+   ```
+
+2. **Set environment variables** before starting the proxy:
+
+   | Variable | Required | Description |
+   |----------|----------|-------------|
+   | `LITELLM_ENABLE_PYROSCOPE` | Yes (to enable) | Set to `true` to enable Pyroscope profiling. |
+   | `PYROSCOPE_APP_NAME` | Yes (when enabled) | Application name shown in the Pyroscope UI. |
+   | `PYROSCOPE_SERVER_ADDRESS` | Yes (when enabled) | Pyroscope server URL (e.g. `http://localhost:4040`). |
+   | `PYROSCOPE_SAMPLE_RATE` | No | Sample rate (integer). If unset, the pyroscope-io library default is used. |
+
+3. **Start the proxy**; profiling will begin automatically when the proxy starts.
+
+   ```bash
+   export LITELLM_ENABLE_PYROSCOPE=true
+   export PYROSCOPE_APP_NAME=litellm-proxy
+   export PYROSCOPE_SERVER_ADDRESS=http://localhost:4040
+   litellm --config config.yaml
+   ```
+
+4. **View profiles** in the Pyroscope (or Grafana) UI and select your `PYROSCOPE_APP_NAME`.
+
+## Notes
+
+- **Optional dependency**: `pyroscope-io` is an optional dependency. If it is not installed and `LITELLM_ENABLE_PYROSCOPE=true`, the proxy will log a warning and continue without profiling.
+- **Platform support**: The `pyroscope-io` package uses a native extension and is not available on all platforms (e.g. Windows is excluded by the package).
+- **Other settings**: See [Configuration settings](/proxy/config_settings) for all proxy environment variables.
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
@@ -107,7 +107,8 @@ const sidebars = {
       items: [
         "proxy/alerting",
         "proxy/pagerduty",
-        "proxy/prometheus"
+        "proxy/prometheus",
+        "proxy/pyroscope_profiling"
       ]
     },
     {

diff --git a/litellm-proxy-extras/dist/litellm_proxy_extras-0.4.37-py3-none-any.whl b/litellm-proxy-extras/dist/litellm_proxy_extras-0.4.37-py3-none-any.whl
diff --git a/litellm-proxy-extras/dist/litellm_proxy_extras-0.4.37.tar.gz b/litellm-proxy-extras/dist/litellm_proxy_extras-0.4.37.tar.gz
diff --git a/...m-proxy-extras/litellm_proxy_extras/migrations/20260211181323_baseline_diff/migration.sql b/...m-proxy-extras/litellm_proxy_extras/migrations/20260211181323_baseline_diff/migration.sql
diff --git a/...lm_proxy_extras/migrations/20260213170952_access_group_change_to_model_name/migration.sql b/...lm_proxy_extras/migrations/20260213170952_access_group_change_to_model_name/migration.sql
@@ -0,0 +1,3 @@
+-- AlterTable
+ALTER TABLE "LiteLLM_AccessGroupTable" DROP COLUMN "access_model_ids",
+ADD COLUMN     "access_model_names" TEXT[] DEFAULT ARRAY[]::TEXT[];
diff --git a/litellm-proxy-extras/litellm_proxy_extras/schema.prisma b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma
@@ -948,7 +948,7 @@ model LiteLLM_AccessGroupTable {
   description        String?
 
   // Resource memberships - explicit arrays per type
-  access_model_ids        String[] @default([])
+  access_model_names        String[] @default([])
   access_mcp_server_ids   String[] @default([])
   access_agent_ids        String[] @default([])
 

diff --git a/litellm-proxy-extras/pyproject.toml b/litellm-proxy-extras/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm-proxy-extras"
-version = "0.4.36"
+version = "0.4.37"
 description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
 authors = ["BerriAI"]
 readme = "README.md"
@@ -22,7 +22,7 @@ requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "0.4.36"
+version = "0.4.37"
 version_files = [
     "pyproject.toml:version",
     "../requirements.txt:litellm-proxy-extras==",

diff --git a/litellm/completion_extras/litellm_responses_transformation/transformation.py b/litellm/completion_extras/litellm_responses_transformation/transformation.py
@@ -163,15 +163,23 @@ def convert_chat_completion_messages_to_responses_api(
                         instructions = f"{instructions} {content}"
                     else:
                         instructions = content
+                elif isinstance(content, list):
+                    # Extract text from content blocks (e.g. [{"type": "text", "text": "..."}])
+                    text_parts = []
+                    for block in content:
+                        if isinstance(block, dict) and block.get("type") == "text":
+                            text_parts.append(block.get("text", ""))
+                        elif isinstance(block, str):
+                            text_parts.append(block)
+                    extracted = " ".join(text_parts)
+                    if instructions:
+                        instructions = f"{instructions} {extracted}"
+                    else:
+                        instructions = extracted
-                elif isinstance(content, list):
-                    # Extract text from content blocks (e.g. [{"type": "text", "text": "..."}])
-                    text_parts = []
-                    for block in content:
-                        if isinstance(block, dict) and block.get("type") == "text":
-                            text_parts.append(block.get("text", ""))
-                        elif isinstance(block, str):
-                            text_parts.append(block)
-                    extracted = " ".join(text_parts)
-                    if instructions:
-                        instructions = f"{instructions} {extracted}"
-                    else:
-                        instructions = extracted
+                elif isinstance(content, list):
+                    # Extract text from content blocks (e.g. [{"type": "text", "text": "..."}])
+                    text_parts = []
+                    for block in content:
+                        if isinstance(block, dict) and block.get("type") == "text":
+                            text_parts.append(block.get("text", ""))
+                        elif isinstance(block, str):
+                            text_parts.append(block)
+                    extracted = " ".join(text_parts)
+                    if instructions:
+                        instructions = f"{instructions} {extracted}"
+                    else:
+                        instructions = extracted
+                else:
+                    verbose_logger.warning(
+                        f"Unexpected system message content type: {type(content)}"
+                    )
-                elif isinstance(content, list):
-                    # Extract text from content blocks (e.g. [{"type": "text", "text": "..."}])
-                    text_parts = []
-                    for block in content:
-                        if isinstance(block, dict) and block.get("type") == "text":
-                            text_parts.append(block.get("text", ""))
-                        elif isinstance(block, str):
-                            text_parts.append(block)
-                    extracted = " ".join(text_parts)
-                    if instructions:
-                        instructions = f"{instructions} {extracted}"
-                    else:
-                        instructions = extracted
+                elif isinstance(content, list):
+                    # Extract text from content blocks (e.g. [{"type": "text", "text": "..."}])
+                    text_parts = []
+                    for block in content:
+                        if isinstance(block, dict) and block.get("type") == "text":
+                            text_parts.append(block.get("text", ""))
+                        elif isinstance(block, str):
+                            text_parts.append(block)
+                    extracted = " ".join(text_parts)
+                    if instructions:
+                        instructions = f"{instructions} {extracted}"
+                    else:
+                        instructions = extracted
+                else:
+                    verbose_logger.warning(
+                        f"Unexpected system message content type: {type(content)}"
+                    )
                 else:
-                    input_items.append(
-                        {
-                            "type": "message",
-                            "role": role,
-                            "content": self._convert_content_to_responses_format(
-                                content, role  # type: ignore
-                            ),
-                        }
+                    verbose_logger.warning(
+                        "Unexpected system message content type: %s. Skipping.",
+                        type(content),
                     )
             elif role == "tool":
                 # Convert tool message to function call output format

diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py
@@ -533,11 +533,12 @@ def _pop_and_merge_extra_body(data: RequestBody, optional_params: dict) -> None:
     """Pop extra_body from optional_params and shallow-merge into data, deep-merging dict values."""
     extra_body: Optional[dict] = optional_params.pop("extra_body", None)
     if extra_body is not None:
+        data_dict: dict = data  # type: ignore[assignment]
         for k, v in extra_body.items():
-            if k in data and isinstance(data[k], dict) and isinstance(v, dict):
-                data[k].update(v)
+            if k in data_dict and isinstance(data_dict[k], dict) and isinstance(v, dict):
+                data_dict[k].update(v)
             else:
-                data[k] = v
+                data_dict[k] = v
 
 
 def _transform_request_body(

diff --git a/litellm/proxy/_experimental/mcp_server/server.py b/litellm/proxy/_experimental/mcp_server/server.py
@@ -2029,7 +2029,7 @@ async def handle_streamable_http_mcp(
             # Inject masked debug headers when client sends x-litellm-mcp-debug: true
             _debug_headers = MCPDebug.maybe_build_debug_headers(
                 raw_headers=raw_headers,
-                scope=scope,
+                scope=dict(scope),
                 mcp_servers=mcp_servers,
                 mcp_auth_header=mcp_auth_header,
                 mcp_server_auth_headers=mcp_server_auth_headers,

diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
@@ -585,7 +585,20 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
 
                 if is_proxy_admin:
                     return UserAPIKeyAuth(
+                        api_key=None,
                         user_role=LitellmUserRoles.PROXY_ADMIN,
+                        user_id=user_id,
+                        team_id=team_id,
+                        team_alias=(
+                            team_object.team_alias
+                            if team_object is not None
+                            else None
+                        ),
+                        team_metadata=team_object.metadata
+                        if team_object is not None
+                        else None,
+                        org_id=org_id,
+                        end_user_id=end_user_id,
                         parent_otel_span=parent_otel_span,
                     )
 

diff --git a/litellm/proxy/example_config_yaml/pipeline_test_guardrails.py b/litellm/proxy/example_config_yaml/pipeline_test_guardrails.py
@@ -0,0 +1,69 @@
+"""
+Test guardrails for pipeline E2E testing.
+
+- StrictFilter: blocks any message containing "bad" (case-insensitive)
+- PermissiveFilter: always passes (simulates an advanced guardrail that is more lenient)
+"""
+
+from typing import Optional, Union
+
+from fastapi import HTTPException
+
+from litellm._logging import verbose_proxy_logger
+from litellm.caching.caching import DualCache
+from litellm.integrations.custom_guardrail import CustomGuardrail
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.types.utils import CallTypesLiteral
+
+
+class StrictFilter(CustomGuardrail):
+    """Blocks any message containing the word 'bad'."""
+
+    async def async_pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        cache: DualCache,
+        data: dict,
+        call_type: CallTypesLiteral,
+    ) -> Optional[Union[Exception, str, dict]]:
+        for msg in data.get("messages", []):
+            content = msg.get("content", "")
+            if isinstance(content, str) and "bad" in content.lower():
+                verbose_proxy_logger.info("StrictFilter: BLOCKED - found 'bad'")
+                raise HTTPException(
+                    status_code=400,
+                    detail="StrictFilter: content contains forbidden word 'bad'",
+                )
+        verbose_proxy_logger.info("StrictFilter: PASSED")
+        return data
+
+
+class PermissiveFilter(CustomGuardrail):
+    """Always passes - simulates a lenient advanced guardrail."""
+
+    async def async_pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        cache: DualCache,
+        data: dict,
+        call_type: CallTypesLiteral,
+    ) -> Optional[Union[Exception, str, dict]]:
+        verbose_proxy_logger.info("PermissiveFilter: PASSED (always passes)")
+        return data
+
+
+class AlwaysBlockFilter(CustomGuardrail):
+    """Always blocks - for testing full escalation->block path."""
+
+    async def async_pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        cache: DualCache,
+        data: dict,
+        call_type: CallTypesLiteral,
+    ) -> Optional[Union[Exception, str, dict]]:
+        verbose_proxy_logger.info("AlwaysBlockFilter: BLOCKED")
+        raise HTTPException(
+            status_code=400,
+            detail="AlwaysBlockFilter: all content blocked",
+        )
diff --git a/litellm/proxy/example_config_yaml/test_pipeline_config.yaml b/litellm/proxy/example_config_yaml/test_pipeline_config.yaml
@@ -0,0 +1,64 @@
+model_list:
+  - model_name: fake-openai-endpoint
+    litellm_params:
+      model: openai/gpt-3.5-turbo
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+  - model_name: fake-blocked-endpoint
+    litellm_params:
+      model: openai/gpt-3.5-turbo
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+
+guardrails:
+  - guardrail_name: "strict-filter"
+    litellm_params:
+      guardrail: pipeline_test_guardrails.StrictFilter
+      mode: "pre_call"
+  - guardrail_name: "permissive-filter"
+    litellm_params:
+      guardrail: pipeline_test_guardrails.PermissiveFilter
+      mode: "pre_call"
+  - guardrail_name: "always-block-filter"
+    litellm_params:
+      guardrail: pipeline_test_guardrails.AlwaysBlockFilter
+      mode: "pre_call"
+
+policies:
+  # Pipeline: strict-filter fails -> escalate to permissive-filter
+  # If strict fails but permissive passes -> allow the request
+  content-safety-permissive:
+    description: "Multi-tier: strict filter with permissive fallback"
+    guardrails:
+      add: [strict-filter, permissive-filter]
+    pipeline:
+      mode: "pre_call"
+      steps:
+        - guardrail: strict-filter
+          on_fail: next       # escalate to permissive
+          on_pass: allow      # clean content proceeds
+        - guardrail: permissive-filter
+          on_fail: block      # hard block
+          on_pass: allow      # permissive says OK
+
+  # Pipeline: strict-filter fails -> escalate to always-block
+  # Both fail -> block
+  content-safety-strict:
+    description: "Multi-tier: strict filter with strict fallback (both block)"
+    guardrails:
+      add: [strict-filter, always-block-filter]
+    pipeline:
+      mode: "pre_call"
+      steps:
+        - guardrail: strict-filter
+          on_fail: next
+          on_pass: allow
+        - guardrail: always-block-filter
+          on_fail: block
+          on_pass: allow
+
+policy_attachments:
+  - policy: content-safety-permissive
+    models: [fake-openai-endpoint]
+  - policy: content-safety-strict
+    models: [fake-blocked-endpoint]
diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
@@ -1642,20 +1642,40 @@ def add_guardrails_from_policy_engine(
         f"Policy engine: resolved guardrails: {resolved_guardrails}"
     )
 
-    if not resolved_guardrails:
-        return
+    # Resolve pipelines from matching policies
+    pipelines = PolicyResolver.resolve_pipelines_for_context(context=context)
 
     # Add resolved guardrails to request metadata
     if metadata_variable_name not in data:
         data[metadata_variable_name] = {}
 
+    # Track pipeline-managed guardrails to exclude from independent execution
+    pipeline_managed_guardrails: set = set()
+    if pipelines:
+        pipeline_managed_guardrails = PolicyResolver.get_pipeline_managed_guardrails(
+            pipelines
+        )
+        data[metadata_variable_name]["_guardrail_pipelines"] = pipelines
+        data[metadata_variable_name]["_pipeline_managed_guardrails"] = (
+            pipeline_managed_guardrails
+        )
+        verbose_proxy_logger.debug(
+            f"Policy engine: resolved {len(pipelines)} pipeline(s), "
+            f"managed guardrails: {pipeline_managed_guardrails}"
+        )
+
+    if not resolved_guardrails and not pipelines:
+        return
+
     existing_guardrails = data[metadata_variable_name].get("guardrails", [])
     if not isinstance(existing_guardrails, list):
         existing_guardrails = []
 
     # Combine existing guardrails with policy-resolved guardrails (no duplicates)
+    # Exclude pipeline-managed guardrails from the flat list
     combined = set(existing_guardrails)
     combined.update(resolved_guardrails)
+    combined -= pipeline_managed_guardrails
     data[metadata_variable_name]["guardrails"] = list(combined)
 
     verbose_proxy_logger.debug(

diff --git a/litellm/proxy/management_endpoints/access_group_endpoints.py b/litellm/proxy/management_endpoints/access_group_endpoints.py
@@ -31,7 +31,7 @@ def _record_to_response(record) -> AccessGroupResponse:
         access_group_id=record.access_group_id,
         access_group_name=record.access_group_name,
         description=record.description,
-        access_model_ids=record.access_model_ids,
+        access_model_names=record.access_model_names,
         access_mcp_server_ids=record.access_mcp_server_ids,
         access_agent_ids=record.access_agent_ids,
         assigned_team_ids=record.assigned_team_ids,
@@ -69,7 +69,7 @@ async def create_access_group(
             data={
                 "access_group_name": data.access_group_name,
                 "description": data.description,
-                "access_model_ids": data.access_model_ids or [],
+                "access_model_names": data.access_model_names or [],
                 "access_mcp_server_ids": data.access_mcp_server_ids or [],
                 "access_agent_ids": data.access_agent_ids or [],
                 "assigned_team_ids": data.assigned_team_ids or [],
@@ -153,10 +153,19 @@ async def update_access_group(
     for field, value in data.model_dump(exclude_unset=True).items():
         update_data[field] = value
 
-    record = await prisma_client.db.litellm_accessgrouptable.update(
-        where={"access_group_id": access_group_id},
-        data=update_data,
-    )
+    try:
+        record = await prisma_client.db.litellm_accessgrouptable.update(
+            where={"access_group_id": access_group_id},
+            data=update_data,
+        )
+    except Exception as e:
+        # Unique constraint violation (e.g. access_group_name already exists).
+        if "unique constraint" in str(e).lower() or "P2002" in str(e):
+            raise HTTPException(
+                status_code=status.HTTP_409_CONFLICT,
+                detail=f"Access group '{update_data.get('access_group_name', '')}' already exists",
+            )
+        raise
     return _record_to_response(record)