diff --git a/analysis/scripts/upload_rollup.py b/analysis/scripts/upload_rollup.py
new file mode 100644
index 00000000..cb2e3209
--- /dev/null
+++ b/analysis/scripts/upload_rollup.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+"""Upload rollup.json files to SRT endpoint as Parquet files grouped by backend/frontend/benchmarker.
+
+Process:
+1. Find all rollup.json files in the directory
+2. Create flattened JSON for each concurrency level
+3. Combine all into a single Parquet file
+4. Group by (backend_type, frontend_type, benchmark_type)
+5. Upload separate Parquet file for each group
+
+Usage:
+    python upload_rollup.py <directory> <user_login> [--study-id STUDY] [--endpoint URL]
+
+Example:
+    python upload_rollup.py ./outputs user@example.com
+    python upload_rollup.py ./outputs user@example.com --study-id my-study
+    python upload_rollup.py ./outputs user@example.com --endpoint that accepts the upload
+"""
+
+import argparse
+from collections import defaultdict
+import gzip
+import json
+import sys
+from pathlib import Path
+
+import requests
+
+
+DEFAULT_WORKDIR = Path("/tmp/srt")
+
+
+def upload_json(
+    json_path: Path,
+    user_login: str,
+    session_id: str,
+    endpoint: str,
+    backend: str,
+    benchmarker: str,
+    frontend: str,
+    mode: str,
+) -> tuple[bool, str]:
+    """Upload a gzipped JSON file to the endpoint.
+
+    Args:
+        json_path: Path to the JSON file
+        user_login: User login/email
+        session_id: Session ID for the upload
+        endpoint: API endpoint URL
+        backend: Backend type
+        benchmarker: Benchmark type
+        frontend: Frontend type
+        mode: Mode (disaggregated or aggregated)
+
+    Returns:
+        Tuple of (success, message)
+    """
+    json_content = json_path.read_bytes()
+    compressed_content = gzip.compress(json_content)
+
+    # Use .json.gz extension to indicate gzipped JSON
+    filename = json_path.name + ".gz"
+
+    try:
+        response = requests.post(
+            endpoint,
+            files={"file": (filename, compressed_content, "application/gzip")},
+            data={
+                "user_login": user_login,
+                "session_id": session_id,
+                "backend": backend,
+                "benchmarker": benchmarker,
+                "frontend": frontend,
+                "mode": mode,
+            },
+            timeout=60,
+        )
+
+        if response.ok:
+            return True, f"HTTP {response.status_code}"
+        else:
+            return False, f"HTTP {response.status_code}: {response.text}"
+
+    except requests.RequestException as e:
+        return False, f"Request failed: {e}"
+
+
+def find_rollup_files(directory: Path) -> list[Path]:
+    """Recursively find all rollup.json files in a directory."""
+    return list(directory.rglob("rollup.json"))
+
+
+def read_sbatch_script(rollup_path: Path) -> str | None:
+    """Read the sbatch_script.sh associated with a rollup.json.
+
+    The sbatch script is expected to be at <job_dir>/sbatch_script.sh
+    where rollup.json is at <job_dir>/logs/rollup.json.
+
+    Args:
+        rollup_path: Path to the rollup.json file
+
+    Returns:
+        Content of sbatch_script.sh or None if not found
+    """
+    # rollup.json is at <job_dir>/logs/rollup.json
+    # sbatch_script.sh is at <job_dir>/sbatch_script.sh
+    job_dir = rollup_path.parent.parent
+    sbatch_path = job_dir / "sbatch_script.sh"
+
+    if sbatch_path.exists():
+        try:
+            return sbatch_path.read_text()
+        except Exception:
+            return None
+    return None
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Upload rollup.json files to SRT endpoint as Parquet (grouped by backend/frontend/benchmarker)"
+    )
+    parser.add_argument("directory", type=Path, help="Directory to search for rollup.json files")
+    parser.add_argument("user_login", help="User login/email for the upload")
+    parser.add_argument(
+        "--study-id",
+        help="Study ID (default: extracted from first job_name per group)",
+    )
+    parser.add_argument(
+        "--endpoint",
+        default="http://localhost:8000",
+        help="API endpoint (default: http://localhost:8000)",
+    )
+    parser.add_argument(
+        "--workdir",
+        type=Path,
+        help=f"Working directory for output files (default: {DEFAULT_WORKDIR})",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Generate Parquet files but don't upload",
+    )
+    parser.add_argument(
+        "--keep-files",
+        action="store_true",
+        help="Keep generated Parquet files after upload (default: delete)",
+    )
+
+    args = parser.parse_args()
+
+    if not args.directory.exists():
+        print(f"Error: Directory does not exist: {args.directory}", file=sys.stderr)
+        sys.exit(1)
+
+    # Setup workdir
+    workdir = args.workdir if args.workdir else DEFAULT_WORKDIR
+    workdir.mkdir(parents=True, exist_ok=True)
+    print(f"Working directory: {workdir}")
+
+    rollup_files = find_rollup_files(args.directory)
+
+    if not rollup_files:
+        print(f"No rollup.json files found in {args.directory}")
+        sys.exit(0)
+
+    print(f"Found {len(rollup_files)} rollup.json files")
+    print(f"User: {args.user_login}")
+    print(f"Endpoint: {args.endpoint}")
+    print(f"Study ID: {args.study_id}")
+    print("---")
+
+    failed_count = 0
+
+    groups: defaultdict[tuple[str, str, str], list[dict]] = defaultdict(list)
+
+    for rollup_path in sorted(rollup_files):
+        print(f"Processing: {rollup_path}")
+
+        try:
+            with open(rollup_path) as f:
+                data = json.load(f)
+        except Exception as e:
+            print(f"  ✗ Failed to read: {e}")
+            failed_count += 1
+            continue
+
+        # Skip if no nodes_summary (job likely failed/cancelled)
+        if not data.get("nodes_summary"):
+            print("  ⚠ Skipping: no nodes_summary (job may have failed)")
+            continue
+
+        mode = "aggregated" if data.get("is_aggregated") else "disaggregated"
+        group = (data['benchmark_type'], data['frontend_type'], data['backend_type'], mode)
+        # Read sbatch script for this job
+        sbatch_script = read_sbatch_script(rollup_path)
+
+        # Add sbatch script to each row
+        if sbatch_script:
+            data["sbatch_script"] = sbatch_script
+        groups[group].append(data)
+
+    print("---")
+    print(f"Total rollups processed: {len(rollup_files)}")
+    print(f"Total groups: {len(groups)}")
+    for group, rows in groups.items():
+        print(f"  {group}: {len(rows)}")
+    print(f"Failed to read: {failed_count}")
+
+    if not groups:
+        print("No data to write")
+        return
+
+    success_count = 0
+    upload_failed_count = 0
+
+    for group, rows in groups.items():
+        print(f"\n--- Group: {group} ---")
+        print(f"  Rows: {len(rows)}")
+
+        group_str = "_".join(group)
+
+        group_filename = f"rollup_{group_str}.json"
+        group_path = workdir / group_filename
+        with open(group_path, "w") as f:
+            json.dump(rows, f, indent=1)
+        print(f"  ✓ Created: {group_path}")
+
+        if args.dry_run:
+            print("  Dry run - skipping upload")
+            continue
+
+        # Determine study_id - use provided or first job_name in group
+        if args.study_id:
+            study_id = args.study_id
+        else:
+            study_id = rows[0]["job_name"]
+
+        print(f"  Uploading with study_id: {study_id}")
+
+        success, message = upload_json(
+            group_path,
+            args.user_login,
+            study_id,
+            args.endpoint,
+            benchmarker=group[0],
+            frontend=group[1],
+            backend=group[2],
+            mode=group[3],
+        )
+
+        if success:
+            print(f"  ✓ Uploaded ({message})")
+            success_count += 1
+        else:
+            print(f"  ✗ Upload failed: {message}")
+            upload_failed_count += 1
+
+    # Cleanup unless --keep-files
+    if not args.keep_files and not args.dry_run:
+        for fp in workdir.glob("*.json"):
+            try:
+                fp.unlink()
+            except Exception:
+                print(f"  ✗ Failed to delete: {fp}")
+        print(f"\nCleaned up {len(list(workdir.glob('*.json')))} generated files")
+
+    print("\n" + "=" * 50)
+    print(f"Total groups: {len(groups)}")
+    if not args.dry_run:
+        print(f"Successful uploads: {success_count}")
+        print(f"Failed uploads: {upload_failed_count}")
+
+    if upload_failed_count > 0:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/analysis/srtlog/parsers/__init__.py b/analysis/srtlog/parsers/__init__.py
new file mode 100644
index 00000000..e61c1438
--- /dev/null
+++ b/analysis/srtlog/parsers/__init__.py
@@ -0,0 +1,247 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Parser protocols and registries for benchmark and node log parsing.
+
+This module provides extensible parsing infrastructure:
+- BenchmarkParser: Parses benchmark.out files based on benchmark type
+- NodeParser: Parses prefill/decode/agg logs based on backend type
+
+Usage:
+    from analysis.srtlog.parsers import get_benchmark_parser, get_node_parser
+
+    # Get parser by type
+    bench_parser = get_benchmark_parser("sa-bench")
+    results = bench_parser.parse(benchmark_out_path)
+
+    node_parser = get_node_parser("sglang")
+    nodes = node_parser.parse_logs(log_dir)
+"""
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Protocol
+
+from analysis.srtlog.models import NodeMetrics
+
+
+@dataclass
+class BenchmarkLaunchCommand:
+    """Parsed benchmark launch command information.
+
+    Source: logs/benchmark.out
+
+    Only contains essential fields. All parsed arguments go into extra_args.
+    """
+
+    benchmark_type: str
+    raw_command: str
+
+    # All parsed arguments as dict
+    extra_args: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class NodeLaunchCommand:
+    """Parsed node worker launch command information.
+
+    Source: logs/{node}_{worker_type}_{worker_id}.out or .err
+
+    Only contains essential fields. All parsed arguments go into extra_args.
+    """
+
+    backend_type: str
+    worker_type: str  # prefill, decode, agg
+    raw_command: str
+
+    # All parsed arguments as dict
+    extra_args: dict[str, Any] = field(default_factory=dict)
+
+
+class BenchmarkParserProtocol(Protocol):
+    """Protocol for benchmark output parsers.
+
+    Each benchmark type (sa-bench, mooncake-router, etc.) should have
+    a parser that implements this protocol.
+    """
+
+    @property
+    def benchmark_type(self) -> str:
+        """Return the benchmark type this parser handles."""
+        ...
+
+    def parse(self, benchmark_out_path: Path) -> dict[str, Any]:
+        """Parse benchmark.out file and return results.
+
+        Args:
+            benchmark_out_path: Path to the benchmark.out file
+
+        Returns:
+            Dict with benchmark results including:
+            - output_tps: Output tokens per second
+            - mean_ttft_ms: Mean time to first token
+            - mean_itl_ms: Mean inter-token latency
+            - etc.
+        """
+        ...
+
+    def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | None:
+        """Parse the benchmark launch command from log content.
+
+        Args:
+            log_content: Content of the benchmark log file
+
+        Returns:
+            BenchmarkLaunchCommand with parsed parameters, or None if not found
+        """
+        ...
+
+    def parse_result_json(self, json_path: Path) -> dict[str, Any]:
+        """Parse a benchmark result JSON file.
+
+        Args:
+            json_path: Path to a result JSON file
+
+        Returns:
+            Dict with parsed benchmark metrics
+        """
+        ...
+
+
+class NodeParserProtocol(Protocol):
+    """Protocol for node log parsers.
+
+    Each backend type (sglang, trtllm, etc.) should have a parser
+    that implements this protocol for parsing prefill/decode/agg logs.
+    """
+
+    @property
+    def backend_type(self) -> str:
+        """Return the backend type this parser handles."""
+        ...
+
+    def parse_logs(self, log_dir: Path) -> list[NodeMetrics]:
+        """Parse all node logs in a directory.
+
+        Args:
+            log_dir: Directory containing prefill/decode/agg .out/.err files
+
+        Returns:
+            List of NodeMetrics objects, one per worker
+        """
+        ...
+
+    def parse_single_log(self, log_path: Path) -> NodeMetrics | None:
+        """Parse a single node log file.
+
+        Args:
+            log_path: Path to a prefill/decode/agg log file
+
+        Returns:
+            NodeMetrics object or None if parsing failed
+        """
+        ...
+
+    def parse_launch_command(self, log_content: str, worker_type: str = "unknown") -> NodeLaunchCommand | None:
+        """Parse the worker launch command from log content.
+
+        Args:
+            log_content: Content of the worker log file
+            worker_type: Type of worker (prefill, decode, agg)
+
+        Returns:
+            NodeLaunchCommand with parsed parameters, or None if not found
+        """
+        ...
+
+
+# Registry for benchmark parsers
+_benchmark_parsers: dict[str, type] = {}
+
+# Registry for node parsers
+_node_parsers: dict[str, type] = {}
+
+
+def register_benchmark_parser(benchmark_type: str):
+    """Decorator to register a benchmark parser.
+
+    Usage:
+        @register_benchmark_parser("sa-bench")
+        class SABenchParser:
+            ...
+    """
+
+    def decorator(cls):
+        _benchmark_parsers[benchmark_type] = cls
+        return cls
+
+    return decorator
+
+
+def register_node_parser(backend_type: str):
+    """Decorator to register a node parser.
+
+    Usage:
+        @register_node_parser("sglang")
+        class SGLangNodeParser:
+            ...
+    """
+
+    def decorator(cls):
+        _node_parsers[backend_type] = cls
+        return cls
+
+    return decorator
+
+
+def get_benchmark_parser(benchmark_type: str) -> BenchmarkParserProtocol:
+    """Get a benchmark parser by type.
+
+    Args:
+        benchmark_type: Type of benchmark (e.g., "sa-bench", "mooncake-router")
+
+    Returns:
+        Instance of the appropriate benchmark parser
+
+    Raises:
+        ValueError: If no parser registered for the benchmark type
+    """
+    if benchmark_type not in _benchmark_parsers:
+        available = ", ".join(_benchmark_parsers.keys()) or "none"
+        raise ValueError(f"No benchmark parser registered for '{benchmark_type}'. Available: {available}")
+    return _benchmark_parsers[benchmark_type]()
+
+
+def get_node_parser(backend_type: str) -> NodeParserProtocol:
+    """Get a node parser by backend type.
+
+    Args:
+        backend_type: Type of backend (e.g., "sglang", "trtllm")
+
+    Returns:
+        Instance of the appropriate node parser
+
+    Raises:
+        ValueError: If no parser registered for the backend type
+    """
+    if backend_type not in _node_parsers:
+        available = ", ".join(_node_parsers.keys()) or "none"
+        raise ValueError(f"No node parser registered for '{backend_type}'. Available: {available}")
+    return _node_parsers[backend_type]()
+
+
+def list_benchmark_parsers() -> list[str]:
+    """List all registered benchmark parser types."""
+    return list(_benchmark_parsers.keys())
+
+
+def list_node_parsers() -> list[str]:
+    """List all registered node parser types."""
+    return list(_node_parsers.keys())
+
+
+# Import parsers to trigger registration
+from analysis.srtlog.parsers.benchmark import *  # noqa: E402, F401, F403
+from analysis.srtlog.parsers.nodes import *  # noqa: E402, F401, F403
+
diff --git a/analysis/srtlog/parsers/benchmark/__init__.py b/analysis/srtlog/parsers/benchmark/__init__.py
new file mode 100644
index 00000000..302bed0a
--- /dev/null
+++ b/analysis/srtlog/parsers/benchmark/__init__.py
@@ -0,0 +1,10 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Benchmark output parsers."""
+
+from analysis.srtlog.parsers.benchmark.mooncake_router import MooncakeRouterParser
+from analysis.srtlog.parsers.benchmark.sa_bench import SABenchParser
+
+__all__ = ["SABenchParser", "MooncakeRouterParser"]
+
diff --git a/analysis/srtlog/parsers/benchmark/mooncake_router.py b/analysis/srtlog/parsers/benchmark/mooncake_router.py
new file mode 100644
index 00000000..9f1a7e6c
--- /dev/null
+++ b/analysis/srtlog/parsers/benchmark/mooncake_router.py
@@ -0,0 +1,284 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Mooncake Router benchmark output parser."""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from analysis.srtlog.parsers import register_benchmark_parser
+
+if TYPE_CHECKING:
+    from analysis.srtlog.parsers import BenchmarkLaunchCommand
+
+logger = logging.getLogger(__name__)
+
+
+@register_benchmark_parser("mooncake-router")
+class MooncakeRouterParser:
+    """Parser for Mooncake Router benchmark output.
+
+    Parses benchmark.out files and AIPerf result JSON files from mooncake-router runs.
+    """
+
+    @property
+    def benchmark_type(self) -> str:
+        return "mooncake-router"
+
+    def parse(self, benchmark_out_path: Path) -> dict[str, Any]:
+        """Parse benchmark.out file for mooncake-router results.
+
+        Args:
+            benchmark_out_path: Path to benchmark.out file
+
+        Returns:
+            Dict with aggregated benchmark results
+        """
+        results = {
+            "benchmark_type": self.benchmark_type,
+            "output_tps": None,
+            "request_throughput": None,
+            "mean_ttft_ms": None,
+            "mean_itl_ms": None,
+            "total_requests": None,
+        }
+
+        if not benchmark_out_path.exists():
+            logger.warning("benchmark.out not found: %s", benchmark_out_path)
+            return results
+
+        try:
+            content = benchmark_out_path.read_text()
+
+            # Parse mooncake-router output patterns
+            # Example: "Request throughput: 3.37 req/s"
+            # Example: "Output token throughput: 1150.92 tok/s"
+            req_tpt_pattern = r"[Rr]equest\s+throughput[:\s]+([\d.]+)"
+            out_tpt_pattern = r"[Oo]utput\s+(?:token\s+)?throughput[:\s]+([\d.]+)"
+            ttft_pattern = r"[Tt]ime\s+to\s+first\s+token[:\s]+([\d.]+)"
+            itl_pattern = r"[Ii]nter.?token\s+latency[:\s]+([\d.]+)"
+
+            for line in content.split("\n"):
+                if req_tpt_match := re.search(req_tpt_pattern, line):
+                    results["request_throughput"] = float(req_tpt_match.group(1))
+                if out_tpt_match := re.search(out_tpt_pattern, line):
+                    results["output_tps"] = float(out_tpt_match.group(1))
+                if ttft_match := re.search(ttft_pattern, line):
+                    results["mean_ttft_ms"] = float(ttft_match.group(1))
+                if itl_match := re.search(itl_pattern, line):
+                    results["mean_itl_ms"] = float(itl_match.group(1))
+
+        except Exception as e:
+            logger.warning("Failed to parse benchmark.out: %s", e)
+
+        return results
+
+    def parse_result_json(self, json_path: Path) -> dict[str, Any]:
+        """Parse an AIPerf result JSON file.
+
+        Args:
+            json_path: Path to profile_export_aiperf.json
+
+        Returns:
+            Dict with benchmark metrics
+        """
+        result = {}
+
+        try:
+            with open(json_path) as f:
+                data = json.load(f)
+
+            # AIPerf format has nested structure with unit and values
+            result = {
+                "concurrency": 0,  # Mooncake uses open-loop, no fixed concurrency
+                # Throughput metrics
+                "output_tps": self._get_metric(data, "output_token_throughput", "avg"),
+                "request_throughput": self._get_metric(data, "request_throughput", "avg"),
+                # Mean latencies (convert from ms)
+                "mean_ttft_ms": self._get_metric(data, "time_to_first_token", "avg"),
+                "mean_tpot_ms": self._get_metric(data, "inter_token_latency", "avg"),
+                "mean_itl_ms": self._get_metric(data, "inter_token_latency", "avg"),
+                "mean_e2el_ms": self._get_metric(data, "request_latency", "avg"),
+                # Median latencies
+                "median_ttft_ms": self._get_metric(data, "time_to_first_token", "p50"),
+                "median_tpot_ms": self._get_metric(data, "inter_token_latency", "p50"),
+                "median_itl_ms": self._get_metric(data, "inter_token_latency", "p50"),
+                "median_e2el_ms": self._get_metric(data, "request_latency", "p50"),
+                # P99 latencies
+                "p99_ttft_ms": self._get_metric(data, "time_to_first_token", "p99"),
+                "p99_tpot_ms": self._get_metric(data, "inter_token_latency", "p99"),
+                "p99_itl_ms": self._get_metric(data, "inter_token_latency", "p99"),
+                "p99_e2el_ms": self._get_metric(data, "request_latency", "p99"),
+                # Std dev latencies
+                "std_ttft_ms": self._get_metric(data, "time_to_first_token", "std"),
+                "std_itl_ms": self._get_metric(data, "inter_token_latency", "std"),
+                "std_e2el_ms": self._get_metric(data, "request_latency", "std"),
+                # Request count
+                "completed": self._get_metric(data, "request_count", "avg"),
+                "num_prompts": self._get_metric(data, "request_count", "avg"),
+            }
+
+            # Also extract per-user throughput if available
+            tps_per_user = self._get_metric(data, "output_token_throughput_per_user", "avg")
+            if tps_per_user:
+                result["output_tps_per_user"] = tps_per_user
+
+        except Exception as e:
+            logger.warning("Failed to parse %s: %s", json_path, e)
+
+        return result
+
+    def _get_metric(self, data: dict, metric_name: str, stat: str) -> float | None:
+        """Extract a metric value from AIPerf data structure.
+
+        Args:
+            data: AIPerf JSON data
+            metric_name: Name of the metric (e.g., "time_to_first_token")
+            stat: Statistic to extract (e.g., "avg", "p50", "p99")
+
+        Returns:
+            Metric value or None if not found
+        """
+        try:
+            metric_data = data.get(metric_name, {})
+            if isinstance(metric_data, dict):
+                value = metric_data.get(stat)
+                if value is not None:
+                    return float(value)
+        except (KeyError, TypeError, ValueError):
+            pass
+        return None
+
+    def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]:
+        """Parse AIPerf result files in a directory.
+
+        Args:
+            result_dir: Directory containing profile_export_aiperf.json
+
+        Returns:
+            List of result dicts (usually just one for mooncake-router)
+        """
+        results = []
+
+        # Look for AIPerf JSON files
+        for json_file in result_dir.rglob("profile_export_aiperf.json"):
+            result = self.parse_result_json(json_file)
+            if result.get("output_tps") is not None:
+                results.append(result)
+
+        return results
+
+    def find_aiperf_results(self, log_dir: Path) -> list[Path]:
+        """Find all AIPerf result files in a log directory.
+
+        Args:
+            log_dir: Root log directory
+
+        Returns:
+            List of paths to profile_export_aiperf.json files
+        """
+        return list(log_dir.rglob("profile_export_aiperf.json"))
+
+    def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | None:
+        """Parse the mooncake-router launch command from log content.
+
+        Looks for command lines like:
+            [CMD] aiperf profile --model ... --url ...
+            genai-perf profile --model ... --endpoint ...
+
+        Also parses header format:
+            Endpoint: http://localhost:8000
+            Model: Qwen/Qwen3-32B
+            Workload: conversation
+
+        Args:
+            log_content: Content of the benchmark log file
+
+        Returns:
+            BenchmarkLaunchCommand with parsed parameters, or None if not found
+        """
+        from analysis.srtlog.parsers import BenchmarkLaunchCommand
+
+        raw_command = None
+
+        # First, try to find [CMD] tagged command (preferred - from our scripts)
+        cmd_match = re.search(r"\[CMD\]\s*(.+)$", log_content, re.MULTILINE)
+        if cmd_match:
+            raw_command = cmd_match.group(1).strip()
+
+        # Fallback: pattern to match genai-perf, aiperf or mooncake-router commands
+        # aiperf format: aiperf profile -m "Model" --url "http://..." --concurrency 10
+        if not raw_command:
+            command_patterns = [
+                r"(aiperf\s+profile\s+[^\n]+)",
+                r"(genai-perf\s+profile\s+[^\n]+)",
+                r"(python[3]?\s+.*genai_perf[^\n]+)",
+                r"(python[3]?\s+.*aiperf[^\n]+)",
+                r"(mooncake-router\s+[^\n]+)",
+            ]
+
+            for pattern in command_patterns:
+                match = re.search(pattern, log_content, re.IGNORECASE)
+                if match:
+                    raw_command = match.group(1).strip()
+                    break
+
+        # If no command found, try to build from header format
+        if not raw_command:
+            if "Mooncake Router Benchmark" in log_content:
+                raw_command = "mooncake-router-benchmark (from header)"
+
+        if not raw_command:
+            return None
+
+        extra_args: dict[str, Any] = {}
+
+        # Parse aiperf/genai-perf arguments from command line
+        # Supports both --model and -m formats, quoted and unquoted values
+        arg_patterns = {
+            "model": r"(?:--model|-m)[=\s]+[\"']?([^\"'\s]+)[\"']?",
+            "base_url": r"--url[=\s]+[\"']?([^\"'\s]+)[\"']?",
+            "num_prompts": r"--(?:num-prompts|request-count|request)[=\s]+(\d+)",
+            "request_rate": r"--request-rate[=\s]+([^\s]+)",
+            "max_concurrency": r"--concurrency[=\s]+(\d+)",
+            "input_len": r"--(?:synthetic-input-tokens-mean|input-sequence-length|isl)[=\s]+(\d+)",
+            "output_len": r"--(?:output-tokens-mean|output-sequence-length|osl)[=\s]+(\d+)",
+        }
+
+        for field, pattern in arg_patterns.items():
+            match = re.search(pattern, raw_command)
+            if match:
+                value: Any = match.group(1)
+                if field in ("num_prompts", "max_concurrency", "input_len", "output_len"):
+                    value = int(value)
+                elif field == "request_rate" and value != "inf":
+                    try:
+                        value = float(value)
+                    except ValueError:
+                        pass
+                extra_args[field] = value
+
+        # Also parse from header format (srtctl-style)
+        header_patterns = {
+            "model": r"^Model:\s*(.+)$",
+            "base_url": r"^Endpoint:\s*(.+)$",
+            "dataset": r"^Workload:\s*(.+)$",
+        }
+
+        for field, pattern in header_patterns.items():
+            if field not in extra_args:
+                match = re.search(pattern, log_content, re.MULTILINE)
+                if match:
+                    extra_args[field] = match.group(1).strip()
+
+        return BenchmarkLaunchCommand(
+            benchmark_type=self.benchmark_type,
+            raw_command=raw_command,
+            extra_args=extra_args,
+        )
+
diff --git a/analysis/srtlog/parsers/benchmark/sa_bench.py b/analysis/srtlog/parsers/benchmark/sa_bench.py
new file mode 100644
index 00000000..ecb87b33
--- /dev/null
+++ b/analysis/srtlog/parsers/benchmark/sa_bench.py
@@ -0,0 +1,277 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""SA-Bench benchmark output parser."""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from analysis.srtlog.parsers import register_benchmark_parser
+
+if TYPE_CHECKING:
+    from analysis.srtlog.parsers import BenchmarkLaunchCommand
+
+logger = logging.getLogger(__name__)
+
+
+@register_benchmark_parser("sa-bench")
+class SABenchParser:
+    """Parser for SA-Bench benchmark output.
+
+    Parses benchmark.out files and result JSON files from SA-Bench runs.
+    """
+
+    @property
+    def benchmark_type(self) -> str:
+        return "sa-bench"
+
+    def parse(self, benchmark_out_path: Path) -> dict[str, Any]:
+        """Parse benchmark.out file for SA-Bench results.
+
+        Args:
+            benchmark_out_path: Path to benchmark.out file
+
+        Returns:
+            Dict with aggregated benchmark results
+        """
+        results = {
+            "benchmark_type": self.benchmark_type,
+            "concurrencies": [],
+            "output_tps": [],
+            "mean_ttft_ms": [],
+            "mean_itl_ms": [],
+            "mean_tpot_ms": [],
+            "p99_ttft_ms": [],
+            "p99_itl_ms": [],
+            "request_throughput": [],
+            "completed_requests": [],
+        }
+
+        if not benchmark_out_path.exists():
+            logger.warning("benchmark.out not found: %s", benchmark_out_path)
+            return results
+
+        try:
+            content = benchmark_out_path.read_text()
+
+            # Parse summary lines from benchmark output
+            # Example: "Concurrency: 100, Throughput: 5000 tok/s, TTFT: 150ms, ITL: 20ms"
+            concurrency_pattern = r"Concurrency[:\s]+(\d+)"
+            throughput_pattern = r"(?:Output\s+)?[Tt]hroughput[:\s]+([\d.]+)"
+            ttft_pattern = r"(?:Mean\s+)?TTFT[:\s]+([\d.]+)"
+            itl_pattern = r"(?:Mean\s+)?ITL[:\s]+([\d.]+)"
+
+            # Try to extract from summary lines
+            for line in content.split("\n"):
+                if "concurrency" in line.lower() or "throughput" in line.lower():
+                    conc_match = re.search(concurrency_pattern, line, re.IGNORECASE)
+                    tpt_match = re.search(throughput_pattern, line, re.IGNORECASE)
+                    ttft_match = re.search(ttft_pattern, line, re.IGNORECASE)
+                    itl_match = re.search(itl_pattern, line, re.IGNORECASE)
+
+                    if conc_match and tpt_match:
+                        results["concurrencies"].append(int(conc_match.group(1)))
+                        results["output_tps"].append(float(tpt_match.group(1)))
+                        if ttft_match:
+                            results["mean_ttft_ms"].append(float(ttft_match.group(1)))
+                        if itl_match:
+                            results["mean_itl_ms"].append(float(itl_match.group(1)))
+
+        except Exception as e:
+            logger.warning("Failed to parse benchmark.out: %s", e)
+
+        return results
+
+    def parse_result_json(self, json_path: Path) -> dict[str, Any]:
+        """Parse a SA-Bench result JSON file.
+
+        Args:
+            json_path: Path to result JSON (e.g., result_c100.json)
+
+        Returns:
+            Dict with benchmark metrics for this concurrency level
+        """
+        result = {}
+
+        try:
+            with open(json_path) as f:
+                data = json.load(f)
+
+            # Return with same field names as original JSON for compatibility
+            # with downstream processing in _build_rollup_summary
+            result = {
+                "max_concurrency": data.get("max_concurrency"),
+                # Throughput metrics (keep original field names)
+                "output_throughput": data.get("output_throughput"),
+                "total_token_throughput": data.get("total_token_throughput"),
+                "request_throughput": data.get("request_throughput"),
+                "request_goodput": data.get("request_goodput"),
+                "request_rate": data.get("request_rate"),
+                # Mean latencies
+                "mean_ttft_ms": data.get("mean_ttft_ms"),
+                "mean_tpot_ms": data.get("mean_tpot_ms"),
+                "mean_itl_ms": data.get("mean_itl_ms"),
+                "mean_e2el_ms": data.get("mean_e2el_ms"),
+                # Median latencies
+                "median_ttft_ms": data.get("median_ttft_ms"),
+                "median_tpot_ms": data.get("median_tpot_ms"),
+                "median_itl_ms": data.get("median_itl_ms"),
+                "median_e2el_ms": data.get("median_e2el_ms"),
+                # P99 latencies
+                "p99_ttft_ms": data.get("p99_ttft_ms"),
+                "p99_tpot_ms": data.get("p99_tpot_ms"),
+                "p99_itl_ms": data.get("p99_itl_ms"),
+                "p99_e2el_ms": data.get("p99_e2el_ms"),
+                # Std dev latencies
+                "std_ttft_ms": data.get("std_ttft_ms"),
+                "std_tpot_ms": data.get("std_tpot_ms"),
+                "std_itl_ms": data.get("std_itl_ms"),
+                "std_e2el_ms": data.get("std_e2el_ms"),
+                # Token counts
+                "total_input_tokens": data.get("total_input_tokens"),
+                "total_output_tokens": data.get("total_output_tokens"),
+                # Metadata
+                "duration": data.get("duration"),
+                "completed": data.get("completed"),
+                "num_prompts": data.get("num_prompts"),
+            }
+
+        except Exception as e:
+            logger.warning("Failed to parse %s: %s", json_path, e)
+
+        return result
+
+    def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]:
+        """Parse all result JSON files in a benchmark result directory.
+
+        Args:
+            result_dir: Directory containing result_*.json files
+
+        Returns:
+            List of result dicts sorted by concurrency
+        """
+        results = []
+
+        for json_file in result_dir.glob("*.json"):
+            result = self.parse_result_json(json_file)
+            if result.get("max_concurrency") is not None:
+                results.append(result)
+
+        # Sort by concurrency
+        results.sort(key=lambda x: x.get("max_concurrency", 0) or 0)
+
+        return results
+
+    def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | None:
+        """Parse the SA-Bench launch command from log content.
+
+        Looks for command lines like:
+            [CMD] python -m sglang.bench_serving --model ... --base-url ...
+            python -m sglang.bench_serving --model ... --base-url ...
+
+        Also parses SA-Bench Config header format:
+            SA-Bench Config: endpoint=http://localhost:8000; isl=8192; osl=1024; ...
+
+        Args:
+            log_content: Content of the benchmark log file
+
+        Returns:
+            BenchmarkLaunchCommand with parsed parameters, or None if not found
+        """
+        from analysis.srtlog.parsers import BenchmarkLaunchCommand
+
+        raw_command = None
+
+        # First, try to find [CMD] tagged command (preferred - from our scripts)
+        cmd_match = re.search(r"\[CMD\]\s*(.+)$", log_content, re.MULTILINE)
+        if cmd_match:
+            raw_command = cmd_match.group(1).strip()
+
+        # Fallback: pattern to match sa-bench / sglang.bench_serving command
+        if not raw_command:
+            command_patterns = [
+                r"(python[3]?\s+-m\s+sglang\.bench_serving\s+[^\n]+)",
+                r"(sa-bench\s+[^\n]+)",
+                r"(python[3]?\s+.*bench_serving\.py\s+[^\n]+)",
+            ]
+
+            for pattern in command_patterns:
+                match = re.search(pattern, log_content, re.IGNORECASE)
+                if match:
+                    raw_command = match.group(1).strip()
+                    break
+
+        # Also try SA-Bench Config header format
+        if not raw_command:
+            config_match = re.search(r"(SA-Bench Config:[^\n]+)", log_content)
+            if config_match:
+                raw_command = config_match.group(1).strip()
+
+        if not raw_command:
+            return None
+
+        extra_args: dict[str, Any] = {}
+
+        # Parse common arguments from command line
+        arg_patterns = {
+            "model": r"--model[=\s]+([^\s]+)",
+            "base_url": r"--base-url[=\s]+([^\s]+)",
+            "num_prompts": r"--num-prompts?[=\s]+(\d+)",
+            "request_rate": r"--request-rate[=\s]+([^\s]+)",
+            "max_concurrency": r"--max-concurrency[=\s]+(\d+)",
+            "input_len": r"--(?:input-len|random-input-len)[=\s]+(\d+)",
+            "output_len": r"--(?:output-len|random-output-len)[=\s]+(\d+)",
+            "dataset": r"--dataset[=\s]+([^\s]+)",
+            "dataset_path": r"--dataset-path[=\s]+([^\s]+)",
+        }
+
+        for field, pattern in arg_patterns.items():
+            match = re.search(pattern, raw_command)
+            if match:
+                value: Any = match.group(1)
+                # Convert to appropriate type
+                if field in ("num_prompts", "max_concurrency", "input_len", "output_len"):
+                    value = int(value)
+                elif field == "request_rate" and value != "inf":
+                    try:
+                        value = float(value)
+                    except ValueError:
+                        pass
+                extra_args[field] = value
+
+        # Also parse from SA-Bench Config header format
+        # Format: SA-Bench Config: endpoint=http://localhost:8000; isl=8192; osl=1024; concurrencies=28; req_rate=inf; model=dsr1-fp8
+        header_patterns = {
+            "base_url": r"endpoint=([^;\s]+)",
+            "model": r"model=([^;\s]+)",
+            "input_len": r"isl=(\d+)",
+            "output_len": r"osl=(\d+)",
+            "max_concurrency": r"concurrencies=(\d+)",
+            "request_rate": r"req_rate=([^;\s]+)",
+        }
+
+        for field, pattern in header_patterns.items():
+            if field not in extra_args:
+                match = re.search(pattern, raw_command)
+                if match:
+                    value = match.group(1)
+                    if field in ("input_len", "output_len", "max_concurrency"):
+                        value = int(value)
+                    elif field == "request_rate" and value != "inf":
+                        try:
+                            value = float(value)
+                        except ValueError:
+                            pass
+                    extra_args[field] = value
+
+        return BenchmarkLaunchCommand(
+            benchmark_type=self.benchmark_type,
+            raw_command=raw_command,
+            extra_args=extra_args,
+        )
+
diff --git a/analysis/srtlog/parsers/nodes/__init__.py b/analysis/srtlog/parsers/nodes/__init__.py
new file mode 100644
index 00000000..54c3d8dd
--- /dev/null
+++ b/analysis/srtlog/parsers/nodes/__init__.py
@@ -0,0 +1,10 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Node log parsers for different backends."""
+
+from analysis.srtlog.parsers.nodes.sglang import SGLangNodeParser
+from analysis.srtlog.parsers.nodes.trtllm import TRTLLMNodeParser
+
+__all__ = ["SGLangNodeParser", "TRTLLMNodeParser"]
+
diff --git a/analysis/srtlog/parsers/nodes/sglang.py b/analysis/srtlog/parsers/nodes/sglang.py
new file mode 100644
index 00000000..9fbc6e6a
--- /dev/null
+++ b/analysis/srtlog/parsers/nodes/sglang.py
@@ -0,0 +1,431 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""SGLang node log parser.
+
+Parses logs with format:
+    [2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m ... Decode batch, #running-req: 5, ...
+
+This parser handles SGLang structured logging format with ISO 8601 timestamps.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from analysis.srtlog.models import BatchMetrics, MemoryMetrics, NodeMetrics
+from analysis.srtlog.parsers import register_node_parser
+
+if TYPE_CHECKING:
+    from analysis.srtlog.parsers import NodeLaunchCommand
+
+logger = logging.getLogger(__name__)
+
+
+# ANSI escape code pattern for stripping colors
+ANSI_ESCAPE = re.compile(r"\x1b\[[0-9;]*m")
+
+
+@register_node_parser("sglang")
+class SGLangNodeParser:
+    """Parser for SGLang node logs.
+
+    Handles SGLang structured logging with ISO 8601 timestamps.
+    May contain ANSI color codes which are stripped during parsing.
+    """
+
+    @property
+    def backend_type(self) -> str:
+        return "sglang"
+
+    def parse_logs(self, log_dir: Path) -> list[NodeMetrics]:
+        """Parse all prefill/decode/agg log files in a directory.
+
+        Args:
+            log_dir: Directory containing *_prefill_*.out, *_decode_*.out, *_agg_*.out files
+
+        Returns:
+            List of NodeMetrics objects
+        """
+        log_dir = Path(log_dir)
+        nodes = []
+
+        if not log_dir.exists():
+            logger.error("Log directory does not exist: %s", log_dir)
+            return nodes
+
+        # Find all worker log files
+        for file in os.listdir(log_dir):
+            if not (file.endswith(".err") or file.endswith(".out")):
+                continue
+            if not any(wt in file for wt in ("prefill", "decode", "agg")):
+                continue
+
+            filepath = log_dir / file
+            node = self.parse_single_log(filepath)
+            if node:
+                nodes.append(node)
+
+        logger.info("Parsed %d node log files from %s", len(nodes), log_dir)
+        return nodes
+
+    def parse_single_log(self, log_path: Path) -> NodeMetrics | None:
+        """Parse a single node log file.
+
+        Args:
+            log_path: Path to a prefill/decode/agg log file
+
+        Returns:
+            NodeMetrics object or None if parsing failed
+        """
+        node_info = self._extract_node_info_from_filename(str(log_path))
+        if not node_info:
+            logger.warning(
+                "Could not extract node info from filename: %s. "
+                "Expected format: <node>_<service>_<id>.err or .out",
+                log_path,
+            )
+            return None
+
+        batches = []
+        memory_snapshots = []
+        config = {}
+
+        try:
+            with open(log_path) as f:
+                for line in f:
+                    # Strip ANSI escape codes
+                    clean_line = ANSI_ESCAPE.sub("", line)
+
+                    # Parse prefill batch metrics
+                    batch_metrics = self._parse_prefill_batch_line(clean_line)
+                    if batch_metrics:
+                        batches.append(
+                            BatchMetrics(
+                                timestamp=batch_metrics["timestamp"],
+                                dp=0,  # Default since not in log
+                                tp=0,
+                                ep=0,
+                                batch_type=batch_metrics["type"],
+                                new_seq=batch_metrics.get("new_seq"),
+                                new_token=batch_metrics.get("new_token"),
+                                cached_token=batch_metrics.get("cached_token"),
+                                token_usage=batch_metrics.get("token_usage"),
+                                running_req=batch_metrics.get("running_req"),
+                                queue_req=batch_metrics.get("queue_req"),
+                                prealloc_req=batch_metrics.get("prealloc_req"),
+                                inflight_req=batch_metrics.get("inflight_req"),
+                                input_throughput=batch_metrics.get("input_throughput"),
+                            )
+                        )
+
+                    # Parse decode batch metrics
+                    decode_metrics = self._parse_decode_batch_line(clean_line)
+                    if decode_metrics:
+                        batches.append(
+                            BatchMetrics(
+                                timestamp=decode_metrics["timestamp"],
+                                dp=0,
+                                tp=0,
+                                ep=0,
+                                batch_type=decode_metrics["type"],
+                                running_req=decode_metrics.get("running_req"),
+                                queue_req=decode_metrics.get("queue_req"),
+                                prealloc_req=decode_metrics.get("prealloc_req"),
+                                transfer_req=decode_metrics.get("transfer_req"),
+                                token_usage=decode_metrics.get("token_usage"),
+                                preallocated_usage=decode_metrics.get("preallocated_usage"),
+                                num_tokens=decode_metrics.get("num_tokens"),
+                                gen_throughput=decode_metrics.get("gen_throughput"),
+                            )
+                        )
+
+                    # Parse memory metrics
+                    mem_metrics = self._parse_memory_line(clean_line)
+                    if mem_metrics:
+                        memory_snapshots.append(
+                            MemoryMetrics(
+                                timestamp=mem_metrics["timestamp"],
+                                dp=0,
+                                tp=0,
+                                ep=0,
+                                metric_type=mem_metrics["type"],
+                                avail_mem_gb=mem_metrics.get("avail_mem_gb"),
+                                mem_usage_gb=mem_metrics.get("mem_usage_gb"),
+                                kv_cache_gb=mem_metrics.get("kv_cache_gb"),
+                                kv_tokens=mem_metrics.get("kv_tokens"),
+                            )
+                        )
+
+                    # Extract TP/DP/EP configuration from server_args
+                    if "tp_size=" in clean_line:
+                        tp_match = re.search(r"tp_size=(\d+)", clean_line)
+                        dp_match = re.search(r"dp_size=(\d+)", clean_line)
+                        ep_match = re.search(r"ep_size=(\d+)", clean_line)
+
+                        if tp_match:
+                            config["tp_size"] = int(tp_match.group(1))
+                        if dp_match:
+                            config["dp_size"] = int(dp_match.group(1))
+                        if ep_match:
+                            config["ep_size"] = int(ep_match.group(1))
+
+        except Exception as e:
+            logger.error("Error parsing %s: %s", log_path, e)
+            return None
+
+        total_metrics = len(batches) + len(memory_snapshots)
+        if total_metrics == 0:
+            logger.debug("Parsed %s but found no batch/memory metrics", log_path)
+
+        logger.debug("Parsed %s: %d batches, %d memory snapshots", log_path, len(batches), len(memory_snapshots))
+
+        return NodeMetrics(
+            node_info=node_info,
+            batches=batches,
+            memory_snapshots=memory_snapshots,
+            config=config,
+        )
+
+    def _parse_timestamp(self, line: str) -> str | None:
+        """Extract ISO 8601 timestamp from log line.
+
+        Example: 2025-12-30T15:52:38.206058Z
+        """
+        match = re.search(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z?)", line)
+        if match:
+            return match.group(1)
+        return None
+
+    def _parse_prefill_batch_line(self, line: str) -> dict | None:
+        """Parse prefill batch log line for metrics."""
+        if "Prefill batch" not in line:
+            return None
+
+        timestamp = self._parse_timestamp(line)
+        if not timestamp:
+            return None
+
+        metrics = {"timestamp": timestamp, "type": "prefill"}
+
+        patterns = {
+            "new_seq": r"#new-seq:\s*(\d+)",
+            "new_token": r"#new-token:\s*(\d+)",
+            "cached_token": r"#cached-token:\s*(\d+)",
+            "token_usage": r"token usage:\s*([\d.]+)",
+            "running_req": r"#running-req:\s*(\d+)",
+            "queue_req": r"#queue-req:\s*(\d+)",
+            "prealloc_req": r"#prealloc-req:\s*(\d+)",
+            "inflight_req": r"#inflight-req:\s*(\d+)",
+            "input_throughput": r"input throughput \(token/s\):\s*([\d.]+)",
+        }
+
+        for key, pattern in patterns.items():
+            match = re.search(pattern, line)
+            if match:
+                value = match.group(1)
+                metrics[key] = float(value) if "." in value else int(value)
+
+        return metrics
+
+    def _parse_decode_batch_line(self, line: str) -> dict | None:
+        """Parse decode batch log line for metrics."""
+        if "Decode batch" not in line:
+            return None
+
+        timestamp = self._parse_timestamp(line)
+        if not timestamp:
+            return None
+
+        metrics = {"timestamp": timestamp, "type": "decode"}
+
+        patterns = {
+            "running_req": r"#running-req:\s*(\d+)",
+            "num_tokens": r"#token:\s*(\d+)",
+            "token_usage": r"token usage:\s*([\d.]+)",
+            "preallocated_usage": r"pre-allocated usage:\s*([\d.]+)",
+            "prealloc_req": r"#prealloc-req:\s*(\d+)",
+            "transfer_req": r"#transfer-req:\s*(\d+)",
+            "queue_req": r"#queue-req:\s*(\d+)",
+            "gen_throughput": r"gen throughput \(token/s\):\s*([\d.]+)",
+        }
+
+        for key, pattern in patterns.items():
+            match = re.search(pattern, line)
+            if match:
+                value = match.group(1)
+                metrics[key] = float(value) if "." in value else int(value)
+
+        return metrics
+
+    def _parse_memory_line(self, line: str) -> dict | None:
+        """Parse memory-related log lines."""
+        timestamp = self._parse_timestamp(line)
+        if not timestamp:
+            return None
+
+        metrics = {"timestamp": timestamp}
+
+        # Parse available memory from "avail mem=75.11 GB"
+        avail_match = re.search(r"avail mem=([\d.]+)\s*GB", line)
+        if avail_match:
+            metrics["avail_mem_gb"] = float(avail_match.group(1))
+            metrics["type"] = "memory"
+
+        # Parse memory usage from "mem usage=107.07 GB"
+        usage_match = re.search(r"mem usage=([\d.]+)\s*GB", line)
+        if usage_match:
+            metrics["mem_usage_gb"] = float(usage_match.group(1))
+            metrics["type"] = "memory"
+
+        # Parse KV cache size from "KV size: 17.16 GB"
+        kv_match = re.search(r"KV size:\s*([\d.]+)\s*GB", line)
+        if kv_match:
+            metrics["kv_cache_gb"] = float(kv_match.group(1))
+            metrics["type"] = "kv_cache"
+
+        # Parse token count from "#tokens: 524288"
+        token_match = re.search(r"#tokens:\s*(\d+)", line)
+        if token_match:
+            metrics["kv_tokens"] = int(token_match.group(1))
+
+        # Parse from "Capturing batches" progress lines
+        # Example: "Capturing batches (bs=256 avail_mem=6.32 GB)"
+        capture_match = re.search(r"avail_mem=([\d.]+)\s*GB", line)
+        if capture_match and "type" not in metrics:
+            metrics["avail_mem_gb"] = float(capture_match.group(1))
+            metrics["type"] = "memory"
+
+        return metrics if "type" in metrics else None
+
+    def _extract_node_info_from_filename(self, filename: str) -> dict | None:
+        """Extract node name and worker info from filename.
+
+        Example: eos0219_prefill_w0.out
+        Returns: {'node': 'eos0219', 'worker_type': 'prefill', 'worker_id': 'w0'}
+        """
+        match = re.match(
+            r"(.+)_(prefill|decode|agg|frontend)_([^.]+)\.(err|out)",
+            os.path.basename(filename),
+        )
+        if match:
+            return {
+                "node": match.group(1),
+                "worker_type": match.group(2),
+                "worker_id": match.group(3),
+            }
+        return None
+
+    def parse_launch_command(self, log_content: str, worker_type: str = "unknown") -> NodeLaunchCommand | None:
+        """Parse the SGLang worker launch command from log content.
+
+        Looks for command lines or ServerArgs in the log.
+
+        Args:
+            log_content: Content of the worker log file
+            worker_type: Type of worker (prefill, decode, agg)
+
+        Returns:
+            NodeLaunchCommand with parsed parameters, or None if not found
+        """
+        from analysis.srtlog.parsers import NodeLaunchCommand
+
+        # Strip ANSI codes for cleaner parsing
+        clean_content = ANSI_ESCAPE.sub("", log_content)
+
+        raw_command = None
+
+        # First, try to find [CMD] tagged command (preferred - from our scripts)
+        cmd_match = re.search(r"\[CMD\]\s*(.+)$", clean_content, re.MULTILINE)
+        if cmd_match:
+            raw_command = cmd_match.group(1).strip()
+
+        # Fallback: pattern to match sglang launch commands
+        if not raw_command:
+            patterns = [
+                r"(python[3]?\s+-m\s+sglang\.launch_server\s+[^\n]+)",
+                r"(python[3]?\s+.*launch_server\.py\s+[^\n]+)",
+                r"(sglang\.launch_server\s+[^\n]+)",
+            ]
+
+            for pattern in patterns:
+                match = re.search(pattern, clean_content, re.IGNORECASE)
+                if match:
+                    raw_command = match.group(1).strip()
+                    break
+
+        # Also try to parse from ServerArgs() log line
+        if not raw_command:
+            server_args_match = re.search(r"server_args=ServerArgs\((.*?)\)", clean_content, re.DOTALL)
+            if server_args_match:
+                raw_command = f"ServerArgs({server_args_match.group(1)[:200]}...)"
+
+        if not raw_command:
+            return None
+
+        extra_args: dict[str, Any] = {}
+
+        # Parse SGLang server arguments (from command line)
+        arg_patterns = {
+            "model_path": r"--model(?:-path)?[=\s]+([^\s]+)",
+            "served_model_name": r"--served-model-name[=\s]+([^\s]+)",
+            "tp_size": r"--tp-size[=\s]+(\d+)",
+            "pp_size": r"--pp-size[=\s]+(\d+)",
+            "dp_size": r"--dp-size[=\s]+(\d+)",
+            "ep_size": r"--ep-size[=\s]+(\d+)",
+            "host": r"--host[=\s]+([^\s]+)",
+            "port": r"--port[=\s]+(\d+)",
+            "max_num_seqs": r"--max-(?:num-seqs|running-requests)[=\s]+(\d+)",
+            "max_model_len": r"--(?:max-model-len|context-length)[=\s]+(\d+)",
+            "kv_cache_dtype": r"--kv-cache-dtype[=\s]+([^\s]+)",
+            "gpu_memory_utilization": r"--(?:mem-fraction-static|gpu-memory-utilization)[=\s]+([\d.]+)",
+            "disaggregation_mode": r"--disaggregation-mode[=\s]+([^\s]+)",
+            "nccl_init_addr": r"--(?:dist-init-addr|nccl-init-addr)[=\s]+([^\s]+)",
+        }
+
+        # Also parse from ServerArgs format
+        server_args_patterns = {
+            "model_path": r"model_path=['\"]?([^'\"]+)['\"]?",
+            "served_model_name": r"served_model_name=['\"]?([^'\"]+)['\"]?",
+            "tp_size": r"tp_size=(\d+)",
+            "pp_size": r"pp_size=(\d+)",
+            "dp_size": r"dp_size=(\d+)",
+            "ep_size": r"ep_size=(\d+)",
+            "host": r"host=['\"]?([^'\"]+)['\"]?",
+            "port": r"port=(\d+)",
+            "max_num_seqs": r"max_running_requests=(\d+)",
+            "max_model_len": r"context_length=(\d+)",
+            "disaggregation_mode": r"disaggregation_mode=['\"]?([^'\"]+)['\"]?",
+        }
+
+        for field, pattern in arg_patterns.items():
+            match = re.search(pattern, raw_command)
+            if match:
+                value: Any = match.group(1)
+                if field in ("tp_size", "pp_size", "dp_size", "ep_size", "port", "max_num_seqs", "max_model_len"):
+                    value = int(value)
+                elif field == "gpu_memory_utilization":
+                    value = float(value)
+                extra_args[field] = value
+
+        # Try ServerArgs patterns for any missing fields
+        for field, pattern in server_args_patterns.items():
+            if field not in extra_args:
+                match = re.search(pattern, clean_content)
+                if match:
+                    value = match.group(1)
+                    if field in ("tp_size", "pp_size", "dp_size", "ep_size", "port", "max_num_seqs", "max_model_len"):
+                        value = int(value)
+                    extra_args[field] = value
+
+        return NodeLaunchCommand(
+            backend_type="sglang",
+            worker_type=worker_type,
+            raw_command=raw_command,
+            extra_args=extra_args,
+        )
+
diff --git a/analysis/srtlog/parsers/nodes/trtllm.py b/analysis/srtlog/parsers/nodes/trtllm.py
new file mode 100644
index 00000000..7f5aa0e8
--- /dev/null
+++ b/analysis/srtlog/parsers/nodes/trtllm.py
@@ -0,0 +1,469 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""TRTLLM node log parser.
+
+Parses logs from TensorRT-LLM workers launched via dynamo.trtllm.
+Example log format:
+    [33mRank0 run python3 -m dynamo.trtllm --model-path /model --served-model-name dsr1-fp8 ...
+    Initializing the worker with config: Config(namespace=dynamo, component=prefill, ...)
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from analysis.srtlog.models import BatchMetrics, MemoryMetrics, NodeMetrics
+from analysis.srtlog.parsers import register_node_parser
+
+if TYPE_CHECKING:
+    from analysis.srtlog.parsers import NodeLaunchCommand
+
+logger = logging.getLogger(__name__)
+
+# ANSI escape code pattern for stripping colors
+ANSI_ESCAPE = re.compile(r"\x1b\[[0-9;]*m")
+
+
+@register_node_parser("trtllm")
+class TRTLLMNodeParser:
+    """Parser for TensorRT-LLM node logs.
+
+    Parses logs from TRTLLM workers, including:
+    - Launch command from dynamo.trtllm
+    - Worker configuration from Config() dump
+    - MPI rank and world size information
+    """
+
+    @property
+    def backend_type(self) -> str:
+        return "trtllm"
+
+    def parse_logs(self, log_dir: Path) -> list[NodeMetrics]:
+        """Parse all TRTLLM node logs in a directory.
+
+        Args:
+            log_dir: Directory containing *_prefill_*.out, *_decode_*.out files
+
+        Returns:
+            List of NodeMetrics objects
+        """
+        log_dir = Path(log_dir)
+        nodes = []
+
+        if not log_dir.exists():
+            logger.error("Log directory does not exist: %s", log_dir)
+            return nodes
+
+        # Find all worker log files
+        for file in os.listdir(log_dir):
+            if not (file.endswith(".err") or file.endswith(".out")):
+                continue
+            if not any(wt in file for wt in ("prefill", "decode", "agg")):
+                continue
+
+            filepath = log_dir / file
+            node = self.parse_single_log(filepath)
+            if node:
+                nodes.append(node)
+
+        logger.info("Parsed %d TRTLLM node log files from %s", len(nodes), log_dir)
+        return nodes
+
+    def parse_single_log(self, log_path: Path) -> NodeMetrics | None:
+        """Parse a single TRTLLM log file.
+
+        Args:
+            log_path: Path to a node log file
+
+        Returns:
+            NodeMetrics object or None if parsing failed
+        """
+        node_info = self._extract_node_info_from_filename(str(log_path))
+        if not node_info:
+            logger.warning("Could not extract node info from filename: %s", log_path)
+            return None
+
+        batches = []
+        memory_snapshots = []
+        config = {}
+
+        try:
+            # Handle encoding issues gracefully
+            content = log_path.read_text(errors="replace")
+            clean_content = ANSI_ESCAPE.sub("", content)
+
+            # Extract MPI configuration
+            mpi_size_match = re.search(r"tllm_mpi_size:\s*(\d+)", clean_content)
+            if mpi_size_match:
+                config["mpi_world_size"] = int(mpi_size_match.group(1))
+
+            # Extract TP/PP from Config() dump
+            config_match = re.search(r"Config\((.*?)\)", clean_content)
+            if config_match:
+                config_str = config_match.group(1)
+
+                tp_match = re.search(r"tensor_parallel_size=(\d+)", config_str)
+                if tp_match:
+                    config["tp_size"] = int(tp_match.group(1))
+
+                pp_match = re.search(r"pipeline_parallel_size=(\d+)", config_str)
+                if pp_match:
+                    config["pp_size"] = int(pp_match.group(1))
+
+                ep_match = re.search(r"expert_parallel_size=(\d+)", config_str)
+                if ep_match:
+                    config["ep_size"] = int(ep_match.group(1))
+
+                max_batch_match = re.search(r"max_batch_size=(\d+)", config_str)
+                if max_batch_match:
+                    config["max_batch_size"] = int(max_batch_match.group(1))
+
+                max_tokens_match = re.search(r"max_num_tokens=(\d+)", config_str)
+                if max_tokens_match:
+                    config["max_num_tokens"] = int(max_tokens_match.group(1))
+
+                max_seq_match = re.search(r"max_seq_len=(\d+)", config_str)
+                if max_seq_match:
+                    config["max_seq_len"] = int(max_seq_match.group(1))
+
+            # Extract from separate trtllm_config YAML references
+            yaml_match = re.search(r"extra_engine_args=([^\s,]+\.yaml)", clean_content)
+            if yaml_match:
+                config["extra_engine_args"] = yaml_match.group(1)
+
+            # Also extract from TensorRT-LLM engine args line which has actual parallelism
+            engine_args_match = re.search(r"TensorRT-LLM engine args:\s*\{([^}]+)", clean_content)
+            if engine_args_match:
+                engine_str = engine_args_match.group(1)
+
+                engine_tp_match = re.search(r"'tensor_parallel_size':\s*(\d+)", engine_str)
+                if engine_tp_match:
+                    config["tp_size"] = int(engine_tp_match.group(1))
+
+                engine_pp_match = re.search(r"'pipeline_parallel_size':\s*(\d+)", engine_str)
+                if engine_pp_match:
+                    config["pp_size"] = int(engine_pp_match.group(1))
+
+                engine_ep_match = re.search(r"'moe_expert_parallel_size':\s*(\d+)", engine_str)
+                if engine_ep_match:
+                    config["ep_size"] = int(engine_ep_match.group(1))
+
+                engine_batch_match = re.search(r"'max_batch_size':\s*(\d+)", engine_str)
+                if engine_batch_match:
+                    config["max_batch_size"] = int(engine_batch_match.group(1))
+
+                engine_tokens_match = re.search(r"'max_num_tokens':\s*(\d+)", engine_str)
+                if engine_tokens_match:
+                    config["max_num_tokens"] = int(engine_tokens_match.group(1))
+
+                engine_seq_match = re.search(r"'max_seq_len':\s*(\d+)", engine_str)
+                if engine_seq_match:
+                    config["max_seq_len"] = int(engine_seq_match.group(1))
+
+            # Parse iteration logs for batch metrics
+            # Format: iter = X, ... num_scheduled_requests: X, states = {'num_ctx_requests': X, 'num_ctx_tokens': X, 'num_generation_tokens': X}
+            batches = self._parse_iteration_logs(clean_content, node_info.get("worker_type", "unknown"))
+
+            # Parse memory info
+            memory_snapshots = self._parse_memory_info(clean_content)
+
+        except Exception as e:
+            logger.error("Error parsing %s: %s", log_path, e)
+            return None
+
+        logger.debug("Parsed %s: %d batches, %d memory snapshots, config=%s", log_path, len(batches), len(memory_snapshots), config)
+
+        return NodeMetrics(
+            node_info=node_info,
+            batches=batches,
+            memory_snapshots=memory_snapshots,
+            config=config,
+        )
+
+    def _parse_iteration_logs(self, content: str, worker_type: str) -> list[BatchMetrics]:
+        """Parse TRTLLM iteration logs for batch metrics.
+
+        Format:
+            [01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5559, ..., num_scheduled_requests: 1,
+            states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 3}
+
+        Args:
+            content: Log file content (ANSI stripped)
+            worker_type: Worker type (prefill, decode)
+
+        Returns:
+            List of BatchMetrics objects
+        """
+        batches = []
+
+        # Pattern to match TRTLLM iteration logs
+        iter_pattern = re.compile(
+            r"\[(\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2})\].*"
+            r"iter\s*=\s*(\d+).*"
+            r"num_scheduled_requests:\s*(\d+).*"
+            r"states\s*=\s*\{([^}]+)\}"
+        )
+
+        for match in iter_pattern.finditer(content):
+            timestamp = match.group(1)
+            iteration = int(match.group(2))
+            num_scheduled = int(match.group(3))
+            states_str = match.group(4)
+
+            # Parse states dict
+            ctx_requests = 0
+            ctx_tokens = 0
+            gen_tokens = 0
+
+            ctx_req_match = re.search(r"'num_ctx_requests':\s*(\d+)", states_str)
+            if ctx_req_match:
+                ctx_requests = int(ctx_req_match.group(1))
+
+            ctx_tok_match = re.search(r"'num_ctx_tokens':\s*(\d+)", states_str)
+            if ctx_tok_match:
+                ctx_tokens = int(ctx_tok_match.group(1))
+
+            gen_tok_match = re.search(r"'num_generation_tokens':\s*(\d+)", states_str)
+            if gen_tok_match:
+                gen_tokens = int(gen_tok_match.group(1))
+
+            # Determine batch type based on content
+            if ctx_tokens > 0:
+                batch_type = "prefill"
+            elif gen_tokens > 0:
+                batch_type = "decode"
+            else:
+                batch_type = worker_type
+
+            # Parse step time if available
+            step_time = None
+            step_match = re.search(r"host_step_time\s*=\s*([\d.]+)ms", match.group(0))
+            if step_match:
+                step_time = float(step_match.group(1))
+
+            # Compute throughput (tokens/s)
+            input_throughput = None
+            gen_throughput = None
+            if step_time and step_time > 0:
+                if batch_type == "prefill" and ctx_tokens > 0:
+                    # Prefill throughput: context tokens / step time
+                    input_throughput = (ctx_tokens * 1000.0) / step_time
+                elif batch_type == "decode" and gen_tokens > 0:
+                    # Decode throughput: generation tokens / step time
+                    gen_throughput = (gen_tokens * 1000.0) / step_time
+
+            batches.append(
+                BatchMetrics(
+                    timestamp=timestamp,
+                    dp=0,
+                    tp=0,
+                    ep=0,
+                    batch_type=batch_type,
+                    running_req=num_scheduled,
+                    new_token=ctx_tokens if batch_type == "prefill" else None,
+                    num_tokens=gen_tokens if batch_type == "decode" else None,
+                    input_throughput=input_throughput,
+                    gen_throughput=gen_throughput,
+                )
+            )
+
+        return batches
+
+    def _parse_memory_info(self, content: str) -> list[MemoryMetrics]:
+        """Parse TRTLLM memory information.
+
+        Format:
+            Peak memory during memory usage profiling (torch + non-torch): 91.46 GiB,
+            available KV cache memory when calculating max tokens: 41.11 GiB,
+            fraction is set 0.85, kv size is 35136. device total memory 139.81 GiB
+
+        Args:
+            content: Log file content (ANSI stripped)
+
+        Returns:
+            List of MemoryMetrics objects
+        """
+        memory_snapshots = []
+
+        # Pattern to match memory info
+        mem_pattern = re.compile(
+            r"\[(\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2})\].*"
+            r"Peak memory.*?:\s*([\d.]+)\s*GiB.*?"
+            r"available KV cache memory.*?:\s*([\d.]+)\s*GiB.*?"
+            r"device total memory\s*([\d.]+)\s*GiB"
+        )
+
+        for match in mem_pattern.finditer(content):
+            timestamp = match.group(1)
+            peak_mem = float(match.group(2))
+            avail_kv = float(match.group(3))
+            total_mem = float(match.group(4))
+
+            memory_snapshots.append(
+                MemoryMetrics(
+                    timestamp=timestamp,
+                    dp=0,
+                    tp=0,
+                    ep=0,
+                    metric_type="memory",
+                    mem_usage_gb=peak_mem,
+                    avail_mem_gb=total_mem - peak_mem,
+                    kv_cache_gb=avail_kv,
+                )
+            )
+
+        # Also parse KV cache allocation info
+        kv_alloc_pattern = re.compile(
+            r"\[MemUsageChange\] Allocated\s*([\d.]+)\s*GiB for max tokens.*?\((\d+)\)"
+        )
+
+        for match in kv_alloc_pattern.finditer(content):
+            kv_gb = float(match.group(1))
+            max_tokens = int(match.group(2))
+
+            memory_snapshots.append(
+                MemoryMetrics(
+                    timestamp="",
+                    dp=0,
+                    tp=0,
+                    ep=0,
+                    metric_type="kv_cache",
+                    kv_cache_gb=kv_gb,
+                    kv_tokens=max_tokens,
+                )
+            )
+
+        return memory_snapshots
+
+    def _extract_node_info_from_filename(self, filename: str) -> dict | None:
+        """Extract node name and worker info from filename.
+
+        Example: worker-0_prefill_w0.out
+        Returns: {'node': 'worker-0', 'worker_type': 'prefill', 'worker_id': 'w0'}
+        """
+        match = re.match(
+            r"(.+)_(prefill|decode|agg|frontend)_([^.]+)\.(err|out)",
+            os.path.basename(filename),
+        )
+        if match:
+            return {
+                "node": match.group(1),
+                "worker_type": match.group(2),
+                "worker_id": match.group(3),
+            }
+        return None
+
+    def parse_launch_command(self, log_content: str, worker_type: str = "unknown") -> NodeLaunchCommand | None:
+        """Parse the TRTLLM worker launch command from log content.
+
+        Looks for command lines like:
+            [CMD] python3 -m dynamo.trtllm --model-path /model --served-model-name dsr1-fp8 --disaggregation-mode prefill
+            python3 -m dynamo.trtllm --model-path /model --served-model-name dsr1-fp8 --disaggregation-mode prefill
+
+        Args:
+            log_content: Content of the worker log file
+            worker_type: Type of worker (prefill, decode, agg)
+
+        Returns:
+            NodeLaunchCommand with parsed parameters, or None if not found
+        """
+        from analysis.srtlog.parsers import NodeLaunchCommand
+
+        # Strip ANSI codes for cleaner parsing
+        clean_content = ANSI_ESCAPE.sub("", log_content)
+
+        raw_command = None
+
+        # First, try to find [CMD] tagged command (preferred - from our scripts)
+        cmd_match = re.search(r"\[CMD\]\s*(.+)$", clean_content, re.MULTILINE)
+        if cmd_match:
+            raw_command = cmd_match.group(1).strip()
+
+        # Fallback: pattern to match TRTLLM launch commands (dynamo.trtllm or tensorrt_llm.serve)
+        if not raw_command:
+            patterns = [
+                r"(?:Rank\d+\s+run\s+)?(python[3]?\s+-m\s+dynamo\.trtllm\s+[^\n]+)",
+                r"(?:Rank\d+\s+run\s+)?(python[3]?\s+-m\s+tensorrt_llm\.serve\s+[^\n]+)",
+                r"(trtllm-serve\s+[^\n]+)",
+                r"(mpirun\s+.*trtllm[^\n]+)",
+            ]
+
+            for pattern in patterns:
+                match = re.search(pattern, clean_content)
+                if match:
+                    raw_command = match.group(1).strip()
+                    # Remove trailing "in background" if present
+                    raw_command = re.sub(r"\s+in\s+background$", "", raw_command)
+                    break
+
+        if not raw_command:
+            return None
+
+        extra_args: dict[str, Any] = {}
+
+        # Parse dynamo.trtllm / tensorrt_llm server arguments from command line
+        arg_patterns = {
+            "model_path": r"--model-path[=\s]+([^\s]+)",
+            "served_model_name": r"--served-model-name[=\s]+([^\s]+)",
+            "disaggregation_mode": r"--disaggregation-mode[=\s]+([^\s]+)",
+            "host": r"--host[=\s]+([^\s]+)",
+            "port": r"--port[=\s]+(\d+)",
+        }
+
+        for field, pattern in arg_patterns.items():
+            match = re.search(pattern, raw_command)
+            if match:
+                value = match.group(1)
+                if field == "port":
+                    value = int(value)
+                extra_args[field] = value
+
+        # Also extract from TensorRT-LLM engine args if available (has actual parallelism values)
+        engine_args_match = re.search(r"TensorRT-LLM engine args:\s*\{([^}]+)", clean_content)
+        if engine_args_match:
+            engine_str = engine_args_match.group(1)
+
+            engine_patterns = {
+                "tp_size": r"'tensor_parallel_size':\s*(\d+)",
+                "pp_size": r"'pipeline_parallel_size':\s*(\d+)",
+                "max_num_seqs": r"'max_batch_size':\s*(\d+)",
+                "max_model_len": r"'max_seq_len':\s*(\d+)",
+            }
+
+            for field, pattern in engine_patterns.items():
+                if field not in extra_args:
+                    match = re.search(pattern, engine_str)
+                    if match:
+                        extra_args[field] = int(match.group(1))
+
+        # Fallback to Config() dump
+        if "tp_size" not in extra_args:
+            config_match = re.search(r"Config\((.*?)\)", clean_content)
+            if config_match:
+                config_str = config_match.group(1)
+
+                config_patterns = {
+                    "tp_size": r"tensor_parallel_size=(\d+)",
+                    "pp_size": r"pipeline_parallel_size=(\d+)",
+                    "max_num_seqs": r"max_batch_size=(\d+)",
+                    "max_model_len": r"max_seq_len=(\d+)",
+                }
+
+                for field, pattern in config_patterns.items():
+                    if field not in extra_args:
+                        match = re.search(pattern, config_str)
+                        if match:
+                            extra_args[field] = int(match.group(1))
+
+        return NodeLaunchCommand(
+            backend_type=self.backend_type,
+            worker_type=worker_type,
+            raw_command=raw_command,
+            extra_args=extra_args,
+        )
+
diff --git a/analysis/srtlog/rollup_harness.py b/analysis/srtlog/rollup_harness.py
new file mode 100644
index 00000000..24b6e880
--- /dev/null
+++ b/analysis/srtlog/rollup_harness.py
@@ -0,0 +1,474 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Rollup harness for batch processing experiment logs.
+
+Recursively searches for sbatch_script.sh files and runs rollup on each job directory.
+
+Usage:
+    python -m analysis.srtlog.rollup_harness --log-dir /path/to/outputs
+    python -m analysis.srtlog.rollup_harness --log-dir /path/to/outputs --dry-run
+    python -m analysis.srtlog.rollup_harness --log-dir /path/to/outputs --output-dir /path/to/rollups
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import sys
+from dataclasses import asdict
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def find_job_directories(log_dir: Path) -> list[Path]:
+    """Find all job directories by searching for sbatch_script.sh files.
+
+    Args:
+        log_dir: Root directory to search
+
+    Returns:
+        List of job directory paths (parent dirs of sbatch_script.sh)
+    """
+    job_dirs = []
+    for sbatch_script in log_dir.rglob("sbatch_script.sh"):
+        job_dir = sbatch_script.parent
+        job_dirs.append(job_dir)
+
+    # Sort by job ID (directory name) if numeric
+    job_dirs.sort(key=lambda p: (int(p.name) if p.name.isdigit() else p.name))
+    return job_dirs
+
+
+def load_job_config(job_dir: Path) -> dict[str, Any] | None:
+    """Load job configuration from config.yaml.
+
+    Args:
+        job_dir: Job directory containing config.yaml
+
+    Returns:
+        Parsed config dict or None if not found
+    """
+    config_path = job_dir / "config.yaml"
+    if not config_path.exists():
+        return None
+
+    try:
+        import yaml
+        with open(config_path) as f:
+            return yaml.safe_load(f)
+    except Exception as e:
+        logger.warning("Failed to load %s: %s", config_path, e)
+        return None
+
+
+def get_log_dir(job_dir: Path) -> Path | None:
+    """Get the logs directory for a job.
+
+    Args:
+        job_dir: Job directory
+
+    Returns:
+        Path to logs directory or None if not found
+    """
+    logs_dir = job_dir / "logs"
+    if logs_dir.exists():
+        return logs_dir
+    return None
+
+
+def _add_launch_command_to_node(node_rollup: Any, node_parser: Any, logs_dir: Path) -> None:
+    """Parse and add launch command to a node rollup.
+
+    Args:
+        node_rollup: NodeRollup object to update
+        node_parser: Node parser with parse_launch_command method
+        logs_dir: Directory containing log files
+    """
+    from srtctl.cli.mixins.rollup import LaunchCommandRollup
+
+    node_name = node_rollup.node_name
+    worker_type = node_rollup.worker_type
+    worker_id = node_rollup.worker_id
+
+    # Try different filename patterns
+    patterns = [
+        f"{node_name}_{worker_type}_{worker_id}",  # worker-4_decode_w0
+        f"worker-*_{worker_type}_{worker_id}",     # wildcard node
+    ]
+
+    for pattern in patterns:
+        # Try both .out and .err files
+        for ext in [".out", ".err"]:
+            if "*" in pattern:
+                # Glob pattern
+                for log_file in logs_dir.glob(f"{pattern}{ext}"):
+                    if _try_parse_launch_command(node_rollup, node_parser, log_file, worker_type):
+                        return
+            else:
+                log_file = logs_dir / f"{pattern}{ext}"
+                if log_file.exists():
+                    if _try_parse_launch_command(node_rollup, node_parser, log_file, worker_type):
+                        return
+
+
+def _try_parse_launch_command(node_rollup: Any, node_parser: Any, log_file: Path, worker_type: str) -> bool:
+    """Try to parse launch command from a log file.
+
+    Args:
+        node_rollup: NodeRollup object to update
+        node_parser: Node parser with parse_launch_command method
+        log_file: Log file to parse
+        worker_type: Worker type (prefill, decode, agg)
+
+    Returns:
+        True if command was found and added
+    """
+    from srtctl.cli.mixins.rollup import LaunchCommandRollup
+
+    try:
+        content = log_file.read_text(errors="replace")
+        cmd = node_parser.parse_launch_command(content, worker_type=worker_type)
+        if cmd:
+            args = cmd.extra_args
+            node_rollup.launch_command = LaunchCommandRollup(
+                raw_command=cmd.raw_command,
+                command_type="worker",
+                model_path=args.get("model_path"),
+                served_model_name=args.get("served_model_name"),
+                worker_type=worker_type,
+                backend_type=cmd.backend_type,
+                disaggregation_mode=args.get("disaggregation_mode"),
+                tp_size=args.get("tp_size"),
+                pp_size=args.get("pp_size"),
+                dp_size=args.get("dp_size"),
+                ep_size=args.get("ep_size"),
+                port=args.get("port"),
+                max_num_seqs=args.get("max_num_seqs"),
+                max_model_len=args.get("max_model_len"),
+            )
+            logger.debug("Parsed launch command for %s from %s", node_rollup.node_name, log_file.name)
+            return True
+    except Exception as e:
+        logger.debug("Failed to parse launch command from %s: %s", log_file, e)
+
+    return False
+
+
+def run_rollup_on_job(job_dir: Path, output_dir: Path | None = None) -> dict[str, Any] | None:
+    """Run rollup on a single job directory.
+
+    Args:
+        job_dir: Job directory containing config.yaml and logs/
+        output_dir: Optional output directory for rollup.json
+
+    Returns:
+        Rollup summary dict or None if failed
+    """
+    from analysis.srtlog.parsers import get_benchmark_parser, get_node_parser, list_benchmark_parsers, list_node_parsers
+    from srtctl.cli.mixins.rollup import (
+        EnvironmentConfig,
+        LaunchCommandRollup,
+        NodesSummary,
+        RollupResult,
+        RollupSummary,
+    )
+
+    job_id = job_dir.name
+    logs_dir = get_log_dir(job_dir)
+
+    if not logs_dir:
+        logger.warning("No logs directory found in %s", job_dir)
+        return None
+
+    config = load_job_config(job_dir)
+    if not config:
+        logger.warning("No config.yaml found in %s", job_dir)
+        return None
+
+    # Extract config values
+    backend_type = config.get("backend", {}).get("type", "unknown")
+    benchmark_type = config.get("benchmark", {}).get("type", "sa-bench")
+    model_name = config.get("model", {}).get("served_model_name", "unknown")
+    
+    resources = config.get("resources", {})
+    is_disaggregated = resources.get("prefill_nodes") is not None
+    
+    logger.info("Processing job %s: backend=%s, benchmark=%s", job_id, backend_type, benchmark_type)
+
+    # Parse benchmark results
+    results = []
+    try:
+        parser = get_benchmark_parser(benchmark_type)
+        
+        # Find result directories
+        for entry in logs_dir.iterdir():
+            if entry.is_dir() and "_isl_" in entry.name and "_osl_" in entry.name:
+                dir_results = parser.parse_result_directory(entry)
+                results.extend(dir_results)
+        
+        # Also check for AIPerf results
+        if hasattr(parser, "find_aiperf_results"):
+            for aiperf_path in parser.find_aiperf_results(logs_dir):
+                result = parser.parse_result_json(aiperf_path)
+                if result.get("output_tps") is not None or result.get("output_throughput") is not None:
+                    results.append(result)
+                    
+    except ValueError:
+        logger.warning("No benchmark parser for %s, available: %s", benchmark_type, list_benchmark_parsers())
+    except Exception as e:
+        logger.warning("Failed to parse benchmark results: %s", e)
+
+    if not results:
+        logger.warning("No benchmark results found in %s", logs_dir)
+
+    # Parse node metrics
+    nodes_summary = None
+    node_parser = None
+    try:
+        node_parser = get_node_parser(backend_type)
+        nodes = node_parser.parse_logs(logs_dir)
+        if nodes:
+            nodes_summary = NodesSummary.from_node_metrics_list(nodes)
+            logger.info("  Found %d nodes (%d prefill, %d decode)", 
+                       len(nodes_summary.nodes),
+                       nodes_summary.total_prefill_nodes,
+                       nodes_summary.total_decode_nodes)
+            
+            # Parse launch commands for each node
+            for node_rollup in nodes_summary.nodes:
+                _add_launch_command_to_node(node_rollup, node_parser, logs_dir)
+                
+    except ValueError:
+        logger.debug("No node parser for %s, available: %s", backend_type, list_node_parsers())
+    except Exception as e:
+        logger.warning("Failed to parse node metrics: %s", e)
+
+    # Parse benchmark launch command
+    benchmark_command = None
+    benchmark_out = logs_dir / "benchmark.out"
+    if benchmark_out.exists():
+        try:
+            parser = get_benchmark_parser(benchmark_type)
+            cmd = parser.parse_launch_command(benchmark_out.read_text(errors="replace"))
+            if cmd:
+                args = cmd.extra_args
+                benchmark_command = LaunchCommandRollup(
+                    raw_command=cmd.raw_command,
+                    command_type="benchmark",
+                    model_path=args.get("model"),
+                    benchmark_type=cmd.benchmark_type,
+                    base_url=args.get("base_url"),
+                    max_concurrency=args.get("max_concurrency"),
+                    input_len=args.get("input_len"),
+                    output_len=args.get("output_len"),
+                )
+        except Exception as e:
+            logger.debug("Failed to parse benchmark command: %s", e)
+
+    # Parse environment config
+    environment_config = None
+    try:
+        import yaml
+        env_config = EnvironmentConfig()
+        
+        backend_section = config.get("backend", {})
+        if "prefill_environment" in backend_section:
+            env_config.prefill_environment = backend_section["prefill_environment"]
+        if "decode_environment" in backend_section:
+            env_config.decode_environment = backend_section["decode_environment"]
+        if "aggregated_environment" in backend_section:
+            env_config.aggregated_environment = backend_section["aggregated_environment"]
+            
+        # Load TRTLLM config files
+        prefill_yaml = logs_dir / "trtllm_config_prefill.yaml"
+        decode_yaml = logs_dir / "trtllm_config_decode.yaml"
+        
+        if prefill_yaml.exists():
+            with open(prefill_yaml) as f:
+                env_config.prefill_engine_config = yaml.safe_load(f)
+        if decode_yaml.exists():
+            with open(decode_yaml) as f:
+                env_config.decode_engine_config = yaml.safe_load(f)
+                
+        if any([env_config.prefill_environment, env_config.decode_environment, 
+                env_config.prefill_engine_config, env_config.decode_engine_config]):
+            environment_config = env_config
+            
+    except Exception as e:
+        logger.debug("Failed to parse environment config: %s", e)
+
+    # Build rollup summary
+    benchmark_config = config.get("benchmark", {})
+    
+    # Compute total GPUs
+    if is_disaggregated:
+        prefill_gpus = resources.get("prefill_nodes", 0) * resources.get("gpus_per_node", 8)
+        decode_gpus = resources.get("decode_nodes", 0) * resources.get("gpus_per_node", 8)
+        total_gpus = prefill_gpus + decode_gpus
+    else:
+        total_gpus = resources.get("agg_nodes", 1) * resources.get("gpus_per_node", 8)
+        prefill_gpus = 0
+        decode_gpus = 0
+
+    summary = RollupSummary(
+        job_id=job_id,
+        job_name=config.get("name", "unknown"),
+        generated_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        model_path=config.get("model", {}).get("path", ""),
+        model_name=model_name,
+        precision=config.get("model", {}).get("precision", "unknown"),
+        gpu_type=resources.get("gpu_type", "unknown"),
+        gpus_per_node=resources.get("gpus_per_node", 8),
+        backend_type=backend_type,
+        frontend_type=config.get("frontend", {}).get("type", "unknown"),
+        is_disaggregated=is_disaggregated,
+        total_nodes=resources.get("prefill_nodes", 0) + resources.get("decode_nodes", 0) if is_disaggregated else resources.get("agg_nodes", 1),
+        total_gpus=total_gpus,
+        prefill_nodes=resources.get("prefill_nodes") if is_disaggregated else None,
+        decode_nodes=resources.get("decode_nodes") if is_disaggregated else None,
+        prefill_workers=resources.get("prefill_workers") if is_disaggregated else None,
+        decode_workers=resources.get("decode_workers") if is_disaggregated else None,
+        prefill_gpus=prefill_gpus if is_disaggregated else None,
+        decode_gpus=decode_gpus if is_disaggregated else None,
+        agg_nodes=resources.get("agg_nodes") if not is_disaggregated else None,
+        agg_workers=resources.get("agg_workers") if not is_disaggregated else None,
+        benchmark_type=benchmark_type,
+        isl=benchmark_config.get("isl"),
+        osl=benchmark_config.get("osl"),
+        concurrencies=benchmark_config.get("concurrencies", []),
+        nodes_summary=nodes_summary,
+        environment_config=environment_config,
+        benchmark_command=benchmark_command,
+        tags=config.get("tags", []),
+    )
+
+    # Convert results to RollupResult objects
+    for data in results:
+        result = RollupResult(
+            concurrency=data.get("max_concurrency", 0),
+            output_tps=data.get("output_throughput", 0) or data.get("output_tps", 0),
+            total_tps=data.get("total_token_throughput"),
+            request_throughput=data.get("request_throughput"),
+            mean_ttft_ms=data.get("mean_ttft_ms"),
+            mean_tpot_ms=data.get("mean_tpot_ms"),
+            mean_itl_ms=data.get("mean_itl_ms"),
+            mean_e2el_ms=data.get("mean_e2el_ms"),
+            median_ttft_ms=data.get("median_ttft_ms"),
+            median_tpot_ms=data.get("median_tpot_ms"),
+            median_itl_ms=data.get("median_itl_ms"),
+            median_e2el_ms=data.get("median_e2el_ms"),
+            p99_ttft_ms=data.get("p99_ttft_ms"),
+            p99_tpot_ms=data.get("p99_tpot_ms"),
+            p99_itl_ms=data.get("p99_itl_ms"),
+            p99_e2el_ms=data.get("p99_e2el_ms"),
+            total_input_tokens=data.get("total_input_tokens"),
+            total_output_tokens=data.get("total_output_tokens"),
+            duration=data.get("duration"),
+            completed=data.get("completed"),
+            num_prompts=data.get("num_prompts"),
+        )
+        summary.results.append(result)
+
+    # Compute summary stats
+    summary.compute_summary_stats()
+
+    # Write rollup
+    if output_dir:
+        rollup_path = output_dir / f"{job_id}_rollup.json"
+    else:
+        rollup_path = logs_dir / "rollup.json"
+
+    rollup_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(rollup_path, "w") as f:
+        json.dump(asdict(summary), f, indent=2, default=str)
+
+    logger.info("  Wrote rollup to %s", rollup_path)
+    logger.info("  Results: %d, Max TPS: %.1f", len(summary.results), summary.max_output_tps or 0)
+
+    return asdict(summary)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Rollup harness for batch processing experiment logs",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    # Process all jobs in outputs directory
+    python -m analysis.srtlog.rollup_harness --log-dir /path/to/outputs
+
+    # Dry run - just list jobs that would be processed
+    python -m analysis.srtlog.rollup_harness --log-dir /path/to/outputs --dry-run
+
+    # Write rollups to a separate directory
+    python -m analysis.srtlog.rollup_harness --log-dir /path/to/outputs --output-dir /path/to/rollups
+
+    # Process only specific jobs
+    python -m analysis.srtlog.rollup_harness --log-dir /path/to/outputs --jobs 585 586 587
+        """,
+    )
+    parser.add_argument("--log-dir", required=True, type=Path, help="Root directory to search for jobs")
+    parser.add_argument("--output-dir", type=Path, help="Output directory for rollup files (default: in-place)")
+    parser.add_argument("--dry-run", action="store_true", help="List jobs without processing")
+    parser.add_argument("--jobs", nargs="+", help="Only process specific job IDs")
+    parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+
+    if not args.log_dir.exists():
+        logger.error("Log directory does not exist: %s", args.log_dir)
+        sys.exit(1)
+
+    # Find all job directories
+    job_dirs = find_job_directories(args.log_dir)
+    logger.info("Found %d job directories in %s", len(job_dirs), args.log_dir)
+
+    # Filter by job IDs if specified
+    if args.jobs:
+        job_dirs = [d for d in job_dirs if d.name in args.jobs]
+        logger.info("Filtered to %d jobs: %s", len(job_dirs), [d.name for d in job_dirs])
+
+    if args.dry_run:
+        print("\nJob directories found:")
+        for job_dir in job_dirs:
+            config = load_job_config(job_dir)
+            if config:
+                backend = config.get("backend", {}).get("type", "?")
+                benchmark = config.get("benchmark", {}).get("type", "?")
+                print(f"  {job_dir.name}: backend={backend}, benchmark={benchmark}")
+            else:
+                print(f"  {job_dir.name}: (no config.yaml)")
+        return
+
+    # Process each job
+    successful = 0
+    failed = 0
+    
+    for job_dir in job_dirs:
+        try:
+            result = run_rollup_on_job(job_dir, args.output_dir)
+            if result:
+                successful += 1
+            else:
+                failed += 1
+        except Exception as e:
+            logger.error("Failed to process %s: %s", job_dir, e)
+            failed += 1
+
+    logger.info("Complete: %d successful, %d failed", successful, failed)
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/docs/architecture.md b/docs/architecture.md
index afefa85d..e7b31d9e 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -65,7 +65,7 @@ srtctl abstracts this complexity into a simple YAML interface while providing ex
                                 v
 +------------------------------------------------------------------+
 |                   ORCHESTRATION LAYER                             |
-|         SweepOrchestrator + Stage Mixins (Worker/Frontend/Bench)  |
+|   SweepOrchestrator + Stage Mixins (Worker/Frontend/Bench/Rollup) |
 +------------------------------------------------------------------+
                 |               |                |
                 v               v                v
@@ -208,7 +208,7 @@ The main orchestration class that runs inside the SLURM job:
 
 ```python
 @dataclass
-class SweepOrchestrator(WorkerStageMixin, FrontendStageMixin, BenchmarkStageMixin):
+class SweepOrchestrator(WorkerStageMixin, FrontendStageMixin, BenchmarkStageMixin, RollupStageMixin):
     config: SrtConfig
     runtime: RuntimeContext
 
@@ -396,6 +396,7 @@ src/srtctl/core/
 |   +-- WorkerStageMixin   (start_worker, start_all_workers)       |
 |   +-- FrontendStageMixin (start_nginx, start_frontend)           |
 |   +-- BenchmarkStageMixin (run_benchmark)                         |
+|   +-- RollupStageMixin   (run_rollup -> rollup.json)             |
 |                                                                   |
 | ProcessRegistry        | ManagedProcess     | Signal Handlers    |
 | - add_process()        | - name, popen      | - SIGTERM/SIGINT   |
@@ -811,6 +812,281 @@ class ManagedProcess:
 
 ---
 
+## Rollup Stage
+
+After benchmark completion, the **RollupStageMixin** consolidates all experiment data
+into a single `rollup.json` file for easy analysis and comparison.
+
+### Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────────────────────────┐
+│                              ROLLUP STAGE ARCHITECTURE                               │
+└─────────────────────────────────────────────────────────────────────────────────────┘
+
+                                    ┌─────────────────┐
+                                    │ SweepOrchestrator│
+                                    │   run_rollup()   │
+                                    └────────┬────────┘
+                                             │
+                    ┌────────────────────────┼────────────────────────┐
+                    │                        │                        │
+                    ▼                        ▼                        ▼
+     ┌──────────────────────┐  ┌──────────────────────┐  ┌──────────────────────┐
+     │_collect_benchmark_   │  │ _collect_node_       │  │_collect_environment_ │
+     │      results()       │  │     metrics()        │  │      config()        │
+     └──────────┬───────────┘  └──────────┬───────────┘  └──────────┬───────────┘
+                │                         │                         │
+                ▼                         ▼                         ▼
+┌───────────────────────────┐ ┌───────────────────────────┐ ┌───────────────────────┐
+│   BENCHMARK PARSERS       │ │     NODE PARSERS          │ │    CONFIG FILES       │
+│   analysis.srtlog.parsers │ │   analysis.srtlog.parsers │ │                       │
+│   .benchmark              │ │   .nodes                  │ │  • config.yaml        │
+├───────────────────────────┤ ├───────────────────────────┤ │  • trtllm_config_*.yaml│
+│ • SABenchParser           │ │ • SGLangNodeParser        │ └───────────────────────┘
+│ • MooncakeRouterParser    │ │ • TRTLLMNodeParser        │
+└───────────────────────────┘ └───────────────────────────┘
+                                             │
+                                             ▼
+                              ┌──────────────────────────────────────────┐
+                              │             RollupSummary                │
+                              ├──────────────────────────────────────────┤
+                              │ • job_id, model_path, backend_type       │
+                              │ • results: RollupResult[] (TPS, latency) │
+                              │ • nodes_summary: NodesSummary            │
+                              │ • environment_config: EnvironmentConfig  │
+                              │ • max_output_tps, min_mean_ttft_ms       │
+                              └──────────────────────┬───────────────────┘
+                                                     │
+                                                     ▼
+                                          ┌──────────────────┐
+                                          │   rollup.json    │
+                                          └──────────────────┘
+```
+
+### Key Dataclasses
+
+| Dataclass | Source File(s) | Purpose |
+|-----------|----------------|---------|
+| `RollupResult` | `logs/*_isl_*_osl_*/result.json` | Single benchmark result at one concurrency level (TPS, latencies) |
+| `NodeRollup` | `logs/{node}_{type}_{id}.out/err` | Per-node metrics (batches, throughput, memory, KV cache) |
+| `NodesSummary` | Aggregated from worker logs | Aggregated node statistics across all workers |
+| `EnvironmentConfig` | `config.yaml`, `trtllm_config_*.yaml` | Environment variables and engine config for prefill/decode/agg |
+| `LaunchCommandRollup` | Worker/benchmark log files | Parsed launch command parameters |
+| `RollupSummary` | **Output:** `logs/rollup.json` | Complete experiment summary combining all above |
+
+### Parser Dataclasses
+
+Parsers return lightweight dataclasses with essential fields and an `extra_args` dict for parsed values:
+
+| Dataclass | Source | Fields |
+|-----------|--------|--------|
+| `BenchmarkLaunchCommand` | `logs/benchmark.out` | `benchmark_type`, `raw_command`, `extra_args` |
+| `NodeLaunchCommand` | `logs/{node}_{type}_{id}.out/err` | `backend_type`, `worker_type`, `raw_command`, `extra_args` |
+
+The `extra_args` dict contains parsed parameters like `model`, `tp_size`, `max_concurrency`, etc.
+
+### Entity Relationship Diagram
+
+The following diagram shows how data flows from log files through parsers to the final rollup output:
+
+```
+                                    ┌─────────────────────────────────────────────────────────────┐
+                                    │                      LOG FILES                               │
+                                    └─────────────────────────────────────────────────────────────┘
+                                                              │
+                    ┌─────────────────────────────────────────┼─────────────────────────────────────────┐
+                    │                                         │                                         │
+                    ▼                                         ▼                                         ▼
+        ┌───────────────────┐                     ┌───────────────────┐                     ┌───────────────────┐
+        │  worker_*.out/err │                     │   benchmark.out   │                     │   config.yaml     │
+        └───────────────────┘                     └───────────────────┘                     └───────────────────┘
+                    │                                         │                                         │
+                    ▼                                         ▼                                         ▼
+    ┌───────────────────────────┐             ┌───────────────────────────┐             ┌───────────────────────────┐
+    │    NodeParserProtocol     │             │  BenchmarkParserProtocol  │             │      YAML Loader          │
+    │  (sglang.py / trtllm.py)  │             │  (sa_bench.py / etc.)     │             │                           │
+    └───────────────────────────┘             └───────────────────────────┘             └───────────────────────────┘
+                    │                                         │                                         │
+          ┌────────┴────────┐                       ┌────────┴────────┐                                 │
+          ▼                 ▼                       ▼                 ▼                                 ▼
+┌─────────────────┐  ┌─────────────────┐  ┌─────────────────┐  ┌─────────────────┐        ┌─────────────────────┐
+│  NodeMetrics    │  │NodeLaunchCommand│  │  dict (result)  │  │BenchmarkLaunch  │        │  EnvironmentConfig  │
+│  (models.py)    │  │ (__init__.py)   │  │                 │  │    Command      │        │  (rollup/models.py) │
+└─────────────────┘  └─────────────────┘  └─────────────────┘  └─────────────────┘        └─────────────────────┘
+        │                    │                     │                    │                          │
+        └────────────────────┼─────────────────────┼────────────────────┼──────────────────────────┘
+                             │                     │                    │
+                             ▼                     ▼                    ▼
+                    ┌────────────────────────────────────────────────────────────┐
+                    │                    RollupStageMixin                         │
+                    │              (rollup_stage.py / rollup_harness.py)          │
+                    └────────────────────────────────────────────────────────────┘
+                                                   │
+                                                   │ transforms
+                                                   ▼
+                                          ┌───────────────┐
+                                          │ RollupSummary │
+                                          └───────┬───────┘
+                                                  │
+                                                  ▼
+                                          ┌───────────────┐
+                                          │  rollup.json  │
+                                          └───────────────┘
+```
+
+### Detailed Entity Relationships
+
+```
+┌─────────────────────────────────────────────────────────────────────────────────────────┐
+│                              PARSER LAYER (analysis/srtlog/parsers/)                     │
+├─────────────────────────────────────────────────────────────────────────────────────────┤
+│                                                                                          │
+│  ┌────────────────────┐         ┌────────────────────┐         ┌────────────────────┐   │
+│  │  NodeLaunchCommand │         │ BenchmarkLaunch    │         │    NodeMetrics     │   │
+│  ├────────────────────┤         │      Command       │         ├────────────────────┤   │
+│  │ backend_type: str  │         ├────────────────────┤         │ node_info: dict    │   │
+│  │ worker_type: str   │         │ benchmark_type: str│         │ config: dict       │   │
+│  │ raw_command: str   │         │ raw_command: str   │         │ batches: list      │   │
+│  │ extra_args: dict   │         │ extra_args: dict   │         │ memory_snapshots   │   │
+│  └────────────────────┘         └────────────────────┘         └────────────────────┘   │
+│                                                                                          │
+└─────────────────────────────────────────────────────────────────────────────────────────┘
+                                              │
+                                              ▼
+┌─────────────────────────────────────────────────────────────────────────────────────────┐
+│                           ROLLUP LAYER (srtctl/cli/mixins/rollup/)                       │
+├─────────────────────────────────────────────────────────────────────────────────────────┤
+│                                                                                          │
+│  ┌─────────────────────────────────────────────────────────────────────────────────┐    │
+│  │                            LaunchCommandRollup                                   │    │
+│  ├─────────────────────────────────────────────────────────────────────────────────┤    │
+│  │  raw_command: str          │  command_type: str ("worker" | "benchmark")        │    │
+│  │  model_path: str | None    │  served_model_name: str | None                     │    │
+│  │  worker_type: str | None   │  backend_type: str | None                          │    │
+│  │  tp_size: int | None       │  pp_size: int | None                               │    │
+│  │  dp_size: int | None       │  ep_size: int | None                               │    │
+│  │  max_num_seqs: int | None  │  max_model_len: int | None                         │    │
+│  └─────────────────────────────────────────────────────────────────────────────────┘    │
+│                                           │ 1:1                                          │
+│                                           ▼                                              │
+│  ┌─────────────────────────────────────────────────────────────────────────────────┐    │
+│  │                                 NodeRollup                                       │    │
+│  ├─────────────────────────────────────────────────────────────────────────────────┤    │
+│  │  node_name: str              │  worker_type: str                                 │    │
+│  │  worker_id: str              │  tp_size, pp_size, dp_size, ep_size: int | None  │    │
+│  │  launch_command: LaunchCommandRollup | None                                      │    │
+│  │  avail_mem_gb: float | None  │  kv_cache_gb: float | None                       │    │
+│  │  total_batches: int          │  avg_gen_throughput: float | None                │    │
+│  └─────────────────────────────────────────────────────────────────────────────────┘    │
+│                                           │ 1:N                                          │
+│                                           ▼                                              │
+│  ┌─────────────────────────────────────────────────────────────────────────────────┐    │
+│  │                               NodesSummary                                       │    │
+│  ├─────────────────────────────────────────────────────────────────────────────────┤    │
+│  │  nodes: list[NodeRollup]                                                         │    │
+│  │  total_prefill_nodes: int    │  total_decode_nodes: int                         │    │
+│  │  total_agg_nodes: int        │  total_kv_cache_gb: float | None                 │    │
+│  └─────────────────────────────────────────────────────────────────────────────────┘    │
+│                                                                                          │
+│  ┌─────────────────────────────────────────────────────────────────────────────────┐    │
+│  │                               RollupResult                                       │    │
+│  ├─────────────────────────────────────────────────────────────────────────────────┤    │
+│  │  max_concurrency: int        │  input_len: int        │  output_len: int        │    │
+│  │  output_tps: float           │  request_tps: float    │  total_tokens: int      │    │
+│  │  avg_ttft_ms: float          │  avg_tpot_ms: float    │  p50/p90/p99 metrics    │    │
+│  └─────────────────────────────────────────────────────────────────────────────────┘    │
+│                                                                                          │
+│  ┌─────────────────────────────────────────────────────────────────────────────────┐    │
+│  │                            EnvironmentConfig                                     │    │
+│  ├─────────────────────────────────────────────────────────────────────────────────┤    │
+│  │  prefill_environment: dict   │  decode_environment: dict                        │    │
+│  │  aggregated_environment: dict│  prefill_engine_config: dict                     │    │
+│  │  decode_engine_config: dict  │  aggregated_engine_config: dict                  │    │
+│  └─────────────────────────────────────────────────────────────────────────────────┘    │
+│                                                                                          │
+│                    ┌──────────────────────┬──────────────────────┐                      │
+│                    │                      │                      │                      │
+│                    ▼                      ▼                      ▼                      │
+│  ┌─────────────────────────────────────────────────────────────────────────────────┐    │
+│  │                              RollupSummary                                       │    │
+│  ├─────────────────────────────────────────────────────────────────────────────────┤    │
+│  │  job_id: str                 │  model_name: str         │  backend_type: str    │    │
+│  │  benchmark_type: str         │  is_disaggregated: bool  │  total_nodes: int     │    │
+│  │  max_output_tps: float       │  max_request_tps: float                          │    │
+│  │  results: list[RollupResult]                                                     │    │
+│  │  nodes_summary: NodesSummary | None                                              │    │
+│  │  benchmark_command: LaunchCommandRollup | None                                   │    │
+│  │  environment_config: EnvironmentConfig | None                                    │    │
+│  │  tags: dict                  │  timestamp: str                                   │    │
+│  └─────────────────────────────────────────────────────────────────────────────────┘    │
+│                                                                                          │
+└─────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+### Cardinality Summary
+
+| Parent | Child | Relationship |
+|--------|-------|--------------|
+| `RollupSummary` | `RollupResult` | 1:N (one per concurrency level) |
+| `RollupSummary` | `NodesSummary` | 1:1 |
+| `RollupSummary` | `LaunchCommandRollup` | 1:1 (benchmark command) |
+| `RollupSummary` | `EnvironmentConfig` | 1:1 |
+| `NodesSummary` | `NodeRollup` | 1:N (one per worker) |
+| `NodeRollup` | `LaunchCommandRollup` | 1:1 (worker command) |
+
+### Modular Parser System
+
+Parsers are registered via decorators and accessed through a registry:
+
+```python
+# Register a new parser
+@register_benchmark_parser("my-bench")
+class MyBenchParser:
+    def parse_result_json(self, path: Path) -> dict: ...
+    def parse_launch_command(self, log: str) -> BenchmarkLaunchCommand: ...
+
+# Use the parser
+from analysis.srtlog.parsers import get_benchmark_parser, get_node_parser
+
+bench_parser = get_benchmark_parser("sa-bench")
+node_parser = get_node_parser("trtllm")
+```
+
+### Output Format
+
+The `rollup.json` file contains:
+
+```json
+{
+  "job_id": "12345",
+  "model_path": "/model/llama-70b",
+  "backend_type": "trtllm",
+  
+  "results": [
+    {"concurrency": 16, "output_tps": 2500.0, "mean_ttft_ms": 45.2},
+    {"concurrency": 32, "output_tps": 4000.0, "mean_ttft_ms": 52.1}
+  ],
+  
+  "nodes_summary": {
+    "total_prefill_nodes": 1,
+    "total_decode_nodes": 7,
+    "avg_decode_gen_throughput": 533.1,
+    "nodes": [{"node_name": "worker-0", "total_batches": 5531, ...}]
+  },
+  
+  "environment_config": {
+    "prefill_environment": {"UCX_TLS": "...", "TRTLLM_ENABLE_PDL": "1"},
+    "prefill_engine_config": {"tensor_parallel_size": 8, "max_batch_size": 2}
+  },
+  
+  "max_output_tps": 4000.0,
+  "min_mean_ttft_ms": 45.2
+}
+```
+
+---
+
 ## Extension Points
 
 ### How to Add a New Backend
@@ -1052,6 +1328,7 @@ src/srtctl/
 |       |-- worker_stage.py      # Backend worker startup
 |       |-- frontend_stage.py    # Frontend/nginx startup
 |       |-- benchmark_stage.py   # Benchmark execution
+|       |-- rollup_stage.py      # Experiment data consolidation
 |
 |-- benchmarks/              # Benchmark runners
 |   |-- __init__.py          # Registry and exports
@@ -1072,6 +1349,21 @@ src/srtctl/
 |-- templates/               # Jinja2 templates
     |-- job_script_minimal.j2    # sbatch script template
     |-- nginx.conf.j2            # nginx load balancer config
+
+analysis/srtlog/              # Log analysis and parsing
+|-- __init__.py
+|-- models.py                 # NodeMetrics, BatchMetrics, MemoryMetrics
+|-- log_parser.py             # Legacy NodeAnalyzer
+|-- parsers/                  # Modular parser system
+    |-- __init__.py           # Parser registry and protocols
+    |-- benchmark/            # Benchmark result parsers
+    |   |-- __init__.py
+    |   |-- sa_bench.py       # SA-Bench result parser
+    |   |-- mooncake_router.py# Mooncake router parser
+    |-- nodes/                # Worker log parsers
+        |-- __init__.py
+        |-- sglang.py         # SGLang log parser (DP/TP tags)
+        |-- trtllm.py         # TRTLLM log parser (iteration logs)
 ```
 
 ---
@@ -1082,9 +1374,11 @@ srtctl is a well-architected orchestration framework with:
 
 - **Clean separation of concerns**: Config, runtime, backend, frontend, benchmark layers
 - **Strong typing**: Frozen dataclasses with marshmallow validation
-- **Extensibility**: Protocol-based backends/frontends, decorator-based benchmark registration
+- **Extensibility**: Protocol-based backends/frontends, decorator-based benchmark/parser registration
 - **Robust process management**: Registry, monitoring, graceful cleanup
 - **SLURM integration**: Proper container mounts, srun launching, nodelist parsing
+- **Experiment consolidation**: Rollup stage aggregates results, node metrics, and configs into rollup.json
+- **Modular parsing**: Pluggable parsers for different benchmark types and backend log formats
 - **Modern Python**: 3.10+ syntax, comprehensive type hints, clear module structure
 
 The codebase follows Python best practices and provides a solid foundation for orchestrating complex LLM inference workloads on SLURM clusters.
diff --git a/pyproject.toml b/pyproject.toml
index f2f6a6e4..21dda7ec 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ dependencies = [
     "requests>=2.31.0",
     "rich>=13.0.0",
     "questionary>=2.0.0",
+    "pandas>=2.3.3",
 ]
 
 [project.scripts]
diff --git a/src/srtctl/README.md b/src/srtctl/README.md
index 8b0f7988..373b69bc 100644
--- a/src/srtctl/README.md
+++ b/src/srtctl/README.md
@@ -11,7 +11,12 @@ srtctl/
 ├── cli/
 │   ├── submit.py            # srtctl apply - job submission
 │   ├── do_sweep.py          # srtctl-sweep - main orchestrator
-│   └── setup_head.py        # Head node infrastructure (NATS/etcd)
+│   ├── setup_head.py        # Head node infrastructure (NATS/etcd)
+│   └── mixins/
+│       ├── worker_stage.py  # Worker startup mixin
+│       ├── frontend_stage.py# Frontend/NGINX startup mixin
+│       ├── benchmark_stage.py# Benchmark execution mixin
+│       └── rollup_stage.py  # Experiment data consolidation
 ├── core/
 │   ├── config.py            # Config loading and srtslurm.yaml resolution
 │   ├── runtime.py           # RuntimeContext - single source of truth
@@ -26,7 +31,8 @@ srtctl/
 │       └── get_node_ip.sh   # IP detection bash functions
 ├── backends/
 │   ├── base.py              # BackendProtocol interface
-│   └── sglang.py            # SGLang implementation
+│   ├── sglang.py            # SGLang implementation
+│   └── trtllm.py            # TensorRT-LLM implementation
 ├── benchmarks/
 │   ├── base.py              # BenchmarkRunner ABC
 │   ├── sa_bench.py          # Serving benchmark
@@ -130,17 +136,119 @@ resources:
 
 `CUDA_VISIBLE_DEVICES` is automatically set per worker (e.g., `0,1,2,3` and `4,5,6,7`).
 
+### Rollup Stage
+
+After benchmark completion, the rollup stage consolidates all experiment data into
+a single `rollup.json` file for easy analysis:
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                           ROLLUP STAGE PIPELINE                             │
+├─────────────────────────────────────────────────────────────────────────────┤
+│                                                                             │
+│  INPUT FILES                          OUTPUT                                │
+│  ───────────                          ──────                                │
+│  • benchmark results (*.json)    ──┐                                        │
+│  • worker logs (*.out/*.err)     ──┼──►  rollup.json                        │
+│  • config.yaml                   ──┤     • benchmark results                │
+│  • engine configs (*.yaml)       ──┘     • node metrics                     │
+│                                          • environment config               │
+│                                          • summary statistics               │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+
+**Data collected:**
+
+| Component | Source | Output |
+|-----------|--------|--------|
+| Benchmark Results | `sa-bench_*/results_*.json` | `RollupResult[]` with TPS, latencies |
+| Node Metrics | `worker-*.out` logs | `NodesSummary` with batches, memory, throughput |
+| Environment Config | `config.yaml`, `trtllm_config_*.yaml` | `EnvironmentConfig` with env vars, engine settings |
+| Launch Commands | `benchmark.out`, worker logs | Parsed command parameters |
+
+**Modular Parser System:**
+
+The rollup uses pluggable parsers from `analysis.srtlog.parsers`:
+
+```python
+# Benchmark parsers (parse result JSON files)
+from analysis.srtlog.parsers import get_benchmark_parser
+parser = get_benchmark_parser("sa-bench")  # or "mooncake-router"
+results = parser.parse_result_directory(log_dir)
+
+# Node parsers (parse worker log files)  
+from analysis.srtlog.parsers import get_node_parser
+parser = get_node_parser("trtllm")  # or "sglang"
+nodes = parser.parse_logs(log_dir)
+```
+
+**Example rollup.json structure:**
+
+```json
+{
+  "job_id": "12345",
+  "job_name": "disagg-benchmark",
+  "model_path": "/model/llama-70b",
+  "backend_type": "trtllm",
+  
+  "results": [
+    {"concurrency": 16, "output_tps": 2500.0, "mean_ttft_ms": 45.2, ...},
+    {"concurrency": 32, "output_tps": 4000.0, "mean_ttft_ms": 52.1, ...}
+  ],
+  
+  "nodes_summary": {
+    "total_prefill_nodes": 1,
+    "total_decode_nodes": 7,
+    "avg_decode_gen_throughput": 533.1,
+    "total_kv_cache_gb": 325.0,
+    "nodes": [
+      {
+        "node_name": "worker-0",
+        "worker_type": "prefill",
+        "total_batches": 1523,
+        "avg_input_throughput": 21565.5,
+        "mem_usage_gb": 91.46,
+        "kv_cache_gb": 41.19
+      }
+    ]
+  },
+  
+  "environment_config": {
+    "prefill_environment": {"UCX_TLS": "rc,dc,ud,...", "TRTLLM_ENABLE_PDL": "1"},
+    "decode_environment": {"UCX_TLS": "rc,dc,ud,..."},
+    "prefill_engine_config": {"tensor_parallel_size": 8, "max_batch_size": 2},
+    "decode_engine_config": {"tensor_parallel_size": 8, "max_batch_size": 32}
+  },
+  
+  "max_output_tps": 4000.0,
+  "min_mean_ttft_ms": 45.2
+}
+```
+
 ## Files Overview
 
-| File                 | Purpose                                  |
-| -------------------- | ---------------------------------------- |
-| `core/config.py`     | YAML loading, srtslurm.yaml resolution   |
-| `core/runtime.py`    | Computed paths/values (RuntimeContext)   |
-| `core/topology.py`   | Worker topology and GPU allocation       |
-| `core/processes.py`  | Process lifecycle management             |
-| `core/slurm.py`      | SLURM srun launching, node IP resolution |
-| `core/health.py`     | Health checks, worker readiness polling  |
-| `core/ip_utils/`     | Bash-based IP detection utilities        |
-| `cli/do_sweep.py`    | Main orchestrator (runs on head node)    |
-| `backends/sglang.py` | SGLang backend implementation            |
-| `benchmarks/base.py` | BenchmarkRunner ABC                      |
+| File | Purpose |
+| ---- | ------- |
+| `core/config.py` | YAML loading, srtslurm.yaml resolution |
+| `core/runtime.py` | Computed paths/values (RuntimeContext) |
+| `core/topology.py` | Worker topology and GPU allocation |
+| `core/processes.py` | Process lifecycle management |
+| `core/slurm.py` | SLURM srun launching, node IP resolution |
+| `core/health.py` | Health checks, worker readiness polling |
+| `core/ip_utils/` | Bash-based IP detection utilities |
+| `cli/do_sweep.py` | Main orchestrator (runs on head node) |
+| `cli/mixins/rollup_stage.py` | Experiment data consolidation to rollup.json |
+| `backends/sglang.py` | SGLang backend implementation |
+| `backends/trtllm.py` | TensorRT-LLM backend implementation |
+| `benchmarks/base.py` | BenchmarkRunner ABC |
+
+### Related Analysis Modules
+
+| File | Purpose |
+| ---- | ------- |
+| `analysis/srtlog/parsers/__init__.py` | Parser registry and protocols |
+| `analysis/srtlog/parsers/benchmark/sa_bench.py` | SA-Bench result parser |
+| `analysis/srtlog/parsers/benchmark/mooncake_router.py` | Mooncake router result parser |
+| `analysis/srtlog/parsers/nodes/sglang.py` | SGLang worker log parser |
+| `analysis/srtlog/parsers/nodes/trtllm.py` | TRTLLM worker log parser |
+| `analysis/srtlog/models.py` | Data models (NodeMetrics, BatchMetrics, etc.) |
diff --git a/src/srtctl/benchmarks/scripts/gpqa/bench.sh b/src/srtctl/benchmarks/scripts/gpqa/bench.sh
index 01670aa2..a0d563d0 100644
--- a/src/srtctl/benchmarks/scripts/gpqa/bench.sh
+++ b/src/srtctl/benchmarks/scripts/gpqa/bench.sh
@@ -26,14 +26,9 @@ export OPENAI_API_KEY="${OPENAI_API_KEY:-EMPTY}"
 
 echo "Running GPQA evaluation..."
 
-python3 -m sglang.test.run_eval \
-    --base-url "${ENDPOINT}" \
-    --model "${MODEL_NAME}" \
-    --eval-name gpqa \
-    --num-examples "${NUM_EXAMPLES}" \
-    --max-tokens "${MAX_TOKENS}" \
-    --repeat "${REPEAT}" \
-    --num-threads "${NUM_THREADS}"
+command="python3 -m sglang.test.run_eval --base-url ${ENDPOINT} --model ${MODEL_NAME} --eval-name gpqa --num-examples ${NUM_EXAMPLES} --max-tokens ${MAX_TOKENS} --repeat ${REPEAT} --num-threads ${NUM_THREADS}"
+echo "[CMD] $command"
+eval $command
 
 # Copy result file
 result_file=$(ls -t /tmp/gpqa_*.json 2>/dev/null | head -n1)
@@ -45,4 +40,3 @@ else
 fi
 
 echo "GPQA evaluation complete"
-
diff --git a/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh b/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh
index 7a1643b8..bad38618 100644
--- a/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh
+++ b/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh
@@ -28,25 +28,19 @@ export OPENAI_API_KEY="${OPENAI_API_KEY:-EMPTY}"
 echo "Running LongBench-v2 evaluation..."
 
 # Build command
-cmd="python3 -m sglang.test.run_eval \
-    --base-url ${ENDPOINT} \
-    --model ${MODEL_NAME} \
-    --eval-name longbench_v2 \
-    --max-tokens ${MAX_TOKENS} \
-    --max-context-length ${MAX_CONTEXT_LENGTH} \
-    --num-threads ${NUM_THREADS}"
+command="python3 -m sglang.test.run_eval --base-url ${ENDPOINT} --model ${MODEL_NAME} --eval-name longbench_v2 --max-tokens ${MAX_TOKENS} --max-context-length ${MAX_CONTEXT_LENGTH} --num-threads ${NUM_THREADS}"
 
 # Add optional arguments
 if [ -n "$NUM_EXAMPLES" ]; then
-    cmd="$cmd --num-examples ${NUM_EXAMPLES}"
+    command="$command --num-examples ${NUM_EXAMPLES}"
 fi
 
 if [ -n "$CATEGORIES" ]; then
-    cmd="$cmd --categories ${CATEGORIES}"
+    command="$command --categories ${CATEGORIES}"
 fi
 
-echo "Executing: $cmd"
-eval "$cmd"
+echo "[CMD] $command"
+eval $command
 
 # Copy result files
 result_file=$(ls -t /tmp/longbench_v2_*.json 2>/dev/null | head -n1)
@@ -64,4 +58,3 @@ if [ -f "$html_file" ]; then
 fi
 
 echo "LongBench-v2 evaluation complete"
-
diff --git a/src/srtctl/benchmarks/scripts/mmlu/bench.sh b/src/srtctl/benchmarks/scripts/mmlu/bench.sh
index aff149ce..adbecafd 100644
--- a/src/srtctl/benchmarks/scripts/mmlu/bench.sh
+++ b/src/srtctl/benchmarks/scripts/mmlu/bench.sh
@@ -26,14 +26,9 @@ export OPENAI_API_KEY="${OPENAI_API_KEY:-EMPTY}"
 
 echo "Running MMLU evaluation..."
 
-python3 -m sglang.test.run_eval \
-    --base-url "${ENDPOINT}" \
-    --model "${MODEL_NAME}" \
-    --eval-name mmlu \
-    --num-examples "${NUM_EXAMPLES}" \
-    --max-tokens "${MAX_TOKENS}" \
-    --repeat "${REPEAT}" \
-    --num-threads "${NUM_THREADS}"
+command="python3 -m sglang.test.run_eval --base-url ${ENDPOINT} --model ${MODEL_NAME} --eval-name mmlu --num-examples ${NUM_EXAMPLES} --max-tokens ${MAX_TOKENS} --repeat ${REPEAT} --num-threads ${NUM_THREADS}"
+echo "[CMD] $command"
+eval $command
 
 # Copy result file
 result_file=$(ls -t /tmp/mmlu_*.json 2>/dev/null | head -n1)
@@ -45,4 +40,3 @@ else
 fi
 
 echo "MMLU evaluation complete"
-
diff --git a/src/srtctl/benchmarks/scripts/mooncake-router/bench.sh b/src/srtctl/benchmarks/scripts/mooncake-router/bench.sh
index e84d711c..723630a6 100644
--- a/src/srtctl/benchmarks/scripts/mooncake-router/bench.sh
+++ b/src/srtctl/benchmarks/scripts/mooncake-router/bench.sh
@@ -57,13 +57,9 @@ fi
 
 # Run small benchmark for warmup
 echo "Running small benchmark for warmup..."
-aiperf profile \
-    -m "${MODEL_NAME}" \
-    --url "${ENDPOINT}" \
-    --streaming \
-    --ui simple \
-    --concurrency 10 \
-    --request-count 20
+command="aiperf profile -m ${MODEL_NAME} --url ${ENDPOINT} --streaming --ui simple --concurrency 10 --request-count 20"
+echo "[CMD] $command"
+eval $command
 echo "Small benchmark for warmup complete"
 
 # Setup artifact directory with model and timestamp
@@ -80,17 +76,9 @@ echo ""
 echo "$(date '+%Y-%m-%d %H:%M:%S') - Starting benchmark"
 
 # Run aiperf profile exactly as dynamo does
-aiperf profile \
-    -m "${MODEL_NAME}" \
-    --input-file "${INPUT_FILE}" \
-    --custom-dataset-type mooncake_trace \
-    --fixed-schedule \
-    --url "${ENDPOINT}" \
-    --streaming \
-    --random-seed 42 \
-    --ui simple \
-    --artifact-dir "${RUN_ARTIFACT_DIR}" \
-    --goodput "time_to_first_token:${TTFT_THRESHOLD} inter_token_latency:${ITL_THRESHOLD}"
+command="aiperf profile -m ${MODEL_NAME} --input-file ${INPUT_FILE} --custom-dataset-type mooncake_trace --fixed-schedule --url ${ENDPOINT} --streaming --random-seed 42 --ui simple --artifact-dir ${RUN_ARTIFACT_DIR} --goodput \"time_to_first_token:${TTFT_THRESHOLD} inter_token_latency:${ITL_THRESHOLD}\""
+echo "[CMD] $command"
+eval $command
 
 BENCH_EXIT_CODE=$?
 
diff --git a/src/srtctl/benchmarks/scripts/profiling/profile.sh b/src/srtctl/benchmarks/scripts/profiling/profile.sh
index 6b426c34..b971b883 100644
--- a/src/srtctl/benchmarks/scripts/profiling/profile.sh
+++ b/src/srtctl/benchmarks/scripts/profiling/profile.sh
@@ -130,27 +130,19 @@ done
 if [[ "${PROFILING_MODE}" == "prefill" ]]; then
     echo ""
     echo "Generating profiling traffic..."
-    python3 -m sglang.bench_serving \
-        --backend sglang \
-        --model "${model_name}" \
-        --host "${head_node}" --port "${head_port}" \
-        --dataset-name random \
-        --max-concurrency "${PROFILE_CONCURRENCY}" \
-        --num-prompts 128 \
-        --random-input-len "${PROFILE_ISL}" \
-        --random-output-len "${PROFILE_OSL}" \
-        --random-range-ratio 1 \
-        --warmup-request 0
+    
+    command="python3 -m sglang.bench_serving --backend sglang --model ${model_name} --host ${head_node} --port ${head_port} --dataset-name random --max-concurrency ${PROFILE_CONCURRENCY} --num-prompts 128 --random-input-len ${PROFILE_ISL} --random-output-len ${PROFILE_OSL} --random-range-ratio 1 --warmup-request 0"
+    echo "[CMD] $command"
+    eval $command
 
     # Run lm-eval for additional profiling coverage
     echo ""
     echo "Running lm-eval..."
     pip install lm-eval tenacity > /dev/null 2>&1
-    python -m lm_eval \
-        --model local-completions \
-        --tasks gsm8k \
-        --model_args "base_url=http://${head_node}:${head_port}/v1/completions,model=${model_name},tokenized_requests=False,tokenizer_backend=None,num_concurrent=${PROFILE_CONCURRENCY},timeout=6000,max_retries=1" \
-        --limit 10
+    
+    command="python -m lm_eval --model local-completions --tasks gsm8k --model_args \"base_url=http://${head_node}:${head_port}/v1/completions,model=${model_name},tokenized_requests=False,tokenizer_backend=None,num_concurrent=${PROFILE_CONCURRENCY},timeout=6000,max_retries=1\" --limit 10"
+    echo "[CMD] $command"
+    eval $command
 fi
 
 exit_code=$?
@@ -164,4 +156,3 @@ if [[ -n "${SGLANG_TORCH_PROFILER_DIR}" ]]; then
 fi
 
 exit ${exit_code}
-
diff --git a/src/srtctl/benchmarks/scripts/router/bench.sh b/src/srtctl/benchmarks/scripts/router/bench.sh
index d559b85d..d51ed881 100644
--- a/src/srtctl/benchmarks/scripts/router/bench.sh
+++ b/src/srtctl/benchmarks/scripts/router/bench.sh
@@ -40,13 +40,8 @@ echo "Running prefix ratio benchmark..."
 echo "Results will be saved to: $result_dir"
 
 # shellcheck disable=SC2086
-python prefix_ratio_benchmark.py \
-    --prefix-ratios $PREFIX_RATIOS \
-    --isl "$ISL" \
-    --osl "$OSL" \
-    --requests "$REQUESTS" \
-    --concurrency "$CONCURRENCY" \
-    --output-dir "$result_dir"
+command="python prefix_ratio_benchmark.py --prefix-ratios $PREFIX_RATIOS --isl $ISL --osl $OSL --requests $REQUESTS --concurrency $CONCURRENCY --output-dir $result_dir"
+echo "[CMD] $command"
+eval $command
 
 echo "Router benchmark complete. Results in $result_dir"
-
diff --git a/src/srtctl/benchmarks/scripts/sa-bench/bench.sh b/src/srtctl/benchmarks/scripts/sa-bench/bench.sh
index 99dd4022..8462556b 100644
--- a/src/srtctl/benchmarks/scripts/sa-bench/bench.sh
+++ b/src/srtctl/benchmarks/scripts/sa-bench/bench.sh
@@ -50,20 +50,11 @@ for concurrency in "${CONCURRENCY_LIST[@]}"; do
     echo "Warming up with concurrency $concurrency"
     echo "$(date '+%Y-%m-%d %H:%M:%S')"
     num_prompts=$((concurrency * 2))
-    python3 -u "${WORK_DIR}/benchmark_serving.py" \
-        --model "${MODEL_NAME}" --tokenizer "${MODEL_PATH}" \
-        --host "$HOST" --port "$PORT" \
-        --backend "dynamo" --endpoint /v1/completions \
-        --disable-tqdm \
-        --dataset-name random \
-        --num-prompts "$num_prompts" \
-        --random-input-len "$ISL" \
-        --random-output-len "$OSL" \
-        --random-range-ratio 0.8 \
-        --ignore-eos \
-        --request-rate 250 \
-        --percentile-metrics ttft,tpot,itl,e2el \
-        --max-concurrency "$concurrency"
+    
+    command="python3 -u ${WORK_DIR}/benchmark_serving.py --model ${MODEL_NAME} --tokenizer ${MODEL_PATH} --host $HOST --port $PORT --backend dynamo --endpoint /v1/completions --disable-tqdm --dataset-name random --num-prompts $num_prompts --random-input-len $ISL --random-output-len $OSL --random-range-ratio 0.8 --ignore-eos --request-rate 250 --percentile-metrics ttft,tpot,itl,e2el --max-concurrency $concurrency"
+    
+    echo "[CMD] $command"
+    eval $command
 done
 
 # Benchmark
@@ -83,22 +74,10 @@ for concurrency in "${CONCURRENCY_LIST[@]}"; do
     echo "Running benchmark with concurrency: $concurrency"
     echo "$(date '+%Y-%m-%d %H:%M:%S')"
     
-    python3 -u "${WORK_DIR}/benchmark_serving.py" \
-        --model "${MODEL_NAME}" --tokenizer "${MODEL_PATH}" \
-        --host "$HOST" --port "$PORT" \
-        --backend "dynamo" --endpoint /v1/completions \
-        --disable-tqdm \
-        --dataset-name random \
-        --num-prompts "$num_prompts" \
-        --random-input-len "$ISL" \
-        --random-output-len "$OSL" \
-        --random-range-ratio 0.8 \
-        --ignore-eos \
-        --request-rate "${REQ_RATE}" \
-        --percentile-metrics ttft,tpot,itl,e2el \
-        --max-concurrency "$concurrency" \
-        --use-chat-template \
-        --save-result --result-dir "$result_dir" --result-filename "$result_filename"
+    command="python3 -u ${WORK_DIR}/benchmark_serving.py --model ${MODEL_NAME} --tokenizer ${MODEL_PATH} --host $HOST --port $PORT --backend dynamo --endpoint /v1/completions --disable-tqdm --dataset-name random --num-prompts $num_prompts --random-input-len $ISL --random-output-len $OSL --random-range-ratio 0.8 --ignore-eos --request-rate ${REQ_RATE} --percentile-metrics ttft,tpot,itl,e2el --max-concurrency $concurrency --use-chat-template --save-result --result-dir $result_dir --result-filename $result_filename"
+    
+    echo "[CMD] $command"
+    eval $command
     
     echo "$(date '+%Y-%m-%d %H:%M:%S')"
     echo "Completed benchmark with concurrency: $concurrency"
@@ -106,4 +85,3 @@ for concurrency in "${CONCURRENCY_LIST[@]}"; do
 done
 
 echo "SA-Bench complete. Results in $result_dir"
-
diff --git a/src/srtctl/cli/do_sweep.py b/src/srtctl/cli/do_sweep.py
index 9cb3e577..8dbe6624 100644
--- a/src/srtctl/cli/do_sweep.py
+++ b/src/srtctl/cli/do_sweep.py
@@ -21,7 +21,7 @@
 from dataclasses import dataclass
 from pathlib import Path
 
-from srtctl.cli.mixins import BenchmarkStageMixin, FrontendStageMixin, WorkerStageMixin
+from srtctl.cli.mixins import BenchmarkStageMixin, FrontendStageMixin, RollupStageMixin, WorkerStageMixin
 from srtctl.core.config import load_config
 from srtctl.core.health import wait_for_port
 from srtctl.core.processes import (
@@ -40,7 +40,7 @@
 
 
 @dataclass
-class SweepOrchestrator(WorkerStageMixin, FrontendStageMixin, BenchmarkStageMixin):
+class SweepOrchestrator(WorkerStageMixin, FrontendStageMixin, BenchmarkStageMixin, RollupStageMixin):
     """Main orchestrator for benchmark sweeps.
 
     Usage:
@@ -208,6 +208,11 @@ def run(self) -> int:
 
             exit_code = self.run_benchmark(registry, stop_event)
 
+            # Run rollup to consolidate experiment data
+            if exit_code == 0:
+                tags = self.config.tags if hasattr(self.config, "tags") else []
+                self.run_rollup(tags=tags)
+
         except Exception as e:
             logger.exception("Error during sweep: %s", e)
             exit_code = 1
diff --git a/src/srtctl/cli/mixins/__init__.py b/src/srtctl/cli/mixins/__init__.py
index c2149014..feb1bae9 100644
--- a/src/srtctl/cli/mixins/__init__.py
+++ b/src/srtctl/cli/mixins/__init__.py
@@ -8,14 +8,17 @@
 - WorkerStageMixin: Backend worker process startup
 - FrontendStageMixin: Frontend/nginx orchestration
 - BenchmarkStageMixin: Benchmark execution
+- RollupStageMixin: Experiment data consolidation
 """
 
 from srtctl.cli.mixins.benchmark_stage import BenchmarkStageMixin
 from srtctl.cli.mixins.frontend_stage import FrontendStageMixin
+from srtctl.cli.mixins.rollup_stage import RollupStageMixin
 from srtctl.cli.mixins.worker_stage import WorkerStageMixin
 
 __all__ = [
     "WorkerStageMixin",
     "FrontendStageMixin",
     "BenchmarkStageMixin",
+    "RollupStageMixin",
 ]
diff --git a/src/srtctl/cli/mixins/rollup/__init__.py b/src/srtctl/cli/mixins/rollup/__init__.py
new file mode 100644
index 00000000..9444b5b5
--- /dev/null
+++ b/src/srtctl/cli/mixins/rollup/__init__.py
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Rollup dataclasses for experiment data consolidation.
+
+This module provides dataclasses for:
+- RollupResult: Single benchmark result at one concurrency level
+- RollupSummary: Complete experiment summary
+- NodeRollup: Single worker node metrics
+- NodesSummary: Summary of all worker nodes
+- EnvironmentConfig: Environment variables and engine config
+- LaunchCommandRollup: Parsed launch command information
+"""
+
+from srtctl.cli.mixins.rollup.models import (
+    EnvironmentConfig,
+    LaunchCommandRollup,
+    NodeRollup,
+    NodesSummary,
+    RollupResult,
+    RollupSummary,
+)
+
+__all__ = [
+    "RollupResult",
+    "RollupSummary",
+    "NodeRollup",
+    "NodesSummary",
+    "EnvironmentConfig",
+    "LaunchCommandRollup",
+]
diff --git a/src/srtctl/cli/mixins/rollup/models.py b/src/srtctl/cli/mixins/rollup/models.py
new file mode 100644
index 00000000..c83bc101
--- /dev/null
+++ b/src/srtctl/cli/mixins/rollup/models.py
@@ -0,0 +1,566 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Rollup dataclasses for experiment data consolidation.
+
+These models represent the structure of rollup.json output.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from analysis.srtlog.models import NodeMetrics
+
+
+@dataclass
+class LaunchCommandRollup:
+    """Parsed launch command information for a worker or benchmark.
+
+    Source (worker): logs/{node}_{worker_type}_{worker_id}.out or .err
+    Source (benchmark): logs/benchmark.out
+    """
+
+    raw_command: str
+    command_type: str  # "worker" or "benchmark"
+
+    # Common fields
+    model_path: str | None = None
+    served_model_name: str | None = None
+
+    # Worker-specific fields
+    worker_type: str | None = None  # prefill, decode, agg
+    backend_type: str | None = None
+    disaggregation_mode: str | None = None
+    tp_size: int | None = None
+    pp_size: int | None = None
+    dp_size: int | None = None
+    ep_size: int | None = None
+    port: int | None = None
+    max_num_seqs: int | None = None
+    max_model_len: int | None = None
+
+    # Benchmark-specific fields
+    benchmark_type: str | None = None
+    base_url: str | None = None
+    max_concurrency: int | None = None
+    num_prompts: int | None = None
+    input_len: int | None = None
+    output_len: int | None = None
+
+
+@dataclass
+class NodeRollup:
+    """Summary of metrics for a single worker node.
+
+    Source: logs/{node}_{worker_type}_{worker_id}.out and .err
+
+    Derived from analysis.srtlog.models.NodeMetrics with aggregated statistics.
+    """
+
+    node_name: str
+    worker_type: str  # "prefill", "decode", or "agg"
+    worker_id: str
+
+    # Configuration (from NodeMetrics.config)
+    tp_size: int | None = None
+    pp_size: int | None = None
+    dp_size: int | None = None
+    ep_size: int | None = None
+
+    # Launch command (parsed from log)
+    launch_command: LaunchCommandRollup | None = None
+
+    # Memory metrics (from NodeMetrics.memory_snapshots)
+    avail_mem_gb: float | None = None
+    mem_usage_gb: float | None = None
+    kv_cache_gb: float | None = None
+    kv_tokens: int | None = None
+
+    # Batch statistics (aggregated from NodeMetrics.batches)
+    total_batches: int = 0
+    total_prefill_batches: int = 0
+    total_decode_batches: int = 0
+
+    # Prefill-specific stats (also used by agg workers)
+    total_new_tokens: int | None = None
+    total_cached_tokens: int | None = None
+    cache_hit_rate: float | None = None  # Percentage
+    avg_input_throughput: float | None = None  # tokens/s
+    max_input_throughput: float | None = None  # tokens/s
+
+    # Decode-specific stats (also used by agg workers)
+    avg_running_requests: float | None = None
+    max_running_requests: int | None = None
+    avg_gen_throughput: float | None = None  # tokens/s
+    max_gen_throughput: float | None = None  # tokens/s
+
+    # Queue stats
+    max_queue_requests: int | None = None
+    max_inflight_requests: int | None = None
+    max_transfer_requests: int | None = None
+
+    @property
+    def is_agg(self) -> bool:
+        """Check if this is an aggregated worker."""
+        return self.worker_type == "agg"
+
+    @classmethod
+    def from_node_metrics(cls, node: NodeMetrics) -> NodeRollup:
+        """Create NodeRollup from analysis.srtlog.models.NodeMetrics.
+
+        Args:
+            node: NodeMetrics object from NodeAnalyzer
+
+        Returns:
+            NodeRollup with aggregated statistics
+        """
+        worker_type = node.node_info.get("worker_type", "unknown")
+
+        rollup = cls(
+            node_name=node.node_info.get("node", "unknown"),
+            worker_type=worker_type,
+            worker_id=node.node_info.get("worker_id", ""),
+            tp_size=node.config.get("tp_size"),
+            pp_size=node.config.get("pp_size"),
+            dp_size=node.config.get("dp_size"),
+            ep_size=node.config.get("ep_size"),
+            total_batches=len(node.batches),
+        )
+
+        # Extract memory metrics - aggregate best values from all snapshots
+        if node.memory_snapshots:
+            # Find best values across all snapshots (some may be partial)
+            for mem in node.memory_snapshots:
+                if mem.avail_mem_gb is not None and rollup.avail_mem_gb is None:
+                    rollup.avail_mem_gb = mem.avail_mem_gb
+                if mem.mem_usage_gb is not None and rollup.mem_usage_gb is None:
+                    rollup.mem_usage_gb = mem.mem_usage_gb
+                if mem.kv_cache_gb is not None:
+                    # Take the max kv_cache seen (or sum for multiple allocations)
+                    if rollup.kv_cache_gb is None:
+                        rollup.kv_cache_gb = mem.kv_cache_gb
+                    else:
+                        rollup.kv_cache_gb = max(rollup.kv_cache_gb, mem.kv_cache_gb)
+                if mem.kv_tokens is not None:
+                    # Take the max kv_tokens
+                    if rollup.kv_tokens is None:
+                        rollup.kv_tokens = mem.kv_tokens
+                    else:
+                        rollup.kv_tokens = max(rollup.kv_tokens, mem.kv_tokens)
+
+        # Aggregate batch metrics based on worker type
+        if node.batches:
+            # Check if we have mixed batch types (e.g., TRTLLM decode workers have both)
+            batch_types = {b.batch_type for b in node.batches}
+            has_mixed = "prefill" in batch_types and "decode" in batch_types
+
+            if worker_type == "agg" or has_mixed:
+                # Agg workers or workers with mixed batches need full aggregation
+                rollup._aggregate_agg_batches(node.batches)
+            elif node.is_prefill:
+                rollup._aggregate_prefill_batches(node.batches)
+            elif node.is_decode:
+                rollup._aggregate_decode_batches(node.batches)
+
+        return rollup
+
+    def _aggregate_prefill_batches(self, batches: list) -> None:
+        """Aggregate prefill batch metrics."""
+        self.total_prefill_batches = len(batches)
+
+        new_tokens = []
+        cached_tokens = []
+        input_throughputs = []
+        queue_reqs = []
+        inflight_reqs = []
+
+        for batch in batches:
+            if batch.new_token is not None:
+                new_tokens.append(batch.new_token)
+            if batch.cached_token is not None:
+                cached_tokens.append(batch.cached_token)
+            if batch.input_throughput is not None:
+                input_throughputs.append(batch.input_throughput)
+            if batch.queue_req is not None:
+                queue_reqs.append(batch.queue_req)
+            if batch.inflight_req is not None:
+                inflight_reqs.append(batch.inflight_req)
+
+        if new_tokens:
+            self.total_new_tokens = sum(new_tokens)
+        if cached_tokens:
+            self.total_cached_tokens = sum(cached_tokens)
+
+        # Compute cache hit rate
+        if self.total_new_tokens is not None and self.total_cached_tokens is not None:
+            total = self.total_new_tokens + self.total_cached_tokens
+            if total > 0:
+                self.cache_hit_rate = (self.total_cached_tokens / total) * 100
+
+        if input_throughputs:
+            self.avg_input_throughput = sum(input_throughputs) / len(input_throughputs)
+            self.max_input_throughput = max(input_throughputs)
+
+        if queue_reqs:
+            self.max_queue_requests = max(queue_reqs)
+        if inflight_reqs:
+            self.max_inflight_requests = max(inflight_reqs)
+
+    def _aggregate_decode_batches(self, batches: list) -> None:
+        """Aggregate decode batch metrics."""
+        self.total_decode_batches = len(batches)
+
+        running_reqs = []
+        gen_throughputs = []
+        queue_reqs = []
+        transfer_reqs = []
+
+        for batch in batches:
+            if batch.running_req is not None:
+                running_reqs.append(batch.running_req)
+            if batch.gen_throughput is not None:
+                gen_throughputs.append(batch.gen_throughput)
+            if batch.queue_req is not None:
+                queue_reqs.append(batch.queue_req)
+            if batch.transfer_req is not None:
+                transfer_reqs.append(batch.transfer_req)
+
+        if running_reqs:
+            self.avg_running_requests = sum(running_reqs) / len(running_reqs)
+            self.max_running_requests = max(running_reqs)
+
+        if gen_throughputs:
+            self.avg_gen_throughput = sum(gen_throughputs) / len(gen_throughputs)
+            self.max_gen_throughput = max(gen_throughputs)
+
+        if queue_reqs:
+            self.max_queue_requests = max(queue_reqs)
+        if transfer_reqs:
+            self.max_transfer_requests = max(transfer_reqs)
+
+    def _aggregate_agg_batches(self, batches: list) -> None:
+        """Aggregate metrics for agg workers (handles both prefill and decode batches)."""
+        # Separate prefill and decode batches
+        prefill_batches = [b for b in batches if b.batch_type == "prefill"]
+        decode_batches = [b for b in batches if b.batch_type == "decode"]
+
+        self.total_prefill_batches = len(prefill_batches)
+        self.total_decode_batches = len(decode_batches)
+
+        # Aggregate prefill metrics
+        if prefill_batches:
+            new_tokens = []
+            cached_tokens = []
+            input_throughputs = []
+            inflight_reqs = []
+
+            for batch in prefill_batches:
+                if batch.new_token is not None:
+                    new_tokens.append(batch.new_token)
+                if batch.cached_token is not None:
+                    cached_tokens.append(batch.cached_token)
+                if batch.input_throughput is not None:
+                    input_throughputs.append(batch.input_throughput)
+                if batch.inflight_req is not None:
+                    inflight_reqs.append(batch.inflight_req)
+
+            if new_tokens:
+                self.total_new_tokens = sum(new_tokens)
+            if cached_tokens:
+                self.total_cached_tokens = sum(cached_tokens)
+
+            # Compute cache hit rate
+            if self.total_new_tokens is not None and self.total_cached_tokens is not None:
+                total = self.total_new_tokens + self.total_cached_tokens
+                if total > 0:
+                    self.cache_hit_rate = (self.total_cached_tokens / total) * 100
+
+            if input_throughputs:
+                self.avg_input_throughput = sum(input_throughputs) / len(input_throughputs)
+                self.max_input_throughput = max(input_throughputs)
+
+            if inflight_reqs:
+                self.max_inflight_requests = max(inflight_reqs)
+
+        # Aggregate decode metrics
+        if decode_batches:
+            running_reqs = []
+            gen_throughputs = []
+            queue_reqs = []
+            transfer_reqs = []
+
+            for batch in decode_batches:
+                if batch.running_req is not None:
+                    running_reqs.append(batch.running_req)
+                if batch.gen_throughput is not None:
+                    gen_throughputs.append(batch.gen_throughput)
+                if batch.queue_req is not None:
+                    queue_reqs.append(batch.queue_req)
+                if batch.transfer_req is not None:
+                    transfer_reqs.append(batch.transfer_req)
+
+            if running_reqs:
+                self.avg_running_requests = sum(running_reqs) / len(running_reqs)
+                self.max_running_requests = max(running_reqs)
+
+            if gen_throughputs:
+                self.avg_gen_throughput = sum(gen_throughputs) / len(gen_throughputs)
+                self.max_gen_throughput = max(gen_throughputs)
+
+            if queue_reqs:
+                self.max_queue_requests = max(queue_reqs)
+            if transfer_reqs:
+                self.max_transfer_requests = max(transfer_reqs)
+
+
+@dataclass
+class NodesSummary:
+    """Summary of all worker nodes in the experiment.
+
+    Source: Aggregated from logs/{node}_{worker_type}_{worker_id}.out and .err files
+    """
+
+    # Counts
+    total_prefill_nodes: int = 0
+    total_decode_nodes: int = 0
+    total_agg_nodes: int = 0
+
+    # Aggregated prefill stats (from prefill + agg nodes)
+    total_prefill_tokens: int | None = None
+    total_cached_tokens: int | None = None
+    overall_cache_hit_rate: float | None = None  # Percentage
+    avg_prefill_input_throughput: float | None = None  # tokens/s per node
+    max_prefill_input_throughput: float | None = None  # tokens/s peak
+
+    # Aggregated decode stats (from decode + agg nodes)
+    avg_decode_gen_throughput: float | None = None  # tokens/s per node
+    max_decode_gen_throughput: float | None = None  # tokens/s peak
+
+    # Memory summary
+    total_kv_cache_gb: float | None = None
+
+    # Per-node details
+    nodes: list[NodeRollup] = field(default_factory=list)
+
+    @classmethod
+    def from_node_metrics_list(cls, nodes: list[NodeMetrics]) -> NodesSummary:
+        """Create NodesSummary from a list of NodeMetrics.
+
+        Args:
+            nodes: List of NodeMetrics from NodeAnalyzer.parse_run_logs()
+
+        Returns:
+            NodesSummary with aggregated statistics
+        """
+        summary = cls()
+
+        # Convert each NodeMetrics to NodeRollup
+        for node in nodes:
+            rollup = NodeRollup.from_node_metrics(node)
+            summary.nodes.append(rollup)
+
+            worker_type = node.node_info.get("worker_type", "unknown")
+            if worker_type == "agg":
+                summary.total_agg_nodes += 1
+            elif node.is_prefill:
+                summary.total_prefill_nodes += 1
+            elif node.is_decode:
+                summary.total_decode_nodes += 1
+
+        # Aggregate across all nodes
+        summary._compute_aggregate_stats()
+
+        return summary
+
+    def _compute_aggregate_stats(self) -> None:
+        """Compute aggregate statistics across all nodes."""
+        # Prefill aggregation (includes both prefill and agg nodes)
+        prefill_capable_nodes = [n for n in self.nodes if n.worker_type in ("prefill", "agg")]
+        if prefill_capable_nodes:
+            total_new = sum(n.total_new_tokens or 0 for n in prefill_capable_nodes)
+            total_cached = sum(n.total_cached_tokens or 0 for n in prefill_capable_nodes)
+
+            if total_new > 0 or total_cached > 0:
+                self.total_prefill_tokens = total_new
+                self.total_cached_tokens = total_cached
+                total = total_new + total_cached
+                if total > 0:
+                    self.overall_cache_hit_rate = (total_cached / total) * 100
+
+            throughputs = [n.avg_input_throughput for n in prefill_capable_nodes if n.avg_input_throughput]
+            if throughputs:
+                self.avg_prefill_input_throughput = sum(throughputs) / len(throughputs)
+
+            max_throughputs = [n.max_input_throughput for n in prefill_capable_nodes if n.max_input_throughput]
+            if max_throughputs:
+                self.max_prefill_input_throughput = max(max_throughputs)
+
+        # Decode aggregation (includes both decode and agg nodes)
+        decode_capable_nodes = [n for n in self.nodes if n.worker_type in ("decode", "agg")]
+        if decode_capable_nodes:
+            throughputs = [n.avg_gen_throughput for n in decode_capable_nodes if n.avg_gen_throughput]
+            if throughputs:
+                self.avg_decode_gen_throughput = sum(throughputs) / len(throughputs)
+
+            max_throughputs = [n.max_gen_throughput for n in decode_capable_nodes if n.max_gen_throughput]
+            if max_throughputs:
+                self.max_decode_gen_throughput = max(max_throughputs)
+
+        # Memory aggregation
+        kv_caches = [n.kv_cache_gb for n in self.nodes if n.kv_cache_gb]
+        if kv_caches:
+            self.total_kv_cache_gb = sum(kv_caches)
+
+
+@dataclass
+class RollupResult:
+    """Consolidated benchmark result for a single concurrency level.
+
+    Source: logs/*_isl_*_osl_*/result.json or benchmark.out
+    """
+
+    concurrency: int
+    output_tps: float
+    total_tps: float | None = None
+    request_throughput: float | None = None
+    request_goodput: float | None = None
+    request_rate: float | str | None = None
+
+    # Mean latencies
+    mean_ttft_ms: float | None = None
+    mean_tpot_ms: float | None = None
+    mean_itl_ms: float | None = None
+    mean_e2el_ms: float | None = None
+
+    # Median latencies
+    median_ttft_ms: float | None = None
+    median_tpot_ms: float | None = None
+    median_itl_ms: float | None = None
+    median_e2el_ms: float | None = None
+
+    # P99 latencies
+    p99_ttft_ms: float | None = None
+    p99_tpot_ms: float | None = None
+    p99_itl_ms: float | None = None
+    p99_e2el_ms: float | None = None
+
+    # Token counts
+    total_input_tokens: int | None = None
+    total_output_tokens: int | None = None
+
+    # Run metadata
+    duration: float | None = None
+    completed: int | None = None
+    num_prompts: int | None = None
+
+
+@dataclass
+class EnvironmentConfig:
+    """Environment variables and engine configuration for prefill/decode/agg workers.
+
+    Source: config.yaml (backend.{prefill,decode,aggregated}_environment)
+    Source: logs/trtllm_config_{prefill,decode,agg}.yaml (engine config)
+    """
+
+    # Environment variables from config.yaml
+    prefill_environment: dict[str, str] = field(default_factory=dict)
+    decode_environment: dict[str, str] = field(default_factory=dict)
+    aggregated_environment: dict[str, str] = field(default_factory=dict)
+
+    # Engine config from YAML files (TRTLLM) or parsed from logs
+    prefill_engine_config: dict[str, Any] = field(default_factory=dict)
+    decode_engine_config: dict[str, Any] = field(default_factory=dict)
+    aggregated_engine_config: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class RollupSummary:
+    """Complete rollup summary for an experiment.
+
+    Output: logs/rollup.json
+
+    Aggregates data from:
+    - config.yaml (experiment configuration)
+    - logs/benchmark.out (benchmark command and results)
+    - logs/*_isl_*_osl_*/result.json (benchmark results)
+    - logs/{node}_{worker_type}_{worker_id}.out/err (node metrics and commands)
+    - logs/trtllm_config_*.yaml (engine configuration)
+    """
+
+    # Experiment identification
+    job_id: str
+    job_name: str
+    generated_at: str
+
+    # Configuration
+    model_path: str
+    model_name: str
+    precision: str
+    gpu_type: str
+    gpus_per_node: int
+    backend_type: str
+    frontend_type: str
+
+    # Resource allocation
+    is_disaggregated: bool
+    total_nodes: int
+    total_gpus: int
+    prefill_nodes: int | None = None
+    decode_nodes: int | None = None
+    prefill_workers: int | None = None
+    decode_workers: int | None = None
+    prefill_gpus: int | None = None
+    decode_gpus: int | None = None
+    agg_nodes: int | None = None
+    agg_workers: int | None = None
+
+    # Benchmark configuration
+    benchmark_type: str = ""
+    isl: int | None = None
+    osl: int | None = None
+    concurrencies: list[int] = field(default_factory=list)
+
+    # Aggregated results
+    results: list[RollupResult] = field(default_factory=list)
+
+    # Summary statistics (computed from results)
+    max_output_tps: float | None = None
+    max_total_tps: float | None = None
+    min_mean_ttft_ms: float | None = None
+    min_mean_itl_ms: float | None = None
+
+    # Node-level metrics
+    nodes_summary: NodesSummary | None = None
+
+    # Environment and engine configuration
+    environment_config: EnvironmentConfig | None = None
+
+    # Launch commands
+    benchmark_command: LaunchCommandRollup | None = None
+
+    # Tags
+    tags: list[str] = field(default_factory=list)
+
+    def compute_summary_stats(self) -> None:
+        """Compute summary statistics from results."""
+        if not self.results:
+            return
+
+        output_tps_values = [r.output_tps for r in self.results if r.output_tps is not None]
+        total_tps_values = [r.total_tps for r in self.results if r.total_tps is not None]
+        ttft_values = [r.mean_ttft_ms for r in self.results if r.mean_ttft_ms is not None]
+        itl_values = [r.mean_itl_ms for r in self.results if r.mean_itl_ms is not None]
+
+        if output_tps_values:
+            self.max_output_tps = max(output_tps_values)
+        if total_tps_values:
+            self.max_total_tps = max(total_tps_values)
+        if ttft_values:
+            self.min_mean_ttft_ms = min(ttft_values)
+        if itl_values:
+            self.min_mean_itl_ms = min(itl_values)
+
diff --git a/src/srtctl/cli/mixins/rollup_stage.py b/src/srtctl/cli/mixins/rollup_stage.py
new file mode 100644
index 00000000..325b960d
--- /dev/null
+++ b/src/srtctl/cli/mixins/rollup_stage.py
@@ -0,0 +1,612 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Rollup stage mixin for SweepOrchestrator.
+
+Aggregates experiment data from multiple benchmark runs into a single consolidated summary.
+Includes node-level metrics parsed from prefill/decode .out and .err files using analysis.srtlog.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from dataclasses import asdict
+from datetime import datetime
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from srtctl.cli.mixins.rollup import (
+    EnvironmentConfig,
+    LaunchCommandRollup,
+    NodesSummary,
+    RollupResult,
+    RollupSummary,
+)
+
+if TYPE_CHECKING:
+    from srtctl.core.runtime import RuntimeContext
+    from srtctl.core.schema import SrtConfig
+    from srtctl.core.topology import Endpoint
+
+logger = logging.getLogger(__name__)
+
+
+class RollupStageMixin:
+    """Mixin for rollup stage that consolidates experiment data.
+
+    Requires:
+        self.config: SrtConfig
+        self.runtime: RuntimeContext
+        self.endpoints: list[Endpoint]
+    """
+
+    # Type hints for mixin dependencies
+    config: SrtConfig
+    runtime: RuntimeContext
+
+    @property
+    def endpoints(self) -> list[Endpoint]:
+        """Endpoint allocation topology."""
+        ...
+
+    def run_rollup(self, tags: list[str] | None = None) -> Path | None:
+        """Run the rollup stage to consolidate experiment data.
+
+        Args:
+            tags: Optional list of tags for the experiment
+
+        Returns:
+            Path to the generated rollup.json file, or None if rollup failed
+        """
+        logger.info("Running rollup stage")
+
+        try:
+            # Collect benchmark results
+            results = self._collect_benchmark_results()
+
+            if not results:
+                logger.warning("No benchmark results found to rollup")
+                return None
+
+            # Collect node metrics using analysis.srtlog
+            nodes_summary = self._collect_node_metrics()
+
+            # Collect benchmark launch command
+            benchmark_command = self._collect_benchmark_command()
+
+            # Collect environment and engine configuration
+            environment_config = self._collect_environment_config()
+
+            # Build rollup summary
+            summary = self._build_rollup_summary(results, tags, nodes_summary, benchmark_command, environment_config)
+
+            # Write rollup.json
+            rollup_path = self.runtime.log_dir / "rollup.json"
+            self._write_rollup(summary, rollup_path)
+
+            logger.info("Rollup complete: %s", rollup_path)
+            logger.info(
+                "Summary: %d results, max output TPS: %.2f, %d nodes",
+                len(summary.results),
+                summary.max_output_tps or 0,
+                len(nodes_summary.nodes) if nodes_summary else 0,
+            )
+
+            return rollup_path
+
+        except Exception as e:
+            logger.error("Rollup failed: %s", e)
+            return None
+
+    def _collect_benchmark_results(self) -> list[dict[str, Any]]:
+        """Collect all benchmark result JSON files from the log directory.
+
+        Uses the appropriate benchmark parser based on config.benchmark.type.
+
+        Returns:
+            List of parsed benchmark result dicts
+        """
+        results = []
+        benchmark_type = self.config.benchmark.type
+
+        try:
+            from analysis.srtlog.parsers import get_benchmark_parser, list_benchmark_parsers
+
+            # Get the appropriate parser
+            try:
+                parser = get_benchmark_parser(benchmark_type)
+                logger.debug("Using %s benchmark parser", benchmark_type)
+            except ValueError:
+                logger.warning(
+                    "No parser for benchmark type '%s', available: %s. Using fallback.",
+                    benchmark_type,
+                    list_benchmark_parsers(),
+                )
+                parser = None
+
+            # Try parser-specific result collection first
+            if parser is not None:
+                # For mooncake-router, look for AIPerf results
+                if hasattr(parser, "find_aiperf_results"):
+                    aiperf_files = parser.find_aiperf_results(self.runtime.log_dir)
+                    for aiperf_path in aiperf_files:
+                        result = parser.parse_result_json(aiperf_path)
+                        if result.get("output_tps") is not None:
+                            results.append(result)
+                            logger.debug("Loaded AIPerf result: %s", aiperf_path)
+
+                # For sa-bench style, look for result directories
+                if hasattr(parser, "parse_result_directory"):
+                    for entry in self.runtime.log_dir.iterdir():
+                        if not entry.is_dir():
+                            continue
+                        # Match patterns like sa-bench_isl_X_osl_Y
+                        if "_isl_" in entry.name and "_osl_" in entry.name:
+                            logger.debug("Found benchmark results directory: %s", entry.name)
+                            dir_results = parser.parse_result_directory(entry)
+                            results.extend(dir_results)
+
+        except ImportError:
+            logger.debug("analysis.srtlog.parsers not available, using fallback")
+            parser = None
+
+        # Fallback: direct JSON parsing
+        if not results:
+            for entry in self.runtime.log_dir.iterdir():
+                if not entry.is_dir():
+                    continue
+
+                # Match patterns like sa-bench_isl_X_osl_Y, vllm_isl_X_osl_Y
+                if "_isl_" in entry.name and "_osl_" in entry.name:
+                    logger.debug("Found benchmark results directory: %s", entry.name)
+
+                    # Parse all JSON files in the directory
+                    for json_file in entry.glob("*.json"):
+                        try:
+                            with open(json_file) as f:
+                                data = json.load(f)
+                                results.append(data)
+                                logger.debug("Loaded result: %s", json_file.name)
+                        except Exception as e:
+                            logger.warning("Failed to parse %s: %s", json_file, e)
+
+        # Sort by concurrency
+        results.sort(key=lambda x: x.get("max_concurrency", 0) or 0)
+
+        logger.info("Collected %d benchmark results", len(results))
+        return results
+
+    def _collect_node_metrics(self) -> NodesSummary | None:
+        """Collect node metrics from prefill/decode log files.
+
+        Uses the appropriate node parser based on config.backend_type.
+        Falls back through parser versions if needed (e.g., sglang -> sglang-v2).
+
+        Returns:
+            NodesSummary with aggregated node statistics, or None if parsing fails
+        """
+        backend_type = self.config.backend_type
+        log_dir = self.runtime.log_dir
+
+        try:
+            from analysis.srtlog.parsers import get_node_parser
+
+            # Try parsers in order of preference
+            parser_order = self._get_parser_order(backend_type)
+            logger.debug("Parser order for %s: %s", backend_type, parser_order)
+
+            nodes = []
+            used_parser = None
+            parser = None
+
+            for parser_type in parser_order:
+                try:
+                    parser = get_node_parser(parser_type)
+                    nodes = parser.parse_logs(log_dir)
+
+                    # Check if we got meaningful results (batches or config)
+                    total_batches = sum(len(n.batches) for n in nodes)
+                    has_config = any(n.config for n in nodes)
+                    if total_batches > 0 or has_config:
+                        used_parser = parser_type
+                        logger.info("Using %s parser: found %d nodes with %d batches", parser_type, len(nodes), total_batches)
+                        break
+                    else:
+                        logger.debug("%s parser found no batches, trying next", parser_type)
+
+                except ValueError:
+                    logger.debug("Parser %s not available", parser_type)
+                    continue
+
+            if not nodes:
+                logger.warning("No node metrics found in %s with any parser", log_dir)
+                return None
+
+            # Build summary from parsed nodes
+            summary = NodesSummary.from_node_metrics_list(nodes)
+
+            # Parse launch commands for each node
+            if parser is not None and hasattr(parser, "parse_launch_command"):
+                self._add_launch_commands_to_summary(summary, parser, log_dir)
+
+            if summary.total_agg_nodes > 0:
+                logger.info("Node summary (%s): %d agg nodes", used_parser, summary.total_agg_nodes)
+            else:
+                logger.info(
+                    "Node summary (%s): %d prefill, %d decode nodes",
+                    used_parser,
+                    summary.total_prefill_nodes,
+                    summary.total_decode_nodes,
+                )
+
+            return summary
+
+        except ImportError:
+            logger.warning("analysis.srtlog.parsers not available, skipping node metrics")
+            return None
+        except Exception as e:
+            logger.warning("Failed to collect node metrics: %s", e)
+            return None
+
+    def _add_launch_commands_to_summary(self, summary: NodesSummary, parser: Any, log_dir: Path) -> None:
+        """Parse and add launch commands to each node in the summary.
+
+        Args:
+            summary: NodesSummary to update
+            parser: Node parser with parse_launch_command method
+            log_dir: Directory containing log files
+        """
+        for node_rollup in summary.nodes:
+            # Find the log file for this node
+            node_name = node_rollup.node_name
+            worker_type = node_rollup.worker_type
+            worker_id = node_rollup.worker_id
+
+            # Try both .out and .err files
+            for ext in [".out", ".err"]:
+                log_file = log_dir / f"{node_name}_{worker_type}_{worker_id}{ext}"
+                if log_file.exists():
+                    try:
+                        content = log_file.read_text(errors="replace")
+                        cmd = parser.parse_launch_command(content, worker_type=worker_type)
+                        if cmd:
+                            args = cmd.extra_args
+                            node_rollup.launch_command = LaunchCommandRollup(
+                                raw_command=cmd.raw_command,
+                                command_type="worker",
+                                model_path=args.get("model_path"),
+                                served_model_name=args.get("served_model_name"),
+                                worker_type=worker_type,
+                                backend_type=cmd.backend_type,
+                                disaggregation_mode=args.get("disaggregation_mode"),
+                                tp_size=args.get("tp_size"),
+                                pp_size=args.get("pp_size"),
+                                dp_size=args.get("dp_size"),
+                                ep_size=args.get("ep_size"),
+                                port=args.get("port"),
+                                max_num_seqs=args.get("max_num_seqs"),
+                                max_model_len=args.get("max_model_len"),
+                            )
+                            logger.debug("Parsed launch command for %s_%s_%s", node_name, worker_type, worker_id)
+                            break
+                    except Exception as e:
+                        logger.debug("Failed to parse launch command from %s: %s", log_file, e)
+
+    def _collect_benchmark_command(self) -> LaunchCommandRollup | None:
+        """Parse the benchmark launch command from benchmark.out.
+
+        Returns:
+            LaunchCommandRollup with benchmark parameters, or None if not found
+        """
+        benchmark_type = self.config.benchmark.type
+        log_dir = self.runtime.log_dir
+
+        try:
+            from analysis.srtlog.parsers import get_benchmark_parser
+
+            parser = get_benchmark_parser(benchmark_type)
+
+            # Look for benchmark.out file
+            benchmark_out = log_dir / "benchmark.out"
+            if not benchmark_out.exists():
+                logger.debug("benchmark.out not found in %s", log_dir)
+                return None
+
+            content = benchmark_out.read_text(errors="replace")
+            cmd = parser.parse_launch_command(content)
+
+            if cmd:
+                args = cmd.extra_args
+                return LaunchCommandRollup(
+                    raw_command=cmd.raw_command,
+                    command_type="benchmark",
+                    model_path=args.get("model"),
+                    benchmark_type=cmd.benchmark_type,
+                    base_url=args.get("base_url"),
+                    max_concurrency=args.get("max_concurrency"),
+                    num_prompts=args.get("num_prompts"),
+                    input_len=args.get("input_len"),
+                    output_len=args.get("output_len"),
+                )
+
+        except ImportError:
+            logger.debug("analysis.srtlog.parsers not available")
+        except ValueError as e:
+            logger.debug("No benchmark parser for %s: %s", benchmark_type, e)
+        except Exception as e:
+            logger.debug("Failed to parse benchmark command: %s", e)
+
+        return None
+
+    def _collect_environment_config(self) -> EnvironmentConfig | None:
+        """Collect environment variables and engine config from config files.
+
+        Parses:
+        1. config.yaml for prefill_environment and decode_environment
+        2. YAML config files (e.g., trtllm_config_prefill.yaml) for engine settings
+
+        Returns:
+            EnvironmentConfig with environment variables and engine config, or None if not found
+        """
+        log_dir = self.runtime.log_dir
+
+        try:
+            import yaml
+        except ImportError:
+            logger.debug("PyYAML not available, skipping environment config collection")
+            return None
+
+        config = EnvironmentConfig()
+
+        # Try to find config.yaml in the job output directory
+        # It could be in log_dir, log_dir.parent, or a sibling directory
+        config_paths = [
+            log_dir / "config.yaml",
+            log_dir.parent / "config.yaml",
+            log_dir.parent.parent / "config.yaml",
+        ]
+
+        config_yaml = None
+        for path in config_paths:
+            if path.exists():
+                config_yaml = path
+                break
+
+        if config_yaml:
+            try:
+                with open(config_yaml) as f:
+                    job_config = yaml.safe_load(f)
+
+                backend_section = job_config.get("backend", {})
+
+                # Extract environment variables
+                if "prefill_environment" in backend_section:
+                    config.prefill_environment = backend_section["prefill_environment"]
+                    logger.debug("Found prefill_environment with %d vars", len(config.prefill_environment))
+
+                if "decode_environment" in backend_section:
+                    config.decode_environment = backend_section["decode_environment"]
+                    logger.debug("Found decode_environment with %d vars", len(config.decode_environment))
+
+                if "aggregated_environment" in backend_section:
+                    config.aggregated_environment = backend_section["aggregated_environment"]
+                    logger.debug("Found aggregated_environment with %d vars", len(config.aggregated_environment))
+
+                # For TRTLLM, also extract inline engine config
+                if "trtllm_config" in backend_section:
+                    trtllm_config = backend_section["trtllm_config"]
+                    if "prefill" in trtllm_config:
+                        config.prefill_engine_config = trtllm_config["prefill"]
+                    if "decode" in trtllm_config:
+                        config.decode_engine_config = trtllm_config["decode"]
+                    if "aggregated" in trtllm_config:
+                        config.aggregated_engine_config = trtllm_config["aggregated"]
+
+                # For SGLang, extract sglang_config if present
+                if "sglang_config" in backend_section:
+                    sglang_config = backend_section["sglang_config"]
+                    if "prefill" in sglang_config:
+                        config.prefill_engine_config = sglang_config["prefill"]
+                    if "decode" in sglang_config:
+                        config.decode_engine_config = sglang_config["decode"]
+                    if "aggregated" in sglang_config:
+                        config.aggregated_engine_config = sglang_config["aggregated"]
+
+            except Exception as e:
+                logger.debug("Failed to parse config.yaml: %s", e)
+
+        # Also look for separate YAML config files (e.g., trtllm_config_prefill.yaml)
+        prefill_yaml = log_dir / "trtllm_config_prefill.yaml"
+        decode_yaml = log_dir / "trtllm_config_decode.yaml"
+
+        if prefill_yaml.exists() and not config.prefill_engine_config:
+            try:
+                with open(prefill_yaml) as f:
+                    config.prefill_engine_config = yaml.safe_load(f)
+                logger.debug("Loaded prefill engine config from %s", prefill_yaml)
+            except Exception as e:
+                logger.debug("Failed to parse %s: %s", prefill_yaml, e)
+
+        if decode_yaml.exists() and not config.decode_engine_config:
+            try:
+                with open(decode_yaml) as f:
+                    config.decode_engine_config = yaml.safe_load(f)
+                logger.debug("Loaded decode engine config from %s", decode_yaml)
+            except Exception as e:
+                logger.debug("Failed to parse %s: %s", decode_yaml, e)
+
+        # Return None if we didn't find anything
+        if not any([
+            config.prefill_environment,
+            config.decode_environment,
+            config.aggregated_environment,
+            config.prefill_engine_config,
+            config.decode_engine_config,
+            config.aggregated_engine_config,
+        ]):
+            logger.debug("No environment or engine config found")
+            return None
+
+        # Log what we found
+        env_counts = []
+        if config.prefill_environment:
+            env_counts.append(f"{len(config.prefill_environment)} prefill")
+        if config.decode_environment:
+            env_counts.append(f"{len(config.decode_environment)} decode")
+        if config.aggregated_environment:
+            env_counts.append(f"{len(config.aggregated_environment)} agg")
+
+        if env_counts:
+            logger.info("Collected environment vars: %s", ", ".join(env_counts))
+
+        return config
+
+    def _get_parser_order(self, backend_type: str) -> list[str]:
+        """Get the order of parsers to try for a given backend type.
+
+        Args:
+            backend_type: Backend type from config (e.g., "sglang", "trtllm")
+
+        Returns:
+            List of parser types to try in order
+        """
+        parser_orders = {
+            "sglang": ["sglang"],
+            "trtllm": ["trtllm"],
+        }
+
+        return parser_orders.get(backend_type, [backend_type])
+
+    def _build_rollup_summary(
+        self,
+        results: list[dict[str, Any]],
+        tags: list[str] | None = None,
+        nodes_summary: NodesSummary | None = None,
+        benchmark_command: LaunchCommandRollup | None = None,
+        environment_config: EnvironmentConfig | None = None,
+    ) -> RollupSummary:
+        """Build a RollupSummary from collected results.
+
+        Args:
+            results: List of parsed benchmark result dicts
+            tags: Optional tags for the experiment
+            nodes_summary: Optional node-level metrics summary
+            benchmark_command: Optional parsed benchmark launch command
+            environment_config: Optional environment and engine configuration
+
+        Returns:
+            RollupSummary instance
+        """
+        r = self.config.resources
+        b = self.config.benchmark
+
+        # Determine topology
+        is_disaggregated = r.is_disaggregated
+
+        if is_disaggregated:
+            total_gpus = r.prefill_gpus + r.decode_gpus
+        else:
+            total_gpus = (r.agg_nodes or 1) * r.gpus_per_node
+
+        # Build summary
+        summary = RollupSummary(
+            # Identification
+            job_id=self.runtime.job_id,
+            job_name=self.config.name,
+            generated_at=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            # Model config
+            model_path=str(self.runtime.model_path),
+            model_name=self.config.served_model_name,
+            precision=self.config.model.precision,
+            gpu_type=r.gpu_type,
+            gpus_per_node=r.gpus_per_node,
+            backend_type=self.config.backend_type,
+            frontend_type=self.config.frontend.type,
+            # Resource allocation
+            is_disaggregated=is_disaggregated,
+            total_nodes=r.total_nodes,
+            total_gpus=total_gpus,
+            # Benchmark config
+            benchmark_type=b.type,
+            isl=b.isl,
+            osl=b.osl,
+            concurrencies=b.get_concurrency_list(),
+            # Node metrics
+            nodes_summary=nodes_summary,
+            # Environment and engine configuration
+            environment_config=environment_config,
+            # Launch commands
+            benchmark_command=benchmark_command,
+            # Tags
+            tags=tags or [],
+        )
+
+        # Add disaggregated-specific fields
+        if is_disaggregated:
+            summary.prefill_nodes = r.prefill_nodes
+            summary.decode_nodes = r.decode_nodes
+            summary.prefill_workers = r.num_prefill
+            summary.decode_workers = r.num_decode
+            summary.prefill_gpus = r.prefill_gpus
+            summary.decode_gpus = r.decode_gpus
+        else:
+            summary.agg_nodes = r.agg_nodes
+            summary.agg_workers = r.num_agg
+
+        # Convert results to RollupResult objects
+        for data in results:
+            result = RollupResult(
+                concurrency=data.get("max_concurrency", 0),
+                output_tps=data.get("output_throughput", 0),
+                total_tps=data.get("total_token_throughput"),
+                request_throughput=data.get("request_throughput"),
+                request_goodput=data.get("request_goodput"),
+                request_rate=data.get("request_rate"),
+                # Mean latencies
+                mean_ttft_ms=data.get("mean_ttft_ms"),
+                mean_tpot_ms=data.get("mean_tpot_ms"),
+                mean_itl_ms=data.get("mean_itl_ms"),
+                mean_e2el_ms=data.get("mean_e2el_ms"),
+                # Median latencies
+                median_ttft_ms=data.get("median_ttft_ms"),
+                median_tpot_ms=data.get("median_tpot_ms"),
+                median_itl_ms=data.get("median_itl_ms"),
+                median_e2el_ms=data.get("median_e2el_ms"),
+                # P99 latencies
+                p99_ttft_ms=data.get("p99_ttft_ms"),
+                p99_tpot_ms=data.get("p99_tpot_ms"),
+                p99_itl_ms=data.get("p99_itl_ms"),
+                p99_e2el_ms=data.get("p99_e2el_ms"),
+                # Token counts
+                total_input_tokens=data.get("total_input_tokens"),
+                total_output_tokens=data.get("total_output_tokens"),
+                # Metadata
+                duration=data.get("duration"),
+                completed=data.get("completed"),
+                num_prompts=data.get("num_prompts"),
+            )
+            summary.results.append(result)
+
+        # Compute summary statistics
+        summary.compute_summary_stats()
+
+        return summary
+
+    def _write_rollup(self, summary: RollupSummary, path: Path) -> None:
+        """Write rollup summary to JSON file.
+
+        Args:
+            summary: RollupSummary to write
+            path: Output file path
+        """
+        # Convert to dict, handling nested dataclasses
+        data = asdict(summary)
+
+        # Write with nice formatting
+        with open(path, "w") as f:
+            json.dump(data, f, indent=2, default=str)
+
+        logger.debug("Wrote rollup to %s", path)
+
diff --git a/tests/test_configs.py b/tests/test_configs.py
index 07bce17f..0097d24b 100644
--- a/tests/test_configs.py
+++ b/tests/test_configs.py
@@ -383,3 +383,320 @@ def test_setup_script_env_var_override(self, monkeypatch):
             config = replace(config, setup_script=setup_script_override)
 
         assert config.setup_script == "install-sglang-main.sh"
+
+
+class TestOutputDirectoryStructure:
+    """Tests for output directory structure created during job submission."""
+
+    def test_output_directory_created_with_job_id(self, tmp_path, monkeypatch):
+        """Test that outputs/{job_id}/ directory is created on successful submission."""
+        import json
+        import subprocess
+        from unittest.mock import MagicMock, patch
+
+        from srtctl.cli.submit import submit_with_orchestrator
+        from srtctl.core.schema import (
+            ModelConfig,
+            ResourceConfig,
+            SrtConfig,
+        )
+
+        # Create a test config
+        config = SrtConfig(
+            name="test-output-dir",
+            model=ModelConfig(path="/model", container="/container.sqsh", precision="fp8"),
+            resources=ResourceConfig(gpu_type="h100", gpus_per_node=8, agg_nodes=1),
+        )
+
+        # Create a temp config file
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text("name: test")
+
+        # Mock srtctl_root to use temp directory
+        monkeypatch.setattr(
+            "srtctl.cli.submit.get_srtslurm_setting",
+            lambda key, default=None: str(tmp_path) if key == "srtctl_root" else default,
+        )
+
+        # Mock sbatch to return a fake job ID
+        mock_result = MagicMock()
+        mock_result.stdout = "Submitted batch job 12345"
+        mock_result.returncode = 0
+
+        with patch("subprocess.run", return_value=mock_result):
+            submit_with_orchestrator(
+                config_path=config_file,
+                config=config,
+                dry_run=False,
+            )
+
+        # Verify directory structure
+        output_dir = tmp_path / "outputs" / "12345"
+        assert output_dir.exists(), "outputs/{job_id}/ directory should be created"
+        assert output_dir.is_dir(), "outputs/{job_id}/ should be a directory"
+
+    def test_config_yaml_copied_to_output_dir(self, tmp_path, monkeypatch):
+        """Test that config.yaml is copied to outputs/{job_id}/."""
+        from unittest.mock import MagicMock, patch
+
+        from srtctl.cli.submit import submit_with_orchestrator
+        from srtctl.core.schema import (
+            ModelConfig,
+            ResourceConfig,
+            SrtConfig,
+        )
+
+        config = SrtConfig(
+            name="test-config-copy",
+            model=ModelConfig(path="/model", container="/container.sqsh", precision="fp8"),
+            resources=ResourceConfig(gpu_type="h100", gpus_per_node=8, agg_nodes=1),
+        )
+
+        # Create config file with specific content
+        config_file = tmp_path / "my_config.yaml"
+        config_content = "name: test-config-copy\nmodel:\n  path: /model"
+        config_file.write_text(config_content)
+
+        monkeypatch.setattr(
+            "srtctl.cli.submit.get_srtslurm_setting",
+            lambda key, default=None: str(tmp_path) if key == "srtctl_root" else default,
+        )
+
+        mock_result = MagicMock()
+        mock_result.stdout = "Submitted batch job 99999"
+        mock_result.returncode = 0
+
+        with patch("subprocess.run", return_value=mock_result):
+            submit_with_orchestrator(config_path=config_file, config=config, dry_run=False)
+
+        # Verify config.yaml was copied
+        copied_config = tmp_path / "outputs" / "99999" / "config.yaml"
+        assert copied_config.exists(), "config.yaml should be copied to output dir"
+        assert copied_config.read_text() == config_content, "config.yaml content should match original"
+
+    def test_sbatch_script_copied_to_output_dir(self, tmp_path, monkeypatch):
+        """Test that sbatch_script.sh is copied to outputs/{job_id}/."""
+        from unittest.mock import MagicMock, patch
+
+        from srtctl.cli.submit import submit_with_orchestrator
+        from srtctl.core.schema import (
+            ModelConfig,
+            ResourceConfig,
+            SrtConfig,
+        )
+
+        config = SrtConfig(
+            name="test-sbatch-copy",
+            model=ModelConfig(path="/model", container="/container.sqsh", precision="fp8"),
+            resources=ResourceConfig(gpu_type="h100", gpus_per_node=8, agg_nodes=1),
+        )
+
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text("name: test")
+
+        monkeypatch.setattr(
+            "srtctl.cli.submit.get_srtslurm_setting",
+            lambda key, default=None: str(tmp_path) if key == "srtctl_root" else default,
+        )
+
+        mock_result = MagicMock()
+        mock_result.stdout = "Submitted batch job 88888"
+        mock_result.returncode = 0
+
+        with patch("subprocess.run", return_value=mock_result):
+            submit_with_orchestrator(config_path=config_file, config=config, dry_run=False)
+
+        # Verify sbatch_script.sh was copied
+        sbatch_script = tmp_path / "outputs" / "88888" / "sbatch_script.sh"
+        assert sbatch_script.exists(), "sbatch_script.sh should be copied to output dir"
+        # Verify it's a valid sbatch script
+        content = sbatch_script.read_text()
+        assert "#!/bin/bash" in content, "sbatch script should have bash shebang"
+        assert "#SBATCH" in content, "sbatch script should have SBATCH directives"
+
+    def test_metadata_json_created_in_output_dir(self, tmp_path, monkeypatch):
+        """Test that {job_id}.json metadata file is created in outputs/{job_id}/."""
+        import json
+        from unittest.mock import MagicMock, patch
+
+        from srtctl.cli.submit import submit_with_orchestrator
+        from srtctl.core.schema import (
+            ModelConfig,
+            ResourceConfig,
+            SrtConfig,
+        )
+
+        config = SrtConfig(
+            name="test-metadata",
+            model=ModelConfig(path="/model", container="/container.sqsh", precision="fp8"),
+            resources=ResourceConfig(
+                gpu_type="h100",
+                gpus_per_node=8,
+                prefill_nodes=1,
+                decode_nodes=2,
+                prefill_workers=1,
+                decode_workers=4,
+            ),
+        )
+
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text("name: test")
+
+        monkeypatch.setattr(
+            "srtctl.cli.submit.get_srtslurm_setting",
+            lambda key, default=None: str(tmp_path) if key == "srtctl_root" else default,
+        )
+
+        mock_result = MagicMock()
+        mock_result.stdout = "Submitted batch job 77777"
+        mock_result.returncode = 0
+
+        with patch("subprocess.run", return_value=mock_result):
+            submit_with_orchestrator(config_path=config_file, config=config, dry_run=False)
+
+        # Verify {job_id}.json was created
+        metadata_file = tmp_path / "outputs" / "77777" / "77777.json"
+        assert metadata_file.exists(), "{job_id}.json should be created in output dir"
+
+        # Verify metadata content
+        metadata = json.loads(metadata_file.read_text())
+        assert metadata["version"] == "2.0"
+        assert metadata["orchestrator"] is True
+        assert metadata["job_id"] == "77777"
+        assert metadata["job_name"] == "test-metadata"
+        assert metadata["model"]["path"] == "/model"
+        assert metadata["model"]["container"] == "/container.sqsh"
+        assert metadata["model"]["precision"] == "fp8"
+        assert metadata["resources"]["gpu_type"] == "h100"
+        assert metadata["resources"]["prefill_nodes"] == 1
+        assert metadata["resources"]["decode_nodes"] == 2
+        assert metadata["resources"]["prefill_workers"] == 1
+        assert metadata["resources"]["decode_workers"] == 4
+
+    def test_tags_included_in_metadata(self, tmp_path, monkeypatch):
+        """Test that tags are included in metadata when provided."""
+        import json
+        from unittest.mock import MagicMock, patch
+
+        from srtctl.cli.submit import submit_with_orchestrator
+        from srtctl.core.schema import (
+            ModelConfig,
+            ResourceConfig,
+            SrtConfig,
+        )
+
+        config = SrtConfig(
+            name="test-tags",
+            model=ModelConfig(path="/model", container="/container.sqsh", precision="fp8"),
+            resources=ResourceConfig(gpu_type="h100", gpus_per_node=8, agg_nodes=1),
+        )
+
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text("name: test")
+
+        monkeypatch.setattr(
+            "srtctl.cli.submit.get_srtslurm_setting",
+            lambda key, default=None: str(tmp_path) if key == "srtctl_root" else default,
+        )
+
+        mock_result = MagicMock()
+        mock_result.stdout = "Submitted batch job 66666"
+        mock_result.returncode = 0
+
+        with patch("subprocess.run", return_value=mock_result):
+            submit_with_orchestrator(
+                config_path=config_file,
+                config=config,
+                dry_run=False,
+                tags=["experiment", "baseline", "v2"],
+            )
+
+        metadata_file = tmp_path / "outputs" / "66666" / "66666.json"
+        metadata = json.loads(metadata_file.read_text())
+        assert metadata["tags"] == ["experiment", "baseline", "v2"]
+
+    def test_complete_output_directory_structure(self, tmp_path, monkeypatch):
+        """Test that complete output directory structure is preserved."""
+        import json
+        from unittest.mock import MagicMock, patch
+
+        from srtctl.cli.submit import submit_with_orchestrator
+        from srtctl.core.schema import (
+            ModelConfig,
+            ResourceConfig,
+            SrtConfig,
+        )
+
+        config = SrtConfig(
+            name="test-complete-structure",
+            model=ModelConfig(path="/model", container="/container.sqsh", precision="fp4"),
+            resources=ResourceConfig(gpu_type="gb200", gpus_per_node=4, agg_nodes=2, agg_workers=2),
+            setup_script="my-setup.sh",
+        )
+
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text("name: test-complete-structure")
+
+        monkeypatch.setattr(
+            "srtctl.cli.submit.get_srtslurm_setting",
+            lambda key, default=None: str(tmp_path) if key == "srtctl_root" else default,
+        )
+
+        mock_result = MagicMock()
+        mock_result.stdout = "Submitted batch job 55555"
+        mock_result.returncode = 0
+
+        with patch("subprocess.run", return_value=mock_result):
+            submit_with_orchestrator(
+                config_path=config_file,
+                config=config,
+                dry_run=False,
+                tags=["production"],
+            )
+
+        output_dir = tmp_path / "outputs" / "55555"
+
+        # Verify all expected files exist
+        expected_files = [
+            output_dir / "config.yaml",
+            output_dir / "sbatch_script.sh",
+            output_dir / "55555.json",
+        ]
+        for expected_file in expected_files:
+            assert expected_file.exists(), f"{expected_file.name} should exist in output dir"
+
+        # Verify metadata includes setup_script
+        metadata = json.loads((output_dir / "55555.json").read_text())
+        assert metadata["setup_script"] == "my-setup.sh"
+        assert metadata["tags"] == ["production"]
+        assert metadata["resources"]["agg_workers"] == 2
+
+    def test_dry_run_does_not_create_output_dir(self, tmp_path, monkeypatch):
+        """Test that dry-run mode does NOT create output directory."""
+        from srtctl.cli.submit import submit_with_orchestrator
+        from srtctl.core.schema import (
+            ModelConfig,
+            ResourceConfig,
+            SrtConfig,
+        )
+
+        config = SrtConfig(
+            name="test-dry-run",
+            model=ModelConfig(path="/model", container="/container.sqsh", precision="fp8"),
+            resources=ResourceConfig(gpu_type="h100", gpus_per_node=8, agg_nodes=1),
+        )
+
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text("name: test")
+
+        monkeypatch.setattr(
+            "srtctl.cli.submit.get_srtslurm_setting",
+            lambda key, default=None: str(tmp_path) if key == "srtctl_root" else default,
+        )
+
+        # Dry run should not call sbatch or create output dir
+        submit_with_orchestrator(config_path=config_file, config=config, dry_run=True)
+
+        # Verify no output directory was created
+        outputs_dir = tmp_path / "outputs"
+        assert not outputs_dir.exists(), "outputs/ should not be created in dry-run mode"
diff --git a/tests/test_rollup.py b/tests/test_rollup.py
new file mode 100644
index 00000000..de315cbf
--- /dev/null
+++ b/tests/test_rollup.py
@@ -0,0 +1,938 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Tests for the rollup stage mixin."""
+
+import json
+from pathlib import Path
+
+import pytest
+
+from srtctl.cli.mixins.rollup_stage import (
+    NodeRollup,
+    NodesSummary,
+    RollupResult,
+    RollupStageMixin,
+    RollupSummary,
+)
+
+
+class TestNodeRollup:
+    """Tests for NodeRollup dataclass."""
+
+    def test_minimal_node_rollup(self):
+        """Test creating a NodeRollup with minimal fields."""
+        node = NodeRollup(
+            node_name="node-01",
+            worker_type="prefill",
+            worker_id="w0",
+        )
+        assert node.node_name == "node-01"
+        assert node.worker_type == "prefill"
+        assert node.worker_id == "w0"
+        assert node.total_batches == 0
+        assert node.tp_size is None
+
+    def test_prefill_node_rollup(self):
+        """Test creating a prefill NodeRollup with all metrics."""
+        node = NodeRollup(
+            node_name="node-01",
+            worker_type="prefill",
+            worker_id="w0",
+            tp_size=8,
+            dp_size=1,
+            ep_size=1,
+            avail_mem_gb=75.0,
+            mem_usage_gb=107.0,
+            kv_cache_gb=17.16,
+            kv_tokens=524288,
+            total_batches=100,
+            total_new_tokens=50000,
+            total_cached_tokens=10000,
+            cache_hit_rate=16.67,
+            avg_input_throughput=5000.0,
+            max_input_throughput=8000.0,
+            max_queue_requests=5,
+            max_inflight_requests=10,
+        )
+        assert node.tp_size == 8
+        assert node.kv_cache_gb == 17.16
+        assert node.total_new_tokens == 50000
+        assert node.cache_hit_rate == 16.67
+
+    def test_decode_node_rollup(self):
+        """Test creating a decode NodeRollup with all metrics."""
+        node = NodeRollup(
+            node_name="node-02",
+            worker_type="decode",
+            worker_id="w0",
+            tp_size=8,
+            total_batches=500,
+            avg_running_requests=50.0,
+            max_running_requests=100,
+            avg_gen_throughput=150.0,
+            max_gen_throughput=200.0,
+            max_queue_requests=10,
+            max_transfer_requests=5,
+        )
+        assert node.worker_type == "decode"
+        assert node.avg_gen_throughput == 150.0
+        assert node.max_running_requests == 100
+
+    def test_agg_node_rollup(self):
+        """Test creating an agg NodeRollup with both prefill and decode metrics."""
+        node = NodeRollup(
+            node_name="node-03",
+            worker_type="agg",
+            worker_id="w0",
+            tp_size=8,
+            total_batches=600,
+            total_prefill_batches=100,
+            total_decode_batches=500,
+            # Prefill stats
+            total_new_tokens=50000,
+            total_cached_tokens=10000,
+            cache_hit_rate=16.67,
+            avg_input_throughput=5000.0,
+            max_input_throughput=8000.0,
+            # Decode stats
+            avg_running_requests=50.0,
+            max_running_requests=100,
+            avg_gen_throughput=150.0,
+            max_gen_throughput=200.0,
+        )
+        assert node.worker_type == "agg"
+        assert node.is_agg is True
+        assert node.total_prefill_batches == 100
+        assert node.total_decode_batches == 500
+        assert node.avg_input_throughput == 5000.0
+        assert node.avg_gen_throughput == 150.0
+
+    def test_from_node_metrics(self):
+        """Test creating NodeRollup from NodeMetrics."""
+        from analysis.srtlog.models import BatchMetrics, MemoryMetrics, NodeMetrics
+
+        # Create a mock NodeMetrics with prefill batches
+        node_metrics = NodeMetrics(
+            node_info={"node": "test-node", "worker_type": "prefill", "worker_id": "w0"},
+            batches=[
+                BatchMetrics(
+                    timestamp="2025-01-22 10:00:00",
+                    dp=0,
+                    tp=0,
+                    ep=0,
+                    batch_type="prefill",
+                    new_token=1000,
+                    cached_token=200,
+                    input_throughput=5000.0,
+                    queue_req=2,
+                    inflight_req=5,
+                ),
+                BatchMetrics(
+                    timestamp="2025-01-22 10:00:01",
+                    dp=0,
+                    tp=0,
+                    ep=0,
+                    batch_type="prefill",
+                    new_token=1500,
+                    cached_token=300,
+                    input_throughput=6000.0,
+                    queue_req=3,
+                    inflight_req=8,
+                ),
+            ],
+            memory_snapshots=[
+                MemoryMetrics(
+                    timestamp="2025-01-22 10:00:00",
+                    dp=0,
+                    tp=0,
+                    ep=0,
+                    metric_type="memory",
+                    avail_mem_gb=75.0,
+                    mem_usage_gb=107.0,
+                    kv_cache_gb=17.16,
+                    kv_tokens=524288,
+                ),
+            ],
+            config={"tp_size": 8, "dp_size": 1, "ep_size": 1},
+        )
+
+        rollup = NodeRollup.from_node_metrics(node_metrics)
+
+        assert rollup.node_name == "test-node"
+        assert rollup.worker_type == "prefill"
+        assert rollup.tp_size == 8
+        assert rollup.total_batches == 2
+        assert rollup.total_new_tokens == 2500  # 1000 + 1500
+        assert rollup.total_cached_tokens == 500  # 200 + 300
+        assert rollup.avg_input_throughput == 5500.0  # (5000 + 6000) / 2
+        assert rollup.max_input_throughput == 6000.0
+        assert rollup.max_queue_requests == 3
+        assert rollup.max_inflight_requests == 8
+        assert rollup.kv_cache_gb == 17.16
+
+        # Check cache hit rate: 500 / (2500 + 500) = 16.67%
+        assert rollup.cache_hit_rate == pytest.approx(16.67, rel=0.01)
+
+    def test_from_node_metrics_agg_worker(self):
+        """Test creating NodeRollup from agg worker NodeMetrics."""
+        from analysis.srtlog.models import BatchMetrics, MemoryMetrics, NodeMetrics
+
+        # Create a mock NodeMetrics with agg worker (has both prefill and decode batches)
+        node_metrics = NodeMetrics(
+            node_info={"node": "agg-node", "worker_type": "agg", "worker_id": "w0"},
+            batches=[
+                # Prefill batches
+                BatchMetrics(
+                    timestamp="2025-01-22 10:00:00",
+                    dp=0,
+                    tp=0,
+                    ep=0,
+                    batch_type="prefill",
+                    new_token=1000,
+                    cached_token=200,
+                    input_throughput=5000.0,
+                    inflight_req=5,
+                ),
+                BatchMetrics(
+                    timestamp="2025-01-22 10:00:01",
+                    dp=0,
+                    tp=0,
+                    ep=0,
+                    batch_type="prefill",
+                    new_token=1500,
+                    cached_token=300,
+                    input_throughput=6000.0,
+                    inflight_req=8,
+                ),
+                # Decode batches
+                BatchMetrics(
+                    timestamp="2025-01-22 10:00:02",
+                    dp=0,
+                    tp=0,
+                    ep=0,
+                    batch_type="decode",
+                    running_req=50,
+                    gen_throughput=150.0,
+                    queue_req=3,
+                ),
+                BatchMetrics(
+                    timestamp="2025-01-22 10:00:03",
+                    dp=0,
+                    tp=0,
+                    ep=0,
+                    batch_type="decode",
+                    running_req=60,
+                    gen_throughput=180.0,
+                    queue_req=5,
+                ),
+            ],
+            memory_snapshots=[
+                MemoryMetrics(
+                    timestamp="2025-01-22 10:00:00",
+                    dp=0,
+                    tp=0,
+                    ep=0,
+                    metric_type="memory",
+                    kv_cache_gb=20.0,
+                ),
+            ],
+            config={"tp_size": 8},
+        )
+
+        rollup = NodeRollup.from_node_metrics(node_metrics)
+
+        assert rollup.node_name == "agg-node"
+        assert rollup.worker_type == "agg"
+        assert rollup.is_agg is True
+        assert rollup.total_batches == 4
+        assert rollup.total_prefill_batches == 2
+        assert rollup.total_decode_batches == 2
+
+        # Prefill stats
+        assert rollup.total_new_tokens == 2500  # 1000 + 1500
+        assert rollup.total_cached_tokens == 500  # 200 + 300
+        assert rollup.avg_input_throughput == 5500.0  # (5000 + 6000) / 2
+        assert rollup.max_input_throughput == 6000.0
+        assert rollup.max_inflight_requests == 8
+
+        # Decode stats
+        assert rollup.avg_running_requests == 55.0  # (50 + 60) / 2
+        assert rollup.max_running_requests == 60
+        assert rollup.avg_gen_throughput == 165.0  # (150 + 180) / 2
+        assert rollup.max_gen_throughput == 180.0
+        assert rollup.max_queue_requests == 5
+
+
+class TestNodesSummary:
+    """Tests for NodesSummary dataclass."""
+
+    def test_empty_summary(self):
+        """Test creating an empty NodesSummary."""
+        summary = NodesSummary()
+        assert summary.total_prefill_nodes == 0
+        assert summary.total_decode_nodes == 0
+        assert summary.nodes == []
+
+    def test_from_node_metrics_list(self):
+        """Test creating NodesSummary from NodeMetrics list."""
+        from analysis.srtlog.models import BatchMetrics, NodeMetrics
+
+        nodes = [
+            NodeMetrics(
+                node_info={"node": "node-01", "worker_type": "prefill", "worker_id": "w0"},
+                batches=[
+                    BatchMetrics(
+                        timestamp="2025-01-22 10:00:00",
+                        dp=0,
+                        tp=0,
+                        ep=0,
+                        batch_type="prefill",
+                        new_token=1000,
+                        cached_token=200,
+                        input_throughput=5000.0,
+                    ),
+                ],
+                config={"tp_size": 8},
+            ),
+            NodeMetrics(
+                node_info={"node": "node-02", "worker_type": "decode", "worker_id": "w0"},
+                batches=[
+                    BatchMetrics(
+                        timestamp="2025-01-22 10:00:00",
+                        dp=0,
+                        tp=0,
+                        ep=0,
+                        batch_type="decode",
+                        running_req=50,
+                        gen_throughput=150.0,
+                    ),
+                ],
+                config={"tp_size": 8},
+            ),
+            NodeMetrics(
+                node_info={"node": "node-03", "worker_type": "decode", "worker_id": "w0"},
+                batches=[
+                    BatchMetrics(
+                        timestamp="2025-01-22 10:00:00",
+                        dp=0,
+                        tp=0,
+                        ep=0,
+                        batch_type="decode",
+                        running_req=60,
+                        gen_throughput=180.0,
+                    ),
+                ],
+                config={"tp_size": 8},
+            ),
+        ]
+
+        summary = NodesSummary.from_node_metrics_list(nodes)
+
+        assert summary.total_prefill_nodes == 1
+        assert summary.total_decode_nodes == 2
+        assert len(summary.nodes) == 3
+        assert summary.total_prefill_tokens == 1000
+        assert summary.total_cached_tokens == 200
+        assert summary.avg_prefill_input_throughput == 5000.0
+        assert summary.avg_decode_gen_throughput == 165.0  # (150 + 180) / 2
+        assert summary.max_decode_gen_throughput == 180.0
+
+    def test_from_node_metrics_list_with_agg(self):
+        """Test creating NodesSummary from NodeMetrics list including agg workers."""
+        from analysis.srtlog.models import BatchMetrics, NodeMetrics
+
+        nodes = [
+            # One agg worker
+            NodeMetrics(
+                node_info={"node": "agg-node-01", "worker_type": "agg", "worker_id": "w0"},
+                batches=[
+                    BatchMetrics(
+                        timestamp="2025-01-22 10:00:00",
+                        dp=0,
+                        tp=0,
+                        ep=0,
+                        batch_type="prefill",
+                        new_token=1000,
+                        cached_token=200,
+                        input_throughput=5000.0,
+                    ),
+                    BatchMetrics(
+                        timestamp="2025-01-22 10:00:01",
+                        dp=0,
+                        tp=0,
+                        ep=0,
+                        batch_type="decode",
+                        running_req=50,
+                        gen_throughput=150.0,
+                    ),
+                ],
+                config={"tp_size": 8},
+            ),
+            # Another agg worker
+            NodeMetrics(
+                node_info={"node": "agg-node-02", "worker_type": "agg", "worker_id": "w0"},
+                batches=[
+                    BatchMetrics(
+                        timestamp="2025-01-22 10:00:00",
+                        dp=0,
+                        tp=0,
+                        ep=0,
+                        batch_type="prefill",
+                        new_token=1500,
+                        cached_token=300,
+                        input_throughput=6000.0,
+                    ),
+                    BatchMetrics(
+                        timestamp="2025-01-22 10:00:01",
+                        dp=0,
+                        tp=0,
+                        ep=0,
+                        batch_type="decode",
+                        running_req=60,
+                        gen_throughput=180.0,
+                    ),
+                ],
+                config={"tp_size": 8},
+            ),
+        ]
+
+        summary = NodesSummary.from_node_metrics_list(nodes)
+
+        # Check counts
+        assert summary.total_prefill_nodes == 0
+        assert summary.total_decode_nodes == 0
+        assert summary.total_agg_nodes == 2
+        assert len(summary.nodes) == 2
+
+        # Aggregated stats should include agg nodes
+        assert summary.total_prefill_tokens == 2500  # 1000 + 1500
+        assert summary.total_cached_tokens == 500  # 200 + 300
+        assert summary.avg_prefill_input_throughput == 5500.0  # (5000 + 6000) / 2
+        assert summary.max_prefill_input_throughput == 6000.0
+        assert summary.avg_decode_gen_throughput == 165.0  # (150 + 180) / 2
+        assert summary.max_decode_gen_throughput == 180.0
+
+
+class TestRollupResult:
+    """Tests for RollupResult dataclass."""
+
+    def test_minimal_result(self):
+        """Test creating a result with minimal required fields."""
+        result = RollupResult(concurrency=100, output_tps=5000.0)
+        assert result.concurrency == 100
+        assert result.output_tps == 5000.0
+        assert result.mean_ttft_ms is None
+        assert result.total_tps is None
+
+    def test_full_result(self):
+        """Test creating a result with all fields populated."""
+        result = RollupResult(
+            concurrency=100,
+            output_tps=5000.0,
+            total_tps=6000.0,
+            request_throughput=50.0,
+            mean_ttft_ms=150.0,
+            mean_tpot_ms=20.0,
+            mean_itl_ms=18.0,
+            p99_ttft_ms=300.0,
+            p99_itl_ms=25.0,
+            total_input_tokens=100000,
+            total_output_tokens=200000,
+            duration=60.0,
+            completed=1000,
+            num_prompts=1000,
+        )
+        assert result.concurrency == 100
+        assert result.output_tps == 5000.0
+        assert result.total_tps == 6000.0
+        assert result.mean_ttft_ms == 150.0
+        assert result.p99_ttft_ms == 300.0
+
+
+class TestRollupSummary:
+    """Tests for RollupSummary dataclass."""
+
+    def test_compute_summary_stats_empty(self):
+        """Test summary stats with no results."""
+        summary = RollupSummary(
+            job_id="12345",
+            job_name="test-job",
+            generated_at="2025-01-22 10:00:00",
+            model_path="/models/test",
+            model_name="test-model",
+            precision="fp8",
+            gpu_type="B200",
+            gpus_per_node=8,
+            backend_type="sglang",
+            frontend_type="sglang",
+            is_disaggregated=True,
+            total_nodes=4,
+            total_gpus=32,
+            benchmark_type="sa-bench",
+            isl=1024,
+            osl=1024,
+        )
+        summary.compute_summary_stats()
+        assert summary.max_output_tps is None
+        assert summary.min_mean_ttft_ms is None
+
+    def test_compute_summary_stats_with_results(self):
+        """Test summary stats computation from results."""
+        summary = RollupSummary(
+            job_id="12345",
+            job_name="test-job",
+            generated_at="2025-01-22 10:00:00",
+            model_path="/models/test",
+            model_name="test-model",
+            precision="fp8",
+            gpu_type="B200",
+            gpus_per_node=8,
+            backend_type="sglang",
+            frontend_type="sglang",
+            is_disaggregated=True,
+            total_nodes=4,
+            total_gpus=32,
+            benchmark_type="sa-bench",
+            isl=1024,
+            osl=1024,
+            results=[
+                RollupResult(concurrency=50, output_tps=3000.0, mean_ttft_ms=100.0, mean_itl_ms=20.0),
+                RollupResult(concurrency=100, output_tps=5000.0, mean_ttft_ms=150.0, mean_itl_ms=25.0),
+                RollupResult(concurrency=200, output_tps=4500.0, mean_ttft_ms=250.0, mean_itl_ms=30.0),
+            ],
+        )
+        summary.compute_summary_stats()
+
+        assert summary.max_output_tps == 5000.0
+        assert summary.min_mean_ttft_ms == 100.0
+        assert summary.min_mean_itl_ms == 20.0
+
+
+class TestRollupStageMixin:
+    """Tests for RollupStageMixin functionality."""
+
+    def test_collect_benchmark_results(self, tmp_path):
+        """Test collecting benchmark results from directories."""
+        # Create mock benchmark result directories
+        bench_dir = tmp_path / "sa-bench_isl_1024_osl_1024"
+        bench_dir.mkdir()
+
+        # Create mock result JSONs
+        for concurrency in [50, 100, 200]:
+            result_file = bench_dir / f"result_c{concurrency}.json"
+            result_file.write_text(
+                json.dumps(
+                    {
+                        "max_concurrency": concurrency,
+                        "output_throughput": 1000.0 * concurrency / 50,
+                        "total_token_throughput": 1200.0 * concurrency / 50,
+                        "mean_ttft_ms": 100.0 + concurrency,
+                        "mean_itl_ms": 15.0 + concurrency / 10,
+                        "request_rate": f"c{concurrency}",
+                    }
+                )
+            )
+
+        # Create a mock mixin instance
+        class MockBenchmarkConfig:
+            type = "sa-bench"
+
+        class MockConfig:
+            benchmark = MockBenchmarkConfig()
+
+        class MockOrchestrator(RollupStageMixin):
+            def __init__(self, log_dir):
+                self._log_dir = log_dir
+
+            @property
+            def config(self):
+                return MockConfig()
+
+            @property
+            def runtime(self):
+                class MockRuntime:
+                    log_dir = self._log_dir
+
+                return MockRuntime()
+
+            @property
+            def endpoints(self):
+                return []
+
+        orchestrator = MockOrchestrator(tmp_path)
+        results = orchestrator._collect_benchmark_results()
+
+        assert len(results) == 3
+        # Results should be sorted by concurrency
+        assert results[0]["max_concurrency"] == 50
+        assert results[1]["max_concurrency"] == 100
+        assert results[2]["max_concurrency"] == 200
+
+    def test_collect_benchmark_results_empty(self, tmp_path):
+        """Test collecting when no benchmark results exist."""
+
+        class MockBenchmarkConfig:
+            type = "sa-bench"
+
+        class MockConfig:
+            benchmark = MockBenchmarkConfig()
+
+        class MockOrchestrator(RollupStageMixin):
+            def __init__(self, log_dir):
+                self._log_dir = log_dir
+
+            @property
+            def config(self):
+                return MockConfig()
+
+            @property
+            def runtime(self):
+                class MockRuntime:
+                    log_dir = self._log_dir
+
+                return MockRuntime()
+
+            @property
+            def endpoints(self):
+                return []
+
+        orchestrator = MockOrchestrator(tmp_path)
+        results = orchestrator._collect_benchmark_results()
+
+        assert len(results) == 0
+
+    def test_write_rollup(self, tmp_path):
+        """Test writing rollup summary to JSON."""
+        summary = RollupSummary(
+            job_id="12345",
+            job_name="test-job",
+            generated_at="2025-01-22 10:00:00",
+            model_path="/models/test",
+            model_name="test-model",
+            precision="fp8",
+            gpu_type="B200",
+            gpus_per_node=8,
+            backend_type="sglang",
+            frontend_type="sglang",
+            is_disaggregated=True,
+            total_nodes=4,
+            total_gpus=32,
+            benchmark_type="sa-bench",
+            isl=1024,
+            osl=1024,
+            prefill_nodes=1,
+            decode_nodes=3,
+            prefill_workers=1,
+            decode_workers=3,
+            prefill_gpus=8,
+            decode_gpus=24,
+            results=[
+                RollupResult(concurrency=100, output_tps=5000.0, mean_ttft_ms=150.0),
+            ],
+            tags=["test", "example"],
+        )
+        summary.compute_summary_stats()
+
+        class MockOrchestrator(RollupStageMixin):
+            @property
+            def runtime(self):
+                return None
+
+            @property
+            def endpoints(self):
+                return []
+
+        orchestrator = MockOrchestrator()
+        rollup_path = tmp_path / "rollup.json"
+        orchestrator._write_rollup(summary, rollup_path)
+
+        # Verify the file was written
+        assert rollup_path.exists()
+
+        # Verify the content
+        with open(rollup_path) as f:
+            data = json.load(f)
+
+        assert data["job_id"] == "12345"
+        assert data["job_name"] == "test-job"
+        assert data["model_name"] == "test-model"
+        assert data["is_disaggregated"] is True
+        assert data["total_gpus"] == 32
+        assert data["prefill_nodes"] == 1
+        assert data["decode_nodes"] == 3
+        assert len(data["results"]) == 1
+        assert data["results"][0]["concurrency"] == 100
+        assert data["results"][0]["output_tps"] == 5000.0
+        assert data["max_output_tps"] == 5000.0
+        assert data["tags"] == ["test", "example"]
+
+
+class TestRollupIntegration:
+    """Integration tests for rollup with full mock config."""
+
+    def test_full_rollup_workflow(self, tmp_path):
+        """Test the complete rollup workflow with mocked config."""
+        from dataclasses import dataclass, field
+
+        # Create mock benchmark results
+        bench_dir = tmp_path / "sa-bench_isl_1024_osl_1024"
+        bench_dir.mkdir()
+
+        for concurrency in [50, 100, 200]:
+            result_file = bench_dir / f"result_c{concurrency}.json"
+            result_file.write_text(
+                json.dumps(
+                    {
+                        "max_concurrency": concurrency,
+                        "output_throughput": 1000.0 * concurrency / 50,
+                        "total_token_throughput": 1200.0 * concurrency / 50,
+                        "mean_ttft_ms": 100.0 + concurrency,
+                        "mean_itl_ms": 15.0 + concurrency / 10,
+                        "p99_ttft_ms": 200.0 + concurrency * 2,
+                        "p99_itl_ms": 30.0 + concurrency / 5,
+                        "duration": 60.0,
+                        "completed": concurrency * 10,
+                        "num_prompts": concurrency * 10,
+                    }
+                )
+            )
+
+        # Create mock orchestrator with full config
+        @dataclass
+        class MockResourceConfig:
+            is_disaggregated: bool = True
+            prefill_gpus: int = 8
+            decode_gpus: int = 24
+            agg_nodes: int | None = None
+            gpus_per_node: int = 8
+            gpu_type: str = "B200"
+            total_nodes: int = 4
+            prefill_nodes: int = 1
+            decode_nodes: int = 3
+            num_prefill: int = 1
+            num_decode: int = 3
+            num_agg: int | None = None
+
+        @dataclass
+        class MockBenchmarkConfig:
+            type: str = "sa-bench"
+            isl: int = 1024
+            osl: int = 1024
+            concurrencies: str = "50x100x200"
+
+            def get_concurrency_list(self):
+                return [int(c) for c in self.concurrencies.split("x")]
+
+        @dataclass
+        class MockModelConfig:
+            precision: str = "fp8"
+
+        @dataclass
+        class MockFrontendConfig:
+            type: str = "sglang"
+
+        @dataclass
+        class MockConfig:
+            name: str = "test-job"
+            served_model_name: str = "deepseek-v3"
+            backend_type: str = "sglang"
+            resources: MockResourceConfig = field(default_factory=MockResourceConfig)
+            benchmark: MockBenchmarkConfig = field(default_factory=MockBenchmarkConfig)
+            model: MockModelConfig = field(default_factory=MockModelConfig)
+            frontend: MockFrontendConfig = field(default_factory=MockFrontendConfig)
+
+        @dataclass
+        class MockRuntime:
+            job_id: str = "12345"
+            log_dir: Path = field(default_factory=Path)
+            model_path: Path = field(default_factory=lambda: Path("/models/deepseek-v3"))
+
+        class MockOrchestrator(RollupStageMixin):
+            def __init__(self, config, runtime):
+                self._config = config
+                self._runtime = runtime
+
+            @property
+            def config(self):
+                return self._config
+
+            @property
+            def runtime(self):
+                return self._runtime
+
+            @property
+            def endpoints(self):
+                return []
+
+        config = MockConfig()
+        runtime = MockRuntime(log_dir=tmp_path)
+        orchestrator = MockOrchestrator(config, runtime)
+
+        # Run rollup
+        rollup_path = orchestrator.run_rollup(tags=["integration-test"])
+
+        # Verify
+        assert rollup_path is not None
+        assert rollup_path.exists()
+
+        with open(rollup_path) as f:
+            data = json.load(f)
+
+        # Verify summary
+        assert data["job_id"] == "12345"
+        assert data["job_name"] == "test-job"
+        assert data["model_name"] == "deepseek-v3"
+        assert data["is_disaggregated"] is True
+        assert data["total_gpus"] == 32  # 8 + 24
+        assert data["benchmark_type"] == "sa-bench"
+        assert data["isl"] == 1024
+        assert data["osl"] == 1024
+        assert data["concurrencies"] == [50, 100, 200]
+
+        # Verify results
+        assert len(data["results"]) == 3
+        assert data["max_output_tps"] == 4000.0  # 1000 * 200/50
+
+        # Verify tags
+        assert data["tags"] == ["integration-test"]
+
+    def test_rollup_with_node_logs(self, tmp_path):
+        """Test rollup with actual node log files parsed by NodeAnalyzer."""
+        from dataclasses import dataclass, field
+
+        # Create mock benchmark results
+        bench_dir = tmp_path / "sa-bench_isl_1024_osl_1024"
+        bench_dir.mkdir()
+
+        result_file = bench_dir / "result_c100.json"
+        result_file.write_text(
+            json.dumps(
+                {
+                    "max_concurrency": 100,
+                    "output_throughput": 5000.0,
+                    "mean_ttft_ms": 150.0,
+                    "mean_itl_ms": 20.0,
+                }
+            )
+        )
+
+        # Create mock prefill log file (matches SGLang parser expected format)
+        prefill_log = tmp_path / "node-01_prefill_w0.err"
+        prefill_log.write_text(
+            """[2m2025-01-22T10:00:00.000000Z[0m [32m INFO[0m Prefill batch, #new-seq: 10, #new-token: 1024, #cached-token: 256, token usage: 0.50, #running-req: 5, #queue-req: 2, #prealloc-req: 0, #inflight-req: 3, input throughput (token/s): 5000.00,
+[2m2025-01-22T10:00:01.000000Z[0m [32m INFO[0m Load weight end. type=DeepseekV3ForCausalLM, dtype=torch.bfloat16, avail mem=75.11 GB, mem usage=107.07 GB.
+[2m2025-01-22T10:00:02.000000Z[0m [32m INFO[0m KV Cache is allocated. #tokens: 524288, KV size: 17.16 GB
+"""
+        )
+
+        # Create mock decode log file
+        decode_log = tmp_path / "node-02_decode_w0.err"
+        decode_log.write_text(
+            """[2m2025-01-22T10:00:00.000000Z[0m [32m INFO[0m Decode batch, #running-req: 50, #token: 5000, token usage: 0.50, pre-allocated usage: 0.10, #prealloc-req: 2, #transfer-req: 1, #queue-req: 3, gen throughput (token/s): 150.00,
+"""
+        )
+
+        # Mock config classes
+        @dataclass
+        class MockResourceConfig:
+            is_disaggregated: bool = True
+            prefill_gpus: int = 8
+            decode_gpus: int = 8
+            agg_nodes: int | None = None
+            gpus_per_node: int = 8
+            gpu_type: str = "B200"
+            total_nodes: int = 2
+            prefill_nodes: int = 1
+            decode_nodes: int = 1
+            num_prefill: int = 1
+            num_decode: int = 1
+            num_agg: int | None = None
+
+        @dataclass
+        class MockBenchmarkConfig:
+            type: str = "sa-bench"
+            isl: int = 1024
+            osl: int = 1024
+            concurrencies: str = "100"
+
+            def get_concurrency_list(self):
+                return [100]
+
+        @dataclass
+        class MockModelConfig:
+            precision: str = "fp8"
+
+        @dataclass
+        class MockFrontendConfig:
+            type: str = "sglang"
+
+        @dataclass
+        class MockConfig:
+            name: str = "test-job"
+            served_model_name: str = "deepseek-v3"
+            backend_type: str = "sglang"
+            resources: MockResourceConfig = field(default_factory=MockResourceConfig)
+            benchmark: MockBenchmarkConfig = field(default_factory=MockBenchmarkConfig)
+            model: MockModelConfig = field(default_factory=MockModelConfig)
+            frontend: MockFrontendConfig = field(default_factory=MockFrontendConfig)
+
+        @dataclass
+        class MockRuntime:
+            job_id: str = "12345"
+            log_dir: Path = field(default_factory=Path)
+            model_path: Path = field(default_factory=lambda: Path("/models/deepseek-v3"))
+
+        class MockOrchestrator(RollupStageMixin):
+            def __init__(self, config, runtime):
+                self._config = config
+                self._runtime = runtime
+
+            @property
+            def config(self):
+                return self._config
+
+            @property
+            def runtime(self):
+                return self._runtime
+
+            @property
+            def endpoints(self):
+                return []
+
+        config = MockConfig()
+        runtime = MockRuntime(log_dir=tmp_path)
+        orchestrator = MockOrchestrator(config, runtime)
+
+        # Run rollup
+        rollup_path = orchestrator.run_rollup(tags=["node-test"])
+
+        assert rollup_path is not None
+        assert rollup_path.exists()
+
+        with open(rollup_path) as f:
+            data = json.load(f)
+
+        # Verify node summary is present
+        assert data["nodes_summary"] is not None
+        nodes_summary = data["nodes_summary"]
+
+        assert nodes_summary["total_prefill_nodes"] == 1
+        assert nodes_summary["total_decode_nodes"] == 1
+        assert len(nodes_summary["nodes"]) == 2
+
+        # Find prefill and decode nodes
+        prefill_node = next((n for n in nodes_summary["nodes"] if n["worker_type"] == "prefill"), None)
+        decode_node = next((n for n in nodes_summary["nodes"] if n["worker_type"] == "decode"), None)
+
+        assert prefill_node is not None
+        assert prefill_node["node_name"] == "node-01"
+        assert prefill_node["total_new_tokens"] == 1024
+        assert prefill_node["total_cached_tokens"] == 256
+        assert prefill_node["kv_cache_gb"] == 17.16
+
+        assert decode_node is not None
+        assert decode_node["node_name"] == "node-02"
+        assert decode_node["max_running_requests"] == 50
+        assert decode_node["avg_gen_throughput"] == 150.0
+