From 43dd31a6a036ec90a53cb33c84b6bd1ebb168319 Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 12:54:28 -0800 Subject: [PATCH 01/15] parser for node and benchmakre. trim aislop --- analysis/dashboard/app.py | 14 +- analysis/dashboard/components.py | 6 +- analysis/dashboard/config_tab.py | 4 +- analysis/dashboard/node_metrics_tab.py | 4 +- analysis/dashboard/rate_match_tab.py | 6 +- analysis/srtlog/config_reader.py | 7 +- analysis/srtlog/log_parser.py | 621 +++++++-------- analysis/srtlog/models.py | 253 ++++-- analysis/srtlog/parsers/__init__.py | 230 ++++++ analysis/srtlog/parsers/benchmark/__init__.py | 7 + .../parsers/benchmark/mooncake_router.py | 306 ++++++++ analysis/srtlog/parsers/benchmark/sa_bench.py | 318 ++++++++ analysis/srtlog/parsers/nodes/__init__.py | 9 + analysis/srtlog/parsers/nodes/sglang.py | 441 +++++++++++ analysis/srtlog/parsers/nodes/trtllm.py | 473 +++++++++++ analysis/srtlog/run_loader.py | 353 ++++++--- pyproject.toml | 7 + src/srtctl/backends/base.py | 2 +- src/srtctl/backends/sglang.py | 7 +- src/srtctl/backends/trtllm.py | 19 +- src/srtctl/benchmarks/scripts/gpqa/bench.sh | 11 +- .../benchmarks/scripts/longbenchv2/bench.sh | 16 +- src/srtctl/benchmarks/scripts/mmlu/bench.sh | 11 +- .../scripts/mooncake-router/bench.sh | 27 +- .../benchmarks/scripts/profiling/profile.sh | 26 +- src/srtctl/benchmarks/scripts/router/bench.sh | 10 +- .../benchmarks/scripts/sa-bench/bench.sh | 36 +- src/srtctl/core/schema.py | 2 +- src/srtctl/frontends/dynamo.py | 2 +- tests/fixtures_parsers.py | 361 +++++++++ tests/test_parsers.py | 743 ++++++++++++++++++ tests/test_runloader_parsers.py | 332 ++++++++ 32 files changed, 4018 insertions(+), 646 deletions(-) create mode 100644 analysis/srtlog/parsers/__init__.py create mode 100644 analysis/srtlog/parsers/benchmark/__init__.py create mode 100644 analysis/srtlog/parsers/benchmark/mooncake_router.py create mode 100644 analysis/srtlog/parsers/benchmark/sa_bench.py create mode 100644 analysis/srtlog/parsers/nodes/__init__.py create mode 100644 analysis/srtlog/parsers/nodes/sglang.py create mode 100644 analysis/srtlog/parsers/nodes/trtllm.py create mode 100644 tests/fixtures_parsers.py create mode 100644 tests/test_parsers.py create mode 100644 tests/test_runloader_parsers.py diff --git a/analysis/dashboard/app.py b/analysis/dashboard/app.py index 87920a15..35e0722e 100644 --- a/analysis/dashboard/app.py +++ b/analysis/dashboard/app.py @@ -102,8 +102,8 @@ def render_sidebar(logs_dir, runs): with st.sidebar.expander("📊 ISL/OSL", expanded=False): isl_osl_pairs = set() for run in sorted_runs: - if run.profiler.isl and run.profiler.osl: - isl_osl_pairs.add(f"{run.profiler.isl}/{run.profiler.osl}") + if run.profiler_metadata.isl and run.profiler_metadata.osl: + isl_osl_pairs.add(f"{run.profiler_metadata.isl}/{run.profiler_metadata.osl}") if isl_osl_pairs: pair_options = sorted(isl_osl_pairs) @@ -115,7 +115,7 @@ def render_sidebar(logs_dir, runs): ) if selected_pairs: - sorted_runs = [r for r in sorted_runs if f"{r.profiler.isl}/{r.profiler.osl}" in selected_pairs] + sorted_runs = [r for r in sorted_runs if f"{r.profiler_metadata.isl}/{r.profiler_metadata.osl}" in selected_pairs] else: st.caption("No ISL/OSL information available") @@ -176,8 +176,8 @@ def render_sidebar(logs_dir, runs): for run in sorted_runs: topology = run.metadata.topology_label - isl = run.profiler.isl - osl = run.profiler.osl + isl = run.profiler_metadata.isl + osl = run.profiler_metadata.osl gpu_type = run.metadata.gpu_type gpu_suffix = f" [{gpu_type}]" if gpu_type else "" # Include job ID to ensure unique labels @@ -284,7 +284,7 @@ def render_sidebar(logs_dir, runs): f"{run.job_id} | " f"{run.metadata.agg_workers}A | " f"{total_gpus} GPUs | " - f"{run.profiler.isl}/{run.profiler.osl}" + f"{run.profiler_metadata.isl}/{run.profiler_metadata.osl}" ) else: run_id = ( @@ -298,7 +298,7 @@ def render_sidebar(logs_dir, runs): f"{run.job_id} | " f"{run.metadata.prefill_workers}P{run.metadata.decode_workers}D | " f"{prefill_gpus}/{decode_gpus} | " - f"{run.profiler.isl}/{run.profiler.osl}" + f"{run.profiler_metadata.isl}/{run.profiler_metadata.osl}" ) if run.metadata.gpu_type: diff --git a/analysis/dashboard/components.py b/analysis/dashboard/components.py index 22bb320c..1b0979d0 100644 --- a/analysis/dashboard/components.py +++ b/analysis/dashboard/components.py @@ -101,7 +101,11 @@ def _node_to_dict(node) -> dict: Temporary converter for compatibility with existing visualization code. """ return { - "node_info": node.node_info, + "node_info": { + "node": node.node_name, + "worker_type": node.worker_type, + "worker_id": node.worker_id, + }, "prefill_batches": [_batch_to_dict(b) for b in node.batches], "memory_snapshots": [_memory_to_dict(m) for m in node.memory_snapshots], "config": node.config, diff --git a/analysis/dashboard/config_tab.py b/analysis/dashboard/config_tab.py index ec234de2..a8b9f59a 100644 --- a/analysis/dashboard/config_tab.py +++ b/analysis/dashboard/config_tab.py @@ -64,10 +64,10 @@ def render(filtered_runs: list): with col2: st.metric("GPU", config_data["summary"]["gpu_type"]) with col3: - st.metric("ISL/OSL", f"{run.profiler.isl}/{run.profiler.osl}") + st.metric("ISL/OSL", f"{run.profiler_metadata.isl}/{run.profiler_metadata.osl}") with col4: gpu_type_suffix = f" ({run.metadata.gpu_type})" if run.metadata.gpu_type else "" - st.metric("Profiler", f"{run.profiler.profiler_type}{gpu_type_suffix}") + st.metric("Profiler", f"{run.profiler_metadata.profiler_type}{gpu_type_suffix}") st.caption(f"Model: {config_data['summary']['model']}") st.divider() diff --git a/analysis/dashboard/node_metrics_tab.py b/analysis/dashboard/node_metrics_tab.py index 5c83ea9e..37d250be 100644 --- a/analysis/dashboard/node_metrics_tab.py +++ b/analysis/dashboard/node_metrics_tab.py @@ -72,8 +72,8 @@ def render(filtered_runs: list, logs_dir: str): "agg_workers": run.metadata.agg_workers, "gpus_per_node": run.metadata.gpus_per_node, "total_gpus": run.total_gpus, - "isl": run.profiler.isl, - "osl": run.profiler.osl, + "isl": run.profiler_metadata.isl, + "osl": run.profiler_metadata.osl, "gpu_type": run.metadata.gpu_type, } all_node_metrics.extend(node_metrics) diff --git a/analysis/dashboard/rate_match_tab.py b/analysis/dashboard/rate_match_tab.py index 36fa50cb..e86f48dd 100644 --- a/analysis/dashboard/rate_match_tab.py +++ b/analysis/dashboard/rate_match_tab.py @@ -84,11 +84,11 @@ def render(filtered_runs: list, logs_dir: str): decode_gpus = run.metadata.decode_nodes * run.metadata.gpus_per_node st.metric("GPU Split", f"{prefill_gpus} / {decode_gpus}") with col4: - st.metric("ISL/OSL", f"{run.profiler.isl}/{run.profiler.osl}") + st.metric("ISL/OSL", f"{run.profiler_metadata.isl}/{run.profiler_metadata.osl}") # Create rate match graph - isl = int(run.profiler.isl) if run.profiler.isl else None - osl = int(run.profiler.osl) if run.profiler.osl else None + isl = int(run.profiler_metadata.isl) if run.profiler_metadata.isl else None + osl = int(run.profiler_metadata.osl) if run.profiler_metadata.osl else None rate_fig = _create_rate_match_graph( prefill_nodes, decode_nodes, run.job_id, show_request_rate=show_request_rate, isl=isl, osl=osl ) diff --git a/analysis/srtlog/config_reader.py b/analysis/srtlog/config_reader.py index ce5eb328..98a4c089 100644 --- a/analysis/srtlog/config_reader.py +++ b/analysis/srtlog/config_reader.py @@ -10,7 +10,7 @@ import pandas as pd from .cache_manager import CacheManager -from .models import NodeConfig, ParsedCommandInfo +from .models import NodeConfig, TopologyInfo # Configure logging logger = logging.getLogger(__name__) @@ -285,7 +285,7 @@ def parse_command_line_to_dict(cmd_args: list[str]) -> dict[str, str]: return parsed -def parse_command_line_from_err(run_path: str) -> ParsedCommandInfo: +def parse_command_line_from_err(run_path: str) -> TopologyInfo: """Parse .err/.out files to find explicitly set flags and service topology. Uses parquet caching to avoid re-parsing on subsequent loads. @@ -298,10 +298,9 @@ def parse_command_line_from_err(run_path: str) -> ParsedCommandInfo: run_path: Path to the run directory containing .err/.out files Returns: - { + TopologyInfo with: 'explicit_flags': set of flag names that were explicitly set, 'services': {node_name: [service_types]} - } """ import os import re diff --git a/analysis/srtlog/log_parser.py b/analysis/srtlog/log_parser.py index 7de28ac2..ee2ca471 100644 --- a/analysis/srtlog/log_parser.py +++ b/analysis/srtlog/log_parser.py @@ -4,13 +4,17 @@ All parsing logic encapsulated in the NodeAnalyzer class. """ +import json import logging -import os -import re +import time +from pathlib import Path import pandas as pd +import yaml from .cache_manager import CacheManager +from .models import NodeInfo +from .parsers import get_node_parser # Configure logging logger = logging.getLogger(__name__) @@ -19,21 +23,21 @@ class NodeAnalyzer: """Service for analyzing node-level metrics from log files. - Parses .err/.out files to extract batch metrics, memory usage, and configuration. - All parsing logic is encapsulated as methods. + Uses the new parser infrastructure to parse node logs based on detected backend type. """ def parse_run_logs(self, run_path: str, return_dicts: bool = False) -> list: """Parse all node log files in a run directory. Uses parquet caching to avoid re-parsing on subsequent loads. + Automatically detects backend type and uses appropriate parser. Args: run_path: Path to the run directory containing .err/.out files - return_dicts: If True, return dicts directly (faster). If False, return NodeMetrics objects. + return_dicts: If True, return dicts directly (faster). If False, return NodeInfo objects. Returns: - List of NodeMetrics objects or dicts, one per node + List of NodeInfo objects (or dicts), one per node """ # Initialize cache manager cache_mgr = CacheManager(run_path) @@ -50,164 +54,289 @@ def parse_run_logs(self, run_path: str, return_dicts: bool = False) -> list: nodes = self._dataframe_to_dicts(cached_df) logger.info(f"Loaded {len(nodes)} nodes from cache (as dicts)") else: - # Reconstruct NodeMetrics objects from DataFrame - nodes = self._deserialize_node_metrics(cached_df) + # Reconstruct NodeInfo objects from DataFrame + nodes = self._deserialize_node_metrics(cached_df, run_path=run_path) logger.info(f"Loaded {len(nodes)} nodes from cache") return nodes - # Cache miss or invalid - parse from .err/.out files - nodes = [] - - if not os.path.exists(run_path): - logger.error(f"Run path does not exist: {run_path}") - return nodes + # Cache miss or invalid - parse using new parser infrastructure + backend_type = self._detect_backend_type(run_path) + if not backend_type: + logger.warning(f"Could not detect backend type for {run_path}") + return [] - total_err_files = 0 - parsed_successfully = 0 + # Get appropriate parser + parser = get_node_parser(backend_type) + if not parser: + logger.warning(f"No parser registered for backend '{backend_type}'") + return [] - for file in os.listdir(run_path): - if (file.endswith(".err") or file.endswith(".out")) and ("prefill" in file or "decode" in file): - total_err_files += 1 - filepath = os.path.join(run_path, file) - node = self.parse_single_log(filepath) - if node: - nodes.append(node) - parsed_successfully += 1 + # Use parser to parse logs directory + logs_dir = Path(run_path) / "logs" + if not logs_dir.exists(): + # For backwards compatibility, try parsing files in run_path directly + logs_dir = Path(run_path) - logger.info(f"Parsed {parsed_successfully}/{total_err_files} prefill/decode log files from {run_path}") + logger.info(f"Using {backend_type} parser to parse logs in {logs_dir}") + node_infos = parser.parse_logs(logs_dir) - if total_err_files == 0: - logger.warning(f"No prefill/decode log files found in {run_path}") + # Populate additional config from config files if available + if node_infos: + self._populate_config_from_files(run_path, node_infos) # Save to cache if we have data - if nodes: - cache_df = self._serialize_node_metrics(nodes) + if node_infos: + # Extract metrics for caching + metrics_list = [ni.metrics for ni in node_infos] + cache_df = self._serialize_node_metrics(metrics_list) cache_mgr.save_to_cache("node_metrics", cache_df, source_patterns) + logger.info(f"Parsed and cached {len(node_infos)} nodes from {logs_dir}") - return nodes + if return_dicts: + return [self._node_info_to_dict(node) for node in node_infos] + return node_infos + + def _detect_backend_type(self, run_path: str) -> str | None: + """Detect backend type from run metadata. - def parse_single_log(self, filepath: str): - """Parse a single node log file. + Looks for *.json files with container information in run_path + and its parent directory (for cases where run_path is logs/). + Also looks at log file content as fallback. Args: - filepath: Path to the .err/.out log file + run_path: Path to the run directory (or logs subdirectory) Returns: - NodeMetrics object or None if parsing failed + Backend type string (e.g., 'sglang', 'trtllm') or None """ - from .models import BatchMetrics, MemoryMetrics, NodeMetrics + run_path = Path(run_path) + + # Try current directory and parent directory + search_dirs = [run_path] + if run_path.name == "logs" and run_path.parent.exists(): + search_dirs.insert(0, run_path.parent) # Check parent first + + # Try JSON files first + for search_dir in search_dirs: + json_files = list(search_dir.glob("*.json")) + for json_file in json_files: + try: + with open(json_file) as f: + metadata = json.load(f) + # Try different possible locations for container info + container = metadata.get("container", "") + if not container: + container = metadata.get("model", {}).get("container", "") + + container_lower = container.lower() + if "sglang" in container_lower: + logger.debug(f"Detected sglang from {json_file}") + return "sglang" + if "trtllm" in container_lower or "dynamo" in container_lower: + logger.debug(f"Detected trtllm from {json_file}") + return "trtllm" + except Exception as e: + logger.debug(f"Could not read {json_file}: {e}") + continue + + # Try config.yaml as fallback + for search_dir in search_dirs: + yaml_path = search_dir / "config.yaml" + if yaml_path.exists(): + try: + with open(yaml_path) as f: + config = yaml.safe_load(f) + backend_type = config.get("backend", {}).get("type", "").lower() + if backend_type in ["sglang", "trtllm"]: + logger.debug(f"Detected {backend_type} from config.yaml") + return backend_type + except Exception as e: + logger.debug(f"Could not read {yaml_path}: {e}") + + # Last resort: look at log files + logs_dir = run_path if run_path.name == "logs" else run_path / "logs" + if logs_dir.exists(): + log_files = list(logs_dir.glob("*.out")) + list(logs_dir.glob("*.err")) + for log_file in log_files[:3]: # Check first few files + try: + with open(log_file) as f: + content = f.read(2000) # Read first 2KB + if "sglang.launch_server" in content or "sglang.srt" in content: + logger.debug(f"Detected sglang from log content in {log_file.name}") + return "sglang" + if "dynamo.trtllm" in content or "tensorrt_llm" in content: + logger.debug(f"Detected trtllm from log content in {log_file.name}") + return "trtllm" + except Exception as e: + logger.debug(f"Could not read {log_file}: {e}") - node_info = self._extract_node_info_from_filename(filepath) - if not node_info: - logger.warning( - f"Could not extract node info from filename: {filepath}. " - f"Expected format: __.err or .out" - ) - return None + return None + + def _populate_config_from_files(self, run_path: str, node_infos: list) -> None: + """Populate node configuration from config files. + + Reads both: + 1. Per-node *_config.json files (gpu_info, server_args) + 2. Global config.yaml file (environment variables by worker type) + + Merges with existing config that already has launch_command from log parsing. + + Args: + run_path: Path to the run directory (or logs subdirectory) + node_infos: List of NodeInfo objects to enhance with config file data + """ + import os + + run_path = Path(run_path) + + # If run_path is the logs directory, look in parent for config files + if run_path.name == "logs" and run_path.parent.exists(): + config_dir = run_path.parent + else: + config_dir = run_path + + # Parse global config.yaml for environment variables + yaml_env = self._parse_yaml_environment(config_dir) + + # Find all per-node config files + config_files = {} + for file in os.listdir(config_dir): + if file.endswith("_config.json"): + # Extract node identifier from filename (e.g., "worker-3_prefill_w0_config.json" -> "worker-3_prefill_w0") + node_id = file.replace("_config.json", "") + config_files[node_id] = config_dir / file + + # Build or enhance node_config for each NodeInfo + for node_info in node_infos: + metrics = node_info.metrics + node_name = metrics.node_name + worker_type = metrics.worker_type + worker_id = metrics.worker_id + + # Try to find matching config file + # Format: ___config.json + potential_keys = [ + f"{node_name}_{worker_type}_{worker_id}", # Exact match + f"{node_name}_{worker_type}", # Without worker_id + node_name, # Just node name + ] + + config_path = None + for key in potential_keys: + if key in config_files: + config_path = config_files[key] + break + + # Load config file if it exists and merge with existing config + if config_path and config_path.exists(): + try: + with open(config_path) as f: + file_config = json.load(f) + # Merge file config with existing config (which has launch_command) + if node_info.node_config: + # Keep launch_command from log parsing + launch_cmd = node_info.node_config.get("launch_command") + node_info.node_config.update(file_config) + if launch_cmd: + node_info.node_config["launch_command"] = launch_cmd + else: + node_info.node_config = file_config + logger.debug(f"Loaded config for {node_name} with {len(file_config.get('environment', {}))} env vars") + except Exception as e: + logger.warning(f"Could not load config from {config_path}: {e}") + # Keep existing minimal config with launch_command + else: + # No config file found + if not node_info.node_config: + node_info.node_config = {"environment": {}} + elif "environment" not in node_info.node_config: + node_info.node_config["environment"] = {} + logger.debug(f"No config file found for node {node_name}, using minimal config") + + # Merge environment variables from config.yaml + if yaml_env and worker_type in yaml_env: + if not node_info.node_config: + node_info.node_config = {} + if "environment" not in node_info.node_config: + node_info.node_config["environment"] = {} + + # Merge YAML env vars (they take precedence over JSON) + yaml_worker_env = yaml_env[worker_type] + node_info.node_config["environment"].update(yaml_worker_env) + logger.debug(f"Merged {len(yaml_worker_env)} env vars from config.yaml for {node_name} ({worker_type})") + + def _parse_yaml_environment(self, run_path: Path) -> dict[str, dict[str, str]]: + """Parse environment variables from config.yaml. + + Args: + run_path: Path to the run directory - batches = [] - memory_snapshots = [] - config = {} + Returns: + Dict mapping worker_type to environment variables + Example: {"prefill": {"VAR1": "val1"}, "decode": {"VAR2": "val2"}} + """ + yaml_path = run_path / "config.yaml" + if not yaml_path.exists(): + logger.debug(f"No config.yaml found in {run_path}") + return {} try: - with open(filepath) as f: - for line in f: - # Parse prefill batch metrics - batch_metrics = self._parse_prefill_batch_line(line) - if batch_metrics: - batches.append( - BatchMetrics( - timestamp=batch_metrics["timestamp"], - dp=batch_metrics["dp"], - tp=batch_metrics["tp"], - ep=batch_metrics["ep"], - batch_type=batch_metrics["type"], - new_seq=batch_metrics.get("new_seq"), - new_token=batch_metrics.get("new_token"), - cached_token=batch_metrics.get("cached_token"), - token_usage=batch_metrics.get("token_usage"), - running_req=batch_metrics.get("running_req"), - queue_req=batch_metrics.get("queue_req"), - prealloc_req=batch_metrics.get("prealloc_req"), - inflight_req=batch_metrics.get("inflight_req"), - input_throughput=batch_metrics.get("input_throughput"), - ) - ) - - # Parse decode batch metrics - decode_metrics = self._parse_decode_batch_line(line) - if decode_metrics: - batches.append( - BatchMetrics( - timestamp=decode_metrics["timestamp"], - dp=decode_metrics["dp"], - tp=decode_metrics["tp"], - ep=decode_metrics["ep"], - batch_type=decode_metrics["type"], - running_req=decode_metrics.get("running_req"), - queue_req=decode_metrics.get("queue_req"), - prealloc_req=decode_metrics.get("prealloc_req"), - transfer_req=decode_metrics.get("transfer_req"), - token_usage=decode_metrics.get("token_usage"), - preallocated_usage=decode_metrics.get("preallocated_usage"), - num_tokens=decode_metrics.get("num_tokens"), - gen_throughput=decode_metrics.get("gen_throughput"), - ) - ) - - # Parse memory metrics - mem_metrics = self._parse_memory_line(line) - if mem_metrics: - memory_snapshots.append( - MemoryMetrics( - timestamp=mem_metrics["timestamp"], - dp=mem_metrics["dp"], - tp=mem_metrics["tp"], - ep=mem_metrics["ep"], - metric_type=mem_metrics["type"], - avail_mem_gb=mem_metrics.get("avail_mem_gb"), - mem_usage_gb=mem_metrics.get("mem_usage_gb"), - kv_cache_gb=mem_metrics.get("kv_cache_gb"), - kv_tokens=mem_metrics.get("kv_tokens"), - ) - ) - - # Extract TP/DP/EP configuration from command line - if "--tp-size" in line: - tp_match = re.search(r"--tp-size\s+(\d+)", line) - dp_match = re.search(r"--dp-size\s+(\d+)", line) - ep_match = re.search(r"--ep-size\s+(\d+)", line) - - if tp_match: - config["tp_size"] = int(tp_match.group(1)) - if dp_match: - config["dp_size"] = int(dp_match.group(1)) - if ep_match: - config["ep_size"] = int(ep_match.group(1)) + with open(yaml_path) as f: + config = yaml.safe_load(f) + + if not config or "backend" not in config: + logger.debug("config.yaml has no backend section") + return {} - except Exception as e: - logger.error(f"Error parsing {filepath}: {e}") - return None + backend = config["backend"] + env_vars = {} - # Validation: Log if we found no metrics - total_metrics = len(batches) + len(memory_snapshots) + # Extract prefill_environment + if "prefill_environment" in backend: + env_vars["prefill"] = backend["prefill_environment"] + logger.info(f"Loaded {len(env_vars['prefill'])} prefill env vars from config.yaml") - if total_metrics == 0: - logger.warning( - f"Parsed {filepath} but found no metrics. " - f"Expected to find lines with DP/TP/EP tags. " - f"Log format may have changed." - ) + # Extract decode_environment + if "decode_environment" in backend: + env_vars["decode"] = backend["decode_environment"] + logger.info(f"Loaded {len(env_vars['decode'])} decode env vars from config.yaml") + + # Extract agg_environment if present + if "agg_environment" in backend: + env_vars["agg"] = backend["agg_environment"] + logger.info(f"Loaded {len(env_vars['agg'])} agg env vars from config.yaml") + + return env_vars + + except Exception as e: + logger.warning(f"Could not parse config.yaml in {run_path}: {e}") + return {} - logger.debug(f"Parsed {filepath}: {len(batches)} batches, " f"{len(memory_snapshots)} memory snapshots") + def _node_info_to_dict(self, node_info: "NodeInfo") -> dict: + """Convert NodeInfo object to dict for compatibility. - return NodeMetrics( - node_info=node_info, - batches=batches, - memory_snapshots=memory_snapshots, - config=config, - ) + Args: + node_info: NodeInfo object + Returns: + Dict representation compatible with old structure + """ + metrics = node_info.metrics + return { + "node_info": { + "node": metrics.node_name, + "worker_type": metrics.worker_type, + "worker_id": metrics.worker_id, + }, + "prefill_batches": metrics.batches, # Keep as list of BatchMetrics objects + "memory_snapshots": metrics.memory_snapshots, # Keep as list of MemoryMetrics objects + "config": metrics.config, # Runtime config (TP/PP/EP, batch sizes) + "node_config": node_info.node_config, # Full config (environment, launch_command, gpu_info) + "launch_command": node_info.launch_command, # Property accessor for backward compatibility + "environment": node_info.environment, # Property accessor for backward compatibility + "run_id": metrics.run_id, + } + def get_prefill_nodes(self, nodes: list): """Filter for prefill nodes only. @@ -271,16 +400,16 @@ def _serialize_node_metrics(self, nodes: list) -> pd.DataFrame: rows = [] for node in nodes: - node_info = node.node_info + metadata = node.metadata config = node.config # Serialize batch metrics for batch in node.batches: row = { # Node identification - "node": node_info.get("node", ""), - "worker_type": node_info.get("worker_type", ""), - "worker_id": node_info.get("worker_id", ""), + "node": metadata.node_name, + "worker_type": metadata.worker_type, + "worker_id": metadata.worker_id, # Config "tp_size": config.get("tp_size"), "dp_size": config.get("dp_size"), @@ -313,9 +442,9 @@ def _serialize_node_metrics(self, nodes: list) -> pd.DataFrame: for mem in node.memory_snapshots: row = { # Node identification - "node": node_info.get("node", ""), - "worker_type": node_info.get("worker_type", ""), - "worker_id": node_info.get("worker_id", ""), + "node": metadata.node_name, + "worker_type": metadata.worker_type, + "worker_id": metadata.worker_id, # Config "tp_size": config.get("tp_size"), "dp_size": config.get("dp_size"), @@ -336,17 +465,18 @@ def _serialize_node_metrics(self, nodes: list) -> pd.DataFrame: return pd.DataFrame(rows) - def _deserialize_node_metrics(self, df: pd.DataFrame) -> list: - """Deserialize NodeMetrics objects from a cached DataFrame. + def _deserialize_node_metrics(self, df: pd.DataFrame, run_path: str = None) -> list: + """Deserialize NodeInfo objects from a cached DataFrame. Args: df: DataFrame with cached node metrics + run_path: Path to the run directory (for loading config files) Returns: - List of NodeMetrics objects + List of NodeInfo objects """ import time - from .models import BatchMetrics, MemoryMetrics, NodeMetrics + from .models import BatchMetrics, MemoryMetrics, NodeInfo, NodeMetadata, NodeMetrics start_time = time.time() nodes = [] @@ -355,12 +485,6 @@ def _deserialize_node_metrics(self, df: pd.DataFrame) -> list: for (node_name, worker_type, worker_id), group_df in df.groupby( ["node", "worker_type", "worker_id"], dropna=False ): - node_info = { - "node": node_name, - "worker_type": worker_type, - "worker_id": worker_id, - } - # Extract config (same for all rows in this node) config = {} if not group_df.empty: @@ -427,185 +551,34 @@ def _deserialize_node_metrics(self, df: pd.DataFrame) -> list: ) memory_snapshots.append(mem) - # Create NodeMetrics object - node = NodeMetrics( - node_info=node_info, + # Create NodeMetadata + node_metadata = NodeMetadata( + node_name=node_name, + worker_type=worker_type, + worker_id=worker_id, + ) + + # Create NodeMetrics (NEW structure) + metrics = NodeMetrics( + metadata=node_metadata, batches=batches, memory_snapshots=memory_snapshots, config=config, ) - nodes.append(node) + + # Create NodeInfo with empty config (will be populated below) + node_info = NodeInfo(metrics=metrics, node_config={}) + nodes.append(node_info) elapsed = time.time() - start_time logger.info(f"Deserialized {len(nodes)} nodes in {elapsed:.2f}s") + + # Populate config from files (environment, launch_command) + if run_path and nodes: + self._populate_config_from_files(run_path, nodes) + return nodes - # Private helper methods - - def _parse_dp_tp_ep_tag(self, line: str) -> tuple[int | None, int | None, int | None, str | None]: - """Extract DP, TP, EP indices and timestamp from log line. - - Supports three formats: - - Full: [2025-11-04 05:31:43 DP0 TP0 EP0] - - Simple TP: [2025-11-04 07:05:55 TP0] (defaults DP=0, EP=0) - - Pipeline: [2025-12-08 14:34:44 PP0] (defaults DP=0, EP=0, TP=PP value) - - Args: - line: Log line to parse - - Returns: - (dp, tp, ep, timestamp) or (None, None, None, None) if pattern not found - """ - # Try full format first: DP0 TP0 EP0 - match = re.search(r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) DP(\d+) TP(\d+) EP(\d+)\]", line) - if match: - timestamp, dp, tp, ep = match.groups() - return int(dp), int(tp), int(ep), timestamp - - # Try simple format: TP0 only (1P4D style) - match = re.search(r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) TP(\d+)\]", line) - if match: - timestamp, tp = match.groups() - return 0, int(tp), 0, timestamp # Default DP=0, EP=0 - - # Try pipeline parallelism format: PP0 (prefill with PP) - match = re.search(r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) PP(\d+)\]", line) - if match: - timestamp, pp = match.groups() - return 0, int(pp), 0, timestamp # Map PP to TP slot, default DP=0, EP=0 - - return None, None, None, None - - def _parse_prefill_batch_line(self, line: str) -> dict | None: - """Parse prefill batch log line for metrics. - - Example line: - [2025-11-04 05:31:43 DP0 TP0 EP0] Prefill batch, #new-seq: 18, #new-token: 16384, - #cached-token: 0, token usage: 0.00, #running-req: 0, #queue-req: 0, - #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 0.00, - """ - dp, tp, ep, timestamp = self._parse_dp_tp_ep_tag(line) - if dp is None or "Prefill batch" not in line: - return None - - metrics = {"timestamp": timestamp, "dp": dp, "tp": tp, "ep": ep, "type": "prefill"} - - # Extract metrics using regex - patterns = { - "new_seq": r"#new-seq:\s*(\d+)", - "new_token": r"#new-token:\s*(\d+)", - "cached_token": r"#cached-token:\s*(\d+)", - "token_usage": r"token usage:\s*([\d.]+)", - "running_req": r"#running-req:\s*(\d+)", - "queue_req": r"#queue-req:\s*(\d+)", - "prealloc_req": r"#prealloc-req:\s*(\d+)", - "inflight_req": r"#inflight-req:\s*(\d+)", - "input_throughput": r"input throughput \(token/s\):\s*([\d.]+)", - } - - for key, pattern in patterns.items(): - match = re.search(pattern, line) - if match: - value = match.group(1) - metrics[key] = float(value) if "." in value else int(value) - - return metrics - - def _parse_decode_batch_line(self, line: str) -> dict | None: - """Parse decode batch log line for metrics. - - Example line: - [2025-11-04 05:32:32 DP31 TP31 EP31] Decode batch, #running-req: 7, #token: 7040, - token usage: 0.00, pre-allocated usage: 0.00, #prealloc-req: 0, #transfer-req: 0, - #retracted-req: 0, cuda graph: True, gen throughput (token/s): 6.73, #queue-req: 0, - """ - dp, tp, ep, timestamp = self._parse_dp_tp_ep_tag(line) - if dp is None or "Decode batch" not in line: - return None - - metrics = {"timestamp": timestamp, "dp": dp, "tp": tp, "ep": ep, "type": "decode"} - - # Extract metrics using regex - patterns = { - "running_req": r"#running-req:\s*(\d+)", - "num_tokens": r"#token:\s*(\d+)", - "token_usage": r"token usage:\s*([\d.]+)", - "preallocated_usage": r"pre-allocated usage:\s*([\d.]+)", - "prealloc_req": r"#prealloc-req:\s*(\d+)", - "transfer_req": r"#transfer-req:\s*(\d+)", - "queue_req": r"#queue-req:\s*(\d+)", - "gen_throughput": r"gen throughput \(token/s\):\s*([\d.]+)", - } - - for key, pattern in patterns.items(): - match = re.search(pattern, line) - if match: - value = match.group(1) - metrics[key] = float(value) if "." in value else int(value) - - return metrics - - def _parse_memory_line(self, line: str) -> dict | None: - """Parse memory-related log lines. - - Examples: - [2025-11-04 05:27:13 DP0 TP0 EP0] Load weight end. type=DeepseekV3ForCausalLM, - dtype=torch.bfloat16, avail mem=75.11 GB, mem usage=107.07 GB. - - [2025-11-04 05:27:13 DP0 TP0 EP0] KV Cache is allocated. #tokens: 524288, KV size: 17.16 GB - """ - dp, tp, ep, timestamp = self._parse_dp_tp_ep_tag(line) - if dp is None: - return None - - metrics = { - "timestamp": timestamp, - "dp": dp, - "tp": tp, - "ep": ep, - } - - # Parse available memory - avail_match = re.search(r"avail mem=([\d.]+)\s*GB", line) - if avail_match: - metrics["avail_mem_gb"] = float(avail_match.group(1)) - metrics["type"] = "memory" - - # Parse memory usage - usage_match = re.search(r"mem usage=([\d.]+)\s*GB", line) - if usage_match: - metrics["mem_usage_gb"] = float(usage_match.group(1)) - metrics["type"] = "memory" - - # Parse KV cache size - kv_match = re.search(r"KV size:\s*([\d.]+)\s*GB", line) - if kv_match: - metrics["kv_cache_gb"] = float(kv_match.group(1)) - metrics["type"] = "kv_cache" - - # Parse token count for KV cache - token_match = re.search(r"#tokens:\s*(\d+)", line) - if token_match: - metrics["kv_tokens"] = int(token_match.group(1)) - - return metrics if "type" in metrics else None - - def _extract_node_info_from_filename(self, filename: str) -> dict | None: - """Extract node name and worker info from filename. - - Example: watchtower-navy-cn01_prefill_w0.err or r02-p01-dgx-c11_prefill_w0.out - Returns: {'node': 'watchtower-navy-cn01', 'worker_type': 'prefill', 'worker_id': 'w0'} - """ - # Use greedy match for node name up to _(prefill|decode|frontend)_ - match = re.match(r"(.+)_(prefill|decode|frontend)_([^.]+)\.(err|out)", os.path.basename(filename)) - if match: - return { - "node": match.group(1), - "worker_type": match.group(2), - "worker_id": match.group(3), - } - return None - # Standalone helper function for visualizations def get_node_label(node_data: dict) -> str: diff --git a/analysis/srtlog/models.py b/analysis/srtlog/models.py index 26744184..ad7852d7 100644 --- a/analysis/srtlog/models.py +++ b/analysis/srtlog/models.py @@ -142,10 +142,46 @@ def formatted_date(self) -> str: return self.run_date +@dataclass +class ProfilerMetadata: + """Metadata about the benchmark/profiler configuration. + + This describes what the benchmark was configured to do, + not the actual results. + """ + + profiler_type: str + isl: str + osl: str + concurrencies: str = "" + req_rate: str = "" + + @classmethod + def from_json(cls, json_data: dict) -> "ProfilerMetadata": + """Create from {jobid}.json benchmark section. + + Args: + json_data: Parsed JSON from {jobid}.json file + + Returns: + ProfilerMetadata instance + """ + profiler_meta = json_data.get("benchmark", {}) + + return cls( + profiler_type=profiler_meta.get("type", "unknown"), + isl=str(profiler_meta.get("isl", "")), + osl=str(profiler_meta.get("osl", "")), + concurrencies=profiler_meta.get("concurrencies", ""), + req_rate=profiler_meta.get("req-rate", ""), + ) + + @dataclass class ProfilerResults: """Results from profiler benchmarks. + Contains only the actual metrics, not configuration metadata. Parses 32 out of 39 fields from benchmark JSON output. NOT PARSED (7 fields): @@ -154,12 +190,6 @@ class ProfilerResults: - tokenizer_id, best_of, burstiness: Metadata not critical for dashboards """ - profiler_type: str - isl: str - osl: str - concurrencies: str = "" - req_rate: str = "" - # Primary throughput metrics (per concurrency level) output_tps: list[float] = field(default_factory=list) total_tps: list[float] = field(default_factory=list) @@ -204,26 +234,6 @@ class ProfilerResults: completed: list[int] = field(default_factory=list) num_prompts: list[int] = field(default_factory=list) - @classmethod - def from_json(cls, json_data: dict) -> "ProfilerResults": - """Create from {jobid}.json profiler_metadata section. - - Args: - json_data: Parsed JSON from {jobid}.json file - - Returns: - ProfilerResults instance (benchmark data added later from result files) - """ - profiler_meta = json_data.get("benchmark", {}) - - return cls( - profiler_type=profiler_meta.get("type", "unknown"), - isl=str(profiler_meta.get("isl", "")), - osl=str(profiler_meta.get("osl", "")), - concurrencies=profiler_meta.get("concurrencies", ""), - req_rate=profiler_meta.get("req-rate", ""), - ) - def add_benchmark_results(self, results: dict) -> None: """Add actual benchmark results from profiler output files. @@ -275,11 +285,26 @@ def add_benchmark_results(self, results: dict) -> None: self.num_prompts = results.get("num_prompts", []) +@dataclass +class BenchmarkLaunchCommand: + """Parsed benchmark launch command information. + + Source: logs/benchmark.out + Only contains essential fields. All parsed arguments go into extra_args. + """ + + benchmark_type: str + raw_command: str + + # All parsed arguments as dict + extra_args: dict[str, Any] = field(default_factory=dict) + @dataclass class BenchmarkRun: """Complete benchmark run with metadata and profiler results.""" metadata: RunMetadata + profiler_metadata: ProfilerMetadata profiler: ProfilerResults is_complete: bool = True missing_concurrencies: list[int] = field(default_factory=list) @@ -311,10 +336,16 @@ def from_json_file(cls, run_path: str) -> "BenchmarkRun | None": json_data = json.load(f) metadata = RunMetadata.from_json(json_data, run_path) - profiler = ProfilerResults.from_json(json_data) + profiler_metadata = ProfilerMetadata.from_json(json_data) + profiler = ProfilerResults() tags = json_data.get("tags", []) - return cls(metadata=metadata, profiler=profiler, tags=tags) + return cls( + metadata=metadata, + profiler_metadata=profiler_metadata, + profiler=profiler, + tags=tags, + ) except Exception: return None @@ -335,14 +366,14 @@ def check_completeness(self) -> None: Updates is_complete and missing_concurrencies fields. """ # Parse expected concurrencies from metadata - if not self.profiler.concurrencies: + if not self.profiler_metadata.concurrencies: # No expected concurrencies specified, assume manual run self.is_complete = True self.missing_concurrencies = [] return expected = set() - for val in self.profiler.concurrencies.split("x"): + for val in self.profiler_metadata.concurrencies.split("x"): try: expected.add(int(val.strip())) except ValueError: @@ -406,35 +437,72 @@ class MemoryMetrics: kv_tokens: int | None = None +@dataclass +class NodeMetadata: + """Node identification and worker information. + + This is the equivalent of RunMetadata but for individual worker nodes. + """ + + node_name: str # Node identifier (e.g., "worker-3") + worker_type: str # Worker type: prefill, decode, agg + worker_id: str # Worker ID (e.g., "w0") + + @dataclass class NodeMetrics: - """Metrics from a single node (prefill or decode worker), parsed from log files.""" + """Metrics from a single node (prefill or decode worker), parsed from log files. + + This class contains ONLY metrics data. Configuration is in NodeConfig. + """ - node_info: dict # Has node name, worker type, worker_id + metadata: NodeMetadata batches: list[BatchMetrics] = field(default_factory=list) memory_snapshots: list[MemoryMetrics] = field(default_factory=list) - config: dict = field(default_factory=dict) # TP/DP/EP config + config: dict = field(default_factory=dict) # Runtime config: TP/PP/EP, batch sizes, etc. run_id: str = "" + # Convenience properties for backward compatibility @property def node_name(self) -> str: - """Get node name.""" - return self.node_info.get("node", "Unknown") + """Get node name from metadata.""" + return self.metadata.node_name @property def worker_type(self) -> str: - """Get worker type (prefill/decode/frontend).""" - return self.node_info.get("worker_type", "unknown") + """Get worker type from metadata.""" + return self.metadata.worker_type + + @property + def worker_id(self) -> str: + """Get worker ID from metadata.""" + return self.metadata.worker_id @property def is_prefill(self) -> bool: """Check if this is a prefill node.""" - return self.worker_type == "prefill" + return self.metadata.worker_type == "prefill" @property def is_decode(self) -> bool: """Check if this is a decode node.""" - return self.worker_type == "decode" + return self.metadata.worker_type == "decode" + + +@dataclass +class NodeLaunchCommand: + """Parsed node worker launch command information. + + Source: logs/{node}_{worker_type}_{worker_id}.out or .err + Only contains essential fields. All parsed arguments go into extra_args. + """ + + backend_type: str + worker_type: str # prefill, decode, agg + raw_command: str + + # All parsed arguments as dict + extra_args: dict[str, Any] = field(default_factory=dict) # Config-related TypedDicts (from config_reader.py) @@ -447,6 +515,91 @@ class GPUInfo(TypedDict, total=False): memory_total: str driver_version: str +class NodeConfig(TypedDict, total=False): + """Expected structure of a node config JSON file (*_config.json).""" + + filename: str + gpu_info: GPUInfo + config: dict[str, Any] # Contains 'server_args' and other fields + environment: dict[str, str] + launch_command: NodeLaunchCommand | None # Parsed launch command (added at runtime) + + +@dataclass +class NodeInfo: + """Complete information about a node, combining metrics and configuration. + + This is the top-level container for all node data. + """ + + metrics: NodeMetrics # Performance metrics (batches, memory, throughput) + node_config: NodeConfig | None = None # Configuration (environment, launch_command, gpu_info) + + # Convenience properties that delegate to metrics + @property + def node_name(self) -> str: + """Get node name from metrics.""" + return self.metrics.node_name + + @property + def worker_type(self) -> str: + """Get worker type from metrics.""" + return self.metrics.worker_type + + @property + def worker_id(self) -> str: + """Get worker ID from metrics.""" + return self.metrics.worker_id + + @property + def is_prefill(self) -> bool: + """Check if this is a prefill node.""" + return self.metrics.is_prefill + + @property + def is_decode(self) -> bool: + """Check if this is a decode node.""" + return self.metrics.is_decode + + @property + def batches(self) -> list[BatchMetrics]: + """Get batches from metrics.""" + return self.metrics.batches + + @property + def memory_snapshots(self) -> list[MemoryMetrics]: + """Get memory snapshots from metrics.""" + return self.metrics.memory_snapshots + + @property + def config(self) -> dict: + """Get runtime config from metrics.""" + return self.metrics.config + + @property + def run_id(self) -> str: + """Get run_id from metrics.""" + return self.metrics.run_id + + @run_id.setter + def run_id(self, value: str): + """Set run_id on metrics.""" + self.metrics.run_id = value + + # Convenience properties that delegate to node_config + @property + def environment(self) -> dict[str, str]: + """Get environment variables from node_config.""" + if self.node_config: + return self.node_config.get("environment", {}) + return {} + + @property + def launch_command(self) -> NodeLaunchCommand | None: + """Get launch command from node_config.""" + if self.node_config: + return self.node_config.get("launch_command") + return None class ServerArgs(TypedDict, total=False): """Expected structure of server_args in node config. @@ -467,18 +620,14 @@ class ServerArgs(TypedDict, total=False): disaggregation_mode: str context_length: int - -class NodeConfig(TypedDict, total=False): - """Expected structure of a node config JSON file (*_config.json).""" - - filename: str - gpu_info: GPUInfo - config: dict[str, Any] # Contains 'server_args' and other fields - environment: dict[str, str] - - -class ParsedCommandInfo(TypedDict): - """Expected return structure from parse_command_line_from_err.""" +class TopologyInfo(TypedDict): + """Service topology and configuration information from log files. + + Returned by parse_command_line_from_err() which analyzes log files to discover: + - Which flags were explicitly set in launch commands + - Physical node to service type mapping + """ explicit_flags: set - services: dict[str, list[str]] + services: dict[str, list[str]] # {node_name: [service_types]} + diff --git a/analysis/srtlog/parsers/__init__.py b/analysis/srtlog/parsers/__init__.py new file mode 100644 index 00000000..f534a4eb --- /dev/null +++ b/analysis/srtlog/parsers/__init__.py @@ -0,0 +1,230 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Parser protocols and registries for benchmark and node log parsing. +This module provides extensible parsing infrastructure: +- BenchmarkParser: Parses benchmark.out files based on benchmark type +- NodeParser: Parses prefill/decode/agg logs based on backend type +Usage: + from analysis.srtlog.parsers import get_benchmark_parser, get_node_parser + # Get parser by type + bench_parser = get_benchmark_parser("sa-bench") + results = bench_parser.parse(benchmark_out_path) + node_parser = get_node_parser("sglang") + nodes = node_parser.parse_logs(log_dir) +""" + +from pathlib import Path +from typing import Any, Protocol + +from analysis.srtlog.models import BenchmarkLaunchCommand, NodeLaunchCommand, NodeMetrics + + +class BenchmarkParserProtocol(Protocol): + """Protocol for benchmark output parsers. + Each benchmark type (sa-bench, mooncake-router, etc.) should have + a parser that implements this protocol. + """ + + @property + def benchmark_type(self) -> str: + """Return the benchmark type this parser handles.""" + ... + + def parse(self, benchmark_out_path: Path) -> dict[str, Any]: + """Parse benchmark.out file and return results. + Args: + benchmark_out_path: Path to the benchmark.out file + Returns: + Dict with benchmark results including: + - output_tps: Output tokens per second + - mean_ttft_ms: Mean time to first token + - mean_itl_ms: Mean inter-token latency + - etc. + """ + ... + + def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | None: + """Parse the benchmark launch command from log content. + Args: + log_content: Content of the benchmark log file + Returns: + BenchmarkLaunchCommand with parsed parameters, or None if not found + """ + ... + + def parse_result_json(self, json_path: Path) -> dict[str, Any]: + """Parse a benchmark result JSON file. + Args: + json_path: Path to a result JSON file + Returns: + Dict with parsed benchmark metrics + """ + ... + + def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int | None = None) -> Path | None: + """Find the directory containing benchmark results within a run directory. + + This method encapsulates the logic for locating result files, which varies by benchmark type. + For example: + - sa-bench: looks for directories like "sa-bench_isl_8192_osl_1024" + - mooncake-router: looks in "logs/artifacts/" subdirectory + + Args: + run_path: Path to the run directory (contains logs/, metadata, etc.) + isl: Input sequence length (optional, used for pattern matching) + osl: Output sequence length (optional, used for pattern matching) + + Returns: + Path to directory containing result files, or None if not found + """ + ... + + def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: + """Parse all result files in a directory. + + Args: + result_dir: Directory containing benchmark result files + + Returns: + List of result dicts (one per concurrency level or benchmark run) + """ + ... + + +class NodeParserProtocol(Protocol): + """Protocol for node log parsers. + Each backend type (sglang, trtllm, etc.) should have a parser + that implements this protocol for parsing prefill/decode/agg logs. + """ + + @property + def backend_type(self) -> str: + """Return the backend type this parser handles.""" + ... + + def parse_logs(self, log_dir: Path) -> list[NodeMetrics]: + """Parse all node logs in a directory. + Args: + log_dir: Directory containing prefill/decode/agg .out/.err files + Returns: + List of NodeMetrics objects, one per worker + """ + ... + + def parse_single_log(self, log_path: Path) -> NodeMetrics | None: + """Parse a single node log file. + Args: + log_path: Path to a prefill/decode/agg log file + Returns: + NodeMetrics object or None if parsing failed + """ + ... + + def parse_launch_command(self, log_content: str, worker_type: str = "unknown") -> NodeLaunchCommand | None: + """Parse the worker launch command from log content. + Args: + log_content: Content of the worker log file + worker_type: Type of worker (prefill, decode, agg) + Returns: + NodeLaunchCommand with parsed parameters, or None if not found + """ + ... + + +# Registry for benchmark parsers +_benchmark_parsers: dict[str, type] = {} + +# Registry for node parsers +_node_parsers: dict[str, type] = {} + + +def register_benchmark_parser(benchmark_type: str): + """Decorator to register a benchmark parser. + Usage: + @register_benchmark_parser("sa-bench") + class SABenchParser: + ... + """ + + def decorator(cls): + _benchmark_parsers[benchmark_type] = cls + return cls + + return decorator + + +def register_node_parser(backend_type: str): + """Decorator to register a node parser. + Usage: + @register_node_parser("sglang") + class SGLangNodeParser: + ... + """ + + def decorator(cls): + _node_parsers[backend_type] = cls + return cls + + return decorator + + +def get_benchmark_parser(benchmark_type: str) -> BenchmarkParserProtocol: + """Get a benchmark parser by type. + Args: + benchmark_type: Type of benchmark (e.g., "sa-bench", "mooncake-router") + Returns: + Instance of the appropriate benchmark parser + Raises: + ValueError: If no parser registered for the benchmark type + """ + if benchmark_type not in _benchmark_parsers: + available = ", ".join(_benchmark_parsers.keys()) or "none" + raise ValueError(f"No benchmark parser registered for '{benchmark_type}'. Available: {available}") + return _benchmark_parsers[benchmark_type]() + + +def get_node_parser(backend_type: str) -> NodeParserProtocol: + """Get a node parser by backend type. + Args: + backend_type: Type of backend (e.g., "sglang", "trtllm") + Returns: + Instance of the appropriate node parser + Raises: + ValueError: If no parser registered for the backend type + """ + if backend_type not in _node_parsers: + available = ", ".join(_node_parsers.keys()) or "none" + raise ValueError(f"No node parser registered for '{backend_type}'. Available: {available}") + return _node_parsers[backend_type]() + + +def list_benchmark_parsers() -> list[str]: + """List all registered benchmark parser types.""" + return list(_benchmark_parsers.keys()) + + +def list_node_parsers() -> list[str]: + """List all registered node parser types.""" + return list(_node_parsers.keys()) + + +# Import parsers to trigger registration +from analysis.srtlog.parsers.benchmark import * # noqa: E402, F401, F403 +from analysis.srtlog.parsers.nodes import * # noqa: E402, F401, F403 + +# Re-export models for convenience +__all__ = [ + "BenchmarkLaunchCommand", + "NodeLaunchCommand", + "NodeMetrics", + "BenchmarkParserProtocol", + "NodeParserProtocol", + "get_benchmark_parser", + "get_node_parser", + "list_benchmark_parsers", + "list_node_parsers", + "register_benchmark_parser", + "register_node_parser", +] \ No newline at end of file diff --git a/analysis/srtlog/parsers/benchmark/__init__.py b/analysis/srtlog/parsers/benchmark/__init__.py new file mode 100644 index 00000000..29010ae2 --- /dev/null +++ b/analysis/srtlog/parsers/benchmark/__init__.py @@ -0,0 +1,7 @@ + +"""Benchmark output parsers.""" + +from analysis.srtlog.parsers.benchmark.mooncake_router import MooncakeRouterParser +from analysis.srtlog.parsers.benchmark.sa_bench import SABenchParser + +__all__ = ["SABenchParser", "MooncakeRouterParser"] \ No newline at end of file diff --git a/analysis/srtlog/parsers/benchmark/mooncake_router.py b/analysis/srtlog/parsers/benchmark/mooncake_router.py new file mode 100644 index 00000000..52f0832e --- /dev/null +++ b/analysis/srtlog/parsers/benchmark/mooncake_router.py @@ -0,0 +1,306 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Mooncake Router benchmark output parser.""" + +from __future__ import annotations + +import json +import logging +import re +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from analysis.srtlog.parsers import register_benchmark_parser + +if TYPE_CHECKING: + from analysis.srtlog.parsers import BenchmarkLaunchCommand + +logger = logging.getLogger(__name__) + + +@register_benchmark_parser("mooncake-router") +class MooncakeRouterParser: + """Parser for Mooncake Router benchmark output. + Parses benchmark.out files and AIPerf result JSON files from mooncake-router runs. + """ + + @property + def benchmark_type(self) -> str: + return "mooncake-router" + + def parse(self, benchmark_out_path: Path) -> dict[str, Any]: + """Parse benchmark.out file for mooncake-router results. + Args: + benchmark_out_path: Path to benchmark.out file + Returns: + Dict with aggregated benchmark results + """ + results = { + "benchmark_type": self.benchmark_type, + "output_tps": None, + "request_throughput": None, + "mean_ttft_ms": None, + "mean_itl_ms": None, + "total_requests": None, + } + + if not benchmark_out_path.exists(): + logger.warning("benchmark.out not found: %s", benchmark_out_path) + return results + + try: + content = benchmark_out_path.read_text() + + # Parse mooncake-router output patterns + # Example: "Request throughput: 3.37 req/s" + # Example: "Output token throughput: 1150.92 tok/s" + req_tpt_pattern = r"[Rr]equest\s+throughput[:\s]+([\d.]+)" + out_tpt_pattern = r"[Oo]utput\s+(?:token\s+)?throughput[:\s]+([\d.]+)" + ttft_pattern = r"[Tt]ime\s+to\s+first\s+token[:\s]+([\d.]+)" + itl_pattern = r"[Ii]nter.?token\s+latency[:\s]+([\d.]+)" + + for line in content.split("\n"): + if req_tpt_match := re.search(req_tpt_pattern, line): + results["request_throughput"] = float(req_tpt_match.group(1)) + if out_tpt_match := re.search(out_tpt_pattern, line): + results["output_tps"] = float(out_tpt_match.group(1)) + if ttft_match := re.search(ttft_pattern, line): + results["mean_ttft_ms"] = float(ttft_match.group(1)) + if itl_match := re.search(itl_pattern, line): + results["mean_itl_ms"] = float(itl_match.group(1)) + + except Exception as e: + logger.warning("Failed to parse benchmark.out: %s", e) + + return results + + def parse_result_json(self, json_path: Path) -> dict[str, Any]: + """Parse an AIPerf result JSON file. + Args: + json_path: Path to profile_export_aiperf.json + Returns: + Dict with benchmark metrics + """ + result = {} + + try: + with open(json_path) as f: + data = json.load(f) + + # AIPerf format has nested structure with unit and values + result = { + "concurrency": 0, # Mooncake uses open-loop, no fixed concurrency + # Throughput metrics + "output_tps": self._get_metric(data, "output_token_throughput", "avg"), + "request_throughput": self._get_metric(data, "request_throughput", "avg"), + # Mean latencies (convert from ms) + "mean_ttft_ms": self._get_metric(data, "time_to_first_token", "avg"), + "mean_tpot_ms": self._get_metric(data, "inter_token_latency", "avg"), + "mean_itl_ms": self._get_metric(data, "inter_token_latency", "avg"), + "mean_e2el_ms": self._get_metric(data, "request_latency", "avg"), + # Median latencies + "median_ttft_ms": self._get_metric(data, "time_to_first_token", "p50"), + "median_tpot_ms": self._get_metric(data, "inter_token_latency", "p50"), + "median_itl_ms": self._get_metric(data, "inter_token_latency", "p50"), + "median_e2el_ms": self._get_metric(data, "request_latency", "p50"), + # P99 latencies + "p99_ttft_ms": self._get_metric(data, "time_to_first_token", "p99"), + "p99_tpot_ms": self._get_metric(data, "inter_token_latency", "p99"), + "p99_itl_ms": self._get_metric(data, "inter_token_latency", "p99"), + "p99_e2el_ms": self._get_metric(data, "request_latency", "p99"), + # Std dev latencies + "std_ttft_ms": self._get_metric(data, "time_to_first_token", "std"), + "std_itl_ms": self._get_metric(data, "inter_token_latency", "std"), + "std_e2el_ms": self._get_metric(data, "request_latency", "std"), + # Request count + "completed": self._get_metric(data, "request_count", "avg"), + "num_prompts": self._get_metric(data, "request_count", "avg"), + } + + # Also extract per-user throughput if available + tps_per_user = self._get_metric(data, "output_token_throughput_per_user", "avg") + if tps_per_user: + result["output_tps_per_user"] = tps_per_user + except Exception as e: + logger.warning("Failed to parse %s: %s", json_path, e) + + return result + + def _get_metric(self, data: dict, metric_name: str, stat: str) -> float | None: + """Extract a metric value from AIPerf data structure. + Args: + data: AIPerf JSON data + metric_name: Name of the metric (e.g., "time_to_first_token") + stat: Statistic to extract (e.g., "avg", "p50", "p99") + Returns: + Metric value or None if not found + """ + try: + metric_data = data.get(metric_name, {}) + if isinstance(metric_data, dict): + value = metric_data.get(stat) + if value is not None: + return float(value) + except (KeyError, TypeError, ValueError): + pass + return None + + def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: + """Parse AIPerf result files in a directory. + Args: + result_dir: Directory containing profile_export_aiperf.json + Returns: + List of result dicts (usually just one for mooncake-router) + """ + results = [] + + # Look for AIPerf JSON files + for json_file in result_dir.rglob("profile_export_aiperf.json"): + result = self.parse_result_json(json_file) + if result.get("output_tps") is not None: + results.append(result) + + return results + + def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int | None = None) -> Path | None: + """Find the directory containing mooncake-router/AIPerf results. + + Mooncake-router results are typically in: + - logs/artifacts/*/profile_export_aiperf.json + + Since results can be in nested subdirectories, we return the logs directory + and let parse_result_directory use rglob to find them. + + Args: + run_path: Path to the run directory + isl: Input sequence length (not used for mooncake-router) + osl: Output sequence length (not used for mooncake-router) + + Returns: + Path to logs directory where results can be found, or None + """ + # Mooncake-router results are in logs/artifacts/ subdirectories + logs_dir = run_path / "logs" + if logs_dir.exists(): + # Check if there are any AIPerf result files using iterdir recursively + try: + for root_dir in [logs_dir]: + for item in root_dir.rglob("profile_export_aiperf.json"): + logger.info(f"Found mooncake-router results in: {logs_dir}") + return logs_dir + except (OSError, PermissionError) as e: + logger.warning(f"Error accessing {logs_dir}: {e}") + + # Also check run_path directly in case logs are at root + try: + for item in run_path.rglob("profile_export_aiperf.json"): + logger.info(f"Found mooncake-router results in: {run_path}") + return run_path + except (OSError, PermissionError) as e: + logger.warning(f"Error accessing {run_path}: {e}") + + return None + + def find_aiperf_results(self, log_dir: Path) -> list[Path]: + """Find all AIPerf result files in a log directory. + Args: + log_dir: Root log directory + Returns: + List of paths to profile_export_aiperf.json files + """ + return list(log_dir.rglob("profile_export_aiperf.json")) + + def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | None: + """Parse the mooncake-router launch command from log content. + Looks for command lines like: + [CMD] aiperf profile --model ... --url ... + genai-perf profile --model ... --endpoint ... + Also parses header format: + Endpoint: http://localhost:8000 + Model: Qwen/Qwen3-32B + Workload: conversation + Args: + log_content: Content of the benchmark log file + Returns: + BenchmarkLaunchCommand with parsed parameters, or None if not found + """ + from analysis.srtlog.parsers import BenchmarkLaunchCommand + + raw_command = None + + # First, try to find [CMD] tagged command (preferred - from our scripts) + cmd_match = re.search(r"\[CMD\]\s*(.+)$", log_content, re.MULTILINE) + if cmd_match: + raw_command = cmd_match.group(1).strip() + + # Fallback: pattern to match genai-perf, aiperf or mooncake-router commands + # aiperf format: aiperf profile -m "Model" --url "http://..." --concurrency 10 + if not raw_command: + command_patterns = [ + r"(aiperf\s+profile\s+[^\n]+)", + r"(genai-perf\s+profile\s+[^\n]+)", + r"(python[3]?\s+.*genai_perf[^\n]+)", + r"(python[3]?\s+.*aiperf[^\n]+)", + r"(mooncake-router\s+[^\n]+)", + ] + + for pattern in command_patterns: + match = re.search(pattern, log_content, re.IGNORECASE) + if match: + raw_command = match.group(1).strip() + break + + # If no command found, try to build from header format + if not raw_command: + if "Mooncake Router Benchmark" in log_content: + raw_command = "mooncake-router-benchmark (from header)" + + if not raw_command: + return None + + extra_args: dict[str, Any] = {} + + # Parse aiperf/genai-perf arguments from command line + # Supports both --model and -m formats, quoted and unquoted values + arg_patterns = { + "model": r"(?:--model|-m)[=\s]+[\"']?([^\"'\s]+)[\"']?", + "base_url": r"--url[=\s]+[\"']?([^\"'\s]+)[\"']?", + "num_prompts": r"--(?:num-prompts|request-count|request)[=\s]+(\d+)", + "request_rate": r"--request-rate[=\s]+([^\s]+)", + "max_concurrency": r"--concurrency[=\s]+(\d+)", + "input_len": r"--(?:synthetic-input-tokens-mean|input-sequence-length|isl)[=\s]+(\d+)", + "output_len": r"--(?:output-tokens-mean|output-sequence-length|osl)[=\s]+(\d+)", + } + + for field, pattern in arg_patterns.items(): + match = re.search(pattern, raw_command) + if match: + value: Any = match.group(1) + if field in ("num_prompts", "max_concurrency", "input_len", "output_len"): + value = int(value) + elif field == "request_rate" and value != "inf": + try: + value = float(value) + except ValueError: + pass + extra_args[field] = value + + # Also parse from header format (srtctl-style) + header_patterns = { + "model": r"^Model:\s*(.+)$", + "base_url": r"^Endpoint:\s*(.+)$", + "dataset": r"^Workload:\s*(.+)$", + } + + for field, pattern in header_patterns.items(): + if field not in extra_args: + match = re.search(pattern, log_content, re.MULTILINE) + if match: + extra_args[field] = match.group(1).strip() + + return BenchmarkLaunchCommand( + benchmark_type=self.benchmark_type, + raw_command=raw_command, + extra_args=extra_args, + ) \ No newline at end of file diff --git a/analysis/srtlog/parsers/benchmark/sa_bench.py b/analysis/srtlog/parsers/benchmark/sa_bench.py new file mode 100644 index 00000000..de0ba071 --- /dev/null +++ b/analysis/srtlog/parsers/benchmark/sa_bench.py @@ -0,0 +1,318 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""SA-Bench benchmark output parser.""" + +from __future__ import annotations + +import json +import logging +import re +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from analysis.srtlog.parsers import register_benchmark_parser + +if TYPE_CHECKING: + from analysis.srtlog.parsers import BenchmarkLaunchCommand + +logger = logging.getLogger(__name__) + + +@register_benchmark_parser("sa-bench") +class SABenchParser: + """Parser for SA-Bench benchmark output. + Parses benchmark.out files and result JSON files from SA-Bench runs. + """ + + @property + def benchmark_type(self) -> str: + return "sa-bench" + + def parse(self, benchmark_out_path: Path) -> dict[str, Any]: + """Parse benchmark.out file for SA-Bench results. + Args: + benchmark_out_path: Path to benchmark.out file + Returns: + Dict with aggregated benchmark results + """ + results = { + "benchmark_type": self.benchmark_type, + "concurrencies": [], + "output_tps": [], + "mean_ttft_ms": [], + "mean_itl_ms": [], + "mean_tpot_ms": [], + "p99_ttft_ms": [], + "p99_itl_ms": [], + "request_throughput": [], + "completed_requests": [], + } + + if not benchmark_out_path.exists(): + logger.warning("benchmark.out not found: %s", benchmark_out_path) + return results + + try: + content = benchmark_out_path.read_text() + + # Parse summary lines from benchmark output + # Example: "Concurrency: 100, Throughput: 5000 tok/s, TTFT: 150ms, ITL: 20ms" + concurrency_pattern = r"Concurrency[:\s]+(\d+)" + throughput_pattern = r"(?:Output\s+)?[Tt]hroughput[:\s]+([\d.]+)" + ttft_pattern = r"(?:Mean\s+)?TTFT[:\s]+([\d.]+)" + itl_pattern = r"(?:Mean\s+)?ITL[:\s]+([\d.]+)" + + # Try to extract from summary lines + for line in content.split("\n"): + if "concurrency" in line.lower() or "throughput" in line.lower(): + conc_match = re.search(concurrency_pattern, line, re.IGNORECASE) + tpt_match = re.search(throughput_pattern, line, re.IGNORECASE) + ttft_match = re.search(ttft_pattern, line, re.IGNORECASE) + itl_match = re.search(itl_pattern, line, re.IGNORECASE) + + if conc_match and tpt_match: + results["concurrencies"].append(int(conc_match.group(1))) + results["output_tps"].append(float(tpt_match.group(1))) + if ttft_match: + results["mean_ttft_ms"].append(float(ttft_match.group(1))) + if itl_match: + results["mean_itl_ms"].append(float(itl_match.group(1))) + + except Exception as e: + logger.warning("Failed to parse benchmark.out: %s", e) + + return results + + def parse_result_json(self, json_path: Path) -> dict[str, Any]: + """Parse a SA-Bench result JSON file. + Args: + json_path: Path to result JSON (e.g., result_c100.json) + Returns: + Dict with benchmark metrics for this concurrency level + """ + result = {} + + try: + with open(json_path) as f: + data = json.load(f) + + # Return with same field names as original JSON for compatibility + # with downstream processing in _build_rollup_summary + result = { + "max_concurrency": data.get("max_concurrency"), + # Throughput metrics (keep original field names) + "output_throughput": data.get("output_throughput"), + "total_token_throughput": data.get("total_token_throughput"), + "request_throughput": data.get("request_throughput"), + "request_goodput": data.get("request_goodput"), + "request_rate": data.get("request_rate"), + # Mean latencies + "mean_ttft_ms": data.get("mean_ttft_ms"), + "mean_tpot_ms": data.get("mean_tpot_ms"), + "mean_itl_ms": data.get("mean_itl_ms"), + "mean_e2el_ms": data.get("mean_e2el_ms"), + # Median latencies + "median_ttft_ms": data.get("median_ttft_ms"), + "median_tpot_ms": data.get("median_tpot_ms"), + "median_itl_ms": data.get("median_itl_ms"), + "median_e2el_ms": data.get("median_e2el_ms"), + # P99 latencies + "p99_ttft_ms": data.get("p99_ttft_ms"), + "p99_tpot_ms": data.get("p99_tpot_ms"), + "p99_itl_ms": data.get("p99_itl_ms"), + "p99_e2el_ms": data.get("p99_e2el_ms"), + # Std dev latencies + "std_ttft_ms": data.get("std_ttft_ms"), + "std_tpot_ms": data.get("std_tpot_ms"), + "std_itl_ms": data.get("std_itl_ms"), + "std_e2el_ms": data.get("std_e2el_ms"), + # Token counts + "total_input_tokens": data.get("total_input_tokens"), + "total_output_tokens": data.get("total_output_tokens"), + # Metadata + "duration": data.get("duration"), + "completed": data.get("completed"), + "num_prompts": data.get("num_prompts"), + } + + except Exception as e: + logger.warning("Failed to parse %s: %s", json_path, e) + + return result + + def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: + """Parse all result JSON files in a benchmark result directory. + Args: + result_dir: Directory containing result_*.json files + Returns: + List of result dicts sorted by concurrency + """ + results = [] + + for json_file in result_dir.glob("*.json"): + result = self.parse_result_json(json_file) + if result.get("max_concurrency") is not None: + results.append(result) + + # Sort by concurrency + results.sort(key=lambda x: x.get("max_concurrency", 0) or 0) + + return results + + def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int | None = None) -> Path | None: + """Find the directory containing SA-Bench results. + + SA-Bench results are typically in directories named like: + - sa-bench_isl_8192_osl_1024 + - vllm_isl_8192_osl_1024 + + Args: + run_path: Path to the run directory + isl: Input sequence length + osl: Output sequence length + + Returns: + Path to results directory, or None if not found + """ + # Search paths: run_path and run_path/logs + search_paths = [run_path] + logs_dir = run_path / "logs" + if logs_dir.exists(): + search_paths.append(logs_dir) + + # Build prefix patterns + if isl is not None and osl is not None: + prefixes = [ + f"sa-bench_isl_{isl}_osl_{osl}", + f"vllm_isl_{isl}_osl_{osl}", + ] + else: + # Fallback: match any sa-bench or vllm directory + prefixes = ["sa-bench", "vllm"] + + # Search for matching directories + for search_path in search_paths: + if not search_path.exists(): + continue + try: + for entry in search_path.iterdir(): + if not entry.is_dir(): + continue + # Check if directory name starts with any of our prefixes + for prefix in prefixes: + if entry.name.startswith(prefix): + # Verify it contains result files + if list(entry.glob("*.json")): + logger.info(f"Found SA-Bench results in: {entry}") + return entry + except (OSError, PermissionError) as e: + logger.warning(f"Error accessing {search_path}: {e}") + continue + + return None + + + def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | None: + """Parse the SA-Bench launch command from log content. + Looks for command lines like: + [CMD] python -m sglang.bench_serving --model ... --base-url ... + python -m sglang.bench_serving --model ... --base-url ... + Also parses SA-Bench Config header format: + SA-Bench Config: endpoint=http://localhost:8000; isl=8192; osl=1024; ... + Args: + log_content: Content of the benchmark log file + Returns: + BenchmarkLaunchCommand with parsed parameters, or None if not found + """ + from analysis.srtlog.parsers import BenchmarkLaunchCommand + + raw_command = None + + # First, try to find [CMD] tagged command (preferred - from our scripts) + cmd_match = re.search(r"\[CMD\]\s*(.+)$", log_content, re.MULTILINE) + if cmd_match: + raw_command = cmd_match.group(1).strip() + + # Fallback: pattern to match sa-bench / sglang.bench_serving command + if not raw_command: + command_patterns = [ + r"(python[3]?\s+-m\s+sglang\.bench_serving\s+[^\n]+)", + r"(sa-bench\s+[^\n]+)", + r"(python[3]?\s+.*bench_serving\.py\s+[^\n]+)", + ] + + for pattern in command_patterns: + match = re.search(pattern, log_content, re.IGNORECASE) + if match: + raw_command = match.group(1).strip() + break + + # Also try SA-Bench Config header format + if not raw_command: + config_match = re.search(r"(SA-Bench Config:[^\n]+)", log_content) + if config_match: + raw_command = config_match.group(1).strip() + + if not raw_command: + return None + + extra_args: dict[str, Any] = {} + + # Parse common arguments from command line + arg_patterns = { + "model": r"--model[=\s]+([^\s]+)", + "base_url": r"--base-url[=\s]+([^\s]+)", + "num_prompts": r"--num-prompts?[=\s]+(\d+)", + "request_rate": r"--request-rate[=\s]+([^\s]+)", + "max_concurrency": r"--max-concurrency[=\s]+(\d+)", + "input_len": r"--(?:input-len|random-input-len)[=\s]+(\d+)", + "output_len": r"--(?:output-len|random-output-len)[=\s]+(\d+)", + "dataset": r"--dataset[=\s]+([^\s]+)", + "dataset_path": r"--dataset-path[=\s]+([^\s]+)", + } + + for field, pattern in arg_patterns.items(): + match = re.search(pattern, raw_command) + if match: + value: Any = match.group(1) + # Convert to appropriate type + if field in ("num_prompts", "max_concurrency", "input_len", "output_len"): + value = int(value) + elif field == "request_rate" and value != "inf": + try: + value = float(value) + except ValueError: + pass + extra_args[field] = value + + # Also parse from SA-Bench Config header format + # Format: SA-Bench Config: endpoint=http://localhost:8000; isl=8192; osl=1024; concurrencies=28; req_rate=inf; model=dsr1-fp8 + header_patterns = { + "base_url": r"endpoint=([^;\s]+)", + "model": r"model=([^;\s]+)", + "input_len": r"isl=(\d+)", + "output_len": r"osl=(\d+)", + "max_concurrency": r"concurrencies=(\d+)", + "request_rate": r"req_rate=([^;\s]+)", + } + + for field, pattern in header_patterns.items(): + if field not in extra_args: + match = re.search(pattern, raw_command) + if match: + value = match.group(1) + if field in ("input_len", "output_len", "max_concurrency"): + value = int(value) + elif field == "request_rate" and value != "inf": + try: + value = float(value) + except ValueError: + pass + extra_args[field] = value + + return BenchmarkLaunchCommand( + benchmark_type=self.benchmark_type, + raw_command=raw_command, + extra_args=extra_args, + ) diff --git a/analysis/srtlog/parsers/nodes/__init__.py b/analysis/srtlog/parsers/nodes/__init__.py new file mode 100644 index 00000000..40fc849e --- /dev/null +++ b/analysis/srtlog/parsers/nodes/__init__.py @@ -0,0 +1,9 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Node log parsers for different backends.""" + +from analysis.srtlog.parsers.nodes.sglang import SGLangNodeParser +from analysis.srtlog.parsers.nodes.trtllm import TRTLLMNodeParser + +__all__ = ["SGLangNodeParser", "TRTLLMNodeParser"] diff --git a/analysis/srtlog/parsers/nodes/sglang.py b/analysis/srtlog/parsers/nodes/sglang.py new file mode 100644 index 00000000..ea094f97 --- /dev/null +++ b/analysis/srtlog/parsers/nodes/sglang.py @@ -0,0 +1,441 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""SGLang node log parser. +Parses logs with format: + [2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m ... Decode batch, #running-req: 5, ... +This parser handles SGLang structured logging format with ISO 8601 timestamps. +""" + +from __future__ import annotations + +import logging +import os +import re +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from analysis.srtlog.models import BatchMetrics, MemoryMetrics, NodeInfo, NodeMetadata, NodeMetrics +from analysis.srtlog.parsers import register_node_parser + +if TYPE_CHECKING: + from analysis.srtlog.parsers import NodeLaunchCommand + +logger = logging.getLogger(__name__) + + +# ANSI escape code pattern for stripping colors +ANSI_ESCAPE = re.compile(r"\x1b\[[0-9;]*m") + + +@register_node_parser("sglang") +class SGLangNodeParser: + """Parser for SGLang node logs. + Handles SGLang structured logging with ISO 8601 timestamps. + May contain ANSI color codes which are stripped during parsing. + """ + + @property + def backend_type(self) -> str: + return "sglang" + + def parse_logs(self, log_dir: Path) -> list[NodeInfo]: + """Parse all prefill/decode/agg log files in a directory. + Args: + log_dir: Directory containing *_prefill_*.out, *_decode_*.out, *_agg_*.out files + Returns: + List of NodeInfo objects + """ + log_dir = Path(log_dir) + nodes = [] + + if not log_dir.exists(): + logger.error("Log directory does not exist: %s", log_dir) + return nodes + + # Find all worker log files + for file in os.listdir(log_dir): + if not (file.endswith(".err") or file.endswith(".out")): + continue + if not any(wt in file for wt in ("prefill", "decode", "agg")): + continue + + filepath = log_dir / file + node = self.parse_single_log(filepath) + if node: + nodes.append(node) + + logger.info("Parsed %d node log files from %s", len(nodes), log_dir) + return nodes + + def parse_single_log(self, log_path: Path) -> NodeInfo | None: + """Parse a single node log file. + Args: + log_path: Path to a prefill/decode/agg log file + Returns: + NodeInfo object or None if parsing failed + """ + node_info = self._extract_node_info_from_filename(str(log_path)) + if not node_info: + logger.warning( + "Could not extract node info from filename: %s. " + "Expected format: __.err or .out", + log_path, + ) + return None + + batches = [] + memory_snapshots = [] + config = {} + launch_command = None + full_content = [] + + try: + with open(log_path) as f: + for line in f: + full_content.append(line) + # Strip ANSI escape codes + clean_line = ANSI_ESCAPE.sub("", line) + + # Parse prefill batch metrics + batch_metrics = self._parse_prefill_batch_line(clean_line) + if batch_metrics: + batches.append( + BatchMetrics( + timestamp=batch_metrics["timestamp"], + dp=0, # Default since not in log + tp=0, + ep=0, + batch_type=batch_metrics["type"], + new_seq=batch_metrics.get("new_seq"), + new_token=batch_metrics.get("new_token"), + cached_token=batch_metrics.get("cached_token"), + token_usage=batch_metrics.get("token_usage"), + running_req=batch_metrics.get("running_req"), + queue_req=batch_metrics.get("queue_req"), + prealloc_req=batch_metrics.get("prealloc_req"), + inflight_req=batch_metrics.get("inflight_req"), + input_throughput=batch_metrics.get("input_throughput"), + ) + ) + + # Parse decode batch metrics + decode_metrics = self._parse_decode_batch_line(clean_line) + if decode_metrics: + batches.append( + BatchMetrics( + timestamp=decode_metrics["timestamp"], + dp=0, + tp=0, + ep=0, + batch_type=decode_metrics["type"], + running_req=decode_metrics.get("running_req"), + queue_req=decode_metrics.get("queue_req"), + prealloc_req=decode_metrics.get("prealloc_req"), + transfer_req=decode_metrics.get("transfer_req"), + token_usage=decode_metrics.get("token_usage"), + preallocated_usage=decode_metrics.get("preallocated_usage"), + num_tokens=decode_metrics.get("num_tokens"), + gen_throughput=decode_metrics.get("gen_throughput"), + ) + ) + + # Parse memory metrics + mem_metrics = self._parse_memory_line(clean_line) + if mem_metrics: + memory_snapshots.append( + MemoryMetrics( + timestamp=mem_metrics["timestamp"], + dp=0, + tp=0, + ep=0, + metric_type=mem_metrics["type"], + avail_mem_gb=mem_metrics.get("avail_mem_gb"), + mem_usage_gb=mem_metrics.get("mem_usage_gb"), + kv_cache_gb=mem_metrics.get("kv_cache_gb"), + kv_tokens=mem_metrics.get("kv_tokens"), + ) + ) + + # Extract TP/DP/EP configuration from server_args + if "tp_size=" in clean_line: + tp_match = re.search(r"tp_size=(\d+)", clean_line) + dp_match = re.search(r"dp_size=(\d+)", clean_line) + ep_match = re.search(r"ep_size=(\d+)", clean_line) + + if tp_match: + config["tp_size"] = int(tp_match.group(1)) + if dp_match: + config["dp_size"] = int(dp_match.group(1)) + if ep_match: + config["ep_size"] = int(ep_match.group(1)) + + # Parse launch command from full content + launch_command = self.parse_launch_command("".join(full_content), node_info["worker_type"]) + + except Exception as e: + logger.error("Error parsing %s: %s", log_path, e) + return None + + total_metrics = len(batches) + len(memory_snapshots) + if total_metrics == 0: + logger.debug("Parsed %s but found no batch/memory metrics", log_path) + + logger.debug("Parsed %s: %d batches, %d memory snapshots", log_path, len(batches), len(memory_snapshots)) + + # Create NodeMetadata + node_metadata = NodeMetadata( + node_name=node_info["node"], + worker_type=node_info["worker_type"], + worker_id=node_info["worker_id"], + ) + + # Create NodeMetrics with metadata + metrics = NodeMetrics( + metadata=node_metadata, + batches=batches, + memory_snapshots=memory_snapshots, + config=config, + ) + + # Create NodeConfig with launch_command + node_config = {} + if launch_command: + node_config["launch_command"] = launch_command + node_config["environment"] = {} # Will be populated by NodeAnalyzer if config file exists + + # Return complete NodeInfo + return NodeInfo(metrics=metrics, node_config=node_config if node_config else None) + + def _parse_timestamp(self, line: str) -> str | None: + """Extract ISO 8601 timestamp from log line. + Example: 2025-12-30T15:52:38.206058Z + """ + match = re.search(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z?)", line) + if match: + return match.group(1) + return None + + def _parse_prefill_batch_line(self, line: str) -> dict | None: + """Parse prefill batch log line for metrics.""" + if "Prefill batch" not in line: + return None + + timestamp = self._parse_timestamp(line) + if not timestamp: + return None + + metrics = {"timestamp": timestamp, "type": "prefill"} + + patterns = { + "new_seq": r"#new-seq:\s*(\d+)", + "new_token": r"#new-token:\s*(\d+)", + "cached_token": r"#cached-token:\s*(\d+)", + "token_usage": r"token usage:\s*([\d.]+)", + "running_req": r"#running-req:\s*(\d+)", + "queue_req": r"#queue-req:\s*(\d+)", + "prealloc_req": r"#prealloc-req:\s*(\d+)", + "inflight_req": r"#inflight-req:\s*(\d+)", + "input_throughput": r"input throughput \(token/s\):\s*([\d.]+)", + } + + for key, pattern in patterns.items(): + match = re.search(pattern, line) + if match: + value = match.group(1) + metrics[key] = float(value) if "." in value else int(value) + + return metrics + + def _parse_decode_batch_line(self, line: str) -> dict | None: + """Parse decode batch log line for metrics.""" + if "Decode batch" not in line: + return None + + timestamp = self._parse_timestamp(line) + if not timestamp: + return None + + metrics = {"timestamp": timestamp, "type": "decode"} + + patterns = { + "running_req": r"#running-req:\s*(\d+)", + "num_tokens": r"#token:\s*(\d+)", + "token_usage": r"token usage:\s*([\d.]+)", + "preallocated_usage": r"pre-allocated usage:\s*([\d.]+)", + "prealloc_req": r"#prealloc-req:\s*(\d+)", + "transfer_req": r"#transfer-req:\s*(\d+)", + "queue_req": r"#queue-req:\s*(\d+)", + "gen_throughput": r"gen throughput \(token/s\):\s*([\d.]+)", + } + + for key, pattern in patterns.items(): + match = re.search(pattern, line) + if match: + value = match.group(1) + metrics[key] = float(value) if "." in value else int(value) + + return metrics + + def _parse_memory_line(self, line: str) -> dict | None: + """Parse memory-related log lines.""" + timestamp = self._parse_timestamp(line) + if not timestamp: + return None + + metrics = {"timestamp": timestamp} + + # Parse available memory from "avail mem=75.11 GB" + avail_match = re.search(r"avail mem=([\d.]+)\s*GB", line) + if avail_match: + metrics["avail_mem_gb"] = float(avail_match.group(1)) + metrics["type"] = "memory" + + # Parse memory usage from "mem usage=107.07 GB" + usage_match = re.search(r"mem usage=([\d.]+)\s*GB", line) + if usage_match: + metrics["mem_usage_gb"] = float(usage_match.group(1)) + metrics["type"] = "memory" + + # Parse KV cache size from "KV size: 17.16 GB" + kv_match = re.search(r"KV size:\s*([\d.]+)\s*GB", line) + if kv_match: + metrics["kv_cache_gb"] = float(kv_match.group(1)) + metrics["type"] = "kv_cache" + + # Parse token count from "#tokens: 524288" + token_match = re.search(r"#tokens:\s*(\d+)", line) + if token_match: + metrics["kv_tokens"] = int(token_match.group(1)) + + # Parse from "Capturing batches" progress lines + # Example: "Capturing batches (bs=256 avail_mem=6.32 GB)" + capture_match = re.search(r"avail_mem=([\d.]+)\s*GB", line) + if capture_match and "type" not in metrics: + metrics["avail_mem_gb"] = float(capture_match.group(1)) + metrics["type"] = "memory" + + return metrics if "type" in metrics else None + + def _extract_node_info_from_filename(self, filename: str) -> dict | None: + """Extract node name and worker info from filename. + Example: eos0219_prefill_w0.out + Returns: {'node': 'eos0219', 'worker_type': 'prefill', 'worker_id': 'w0'} + """ + match = re.match( + r"(.+)_(prefill|decode|agg|frontend)_([^.]+)\.(err|out)", + os.path.basename(filename), + ) + if match: + return { + "node": match.group(1), + "worker_type": match.group(2), + "worker_id": match.group(3), + } + return None + + def parse_launch_command(self, log_content: str, worker_type: str = "unknown") -> NodeLaunchCommand | None: + """Parse the SGLang worker launch command from log content. + Looks for command lines or ServerArgs in the log. + Args: + log_content: Content of the worker log file + worker_type: Type of worker (prefill, decode, agg) + Returns: + NodeLaunchCommand with parsed parameters, or None if not found + """ + from analysis.srtlog.parsers import NodeLaunchCommand + + # Strip ANSI codes for cleaner parsing + clean_content = ANSI_ESCAPE.sub("", log_content) + + raw_command = None + + # First, try to find [CMD] tagged command (preferred - from our scripts) + cmd_match = re.search(r"\[CMD\]\s*(.+)$", clean_content, re.MULTILINE) + if cmd_match: + raw_command = cmd_match.group(1).strip() + + # Fallback: pattern to match sglang launch commands + if not raw_command: + patterns = [ + r"(python[3]?\s+-m\s+sglang\.launch_server\s+[^\n]+)", + r"(python[3]?\s+.*launch_server\.py\s+[^\n]+)", + r"(sglang\.launch_server\s+[^\n]+)", + ] + + for pattern in patterns: + match = re.search(pattern, clean_content, re.IGNORECASE) + if match: + raw_command = match.group(1).strip() + break + + # Also try to parse from ServerArgs() log line + if not raw_command: + server_args_match = re.search(r"server_args=ServerArgs\((.*?)\)", clean_content, re.DOTALL) + if server_args_match: + raw_command = f"ServerArgs({server_args_match.group(1)[:200]}...)" + + if not raw_command: + return None + + extra_args: dict[str, Any] = {} + + # Parse SGLang server arguments (from command line) + arg_patterns = { + "model_path": r"--model(?:-path)?[=\s]+([^\s]+)", + "served_model_name": r"--served-model-name[=\s]+([^\s]+)", + "tp_size": r"--tp-size[=\s]+(\d+)", + "pp_size": r"--pp-size[=\s]+(\d+)", + "dp_size": r"--dp-size[=\s]+(\d+)", + "ep_size": r"--ep-size[=\s]+(\d+)", + "host": r"--host[=\s]+([^\s]+)", + "port": r"--port[=\s]+(\d+)", + "max_num_seqs": r"--max-(?:num-seqs|running-requests)[=\s]+(\d+)", + "max_model_len": r"--(?:max-model-len|context-length)[=\s]+(\d+)", + "kv_cache_dtype": r"--kv-cache-dtype[=\s]+([^\s]+)", + "gpu_memory_utilization": r"--(?:mem-fraction-static|gpu-memory-utilization)[=\s]+([\d.]+)", + "disaggregation_mode": r"--disaggregation-mode[=\s]+([^\s]+)", + "nccl_init_addr": r"--(?:dist-init-addr|nccl-init-addr)[=\s]+([^\s]+)", + } + + # Also parse from ServerArgs format + server_args_patterns = { + "model_path": r"model_path=['\"]?([^'\"]+)['\"]?", + "served_model_name": r"served_model_name=['\"]?([^'\"]+)['\"]?", + "tp_size": r"tp_size=(\d+)", + "pp_size": r"pp_size=(\d+)", + "dp_size": r"dp_size=(\d+)", + "ep_size": r"ep_size=(\d+)", + "host": r"host=['\"]?([^'\"]+)['\"]?", + "port": r"port=(\d+)", + "max_num_seqs": r"max_running_requests=(\d+)", + "max_model_len": r"context_length=(\d+)", + "disaggregation_mode": r"disaggregation_mode=['\"]?([^'\"]+)['\"]?", + } + + for field, pattern in arg_patterns.items(): + match = re.search(pattern, raw_command) + if match: + value: Any = match.group(1) + if field in ("tp_size", "pp_size", "dp_size", "ep_size", "port", "max_num_seqs", "max_model_len"): + value = int(value) + elif field == "gpu_memory_utilization": + value = float(value) + extra_args[field] = value + + # Try ServerArgs patterns for any missing fields + for field, pattern in server_args_patterns.items(): + if field not in extra_args: + match = re.search(pattern, clean_content) + if match: + value = match.group(1) + if field in ("tp_size", "pp_size", "dp_size", "ep_size", "port", "max_num_seqs", "max_model_len"): + value = int(value) + extra_args[field] = value + + return NodeLaunchCommand( + backend_type="sglang", + worker_type=worker_type, + raw_command=raw_command, + extra_args=extra_args, + ) diff --git a/analysis/srtlog/parsers/nodes/trtllm.py b/analysis/srtlog/parsers/nodes/trtllm.py new file mode 100644 index 00000000..067d0b00 --- /dev/null +++ b/analysis/srtlog/parsers/nodes/trtllm.py @@ -0,0 +1,473 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""TRTLLM node log parser. +Parses logs from TensorRT-LLM workers launched via dynamo.trtllm. +Example log format: + [33mRank0 run python3 -m dynamo.trtllm --model-path /model --served-model-name dsr1-fp8 ... + Initializing the worker with config: Config(namespace=dynamo, component=prefill, ...) +""" + +from __future__ import annotations + +import logging +import os +import re +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from analysis.srtlog.models import BatchMetrics, MemoryMetrics, NodeInfo, NodeMetadata, NodeMetrics +from analysis.srtlog.parsers import register_node_parser + +if TYPE_CHECKING: + from analysis.srtlog.parsers import NodeLaunchCommand + +logger = logging.getLogger(__name__) + +# ANSI escape code pattern for stripping colors +ANSI_ESCAPE = re.compile(r"\x1b\[[0-9;]*m") + + +@register_node_parser("trtllm") +class TRTLLMNodeParser: + """Parser for TensorRT-LLM node logs. + Parses logs from TRTLLM workers, including: + - Launch command from dynamo.trtllm + - Worker configuration from Config() dump + - MPI rank and world size information + """ + + @property + def backend_type(self) -> str: + return "trtllm" + + def parse_logs(self, log_dir: Path) -> list[NodeInfo]: + """Parse all TRTLLM node logs in a directory. + Args: + log_dir: Directory containing *_prefill_*.out, *_decode_*.out files + Returns: + List of NodeInfo objects + """ + log_dir = Path(log_dir) + nodes = [] + + if not log_dir.exists(): + logger.error("Log directory does not exist: %s", log_dir) + return nodes + + # Find all worker log files + for file in os.listdir(log_dir): + if not (file.endswith(".err") or file.endswith(".out")): + continue + if not any(wt in file for wt in ("prefill", "decode", "agg")): + continue + + filepath = log_dir / file + node = self.parse_single_log(filepath) + if node: + nodes.append(node) + + logger.info("Parsed %d TRTLLM node log files from %s", len(nodes), log_dir) + return nodes + + def parse_single_log(self, log_path: Path) -> NodeInfo | None: + """Parse a single TRTLLM log file. + Args: + log_path: Path to a node log file + Returns: + NodeInfo object or None if parsing failed + """ + node_info = self._extract_node_info_from_filename(str(log_path)) + if not node_info: + logger.warning("Could not extract node info from filename: %s", log_path) + return None + + batches = [] + memory_snapshots = [] + config = {} + launch_command = None + + try: + # Handle encoding issues gracefully + content = log_path.read_text(errors="replace") + clean_content = ANSI_ESCAPE.sub("", content) + + # Parse launch command + launch_command = self.parse_launch_command(clean_content, node_info["worker_type"]) + + # Extract MPI configuration + mpi_size_match = re.search(r"tllm_mpi_size:\s*(\d+)", clean_content) + if mpi_size_match: + config["mpi_world_size"] = int(mpi_size_match.group(1)) + + # Extract TP/PP from Config() dump + config_match = re.search(r"Config\((.*?)\)", clean_content) + if config_match: + config_str = config_match.group(1) + + tp_match = re.search(r"tensor_parallel_size=(\d+)", config_str) + if tp_match: + config["tp_size"] = int(tp_match.group(1)) + + pp_match = re.search(r"pipeline_parallel_size=(\d+)", config_str) + if pp_match: + config["pp_size"] = int(pp_match.group(1)) + + ep_match = re.search(r"expert_parallel_size=(\d+)", config_str) + if ep_match: + config["ep_size"] = int(ep_match.group(1)) + + max_batch_match = re.search(r"max_batch_size=(\d+)", config_str) + if max_batch_match: + config["max_batch_size"] = int(max_batch_match.group(1)) + + max_tokens_match = re.search(r"max_num_tokens=(\d+)", config_str) + if max_tokens_match: + config["max_num_tokens"] = int(max_tokens_match.group(1)) + + max_seq_match = re.search(r"max_seq_len=(\d+)", config_str) + if max_seq_match: + config["max_seq_len"] = int(max_seq_match.group(1)) + + # Extract from separate trtllm_config YAML references + yaml_match = re.search(r"extra_engine_args=([^\s,]+\.yaml)", clean_content) + if yaml_match: + config["extra_engine_args"] = yaml_match.group(1) + + # Also extract from TensorRT-LLM engine args line which has actual parallelism + engine_args_match = re.search(r"TensorRT-LLM engine args:\s*\{([^}]+)", clean_content) + if engine_args_match: + engine_str = engine_args_match.group(1) + + engine_tp_match = re.search(r"'tensor_parallel_size':\s*(\d+)", engine_str) + if engine_tp_match: + config["tp_size"] = int(engine_tp_match.group(1)) + + engine_pp_match = re.search(r"'pipeline_parallel_size':\s*(\d+)", engine_str) + if engine_pp_match: + config["pp_size"] = int(engine_pp_match.group(1)) + + engine_ep_match = re.search(r"'moe_expert_parallel_size':\s*(\d+)", engine_str) + if engine_ep_match: + config["ep_size"] = int(engine_ep_match.group(1)) + + engine_batch_match = re.search(r"'max_batch_size':\s*(\d+)", engine_str) + if engine_batch_match: + config["max_batch_size"] = int(engine_batch_match.group(1)) + + engine_tokens_match = re.search(r"'max_num_tokens':\s*(\d+)", engine_str) + if engine_tokens_match: + config["max_num_tokens"] = int(engine_tokens_match.group(1)) + + engine_seq_match = re.search(r"'max_seq_len':\s*(\d+)", engine_str) + if engine_seq_match: + config["max_seq_len"] = int(engine_seq_match.group(1)) + + # Parse iteration logs for batch metrics + # Format: iter = X, ... num_scheduled_requests: X, states = {'num_ctx_requests': X, 'num_ctx_tokens': X, 'num_generation_tokens': X} + batches = self._parse_iteration_logs(clean_content, node_info.get("worker_type", "unknown")) + + # Parse memory info + memory_snapshots = self._parse_memory_info(clean_content) + + except Exception as e: + logger.error("Error parsing %s: %s", log_path, e) + return None + + logger.debug("Parsed %s: %d batches, %d memory snapshots, config=%s", log_path, len(batches), len(memory_snapshots), config) + + # Create NodeMetadata + node_metadata = NodeMetadata( + node_name=node_info["node"], + worker_type=node_info["worker_type"], + worker_id=node_info["worker_id"], + ) + + # Create NodeMetrics with metadata + metrics = NodeMetrics( + metadata=node_metadata, + batches=batches, + memory_snapshots=memory_snapshots, + config=config, + ) + + # Create NodeConfig with launch_command + node_config = {} + if launch_command: + node_config["launch_command"] = launch_command + node_config["environment"] = {} # Will be populated by NodeAnalyzer if config file exists + + # Return complete NodeInfo + return NodeInfo(metrics=metrics, node_config=node_config if node_config else None) + + def _parse_iteration_logs(self, content: str, worker_type: str) -> list[BatchMetrics]: + """Parse TRTLLM iteration logs for batch metrics. + Format: + [01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5559, ..., num_scheduled_requests: 1, + states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 3} + Args: + content: Log file content (ANSI stripped) + worker_type: Worker type (prefill, decode) + Returns: + List of BatchMetrics objects + """ + batches = [] + + # Pattern to match TRTLLM iteration logs + iter_pattern = re.compile( + r"\[(\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2})\].*" + r"iter\s*=\s*(\d+).*" + r"num_scheduled_requests:\s*(\d+).*" + r"states\s*=\s*\{([^}]+)\}" + ) + + for match in iter_pattern.finditer(content): + timestamp = match.group(1) + iteration = int(match.group(2)) + num_scheduled = int(match.group(3)) + states_str = match.group(4) + + # Parse states dict + ctx_requests = 0 + ctx_tokens = 0 + gen_tokens = 0 + + ctx_req_match = re.search(r"'num_ctx_requests':\s*(\d+)", states_str) + if ctx_req_match: + ctx_requests = int(ctx_req_match.group(1)) + + ctx_tok_match = re.search(r"'num_ctx_tokens':\s*(\d+)", states_str) + if ctx_tok_match: + ctx_tokens = int(ctx_tok_match.group(1)) + + gen_tok_match = re.search(r"'num_generation_tokens':\s*(\d+)", states_str) + if gen_tok_match: + gen_tokens = int(gen_tok_match.group(1)) + + # Determine batch type based on content + if ctx_tokens > 0: + batch_type = "prefill" + elif gen_tokens > 0: + batch_type = "decode" + else: + batch_type = worker_type + + # Parse step time if available + step_time = None + step_match = re.search(r"host_step_time\s*=\s*([\d.]+)ms", match.group(0)) + if step_match: + step_time = float(step_match.group(1)) + + # Compute throughput (tokens/s) + input_throughput = None + gen_throughput = None + if step_time and step_time > 0: + if batch_type == "prefill" and ctx_tokens > 0: + # Prefill throughput: context tokens / step time + input_throughput = (ctx_tokens * 1000.0) / step_time + elif batch_type == "decode" and gen_tokens > 0: + # Decode throughput: generation tokens / step time + gen_throughput = (gen_tokens * 1000.0) / step_time + + batches.append( + BatchMetrics( + timestamp=timestamp, + dp=0, + tp=0, + ep=0, + batch_type=batch_type, + running_req=num_scheduled, + new_token=ctx_tokens if batch_type == "prefill" else None, + num_tokens=gen_tokens if batch_type == "decode" else None, + input_throughput=input_throughput, + gen_throughput=gen_throughput, + ) + ) + + return batches + + def _parse_memory_info(self, content: str) -> list[MemoryMetrics]: + """Parse TRTLLM memory information. + Format: + Peak memory during memory usage profiling (torch + non-torch): 91.46 GiB, + available KV cache memory when calculating max tokens: 41.11 GiB, + fraction is set 0.85, kv size is 35136. device total memory 139.81 GiB + Args: + content: Log file content (ANSI stripped) + Returns: + List of MemoryMetrics objects + """ + memory_snapshots = [] + + # Pattern to match memory info + mem_pattern = re.compile( + r"\[(\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2})\].*" + r"Peak memory.*?:\s*([\d.]+)\s*GiB.*?" + r"available KV cache memory.*?:\s*([\d.]+)\s*GiB.*?" + r"device total memory\s*([\d.]+)\s*GiB" + ) + + for match in mem_pattern.finditer(content): + timestamp = match.group(1) + peak_mem = float(match.group(2)) + avail_kv = float(match.group(3)) + total_mem = float(match.group(4)) + + memory_snapshots.append( + MemoryMetrics( + timestamp=timestamp, + dp=0, + tp=0, + ep=0, + metric_type="memory", + mem_usage_gb=peak_mem, + avail_mem_gb=total_mem - peak_mem, + kv_cache_gb=avail_kv, + ) + ) + + # Also parse KV cache allocation info + kv_alloc_pattern = re.compile( + r"\[MemUsageChange\] Allocated\s*([\d.]+)\s*GiB for max tokens.*?\((\d+)\)" + ) + + for match in kv_alloc_pattern.finditer(content): + kv_gb = float(match.group(1)) + max_tokens = int(match.group(2)) + + memory_snapshots.append( + MemoryMetrics( + timestamp="", + dp=0, + tp=0, + ep=0, + metric_type="kv_cache", + kv_cache_gb=kv_gb, + kv_tokens=max_tokens, + ) + ) + + return memory_snapshots + + def _extract_node_info_from_filename(self, filename: str) -> dict | None: + """Extract node name and worker info from filename. + Example: worker-0_prefill_w0.out + Returns: {'node': 'worker-0', 'worker_type': 'prefill', 'worker_id': 'w0'} + """ + match = re.match( + r"(.+)_(prefill|decode|agg|frontend)_([^.]+)\.(err|out)", + os.path.basename(filename), + ) + if match: + return { + "node": match.group(1), + "worker_type": match.group(2), + "worker_id": match.group(3), + } + return None + + def parse_launch_command(self, log_content: str, worker_type: str = "unknown") -> NodeLaunchCommand | None: + """Parse the TRTLLM worker launch command from log content. + Looks for command lines like: + [CMD] python3 -m dynamo.trtllm --model-path /model --served-model-name dsr1-fp8 --disaggregation-mode prefill + python3 -m dynamo.trtllm --model-path /model --served-model-name dsr1-fp8 --disaggregation-mode prefill + Args: + log_content: Content of the worker log file + worker_type: Type of worker (prefill, decode, agg) + Returns: + NodeLaunchCommand with parsed parameters, or None if not found + """ + from analysis.srtlog.parsers import NodeLaunchCommand + + # Strip ANSI codes for cleaner parsing + clean_content = ANSI_ESCAPE.sub("", log_content) + + raw_command = None + + # First, try to find [CMD] tagged command (preferred - from our scripts) + cmd_match = re.search(r"\[CMD\]\s*(.+)$", clean_content, re.MULTILINE) + if cmd_match: + raw_command = cmd_match.group(1).strip() + + # Fallback: pattern to match TRTLLM launch commands (dynamo.trtllm or tensorrt_llm.serve) + if not raw_command: + patterns = [ + r"(?:Rank\d+\s+run\s+)?(python[3]?\s+-m\s+dynamo\.trtllm\s+[^\n]+)", + r"(?:Rank\d+\s+run\s+)?(python[3]?\s+-m\s+tensorrt_llm\.serve\s+[^\n]+)", + r"(trtllm-serve\s+[^\n]+)", + r"(mpirun\s+.*trtllm[^\n]+)", + ] + + for pattern in patterns: + match = re.search(pattern, clean_content) + if match: + raw_command = match.group(1).strip() + # Remove trailing "in background" if present + raw_command = re.sub(r"\s+in\s+background$", "", raw_command) + break + + if not raw_command: + return None + + extra_args: dict[str, Any] = {} + + # Parse dynamo.trtllm / tensorrt_llm server arguments from command line + arg_patterns = { + "model_path": r"--model-path[=\s]+([^\s]+)", + "served_model_name": r"--served-model-name[=\s]+([^\s]+)", + "disaggregation_mode": r"--disaggregation-mode[=\s]+([^\s]+)", + "host": r"--host[=\s]+([^\s]+)", + "port": r"--port[=\s]+(\d+)", + } + + for field, pattern in arg_patterns.items(): + match = re.search(pattern, raw_command) + if match: + value = match.group(1) + if field == "port": + value = int(value) + extra_args[field] = value + + # Also extract from TensorRT-LLM engine args if available (has actual parallelism values) + engine_args_match = re.search(r"TensorRT-LLM engine args:\s*\{([^}]+)", clean_content) + if engine_args_match: + engine_str = engine_args_match.group(1) + + engine_patterns = { + "tp_size": r"'tensor_parallel_size':\s*(\d+)", + "pp_size": r"'pipeline_parallel_size':\s*(\d+)", + "max_num_seqs": r"'max_batch_size':\s*(\d+)", + "max_model_len": r"'max_seq_len':\s*(\d+)", + } + + for field, pattern in engine_patterns.items(): + if field not in extra_args: + match = re.search(pattern, engine_str) + if match: + extra_args[field] = int(match.group(1)) + + # Fallback to Config() dump + if "tp_size" not in extra_args: + config_match = re.search(r"Config\((.*?)\)", clean_content) + if config_match: + config_str = config_match.group(1) + + config_patterns = { + "tp_size": r"tensor_parallel_size=(\d+)", + "pp_size": r"pipeline_parallel_size=(\d+)", + "max_num_seqs": r"max_batch_size=(\d+)", + "max_model_len": r"max_seq_len=(\d+)", + } + + for field, pattern in config_patterns.items(): + if field not in extra_args: + match = re.search(pattern, config_str) + if match: + extra_args[field] = int(match.group(1)) + + return NodeLaunchCommand( + backend_type=self.backend_type, + worker_type=worker_type, + raw_command=raw_command, + extra_args=extra_args, + ) \ No newline at end of file diff --git a/analysis/srtlog/run_loader.py b/analysis/srtlog/run_loader.py index f093b24a..102e488f 100644 --- a/analysis/srtlog/run_loader.py +++ b/analysis/srtlog/run_loader.py @@ -8,11 +8,14 @@ import logging import os import re +from pathlib import Path import pandas as pd from .cache_manager import CacheManager -from .models import BenchmarkRun +from .log_parser import NodeAnalyzer +from .models import BenchmarkRun, NodeMetrics +from .parsers import get_benchmark_parser, get_node_parser logger = logging.getLogger(__name__) @@ -62,7 +65,7 @@ def load_all_with_skipped(self) -> tuple[list[BenchmarkRun], list[tuple[str, str run = BenchmarkRun.from_json_file(path) if run is not None: # Skip profiling jobs (they don't have benchmark results) - if run.profiler.profiler_type == "torch-profiler": + if run.profiler_metadata.profiler_type == "torch-profiler": reason = "Profiling job (no benchmark results)" logger.debug(f"Skipping profiling job {run.job_id}") skipped.append((run.job_id, run_dir, reason)) @@ -214,20 +217,33 @@ def _load_benchmark_results(self, run: BenchmarkRun) -> None: run_path = run.metadata.path # Check both run_path and run_path/logs for benchmark results - search_paths = [run_path] - logs_subdir = os.path.join(run_path, "logs") - if os.path.exists(logs_subdir): - search_paths.append(logs_subdir) - # Initialize cache manager cache_mgr = CacheManager(run_path) - # Use profiler_type from metadata to construct directory name - profiler_type = run.profiler.profiler_type - pattern_strs = [f"{profiler_type}_isl_{run.profiler.isl}_osl_{run.profiler.osl}"] - - # Define source patterns for cache validation (check all possible patterns) - source_patterns = [f"{pattern}/*.json" for pattern in pattern_strs] + # Use profiler_type from metadata + profiler_type = run.profiler_metadata.profiler_type + + # Get the parser for this benchmark type + try: + parser = get_benchmark_parser(profiler_type) + except ValueError as e: + logger.warning(f"No parser available for {profiler_type}: {e}") + return + + # Let the parser find its result directory + result_dir = parser.find_result_directory( + Path(run_path), + isl=run.profiler_metadata.isl, + osl=run.profiler_metadata.osl + ) + + if not result_dir: + logger.warning(f"No results directory found for {profiler_type} in {run_path}") + return + + # Define source patterns for cache validation (relative to run_path) + result_dir_rel = result_dir.relative_to(Path(run_path)) if result_dir.is_relative_to(Path(run_path)) else result_dir.name + source_patterns = [f"{result_dir_rel}/*.json"] # Try to load from cache first if cache_mgr.is_cache_valid("benchmark_results", source_patterns): @@ -273,127 +289,88 @@ def _load_benchmark_results(self, run: BenchmarkRun) -> None: run.profiler.add_benchmark_results(results) return - # Cache miss or invalid - parse from JSON files - for pattern_str in pattern_strs: - profiler_pattern = re.compile(pattern_str) - for search_path in search_paths: - for entry in os.listdir(search_path): - if profiler_pattern.match(entry): - result_dir = os.path.join(search_path, entry) - if os.path.isdir(result_dir): - results = self._parse_profiler_results(result_dir) - run.profiler.add_benchmark_results(results) - - # Save to cache - if results["concurrencies"]: - # Convert to DataFrame for caching - cache ALL parsed fields - cache_data = { - "concurrency": results["concurrencies"], - "output_tps": results["output_tps"], - "mean_itl_ms": results["mean_itl_ms"], - "mean_ttft_ms": results["mean_ttft_ms"], - "request_rate": results["request_rate"], - } - - # Add all optional fields if they have data - optional_fields = { - "mean_tpot_ms": "mean_tpot_ms", - "total_tps": "total_tps", - "request_throughput": "request_throughput", - "request_goodput": "request_goodput", - "mean_e2el_ms": "mean_e2el_ms", - "median_ttft_ms": "median_ttft_ms", - "median_tpot_ms": "median_tpot_ms", - "median_itl_ms": "median_itl_ms", - "median_e2el_ms": "median_e2el_ms", - "p99_ttft_ms": "p99_ttft_ms", - "p99_tpot_ms": "p99_tpot_ms", - "p99_itl_ms": "p99_itl_ms", - "p99_e2el_ms": "p99_e2el_ms", - "std_ttft_ms": "std_ttft_ms", - "std_tpot_ms": "std_tpot_ms", - "std_itl_ms": "std_itl_ms", - "std_e2el_ms": "std_e2el_ms", - "total_input_tokens": "total_input_tokens", - "total_output_tokens": "total_output_tokens", - } - - for result_key, cache_key in optional_fields.items(): - if results.get(result_key): - cache_data[cache_key] = results[result_key] - - cache_df = pd.DataFrame(cache_data) - cache_mgr.save_to_cache("benchmark_results", cache_df, source_patterns) - - return # Found results, stop searching - - def _parse_profiler_results(self, result_dir: str) -> dict: - """Parse profiler result JSON files. + # Cache miss - parse results + results = self._parse_profiler_results(str(result_dir), profiler_type) + run.profiler.add_benchmark_results(results) + + # Save to cache + if results["concurrencies"]: + # Convert to DataFrame for caching - cache ALL parsed fields + cache_data = { + "concurrency": results["concurrencies"], + "output_tps": results["output_tps"], + "mean_itl_ms": results["mean_itl_ms"], + "mean_ttft_ms": results["mean_ttft_ms"], + "request_rate": results["request_rate"], + } + + # Add all optional fields if they have data + optional_fields = { + "mean_tpot_ms": "mean_tpot_ms", + "total_tps": "total_tps", + "request_throughput": "request_throughput", + "request_goodput": "request_goodput", + "mean_e2el_ms": "mean_e2el_ms", + "median_ttft_ms": "median_ttft_ms", + "median_tpot_ms": "median_tpot_ms", + "median_itl_ms": "median_itl_ms", + "median_e2el_ms": "median_e2el_ms", + "p99_ttft_ms": "p99_ttft_ms", + "p99_tpot_ms": "p99_tpot_ms", + "p99_itl_ms": "p99_itl_ms", + "p99_e2el_ms": "p99_e2el_ms", + "std_ttft_ms": "std_ttft_ms", + "std_tpot_ms": "std_tpot_ms", + "std_itl_ms": "std_itl_ms", + "std_e2el_ms": "std_e2el_ms", + "total_input_tokens": "total_input_tokens", + "total_output_tokens": "total_output_tokens", + } + + for result_key, cache_key in optional_fields.items(): + if results.get(result_key): + cache_data[cache_key] = results[result_key] + + cache_df = pd.DataFrame(cache_data) + cache_mgr.save_to_cache("benchmark_results", cache_df, source_patterns) + + def _parse_profiler_results(self, result_dir: str, profiler_type: str) -> dict: + """Parse profiler result JSON files using the parser infrastructure. Args: result_dir: Path to directory containing benchmark result JSON files + profiler_type: Type of profiler/benchmark (e.g., "sa-bench", "mooncake-router") Returns: Dict with concurrencies, output_tps, mean_itl_ms, etc. """ - result = [] + result_dir_path = Path(result_dir) - for file in os.listdir(result_dir): - if not file.endswith(".json"): - continue + try: + # Get the appropriate parser + parser = get_benchmark_parser(profiler_type) - filepath = os.path.join(result_dir, file) - try: - with open(filepath) as f: - content = json.load(f) + # Let the parser find and parse all result files in the directory + # The parser knows where to look (e.g., artifacts/ subdirectories) + results_list = parser.parse_result_directory(result_dir_path) - # Parse all available metrics from benchmark output - res = { - "max_concurrency": content.get("max_concurrency"), - # Throughput metrics - "output_throughput": content.get("output_throughput"), - "total_token_throughput": content.get("total_token_throughput"), - "request_throughput": content.get("request_throughput"), - "request_goodput": content.get("request_goodput"), - "request_rate": content.get("request_rate"), - # Mean latencies - "mean_ttft_ms": content.get("mean_ttft_ms"), - "mean_tpot_ms": content.get("mean_tpot_ms"), - "mean_itl_ms": content.get("mean_itl_ms"), - "mean_e2el_ms": content.get("mean_e2el_ms"), - # Median latencies - "median_ttft_ms": content.get("median_ttft_ms"), - "median_tpot_ms": content.get("median_tpot_ms"), - "median_itl_ms": content.get("median_itl_ms"), - "median_e2el_ms": content.get("median_e2el_ms"), - # P99 latencies - "p99_ttft_ms": content.get("p99_ttft_ms"), - "p99_tpot_ms": content.get("p99_tpot_ms"), - "p99_itl_ms": content.get("p99_itl_ms"), - "p99_e2el_ms": content.get("p99_e2el_ms"), - # Std dev latencies - "std_ttft_ms": content.get("std_ttft_ms"), - "std_tpot_ms": content.get("std_tpot_ms"), - "std_itl_ms": content.get("std_itl_ms"), - "std_e2el_ms": content.get("std_e2el_ms"), - # Token counts - "total_input_tokens": content.get("total_input_tokens"), - "total_output_tokens": content.get("total_output_tokens"), - # Metadata - "backend": content.get("backend"), - "model_id": content.get("model_id"), - "date": content.get("date"), - "duration": content.get("duration"), - "completed": content.get("completed"), - "num_prompts": content.get("num_prompts"), - } + # Convert results to the format expected by the rest of the code + return self._convert_parser_results_to_dict(results_list) - result.append(res) - except Exception as e: - logger.warning(f"Error parsing {filepath}: {e}") - continue + except ValueError as e: + # Parser not found - fall back to manual parsing + logger.warning(f"Parser not available for {profiler_type}, falling back to manual parsing: {e}") + return self._parse_profiler_results_manual(result_dir) - # Organize results - sort by concurrency + def _convert_parser_results_to_dict(self, results_list: list[dict]) -> dict: + """Convert parser results to the dict format expected by add_benchmark_results. + + Args: + results_list: List of result dicts from parser (one per concurrency level) + + Returns: + Dict with lists of metrics across concurrency levels + """ out = { # Primary metrics "concurrencies": [], @@ -434,12 +411,14 @@ def _parse_profiler_results(self, result_dir: str) -> dict: "num_prompts": [], } - # Sort by concurrency and aggregate - for data in sorted(result, key=lambda x: x.get("max_concurrency", 0) or 0): - out["concurrencies"].append(data.get("max_concurrency")) - # Throughput - out["output_tps"].append(data.get("output_throughput")) - out["total_tps"].append(data.get("total_token_throughput")) + # results_list is already sorted by the parser + for data in results_list: + # Concurrency + concurrency = data.get("max_concurrency") or data.get("concurrency") or 0 + out["concurrencies"].append(concurrency) + # Throughput - normalize field names + out["output_tps"].append(data.get("output_throughput") or data.get("output_tps")) + out["total_tps"].append(data.get("total_token_throughput") or data.get("total_tps")) out["request_throughput"].append(data.get("request_throughput")) out["request_goodput"].append(data.get("request_goodput")) out["request_rate"].append(data.get("request_rate")) @@ -476,6 +455,78 @@ def _parse_profiler_results(self, result_dir: str) -> dict: return out + def _parse_profiler_results_manual(self, result_dir: str) -> dict: + """Fallback manual parser for benchmark result JSON files. + + This is kept for backward compatibility when parsers are not available. + + Args: + result_dir: Path to directory containing benchmark result JSON files + + Returns: + Dict with concurrencies, output_tps, mean_itl_ms, etc. + """ + result = [] + + for file in os.listdir(result_dir): + if not file.endswith(".json"): + continue + + filepath = os.path.join(result_dir, file) + try: + with open(filepath) as f: + content = json.load(f) + + # Parse all available metrics from benchmark output + res = { + "max_concurrency": content.get("max_concurrency"), + # Throughput metrics + "output_throughput": content.get("output_throughput"), + "total_token_throughput": content.get("total_token_throughput"), + "request_throughput": content.get("request_throughput"), + "request_goodput": content.get("request_goodput"), + "request_rate": content.get("request_rate"), + # Mean latencies + "mean_ttft_ms": content.get("mean_ttft_ms"), + "mean_tpot_ms": content.get("mean_tpot_ms"), + "mean_itl_ms": content.get("mean_itl_ms"), + "mean_e2el_ms": content.get("mean_e2el_ms"), + # Median latencies + "median_ttft_ms": content.get("median_ttft_ms"), + "median_tpot_ms": content.get("median_tpot_ms"), + "median_itl_ms": content.get("median_itl_ms"), + "median_e2el_ms": content.get("median_e2el_ms"), + # P99 latencies + "p99_ttft_ms": content.get("p99_ttft_ms"), + "p99_tpot_ms": content.get("p99_tpot_ms"), + "p99_itl_ms": content.get("p99_itl_ms"), + "p99_e2el_ms": content.get("p99_e2el_ms"), + # Std dev latencies + "std_ttft_ms": content.get("std_ttft_ms"), + "std_tpot_ms": content.get("std_tpot_ms"), + "std_itl_ms": content.get("std_itl_ms"), + "std_e2el_ms": content.get("std_e2el_ms"), + # Token counts + "total_input_tokens": content.get("total_input_tokens"), + "total_output_tokens": content.get("total_output_tokens"), + # Metadata + "backend": content.get("backend"), + "model_id": content.get("model_id"), + "date": content.get("date"), + "duration": content.get("duration"), + "completed": content.get("completed"), + "num_prompts": content.get("num_prompts"), + } + + result.append(res) + except Exception as e: + logger.warning(f"Error parsing {filepath}: {e}") + continue + + # Sort by concurrency and convert to dict format + results_list = sorted(result, key=lambda x: x.get("max_concurrency", 0) or 0) + return self._convert_parser_results_to_dict(results_list) + def get_run_count(self) -> int: """Get count of valid benchmark runs in logs directory. @@ -553,9 +604,9 @@ def to_dataframe(self, runs: list[BenchmarkRun] | None = None): row = { "Run ID": run_id, "Run Date": run.metadata.run_date, - "Profiler": run.profiler.profiler_type, - "ISL": run.profiler.isl, - "OSL": run.profiler.osl, + "Profiler": run.profiler_metadata.profiler_type, + "ISL": run.profiler_metadata.isl, + "OSL": run.profiler_metadata.osl, "Prefill Nodes": run.metadata.prefill_nodes, "Decode Nodes": run.metadata.decode_nodes, "Prefill Workers": run.metadata.prefill_workers, @@ -620,3 +671,43 @@ def update_tags(self, run_path: str, tags: list[str]) -> bool: except Exception as e: logger.error(f"Error updating tags for {json_path}: {e}") return False + + def load_node_metrics(self, run_path: str, backend_type: str = "sglang") -> list[NodeMetrics]: + """Load node metrics from worker log files using NodeAnalyzer. + + Args: + run_path: Path to the run directory + backend_type: Backend type (sglang or trtllm) - deprecated, auto-detected + + Returns: + List of NodeInfo objects, one per worker + """ + # Handle both relative and absolute paths + if not os.path.isabs(run_path): + run_path = os.path.join(self.logs_dir, run_path) + + # Use NodeAnalyzer which handles caching, backend detection, and config loading + analyzer = NodeAnalyzer() + return analyzer.parse_run_logs(run_path, return_dicts=False) + + def load_node_metrics_for_run(self, run: BenchmarkRun) -> list[NodeMetrics]: + """Load node metrics for a BenchmarkRun. + + Automatically detects backend type from the run's container image. + + Args: + run: BenchmarkRun object + + Returns: + List of NodeMetrics objects + """ + # Detect backend type from container + backend_type = "sglang" # Default + container = run.metadata.container.lower() + + if "trtllm" in container or "dynamo" in container or "tensorrt" in container: + backend_type = "trtllm" + elif "sglang" in container: + backend_type = "sglang" + + return self.load_node_metrics(run.metadata.path, backend_type) diff --git a/pyproject.toml b/pyproject.toml index f2f6a6e4..265ec6c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,13 @@ dev = [ "ty", # Astral's fast type checker (replaces mypy) ] +analysis = [ + "streamlit>=1.30.0", + "plotly>=5.18.0", + "pandas>=2.1.0", + "pyarrow>=14.0.0", # For parquet caching +] + # ============================================================================= # Ruff - Fast Python linter and formatter # ============================================================================= diff --git a/src/srtctl/backends/base.py b/src/srtctl/backends/base.py index 9a2dbb34..984cbd27 100644 --- a/src/srtctl/backends/base.py +++ b/src/srtctl/backends/base.py @@ -6,7 +6,7 @@ """ from collections.abc import Sequence -from dataclasses import dataclass, field +from dataclasses import dataclass from enum import Enum from typing import TYPE_CHECKING, Any, Optional, Protocol diff --git a/src/srtctl/backends/sglang.py b/src/srtctl/backends/sglang.py index c34f1e37..756ef3bb 100644 --- a/src/srtctl/backends/sglang.py +++ b/src/srtctl/backends/sglang.py @@ -139,11 +139,8 @@ def get_kv_events_config_for_mode(self, mode: WorkerMode) -> dict[str, str] | No # Per-mode config dict if isinstance(self.kv_events_config, dict): # Normalize mode key: use "aggregated" for aggregated mode - if mode == "agg": - mode_cfg = self.kv_events_config.get("aggregated") - else: - mode_cfg = self.kv_events_config.get(mode) - + mode_cfg = self.kv_events_config.get("aggregated") if mode == "agg" else self.kv_events_config.get(mode) + if mode_cfg is None: return None if mode_cfg is True: diff --git a/src/srtctl/backends/trtllm.py b/src/srtctl/backends/trtllm.py index 572fa351..2553860f 100644 --- a/src/srtctl/backends/trtllm.py +++ b/src/srtctl/backends/trtllm.py @@ -1,7 +1,8 @@ import builtins +from collections.abc import Sequence from dataclasses import field from pathlib import Path -from typing import Any, ClassVar, Literal, Sequence, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, ClassVar, Literal import yaml from marshmallow import Schema @@ -15,6 +16,7 @@ # Type alias for worker modes WorkerMode = Literal["prefill", "decode", "agg"] + @dataclass(frozen=True) class TRTLLMServerConfig: """SGLang server CLI configuration per mode (prefill/decode/aggregated). @@ -29,6 +31,7 @@ class TRTLLMServerConfig: Schema: ClassVar[type[Schema]] = Schema + @dataclass(frozen=True) class TRTLLMProtocol: """TRTLLM protocol - implements BackendProtocol. @@ -50,7 +53,7 @@ class TRTLLMProtocol: """ type: Literal["trtllm"] = "trtllm" - + prefill_environment: dict[str, str] = field(default_factory=dict) decode_environment: dict[str, str] = field(default_factory=dict) @@ -82,7 +85,7 @@ def get_config_for_mode(self, mode: WorkerMode) -> dict[str, Any]: elif mode == "decode": return dict(self.trtllm_config.decode or {}) elif mode == "agg": - raise ValueError(f"Aggregated mode is not supported for TRTLLM") + raise ValueError("Aggregated mode is not supported for TRTLLM") return {} def get_environment_for_mode(self, mode: WorkerMode) -> dict[str, str]: @@ -91,9 +94,9 @@ def get_environment_for_mode(self, mode: WorkerMode) -> dict[str, str]: elif mode == "decode": return dict(self.decode_environment) elif mode == "agg": - raise ValueError(f"Aggregated mode is not supported for TRTLLM") + raise ValueError("Aggregated mode is not supported for TRTLLM") return {} - + def allocate_endpoints( self, num_prefill: int, @@ -128,7 +131,7 @@ def endpoints_to_processes( from srtctl.core.topology import endpoints_to_processes return endpoints_to_processes(endpoints, base_sys_port=base_sys_port) - + def build_worker_command( self, process: "Process", @@ -168,7 +171,7 @@ def build_worker_command( "--extra-engine-args", str(container_config_path), "--request-plane", - "nats" + "nats", ] - return cmd \ No newline at end of file + return cmd diff --git a/src/srtctl/benchmarks/scripts/gpqa/bench.sh b/src/srtctl/benchmarks/scripts/gpqa/bench.sh index 01670aa2..064ad04a 100644 --- a/src/srtctl/benchmarks/scripts/gpqa/bench.sh +++ b/src/srtctl/benchmarks/scripts/gpqa/bench.sh @@ -26,14 +26,9 @@ export OPENAI_API_KEY="${OPENAI_API_KEY:-EMPTY}" echo "Running GPQA evaluation..." -python3 -m sglang.test.run_eval \ - --base-url "${ENDPOINT}" \ - --model "${MODEL_NAME}" \ - --eval-name gpqa \ - --num-examples "${NUM_EXAMPLES}" \ - --max-tokens "${MAX_TOKENS}" \ - --repeat "${REPEAT}" \ - --num-threads "${NUM_THREADS}" +command="python3 -m sglang.test.run_eval --base-url ${ENDPOINT} --model ${MODEL_NAME} --eval-name gpqa --num-examples ${NUM_EXAMPLES} --max-tokens ${MAX_TOKENS} --repeat ${REPEAT} --num-threads ${NUM_THREADS}" +echo "[CMD] $command" +eval "$command" # Copy result file result_file=$(ls -t /tmp/gpqa_*.json 2>/dev/null | head -n1) diff --git a/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh b/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh index 7a1643b8..0d4235ee 100644 --- a/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh +++ b/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh @@ -28,25 +28,19 @@ export OPENAI_API_KEY="${OPENAI_API_KEY:-EMPTY}" echo "Running LongBench-v2 evaluation..." # Build command -cmd="python3 -m sglang.test.run_eval \ - --base-url ${ENDPOINT} \ - --model ${MODEL_NAME} \ - --eval-name longbench_v2 \ - --max-tokens ${MAX_TOKENS} \ - --max-context-length ${MAX_CONTEXT_LENGTH} \ - --num-threads ${NUM_THREADS}" +command="python3 -m sglang.test.run_eval --base-url ${ENDPOINT} --model ${MODEL_NAME} --eval-name longbench_v2 --max-tokens ${MAX_TOKENS} --max-context-length ${MAX_CONTEXT_LENGTH} --num-threads ${NUM_THREADS}" # Add optional arguments if [ -n "$NUM_EXAMPLES" ]; then - cmd="$cmd --num-examples ${NUM_EXAMPLES}" + command="$command --num-examples ${NUM_EXAMPLES}" fi if [ -n "$CATEGORIES" ]; then - cmd="$cmd --categories ${CATEGORIES}" + command="$command --categories ${CATEGORIES}" fi -echo "Executing: $cmd" -eval "$cmd" +echo "[CMD] $command" +eval "$command" # Copy result files result_file=$(ls -t /tmp/longbench_v2_*.json 2>/dev/null | head -n1) diff --git a/src/srtctl/benchmarks/scripts/mmlu/bench.sh b/src/srtctl/benchmarks/scripts/mmlu/bench.sh index aff149ce..f1389d00 100644 --- a/src/srtctl/benchmarks/scripts/mmlu/bench.sh +++ b/src/srtctl/benchmarks/scripts/mmlu/bench.sh @@ -26,14 +26,9 @@ export OPENAI_API_KEY="${OPENAI_API_KEY:-EMPTY}" echo "Running MMLU evaluation..." -python3 -m sglang.test.run_eval \ - --base-url "${ENDPOINT}" \ - --model "${MODEL_NAME}" \ - --eval-name mmlu \ - --num-examples "${NUM_EXAMPLES}" \ - --max-tokens "${MAX_TOKENS}" \ - --repeat "${REPEAT}" \ - --num-threads "${NUM_THREADS}" +command="python3 -m sglang.test.run_eval --base-url ${ENDPOINT} --model ${MODEL_NAME} --eval-name mmlu --num-examples ${NUM_EXAMPLES} --max-tokens ${MAX_TOKENS} --repeat ${REPEAT} --num-threads ${NUM_THREADS}" +echo "[CMD] $command" +eval "$command" # Copy result file result_file=$(ls -t /tmp/mmlu_*.json 2>/dev/null | head -n1) diff --git a/src/srtctl/benchmarks/scripts/mooncake-router/bench.sh b/src/srtctl/benchmarks/scripts/mooncake-router/bench.sh index e84d711c..a21fc1a4 100644 --- a/src/srtctl/benchmarks/scripts/mooncake-router/bench.sh +++ b/src/srtctl/benchmarks/scripts/mooncake-router/bench.sh @@ -56,15 +56,9 @@ if [ ! -f "${INPUT_FILE}" ]; then fi # Run small benchmark for warmup -echo "Running small benchmark for warmup..." -aiperf profile \ - -m "${MODEL_NAME}" \ - --url "${ENDPOINT}" \ - --streaming \ - --ui simple \ - --concurrency 10 \ - --request-count 20 -echo "Small benchmark for warmup complete" +command="aiperf profile -m ${MODEL_NAME} --url ${ENDPOINT} --streaming --ui simple --concurrency 10 --request-count 20" +echo "[CMD-WARMUP] $command" +eval "$command" # Setup artifact directory with model and timestamp MODEL_BASE_NAME="${MODEL_NAME##*/}" @@ -80,18 +74,9 @@ echo "" echo "$(date '+%Y-%m-%d %H:%M:%S') - Starting benchmark" # Run aiperf profile exactly as dynamo does -aiperf profile \ - -m "${MODEL_NAME}" \ - --input-file "${INPUT_FILE}" \ - --custom-dataset-type mooncake_trace \ - --fixed-schedule \ - --url "${ENDPOINT}" \ - --streaming \ - --random-seed 42 \ - --ui simple \ - --artifact-dir "${RUN_ARTIFACT_DIR}" \ - --goodput "time_to_first_token:${TTFT_THRESHOLD} inter_token_latency:${ITL_THRESHOLD}" - +command="aiperf profile -m ${MODEL_NAME} --input-file ${INPUT_FILE} --custom-dataset-type mooncake_trace --fixed-schedule --url ${ENDPOINT} --streaming --random-seed 42 --ui simple --artifact-dir ${RUN_ARTIFACT_DIR} --goodput \"time_to_first_token:${TTFT_THRESHOLD} inter_token_latency:${ITL_THRESHOLD}\"" +echo "[CMD] $command" +eval "$command" BENCH_EXIT_CODE=$? echo "" diff --git a/src/srtctl/benchmarks/scripts/profiling/profile.sh b/src/srtctl/benchmarks/scripts/profiling/profile.sh index 6b426c34..a024a821 100644 --- a/src/srtctl/benchmarks/scripts/profiling/profile.sh +++ b/src/srtctl/benchmarks/scripts/profiling/profile.sh @@ -130,27 +130,15 @@ done if [[ "${PROFILING_MODE}" == "prefill" ]]; then echo "" echo "Generating profiling traffic..." - python3 -m sglang.bench_serving \ - --backend sglang \ - --model "${model_name}" \ - --host "${head_node}" --port "${head_port}" \ - --dataset-name random \ - --max-concurrency "${PROFILE_CONCURRENCY}" \ - --num-prompts 128 \ - --random-input-len "${PROFILE_ISL}" \ - --random-output-len "${PROFILE_OSL}" \ - --random-range-ratio 1 \ - --warmup-request 0 + + command="python3 -m sglang.bench_serving --backend sglang --model ${model_name} --host ${head_node} --port ${head_port} --dataset-name random --max-concurrency ${PROFILE_CONCURRENCY} --num-prompts 128 --random-input-len ${PROFILE_ISL} --random-output-len ${PROFILE_OSL} --random-range-ratio 1 --warmup-request 0" + echo "[CMD] $command" + eval "$command" # Run lm-eval for additional profiling coverage - echo "" - echo "Running lm-eval..." - pip install lm-eval tenacity > /dev/null 2>&1 - python -m lm_eval \ - --model local-completions \ - --tasks gsm8k \ - --model_args "base_url=http://${head_node}:${head_port}/v1/completions,model=${model_name},tokenized_requests=False,tokenizer_backend=None,num_concurrent=${PROFILE_CONCURRENCY},timeout=6000,max_retries=1" \ - --limit 10 + command="python -m lm_eval --model local-completions --tasks gsm8k --model_args \"base_url=http://${head_node}:${head_port}/v1/completions,model=${model_name},tokenized_requests=False,tokenizer_backend=None,num_concurrent=${PROFILE_CONCURRENCY},timeout=6000,max_retries=1\" --limit 10" + echo "[CMD-LM-EVAL] $command" + eval "$command" fi exit_code=$? diff --git a/src/srtctl/benchmarks/scripts/router/bench.sh b/src/srtctl/benchmarks/scripts/router/bench.sh index d559b85d..052ba7bb 100644 --- a/src/srtctl/benchmarks/scripts/router/bench.sh +++ b/src/srtctl/benchmarks/scripts/router/bench.sh @@ -40,13 +40,9 @@ echo "Running prefix ratio benchmark..." echo "Results will be saved to: $result_dir" # shellcheck disable=SC2086 -python prefix_ratio_benchmark.py \ - --prefix-ratios $PREFIX_RATIOS \ - --isl "$ISL" \ - --osl "$OSL" \ - --requests "$REQUESTS" \ - --concurrency "$CONCURRENCY" \ - --output-dir "$result_dir" +command="python prefix_ratio_benchmark.py --prefix-ratios $PREFIX_RATIOS --isl $ISL --osl $OSL --requests $REQUESTS --concurrency $CONCURRENCY --output-dir $result_dir" +echo "[CMD] $command" +eval "$command" echo "Router benchmark complete. Results in $result_dir" diff --git a/src/srtctl/benchmarks/scripts/sa-bench/bench.sh b/src/srtctl/benchmarks/scripts/sa-bench/bench.sh index 82043666..0f53d58b 100644 --- a/src/srtctl/benchmarks/scripts/sa-bench/bench.sh +++ b/src/srtctl/benchmarks/scripts/sa-bench/bench.sh @@ -52,20 +52,9 @@ mkdir -p "$result_dir" for concurrency in "${CONCURRENCY_LIST[@]}"; do num_warmup_prompts=$((concurrency * 2)) - python3 -u "${WORK_DIR}/benchmark_serving.py" \ - --model "${MODEL_NAME}" --tokenizer "${MODEL_PATH}" \ - --host "$HOST" --port "$PORT" \ - --backend "dynamo" --endpoint /v1/completions \ - --disable-tqdm \ - --dataset-name random \ - --num-prompts "$num_warmup_prompts" \ - --random-input-len "$ISL" \ - --random-output-len "$OSL" \ - --random-range-ratio 0.8 \ - --ignore-eos \ - --request-rate 250 \ - --percentile-metrics ttft,tpot,itl,e2el \ - --max-concurrency "$concurrency" + command="python3 -u ${WORK_DIR}/benchmark_serving.py --model ${MODEL_NAME} --tokenizer ${MODEL_PATH} --host $HOST --port $PORT --backend dynamo --endpoint /v1/completions --disable-tqdm --dataset-name random --num-prompts $num_prompts --random-input-len $ISL --random-output-len $OSL --random-range-ratio 0.8 --ignore-eos --request-rate 250 --percentile-metrics ttft,tpot,itl,e2el --max-concurrency $concurrency" + echo "[CMD-WARMUP] $command" + eval "$command" num_prompts=$((concurrency * 10)) @@ -79,22 +68,9 @@ for concurrency in "${CONCURRENCY_LIST[@]}"; do echo "Running benchmark with concurrency: $concurrency" echo "$(date '+%Y-%m-%d %H:%M:%S')" - python3 -u "${WORK_DIR}/benchmark_serving.py" \ - --model "${MODEL_NAME}" --tokenizer "${MODEL_PATH}" \ - --host "$HOST" --port "$PORT" \ - --backend "dynamo" --endpoint /v1/completions \ - --disable-tqdm \ - --dataset-name random \ - --num-prompts "$num_prompts" \ - --random-input-len "$ISL" \ - --random-output-len "$OSL" \ - --random-range-ratio 0.8 \ - --ignore-eos \ - --request-rate "${REQ_RATE}" \ - --percentile-metrics ttft,tpot,itl,e2el \ - --max-concurrency "$concurrency" \ - --use-chat-template \ - --save-result --result-dir "$result_dir" --result-filename "$result_filename" + command="python3 -u ${WORK_DIR}/benchmark_serving.py --model ${MODEL_NAME} --tokenizer ${MODEL_PATH} --host $HOST --port $PORT --backend dynamo --endpoint /v1/completions --disable-tqdm --dataset-name random --num-prompts $num_prompts --random-input-len $ISL --random-output-len $OSL --random-range-ratio 0.8 --ignore-eos --request-rate ${REQ_RATE} --percentile-metrics ttft,tpot,itl,e2el --max-concurrency $concurrency --use-chat-template --save-result --result-dir $result_dir --result-filename $result_filename" + echo "[CMD] $command" + eval "$command" echo "$(date '+%Y-%m-%d %H:%M:%S')" echo "Completed benchmark with concurrency: $concurrency" diff --git a/src/srtctl/core/schema.py b/src/srtctl/core/schema.py index 09af2335..7af91efc 100644 --- a/src/srtctl/core/schema.py +++ b/src/srtctl/core/schema.py @@ -600,7 +600,7 @@ def get_install_commands(self) -> str: "cd dynamo && " f"{checkout_cmd + ' && ' if checkout_cmd else ''}" "cd lib/bindings/python/ && " - "export RUSTFLAGS=\"${RUSTFLAGS:-} -C target-cpu=native\" && " + 'export RUSTFLAGS="${RUSTFLAGS:-} -C target-cpu=native" && ' "maturin build -o /tmp && " "pip install /tmp/ai_dynamo_runtime*.whl && " "cd /sgl-workspace/dynamo/ && " diff --git a/src/srtctl/frontends/dynamo.py b/src/srtctl/frontends/dynamo.py index 5158ec0e..f8fbc6c2 100644 --- a/src/srtctl/frontends/dynamo.py +++ b/src/srtctl/frontends/dynamo.py @@ -100,7 +100,7 @@ def start_frontends( bash_preamble=bash_preamble, # TODO(jthomson): I don't have the faintest clue of # why this is needed in later versions of Dynamo, but it is. - mpi="pmix", + mpi="pmix", ) processes.append( diff --git a/tests/fixtures_parsers.py b/tests/fixtures_parsers.py new file mode 100644 index 00000000..58938644 --- /dev/null +++ b/tests/fixtures_parsers.py @@ -0,0 +1,361 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Test fixtures and sample data for parser tests. + +Provides reusable test data, log samples, and utilities for testing parsers. +""" + +import json +from pathlib import Path +from typing import Any + + +class SampleSABenchData: + """Sample data for SA-Bench parser testing.""" + + @staticmethod + def benchmark_out_content() -> str: + """Sample benchmark.out content.""" + return """ +SA-Bench Config: endpoint=http://localhost:8000; isl=8192; osl=1024; concurrencies=50x100x200; req_rate=inf; model=Qwen/Qwen3-32B + +[CMD] python -m sglang.bench_serving --model Qwen/Qwen3-32B --base-url http://localhost:8000 --num-prompts 1000 --request-rate inf --max-concurrency 50 --random-input-len 8192 --random-output-len 1024 + +Starting benchmark run... +Concurrency: 50, Throughput: 2500.5 tok/s, TTFT: 150.5ms, ITL: 20.0ms +Concurrency: 100, Throughput: 5000.0 tok/s, TTFT: 180.0ms, ITL: 22.0ms +Concurrency: 200, Throughput: 9500.5 tok/s, TTFT: 250.0ms, ITL: 25.0ms +Benchmark complete. + """ + + @staticmethod + def result_json(concurrency: int = 100) -> dict[str, Any]: + """Sample result JSON data.""" + return { + "max_concurrency": concurrency, + "output_throughput": concurrency * 50.0, + "total_token_throughput": concurrency * 60.0, + "request_throughput": concurrency * 0.5, + "request_goodput": concurrency * 0.48, + "request_rate": float("inf"), + # Mean latencies + "mean_ttft_ms": 150.0 + concurrency * 0.5, + "mean_tpot_ms": 20.0 + concurrency * 0.1, + "mean_itl_ms": 18.0 + concurrency * 0.08, + "mean_e2el_ms": 2000.0 + concurrency * 5.0, + # Median latencies + "median_ttft_ms": 140.0 + concurrency * 0.45, + "median_tpot_ms": 19.0 + concurrency * 0.09, + "median_itl_ms": 17.0 + concurrency * 0.07, + "median_e2el_ms": 1900.0 + concurrency * 4.5, + # P99 latencies + "p99_ttft_ms": 250.0 + concurrency * 1.0, + "p99_tpot_ms": 40.0 + concurrency * 0.2, + "p99_itl_ms": 35.0 + concurrency * 0.15, + "p99_e2el_ms": 3000.0 + concurrency * 10.0, + # Std dev + "std_ttft_ms": 25.0, + "std_tpot_ms": 5.0, + "std_itl_ms": 3.0, + "std_e2el_ms": 200.0, + # Token counts + "total_input_tokens": concurrency * 8192, + "total_output_tokens": concurrency * 1024, + # Metadata + "duration": 120.5, + "completed": concurrency * 10, + "num_prompts": concurrency * 10, + } + + +class SampleMooncakeRouterData: + """Sample data for Mooncake Router parser testing.""" + + @staticmethod + def benchmark_out_content() -> str: + """Sample benchmark.out content.""" + return """ +Mooncake Router Benchmark +Endpoint: http://localhost:8000 +Model: Qwen/Qwen3-32B +Workload: conversation + +[CMD] aiperf profile -m "Qwen/Qwen3-32B" --url "http://localhost:8000" --concurrency 10 --synthetic-input-tokens-mean 8192 --output-tokens-mean 1024 + +Starting benchmark... +Request throughput: 3.37 req/s +Output token throughput: 1150.92 tok/s +Time to first token: 150.5 ms +Inter-token latency: 18.5 ms + """ + + @staticmethod + def aiperf_result_json() -> dict[str, Any]: + """Sample AIPerf result JSON data.""" + return { + "output_token_throughput": { + "avg": 1150.92, + "p50": 1100.0, + "p99": 1200.0, + "std": 50.0, + }, + "request_throughput": {"avg": 3.37, "p50": 3.3, "p99": 3.5, "std": 0.1}, + "time_to_first_token": { + "avg": 150.5, + "p50": 145.0, + "p99": 200.0, + "std": 25.0, + }, + "inter_token_latency": { + "avg": 18.5, + "p50": 18.0, + "p99": 25.0, + "std": 3.0, + }, + "request_latency": { + "avg": 2000.0, + "p50": 1900.0, + "p99": 2500.0, + "std": 200.0, + }, + "request_count": {"avg": 1000}, + "output_token_throughput_per_user": {"avg": 115.09}, + } + + +class SampleSGLangLogData: + """Sample data for SGLang node parser testing.""" + + @staticmethod + def prefill_log_content() -> str: + """Sample prefill worker log.""" + return """ +[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m [1msglang[0m Starting SGLang prefill worker +[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m [1msglang[0m server_args=ServerArgs(tp_size=8, dp_size=1, ep_size=1, model_path=/models/qwen3-32b, served_model_name=Qwen3-32B, host=10.0.0.1, port=30000, disaggregation_mode=prefill, context_length=131072, max_running_requests=1024, mem_fraction_static=0.85, kv_cache_dtype=fp8_e5m2) + +[CMD] python -m sglang.launch_server --model /models/qwen3-32b --served-model-name Qwen3-32B --tp-size 8 --dp-size 1 --ep-size 1 --host 10.0.0.1 --port 30000 --disaggregation-mode prefill --context-length 131072 --max-running-requests 1024 --mem-fraction-static 0.85 --kv-cache-dtype fp8_e5m2 + +[2m2025-12-30T15:52:40.123456Z[0m [32m INFO[0m [1msglang[0m Prefill batch, #new-seq: 8, #new-token: 65536, #cached-token: 0, token usage: 0.78, #running-req: 8, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 6500.5 +[2m2025-12-30T15:52:40.523456Z[0m [32m INFO[0m [1msglang[0m Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.85, #running-req: 13, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5120.0 +[2m2025-12-30T15:52:41.123456Z[0m [32m INFO[0m [1msglang[0m Prefill batch, #new-seq: 10, #new-token: 81920, #cached-token: 16384, token usage: 0.90, #running-req: 23, #queue-req: 2, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 8192.0 +[2m2025-12-30T15:52:42.000000Z[0m [32m INFO[0m [1msglang[0m avail mem=75.11 GB, mem usage=107.07 GB +[2m2025-12-30T15:52:43.000000Z[0m [32m INFO[0m [1msglang[0m KV size: 32.50 GB, #tokens: 1048576 + """ + + @staticmethod + def decode_log_content() -> str: + """Sample decode worker log.""" + return """ +[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m [1msglang[0m Starting SGLang decode worker +[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m [1msglang[0m server_args=ServerArgs(tp_size=4, dp_size=1, ep_size=1, model_path=/models/qwen3-32b, disaggregation_mode=decode) + +[CMD] python -m sglang.launch_server --model /models/qwen3-32b --tp-size 4 --disaggregation-mode decode + +[2m2025-12-30T15:52:40.123456Z[0m [32m INFO[0m [1msglang[0m Decode batch, #running-req: 15, #token: 512, token usage: 0.65, pre-allocated usage: 0.10, #prealloc-req: 3, #transfer-req: 0, #queue-req: 0, gen throughput (token/s): 2048.0 +[2m2025-12-30T15:52:40.523456Z[0m [32m INFO[0m [1msglang[0m Decode batch, #running-req: 20, #token: 768, token usage: 0.72, pre-allocated usage: 0.15, #prealloc-req: 5, #transfer-req: 2, #queue-req: 0, gen throughput (token/s): 3072.0 +[2m2025-12-30T15:52:41.123456Z[0m [32m INFO[0m [1msglang[0m Decode batch, #running-req: 18, #token: 640, token usage: 0.70, pre-allocated usage: 0.12, #prealloc-req: 4, #transfer-req: 1, #queue-req: 0, gen throughput (token/s): 2560.0 +[2m2025-12-30T15:52:42.000000Z[0m [32m INFO[0m [1msglang[0m avail mem=85.00 GB, mem usage=97.00 GB +[2m2025-12-30T15:52:43.000000Z[0m [32m INFO[0m [1msglang[0m KV size: 48.00 GB, #tokens: 2097152 + """ + + +class SampleTRTLLMLogData: + """Sample data for TRTLLM node parser testing.""" + + @staticmethod + def prefill_log_content() -> str: + """Sample TRTLLM prefill worker log.""" + return """ +[33mRank0 run python3 -m dynamo.trtllm --model-path /models/qwen3-32b --served-model-name Qwen3-32B-fp8 --disaggregation-mode prefill --host 10.0.0.1 --port 30000[0m + +[CMD] python3 -m dynamo.trtllm --model-path /models/qwen3-32b --served-model-name Qwen3-32B-fp8 --disaggregation-mode prefill --host 10.0.0.1 --port 30000 + +Initializing the worker with config: Config(namespace=dynamo, component=prefill, tensor_parallel_size=8, pipeline_parallel_size=1, expert_parallel_size=1, max_batch_size=256, max_num_tokens=16384, max_seq_len=131072) + +TensorRT-LLM engine args: {'tensor_parallel_size': 8, 'pipeline_parallel_size': 1, 'moe_expert_parallel_size': 1, 'max_batch_size': 256, 'max_num_tokens': 16384, 'max_seq_len': 131072} + +[01/16/2026-06:20:15] [TRT-LLM] [I] Peak memory during memory usage profiling (torch + non-torch): 91.46 GiB, available KV cache memory when calculating max tokens: 41.11 GiB, fraction is set 0.85, kv size is 35136. device total memory 139.81 GiB + +[MemUsageChange] Allocated 41.11 GiB for max tokens (524288) + +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5559, host_step_time = 62.5ms, num_scheduled_requests: 5, states = {'num_ctx_requests': 5, 'num_ctx_tokens': 40960, 'num_generation_tokens': 0} +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5560, host_step_time = 80.0ms, num_scheduled_requests: 8, states = {'num_ctx_requests': 8, 'num_ctx_tokens': 65536, 'num_generation_tokens': 0} +[01/16/2026-06:20:18] [TRT-LLM] [RANK 0] [I] iter = 5561, host_step_time = 100.0ms, num_scheduled_requests: 10, states = {'num_ctx_requests': 10, 'num_ctx_tokens': 81920, 'num_generation_tokens': 0} + """ + + @staticmethod + def decode_log_content() -> str: + """Sample TRTLLM decode worker log.""" + return """ +[33mRank0 run python3 -m dynamo.trtllm --model-path /models/qwen3-32b --served-model-name Qwen3-32B-fp8 --disaggregation-mode decode --host 10.0.0.2 --port 30001[0m + +[CMD] python3 -m dynamo.trtllm --model-path /models/qwen3-32b --served-model-name Qwen3-32B-fp8 --disaggregation-mode decode --host 10.0.0.2 --port 30001 + +Initializing the worker with config: Config(tensor_parallel_size=4, pipeline_parallel_size=1, max_batch_size=512) + +TensorRT-LLM engine args: {'tensor_parallel_size': 4, 'pipeline_parallel_size': 1, 'max_batch_size': 512, 'max_seq_len': 131072} + +[01/16/2026-06:20:15] [TRT-LLM] [I] Peak memory during memory usage profiling (torch + non-torch): 75.50 GiB, available KV cache memory when calculating max tokens: 55.00 GiB, fraction is set 0.85, kv size is 45000. device total memory 139.81 GiB + +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 1000, host_step_time = 40.0ms, num_scheduled_requests: 20, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 1024} +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 1001, host_step_time = 50.0ms, num_scheduled_requests: 25, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 1280} +[01/16/2026-06:20:18] [TRT-LLM] [RANK 0] [I] iter = 1002, host_step_time = 45.0ms, num_scheduled_requests: 22, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 1152} + """ + + +class ParserTestHarness: + """Test harness utilities for parser testing.""" + + @staticmethod + def create_sa_bench_run(temp_dir: Path, concurrencies: list[int] | None = None) -> Path: + """Create a complete SA-Bench run directory with result files. + + Args: + temp_dir: Temporary directory to create files in + concurrencies: List of concurrency levels to create (default: [50, 100, 200]) + + Returns: + Path to the run directory + """ + if concurrencies is None: + concurrencies = [50, 100, 200] + + run_dir = temp_dir / "sa_bench_run" + run_dir.mkdir(parents=True, exist_ok=True) + + # Create benchmark.out + benchmark_out = run_dir / "benchmark.out" + benchmark_out.write_text(SampleSABenchData.benchmark_out_content()) + + # Create result JSON files + for concurrency in concurrencies: + result_json = run_dir / f"result_c{concurrency}.json" + with open(result_json, "w") as f: + json.dump(SampleSABenchData.result_json(concurrency), f, indent=2) + + return run_dir + + @staticmethod + def create_mooncake_router_run(temp_dir: Path) -> Path: + """Create a Mooncake Router run directory with result file. + + Args: + temp_dir: Temporary directory to create files in + + Returns: + Path to the run directory + """ + run_dir = temp_dir / "mooncake_router_run" + run_dir.mkdir(parents=True, exist_ok=True) + + # Create benchmark.out + benchmark_out = run_dir / "benchmark.out" + benchmark_out.write_text(SampleMooncakeRouterData.benchmark_out_content()) + + # Create AIPerf result JSON + aiperf_json = run_dir / "profile_export_aiperf.json" + with open(aiperf_json, "w") as f: + json.dump(SampleMooncakeRouterData.aiperf_result_json(), f, indent=2) + + return run_dir + + @staticmethod + def create_sglang_node_logs( + temp_dir: Path, + num_prefill: int = 2, + num_decode: int = 4, + ) -> Path: + """Create SGLang node log directory with worker logs. + + Args: + temp_dir: Temporary directory to create files in + num_prefill: Number of prefill workers + num_decode: Number of decode workers + + Returns: + Path to the log directory + """ + log_dir = temp_dir + log_dir.mkdir(parents=True, exist_ok=True) + + # Create prefill worker logs + for i in range(num_prefill): + log_file = log_dir / f"node{i:02d}_prefill_w{i}.out" + log_file.write_text(SampleSGLangLogData.prefill_log_content()) + + # Create decode worker logs + for i in range(num_decode): + log_file = log_dir / f"node{i+10:02d}_decode_w{i}.out" + log_file.write_text(SampleSGLangLogData.decode_log_content()) + + return log_dir + + @staticmethod + def create_trtllm_node_logs( + temp_dir: Path, + num_prefill: int = 2, + num_decode: int = 4, + ) -> Path: + """Create TRTLLM node log directory with worker logs. + + Args: + temp_dir: Temporary directory to create files in + num_prefill: Number of prefill workers + num_decode: Number of decode workers + + Returns: + Path to the log directory + """ + log_dir = temp_dir + log_dir.mkdir(parents=True, exist_ok=True) + + # Create prefill worker logs + for i in range(num_prefill): + log_file = log_dir / f"worker-{i}_prefill_w{i}.out" + log_file.write_text(SampleTRTLLMLogData.prefill_log_content()) + + # Create decode worker logs + for i in range(num_decode): + log_file = log_dir / f"worker-{i+10}_decode_w{i}.out" + log_file.write_text(SampleTRTLLMLogData.decode_log_content()) + + return log_dir + + @staticmethod + def assert_valid_benchmark_results(results: dict, expected_fields: list[str] | None = None): + """Assert that benchmark results contain valid data. + + Args: + results: Benchmark results dictionary + expected_fields: List of fields that must be present (optional) + """ + if expected_fields is None: + expected_fields = [ + "output_tps", + "mean_ttft_ms", + "mean_itl_ms", + ] + + for field in expected_fields: + assert field in results, f"Missing expected field: {field}" + value = results[field] + # Check it's not None and if it's a list, check it's not empty + assert value is not None or value == [], f"Field {field} is None" + + @staticmethod + def assert_valid_node_metrics(node_metrics, min_batches: int = 0): + """Assert that node metrics are valid. + + Args: + node_metrics: NodeMetrics object + min_batches: Minimum number of batches expected + """ + assert node_metrics is not None + assert node_metrics.node_name + assert node_metrics.worker_type + assert node_metrics.worker_id + assert len(node_metrics.batches) >= min_batches + assert isinstance(node_metrics.config, dict) + diff --git a/tests/test_parsers.py b/tests/test_parsers.py new file mode 100644 index 00000000..0046583d --- /dev/null +++ b/tests/test_parsers.py @@ -0,0 +1,743 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Tests for benchmark and node log parsers. + +Tests the parsing infrastructure including: +- Parser registry (benchmark and node parsers) +- SA-Bench parser +- Mooncake Router parser +- SGLang node parser +- TRTLLM node parser +""" + +import json +import tempfile +from pathlib import Path + +import pytest + +from analysis.srtlog.parsers import ( + BenchmarkLaunchCommand, + NodeLaunchCommand, + get_benchmark_parser, + get_node_parser, + list_benchmark_parsers, + list_node_parsers, +) +from tests.fixtures_parsers import ( + ParserTestHarness, + SampleMooncakeRouterData, + SampleSABenchData, + SampleSGLangLogData, + SampleTRTLLMLogData, +) + + +class TestParserRegistry: + """Test the parser registration system.""" + + def test_list_benchmark_parsers(self): + """Test listing registered benchmark parsers.""" + parsers = list_benchmark_parsers() + assert "sa-bench" in parsers + assert "mooncake-router" in parsers + assert len(parsers) >= 2 + + def test_list_node_parsers(self): + """Test listing registered node parsers.""" + parsers = list_node_parsers() + assert "sglang" in parsers + assert "trtllm" in parsers + assert len(parsers) >= 2 + + def test_get_benchmark_parser_sa_bench(self): + """Test getting SA-Bench parser.""" + parser = get_benchmark_parser("sa-bench") + assert parser.benchmark_type == "sa-bench" + + def test_get_benchmark_parser_mooncake_router(self): + """Test getting Mooncake Router parser.""" + parser = get_benchmark_parser("mooncake-router") + assert parser.benchmark_type == "mooncake-router" + + def test_get_benchmark_parser_invalid(self): + """Test getting invalid benchmark parser.""" + with pytest.raises(ValueError, match="No benchmark parser registered"): + get_benchmark_parser("invalid-benchmark") + + def test_get_node_parser_sglang(self): + """Test getting SGLang parser.""" + parser = get_node_parser("sglang") + assert parser.backend_type == "sglang" + + def test_get_node_parser_trtllm(self): + """Test getting TRTLLM parser.""" + parser = get_node_parser("trtllm") + assert parser.backend_type == "trtllm" + + def test_get_node_parser_invalid(self): + """Test getting invalid node parser.""" + with pytest.raises(ValueError, match="No node parser registered"): + get_node_parser("invalid-backend") + + +class TestSABenchParser: + """Test SA-Bench benchmark parser.""" + + @pytest.fixture + def parser(self): + """Get SA-Bench parser instance.""" + return get_benchmark_parser("sa-bench") + + @pytest.fixture + def temp_dir(self): + """Create temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + def test_parser_type(self, parser): + """Test parser type property.""" + assert parser.benchmark_type == "sa-bench" + + def test_parse_result_json(self, parser, temp_dir): + """Test parsing SA-Bench result JSON file.""" + # Create sample result JSON + result_data = { + "max_concurrency": 100, + "output_throughput": 5000.5, + "total_token_throughput": 6000.0, + "request_throughput": 50.5, + "mean_ttft_ms": 150.5, + "mean_tpot_ms": 20.5, + "mean_itl_ms": 18.5, + "mean_e2el_ms": 2000.0, + "p99_ttft_ms": 250.0, + "p99_tpot_ms": 40.0, + "p99_itl_ms": 35.0, + "p99_e2el_ms": 3000.0, + "total_input_tokens": 100000, + "total_output_tokens": 50000, + "completed": 1000, + "duration": 120.5, + } + + json_path = temp_dir / "result_c100.json" + with open(json_path, "w") as f: + json.dump(result_data, f) + + # Parse the file + result = parser.parse_result_json(json_path) + + # Verify parsing + assert result["max_concurrency"] == 100 + assert result["output_throughput"] == 5000.5 + assert result["mean_ttft_ms"] == 150.5 + assert result["p99_ttft_ms"] == 250.0 + assert result["total_input_tokens"] == 100000 + assert result["completed"] == 1000 + + def test_parse_result_directory(self, parser, temp_dir): + """Test parsing multiple result JSON files.""" + # Create multiple result files + for concurrency in [50, 100, 200]: + result_data = { + "max_concurrency": concurrency, + "output_throughput": concurrency * 50.0, + "mean_ttft_ms": 150.0, + } + json_path = temp_dir / f"result_c{concurrency}.json" + with open(json_path, "w") as f: + json.dump(result_data, f) + + # Parse all files + results = parser.parse_result_directory(temp_dir) + + # Verify results are sorted by concurrency + assert len(results) == 3 + assert results[0]["max_concurrency"] == 50 + assert results[1]["max_concurrency"] == 100 + assert results[2]["max_concurrency"] == 200 + + def test_parse_launch_command_tagged(self, parser): + """Test parsing SA-Bench command with [CMD] tag.""" + log_content = """ +[CMD] python -m sglang.bench_serving --model Qwen/Qwen3-32B --base-url http://localhost:8000 --num-prompts 1000 --request-rate inf --max-concurrency 100 --input-len 8192 --output-len 1024 + """ + + cmd = parser.parse_launch_command(log_content) + + assert cmd is not None + assert cmd.benchmark_type == "sa-bench" + assert "python -m sglang.bench_serving" in cmd.raw_command + assert cmd.extra_args["model"] == "Qwen/Qwen3-32B" + assert cmd.extra_args["base_url"] == "http://localhost:8000" + assert cmd.extra_args["num_prompts"] == 1000 + assert cmd.extra_args["max_concurrency"] == 100 + assert cmd.extra_args["input_len"] == 8192 + assert cmd.extra_args["output_len"] == 1024 + + def test_parse_launch_command_header_format(self, parser): + """Test parsing SA-Bench config from header format.""" + log_content = """ +SA-Bench Config: endpoint=http://localhost:8000; isl=8192; osl=1024; concurrencies=28; req_rate=inf; model=dsr1-fp8 + """ + + cmd = parser.parse_launch_command(log_content) + + assert cmd is not None + assert cmd.benchmark_type == "sa-bench" + assert cmd.extra_args["base_url"] == "http://localhost:8000" + assert cmd.extra_args["input_len"] == 8192 + assert cmd.extra_args["output_len"] == 1024 + assert cmd.extra_args["max_concurrency"] == 28 + assert cmd.extra_args["request_rate"] == "inf" + assert cmd.extra_args["model"] == "dsr1-fp8" + + def test_parse_launch_command_not_found(self, parser): + """Test parsing when no command is found.""" + log_content = "Some random log content\nNo benchmark commands here\nJust regular logs" + cmd = parser.parse_launch_command(log_content) + assert cmd is None + + +class TestMooncakeRouterParser: + """Test Mooncake Router benchmark parser.""" + + @pytest.fixture + def parser(self): + """Get Mooncake Router parser instance.""" + return get_benchmark_parser("mooncake-router") + + @pytest.fixture + def temp_dir(self): + """Create temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + def test_parser_type(self, parser): + """Test parser type property.""" + assert parser.benchmark_type == "mooncake-router" + + def test_parse_result_json(self, parser, temp_dir): + """Test parsing AIPerf result JSON file.""" + # Create sample AIPerf JSON + result_data = { + "output_token_throughput": {"avg": 1150.92, "p50": 1100.0, "p99": 1200.0, "std": 50.0}, + "request_throughput": {"avg": 3.37, "p50": 3.3, "p99": 3.5, "std": 0.1}, + "time_to_first_token": {"avg": 150.5, "p50": 145.0, "p99": 200.0, "std": 25.0}, + "inter_token_latency": {"avg": 18.5, "p50": 18.0, "p99": 25.0, "std": 3.0}, + "request_latency": {"avg": 2000.0, "p50": 1900.0, "p99": 2500.0, "std": 200.0}, + "request_count": {"avg": 1000}, + } + + json_path = temp_dir / "profile_export_aiperf.json" + with open(json_path, "w") as f: + json.dump(result_data, f) + + # Parse the file + result = parser.parse_result_json(json_path) + + # Verify parsing + assert result["output_tps"] == 1150.92 + assert result["request_throughput"] == 3.37 + assert result["mean_ttft_ms"] == 150.5 + assert result["median_ttft_ms"] == 145.0 + assert result["p99_ttft_ms"] == 200.0 + assert result["mean_itl_ms"] == 18.5 + assert result["completed"] == 1000 + + def test_parse_launch_command_aiperf(self, parser): + """Test parsing AIPerf command.""" + log_content = """ +[CMD] aiperf profile -m "Qwen/Qwen3-32B" --url "http://localhost:8000" --concurrency 10 --request-count 1000 + """ + + cmd = parser.parse_launch_command(log_content) + + assert cmd is not None + assert cmd.benchmark_type == "mooncake-router" + assert "aiperf" in cmd.raw_command + assert cmd.extra_args["model"] == "Qwen/Qwen3-32B" + assert cmd.extra_args["base_url"] == "http://localhost:8000" + assert cmd.extra_args["max_concurrency"] == 10 + assert cmd.extra_args["num_prompts"] == 1000 + + def test_parse_launch_command_header(self, parser): + """Test parsing from header format.""" + log_content = """ +Mooncake Router Benchmark +Endpoint: http://localhost:8000 +Model: Qwen/Qwen3-32B +Workload: conversation + """ + + cmd = parser.parse_launch_command(log_content) + + assert cmd is not None + assert cmd.benchmark_type == "mooncake-router" + assert cmd.extra_args["base_url"] == "http://localhost:8000" + assert cmd.extra_args["model"] == "Qwen/Qwen3-32B" + assert cmd.extra_args["dataset"] == "conversation" + + +class TestSGLangNodeParser: + """Test SGLang node log parser.""" + + @pytest.fixture + def parser(self): + """Get SGLang parser instance.""" + return get_node_parser("sglang") + + @pytest.fixture + def temp_dir(self): + """Create temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + def test_parser_type(self, parser): + """Test parser type property.""" + assert parser.backend_type == "sglang" + + def test_parse_prefill_batch_line(self, parser): + """Test parsing prefill batch log line.""" + line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.78, #running-req: 5, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5000.5" + + metrics = parser._parse_prefill_batch_line(line) + + assert metrics is not None + assert metrics["type"] == "prefill" + assert metrics["new_seq"] == 5 + assert metrics["new_token"] == 40960 + assert metrics["cached_token"] == 0 + assert metrics["token_usage"] == 0.78 + assert metrics["running_req"] == 5 + assert metrics["input_throughput"] == 5000.5 + + def test_parse_decode_batch_line(self, parser): + """Test parsing decode batch log line.""" + line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m Decode batch, #running-req: 10, #token: 512, token usage: 0.85, pre-allocated usage: 0.10, #prealloc-req: 2, #transfer-req: 0, #queue-req: 0, gen throughput (token/s): 1500.5" + + metrics = parser._parse_decode_batch_line(line) + + assert metrics is not None + assert metrics["type"] == "decode" + assert metrics["running_req"] == 10 + assert metrics["num_tokens"] == 512 + assert metrics["token_usage"] == 0.85 + assert metrics["preallocated_usage"] == 0.10 + assert metrics["gen_throughput"] == 1500.5 + + def test_parse_memory_line(self, parser): + """Test parsing memory log line.""" + line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m avail mem=75.11 GB, mem usage=107.07 GB, KV size: 17.16 GB, #tokens: 524288" + + metrics = parser._parse_memory_line(line) + + assert metrics is not None + # This line has KV size, so it should be marked as kv_cache type + assert metrics["type"] == "kv_cache" + assert metrics["avail_mem_gb"] == 75.11 + assert metrics["mem_usage_gb"] == 107.07 + assert metrics["kv_cache_gb"] == 17.16 + assert metrics["kv_tokens"] == 524288 + + def test_parse_memory_line_without_kv(self, parser): + """Test parsing memory log line without KV info.""" + line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m avail mem=75.11 GB, mem usage=107.07 GB" + + metrics = parser._parse_memory_line(line) + + assert metrics is not None + assert metrics["type"] == "memory" + assert metrics["avail_mem_gb"] == 75.11 + assert metrics["mem_usage_gb"] == 107.07 + + def test_parse_single_log(self, parser, temp_dir): + """Test parsing a complete SGLang log file.""" + log_content = """ +[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m Starting SGLang server with server_args=ServerArgs(tp_size=8, dp_size=1, ep_size=1, model_path=/models/qwen3-32b) +[2m2025-12-30T15:52:40.206058Z[0m [32m INFO[0m Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.78, #running-req: 5, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5000.5 +[2m2025-12-30T15:52:41.206058Z[0m [32m INFO[0m Decode batch, #running-req: 5, #token: 512, token usage: 0.85, gen throughput (token/s): 1500.5 +[2m2025-12-30T15:52:42.206058Z[0m [32m INFO[0m avail mem=75.11 GB, mem usage=107.07 GB + """ + + log_path = temp_dir / "eos0219_prefill_w0.out" + log_path.write_text(log_content) + + node = parser.parse_single_log(log_path) + + assert node is not None + assert node.node_name == "eos0219" + assert node.worker_type == "prefill" + assert node.worker_id == "w0" + assert len(node.batches) == 2 # 1 prefill + 1 decode + assert len(node.memory_snapshots) == 1 + assert node.config["tp_size"] == 8 + assert node.config["dp_size"] == 1 + assert node.config["ep_size"] == 1 + + def test_parse_launch_command(self, parser): + """Test parsing SGLang launch command.""" + log_content = """ +[CMD] python -m sglang.launch_server --model /models/qwen3-32b --tp-size 8 --dp-size 1 --host 10.0.0.1 --port 30000 --max-num-seqs 1024 --disaggregation-mode prefill + """ + + cmd = parser.parse_launch_command(log_content, "prefill") + + assert cmd is not None + assert cmd.backend_type == "sglang" + assert cmd.worker_type == "prefill" + assert cmd.extra_args["model_path"] == "/models/qwen3-32b" + assert cmd.extra_args["tp_size"] == 8 + assert cmd.extra_args["dp_size"] == 1 + assert cmd.extra_args["host"] == "10.0.0.1" + assert cmd.extra_args["port"] == 30000 + assert cmd.extra_args["max_num_seqs"] == 1024 + assert cmd.extra_args["disaggregation_mode"] == "prefill" + + def test_extract_node_info_from_filename(self, parser): + """Test extracting node info from filename.""" + result = parser._extract_node_info_from_filename("eos0219_prefill_w0.out") + + assert result is not None + assert result["node"] == "eos0219" + assert result["worker_type"] == "prefill" + assert result["worker_id"] == "w0" + + +class TestTRTLLMNodeParser: + """Test TRTLLM node log parser.""" + + @pytest.fixture + def parser(self): + """Get TRTLLM parser instance.""" + return get_node_parser("trtllm") + + @pytest.fixture + def temp_dir(self): + """Create temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + def test_parser_type(self, parser): + """Test parser type property.""" + assert parser.backend_type == "trtllm" + + def test_parse_iteration_logs(self, parser): + """Test parsing TRTLLM iteration logs.""" + log_content = """ +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5559, host_step_time = 50.5ms, num_scheduled_requests: 3, states = {'num_ctx_requests': 2, 'num_ctx_tokens': 16384, 'num_generation_tokens': 0} +[01/16/2026-06:20:18] [TRT-LLM] [RANK 0] [I] iter = 5560, host_step_time = 20.0ms, num_scheduled_requests: 5, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 512} + """ + + batches = parser._parse_iteration_logs(log_content, "prefill") + + assert len(batches) == 2 + + # First batch (prefill) + assert batches[0].batch_type == "prefill" + assert batches[0].running_req == 3 + assert batches[0].new_token == 16384 + assert batches[0].input_throughput is not None + assert batches[0].input_throughput > 300000 # 16384 * 1000 / 50.5 + + # Second batch (decode) + assert batches[1].batch_type == "decode" + assert batches[1].running_req == 5 + assert batches[1].num_tokens == 512 + assert batches[1].gen_throughput is not None + assert batches[1].gen_throughput > 25000 # 512 * 1000 / 20 + + def test_parse_memory_info(self, parser): + """Test parsing TRTLLM memory information.""" + log_content = """ +[01/16/2026-06:20:17] [TRT-LLM] [I] Peak memory during memory usage profiling (torch + non-torch): 91.46 GiB, available KV cache memory when calculating max tokens: 41.11 GiB, fraction is set 0.85, kv size is 35136. device total memory 139.81 GiB +[MemUsageChange] Allocated 41.11 GiB for max tokens (524288) + """ + + memory_snapshots = parser._parse_memory_info(log_content) + + assert len(memory_snapshots) == 2 + + # First snapshot (peak memory) + assert memory_snapshots[0].metric_type == "memory" + assert memory_snapshots[0].mem_usage_gb == 91.46 + assert memory_snapshots[0].kv_cache_gb == 41.11 + assert memory_snapshots[0].avail_mem_gb > 48 # 139.81 - 91.46 + + # Second snapshot (KV allocation) + assert memory_snapshots[1].metric_type == "kv_cache" + assert memory_snapshots[1].kv_cache_gb == 41.11 + assert memory_snapshots[1].kv_tokens == 524288 + + def test_parse_single_log(self, parser, temp_dir): + """Test parsing a complete TRTLLM log file.""" + log_content = """ +[33mRank0 run python3 -m dynamo.trtllm --model-path /model --served-model-name dsr1-fp8 --disaggregation-mode prefill[0m +Initializing the worker with config: Config(tensor_parallel_size=8, pipeline_parallel_size=1, expert_parallel_size=1, max_batch_size=256) +TensorRT-LLM engine args: {'tensor_parallel_size': 8, 'pipeline_parallel_size': 1, 'moe_expert_parallel_size': 1, 'max_batch_size': 256, 'max_seq_len': 131072} +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5559, num_scheduled_requests: 3, states = {'num_ctx_requests': 2, 'num_ctx_tokens': 16384, 'num_generation_tokens': 0} + """ + + log_path = temp_dir / "worker-0_prefill_w0.out" + log_path.write_text(log_content) + + node = parser.parse_single_log(log_path) + + assert node is not None + assert node.node_name == "worker-0" + assert node.worker_type == "prefill" + assert node.worker_id == "w0" + assert len(node.batches) == 1 + assert node.config["tp_size"] == 8 + assert node.config["pp_size"] == 1 + assert node.config["ep_size"] == 1 + assert node.config["max_batch_size"] == 256 + assert node.config["max_seq_len"] == 131072 + + def test_parse_launch_command(self, parser): + """Test parsing TRTLLM launch command.""" + log_content = """ +[CMD] python3 -m dynamo.trtllm --model-path /models/qwen3-32b --served-model-name dsr1-fp8 --disaggregation-mode prefill --host 10.0.0.1 --port 30000 +TensorRT-LLM engine args: {'tensor_parallel_size': 8, 'pipeline_parallel_size': 1, 'max_batch_size': 256, 'max_seq_len': 131072} + """ + + cmd = parser.parse_launch_command(log_content, "prefill") + + assert cmd is not None + assert cmd.backend_type == "trtllm" + assert cmd.worker_type == "prefill" + assert cmd.extra_args["model_path"] == "/models/qwen3-32b" + assert cmd.extra_args["served_model_name"] == "dsr1-fp8" + assert cmd.extra_args["disaggregation_mode"] == "prefill" + assert cmd.extra_args["host"] == "10.0.0.1" + assert cmd.extra_args["port"] == 30000 + assert cmd.extra_args["tp_size"] == 8 + assert cmd.extra_args["pp_size"] == 1 + assert cmd.extra_args["max_num_seqs"] == 256 + assert cmd.extra_args["max_model_len"] == 131072 + + def test_extract_node_info_from_filename(self, parser): + """Test extracting node info from filename.""" + result = parser._extract_node_info_from_filename("worker-0_decode_w1.err") + + assert result is not None + assert result["node"] == "worker-0" + assert result["worker_type"] == "decode" + assert result["worker_id"] == "w1" + + +class TestBenchmarkLaunchCommand: + """Test BenchmarkLaunchCommand dataclass.""" + + def test_create_benchmark_launch_command(self): + """Test creating BenchmarkLaunchCommand.""" + cmd = BenchmarkLaunchCommand( + benchmark_type="sa-bench", + raw_command="python -m sglang.bench_serving --model test", + extra_args={"model": "test", "num_prompts": 1000}, + ) + + assert cmd.benchmark_type == "sa-bench" + assert "sglang.bench_serving" in cmd.raw_command + assert cmd.extra_args["model"] == "test" + assert cmd.extra_args["num_prompts"] == 1000 + + +class TestNodeLaunchCommand: + """Test NodeLaunchCommand dataclass.""" + + def test_create_node_launch_command(self): + """Test creating NodeLaunchCommand.""" + cmd = NodeLaunchCommand( + backend_type="sglang", + worker_type="prefill", + raw_command="python -m sglang.launch_server --model test", + extra_args={"model_path": "test", "tp_size": 8}, + ) + + assert cmd.backend_type == "sglang" + assert cmd.worker_type == "prefill" + assert "sglang.launch_server" in cmd.raw_command + assert cmd.extra_args["model_path"] == "test" + assert cmd.extra_args["tp_size"] == 8 + + +class TestParserIntegration: + """Integration tests for parser workflows.""" + + @pytest.fixture + def temp_dir(self): + """Create temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + def test_parse_complete_sa_bench_run(self, temp_dir): + """Test parsing a complete SA-Bench run with multiple concurrencies.""" + parser = get_benchmark_parser("sa-bench") + + # Use test harness to create run directory + run_dir = ParserTestHarness.create_sa_bench_run(temp_dir, concurrencies=[50, 100, 200]) + + # Parse all results + results = parser.parse_result_directory(run_dir) + + assert len(results) == 3 + # Verify it's sorted by concurrency + assert [r["max_concurrency"] for r in results] == [50, 100, 200] + # Verify throughput scales with concurrency + assert results[0]["output_throughput"] == 2500.0 + assert results[1]["output_throughput"] == 5000.0 + assert results[2]["output_throughput"] == 10000.0 + + # Verify using harness utility + for result in results: + ParserTestHarness.assert_valid_benchmark_results( + result, + expected_fields=[ + "output_throughput", + "mean_ttft_ms", + "mean_itl_ms", + "p99_ttft_ms", + ], + ) + + def test_parse_mooncake_router_run(self, temp_dir): + """Test parsing a complete Mooncake Router run.""" + parser = get_benchmark_parser("mooncake-router") + + # Use test harness to create run directory + run_dir = ParserTestHarness.create_mooncake_router_run(temp_dir) + + # Find and parse AIPerf results + aiperf_files = parser.find_aiperf_results(run_dir) + assert len(aiperf_files) == 1 + + result = parser.parse_result_json(aiperf_files[0]) + ParserTestHarness.assert_valid_benchmark_results( + result, + expected_fields=["output_tps", "request_throughput", "mean_ttft_ms"], + ) + + def test_parse_sglang_node_logs_multiple_workers(self, temp_dir): + """Test parsing multiple SGLang node log files.""" + parser = get_node_parser("sglang") + + # Use test harness to create log directory + log_dir = ParserTestHarness.create_sglang_node_logs(temp_dir, num_prefill=2, num_decode=4) + + # Parse all logs + nodes = parser.parse_logs(log_dir) + + assert len(nodes) == 6 # 2 prefill + 4 decode + worker_types = [node.worker_type for node in nodes] + assert worker_types.count("prefill") == 2 + assert worker_types.count("decode") == 4 + + # Verify each node + for node in nodes: + ParserTestHarness.assert_valid_node_metrics(node, min_batches=1) + + def test_parse_trtllm_node_logs_multiple_workers(self, temp_dir): + """Test parsing multiple TRTLLM node log files.""" + parser = get_node_parser("trtllm") + + # Use test harness to create log directory + log_dir = ParserTestHarness.create_trtllm_node_logs(temp_dir, num_prefill=2, num_decode=4) + + # Parse all logs + nodes = parser.parse_logs(log_dir) + + assert len(nodes) == 6 # 2 prefill + 4 decode + worker_types = [node.worker_type for node in nodes] + assert worker_types.count("prefill") == 2 + assert worker_types.count("decode") == 4 + + # Verify each node has config + for node in nodes: + ParserTestHarness.assert_valid_node_metrics(node, min_batches=1) + assert "tp_size" in node.config or "max_batch_size" in node.config + + +class TestParserWithFixtures: + """Tests using sample data fixtures.""" + + def test_sa_bench_sample_data(self): + """Test SA-Bench parser with sample data.""" + parser = get_benchmark_parser("sa-bench") + + # Parse launch command from sample + log_content = SampleSABenchData.benchmark_out_content() + cmd = parser.parse_launch_command(log_content) + + assert cmd is not None + assert cmd.benchmark_type == "sa-bench" + assert "model" in cmd.extra_args + assert "base_url" in cmd.extra_args + + def test_mooncake_router_sample_data(self): + """Test Mooncake Router parser with sample data.""" + parser = get_benchmark_parser("mooncake-router") + + # Parse launch command from sample + log_content = SampleMooncakeRouterData.benchmark_out_content() + cmd = parser.parse_launch_command(log_content) + + assert cmd is not None + assert cmd.benchmark_type == "mooncake-router" + + def test_sglang_prefill_sample_data(self): + """Test SGLang parser with prefill sample data.""" + parser = get_node_parser("sglang") + + # Parse launch command from sample + log_content = SampleSGLangLogData.prefill_log_content() + cmd = parser.parse_launch_command(log_content, "prefill") + + assert cmd is not None + assert cmd.backend_type == "sglang" + assert cmd.worker_type == "prefill" + assert "tp_size" in cmd.extra_args + assert cmd.extra_args["tp_size"] == 8 + + def test_sglang_decode_sample_data(self): + """Test SGLang parser with decode sample data.""" + parser = get_node_parser("sglang") + + # Parse launch command from sample + log_content = SampleSGLangLogData.decode_log_content() + cmd = parser.parse_launch_command(log_content, "decode") + + assert cmd is not None + assert cmd.backend_type == "sglang" + assert cmd.worker_type == "decode" + assert "tp_size" in cmd.extra_args + + def test_trtllm_prefill_sample_data(self): + """Test TRTLLM parser with prefill sample data.""" + parser = get_node_parser("trtllm") + + # Parse launch command from sample + log_content = SampleTRTLLMLogData.prefill_log_content() + cmd = parser.parse_launch_command(log_content, "prefill") + + assert cmd is not None + assert cmd.backend_type == "trtllm" + assert cmd.worker_type == "prefill" + assert "disaggregation_mode" in cmd.extra_args + assert cmd.extra_args["disaggregation_mode"] == "prefill" + + def test_trtllm_decode_sample_data(self): + """Test TRTLLM parser with decode sample data.""" + parser = get_node_parser("trtllm") + + # Parse launch command from sample + log_content = SampleTRTLLMLogData.decode_log_content() + cmd = parser.parse_launch_command(log_content, "decode") + + assert cmd is not None + assert cmd.backend_type == "trtllm" + assert cmd.worker_type == "decode" + diff --git a/tests/test_runloader_parsers.py b/tests/test_runloader_parsers.py new file mode 100644 index 00000000..68070dc6 --- /dev/null +++ b/tests/test_runloader_parsers.py @@ -0,0 +1,332 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Tests for RunLoader integration with parsers. + +Tests that the RunLoader correctly uses the parser infrastructure. +""" + +import json +import tempfile +from pathlib import Path + +import pytest + +from analysis.srtlog.models import BenchmarkRun +from analysis.srtlog.run_loader import RunLoader +from tests.fixtures_parsers import ParserTestHarness, SampleSABenchData + + +class TestRunLoaderWithParsers: + """Test RunLoader integration with parser infrastructure.""" + + @pytest.fixture + def temp_dir(self): + """Create temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + @pytest.fixture + def sample_run_metadata(self): + """Sample run metadata JSON.""" + return { + "job_id": "12345", + "job_name": "test_run", + "generated_at": "20250126_120000", + "model": { + "path": "/models/test", + "container": "sglang:latest", + }, + "resources": { + "prefill_nodes": 1, + "decode_nodes": 1, + "prefill_workers": 2, + "decode_workers": 4, + "agg_workers": 0, + "gpus_per_node": 8, + "gpu_type": "H100", + }, + "benchmark": { + "type": "sa-bench", + "isl": "8192", + "osl": "1024", + "concurrencies": "50x100x200", + "req-rate": "inf", + }, + "tags": ["test"], + } + + def test_parse_sa_bench_with_parser(self, temp_dir, sample_run_metadata): + """Test that RunLoader uses SA-Bench parser correctly.""" + # Create run directory + run_dir = temp_dir / "12345_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata JSON + metadata_path = run_dir / "12345.json" + with open(metadata_path, "w") as f: + json.dump(sample_run_metadata, f) + + # Create benchmark results using test harness + bench_dir = run_dir / "sa-bench_isl_8192_osl_1024" + bench_dir.mkdir() + + # Create result JSON files directly in bench_dir + for concurrency in [50, 100, 200]: + result_data = SampleSABenchData.result_json(concurrency) + result_path = bench_dir / f"result_c{concurrency}.json" + with open(result_path, "w") as f: + json.dump(result_data, f) + + # Load the run + loader = RunLoader(str(temp_dir)) + run = loader.load_single("12345_2P_4D_20250126_120000") + + # Verify run was loaded + assert run is not None + assert run.job_id == "12345" + + # Verify benchmark results were parsed + assert len(run.profiler.output_tps) == 3 + assert run.profiler.output_tps[0] == 2500.0 # 50 * 50 + assert run.profiler.output_tps[1] == 5000.0 # 100 * 50 + assert run.profiler.output_tps[2] == 10000.0 # 200 * 50 + + # Verify concurrencies + assert run.profiler.concurrency_values == [50, 100, 200] + + def test_load_all_runs_with_parsers(self, temp_dir, sample_run_metadata): + """Test loading multiple runs with parser infrastructure.""" + # Create multiple run directories + for job_id in [12345, 12346]: + run_dir = temp_dir / f"{job_id}_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata + metadata = sample_run_metadata.copy() + metadata["job_id"] = str(job_id) + metadata_path = run_dir / f"{job_id}.json" + with open(metadata_path, "w") as f: + json.dump(metadata, f) + + # Create benchmark results + bench_dir = run_dir / "sa-bench_isl_8192_osl_1024" + bench_dir.mkdir() + + for concurrency in [50, 100]: + result_data = SampleSABenchData.result_json(concurrency) + result_path = bench_dir / f"result_c{concurrency}.json" + with open(result_path, "w") as f: + json.dump(result_data, f) + + # Load all runs + loader = RunLoader(str(temp_dir)) + runs = loader.load_all() + + # Verify both runs were loaded + assert len(runs) == 2 + job_ids = {run.job_id for run in runs} + assert "12345" in job_ids + assert "12346" in job_ids + + # Verify each run has benchmark data + for run in runs: + assert len(run.profiler.output_tps) == 2 + + def test_parser_fallback_to_manual(self, temp_dir, sample_run_metadata): + """Test fallback to manual parsing when parser fails.""" + # Create run directory + run_dir = temp_dir / "12345_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata + metadata_path = run_dir / "12345.json" + with open(metadata_path, "w") as f: + json.dump(sample_run_metadata, f) + + # Create benchmark results with unknown benchmark type + bench_dir = run_dir / "unknown-bench_isl_8192_osl_1024" + bench_dir.mkdir() + + # Create result JSON file + result_data = SampleSABenchData.result_json(100) + result_path = bench_dir / "result_c100.json" + with open(result_path, "w") as f: + json.dump(result_data, f) + + # Load the run - should fall back to manual parsing + loader = RunLoader(str(temp_dir)) + run = loader.load_single("12345_2P_4D_20250126_120000") + + # Verify run was loaded with manual parser + assert run is not None + # Note: fallback won't find results in unknown-bench directory + # but it shouldn't crash + + def test_load_node_metrics_sglang(self, temp_dir, sample_run_metadata): + """Test loading node metrics for SGLang runs.""" + # Create run directory + run_dir = temp_dir / "12345_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata + metadata = sample_run_metadata.copy() + metadata["model"]["container"] = "sglang:latest" + metadata_path = run_dir / "12345.json" + with open(metadata_path, "w") as f: + json.dump(metadata, f) + + # Create logs subdirectory + logs_dir = run_dir / "logs" + logs_dir.mkdir() + + # Create SGLang node logs using test harness + ParserTestHarness.create_sglang_node_logs(logs_dir, num_prefill=2, num_decode=4) + + # Load node metrics + loader = RunLoader(str(temp_dir)) + nodes = loader.load_node_metrics(str(run_dir), backend_type="sglang") + + # Verify nodes were loaded + assert len(nodes) == 6 # 2 prefill + 4 decode + worker_types = [node.worker_type for node in nodes] + assert worker_types.count("prefill") == 2 + assert worker_types.count("decode") == 4 + + def test_load_node_metrics_trtllm(self, temp_dir, sample_run_metadata): + """Test loading node metrics for TRTLLM runs.""" + # Create run directory + run_dir = temp_dir / "12345_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata + metadata = sample_run_metadata.copy() + metadata["model"]["container"] = "trtllm:latest" + metadata_path = run_dir / "12345.json" + with open(metadata_path, "w") as f: + json.dump(metadata, f) + + # Create logs subdirectory + logs_dir = run_dir / "logs" + logs_dir.mkdir() + + # Create TRTLLM node logs using test harness + ParserTestHarness.create_trtllm_node_logs(logs_dir, num_prefill=2, num_decode=4) + + # Load node metrics + loader = RunLoader(str(temp_dir)) + nodes = loader.load_node_metrics(str(run_dir), backend_type="trtllm") + + # Verify nodes were loaded + assert len(nodes) == 6 # 2 prefill + 4 decode + worker_types = [node.worker_type for node in nodes] + assert worker_types.count("prefill") == 2 + assert worker_types.count("decode") == 4 + + def test_load_node_metrics_for_run(self, temp_dir, sample_run_metadata): + """Test loading node metrics with automatic backend detection.""" + # Create run directory + run_dir = temp_dir / "12345_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata with SGLang container + metadata = sample_run_metadata.copy() + metadata["model"]["container"] = "sglang:v0.2.0" + metadata_path = run_dir / "12345.json" + with open(metadata_path, "w") as f: + json.dump(metadata, f) + + # Create benchmark results + bench_dir = run_dir / "sa-bench_isl_8192_osl_1024" + bench_dir.mkdir() + result_data = SampleSABenchData.result_json(100) + result_path = bench_dir / "result_c100.json" + with open(result_path, "w") as f: + json.dump(result_data, f) + + # Create logs subdirectory with SGLang logs + logs_dir = run_dir / "logs" + logs_dir.mkdir() + ParserTestHarness.create_sglang_node_logs(logs_dir, num_prefill=1, num_decode=2) + + # Load the run + loader = RunLoader(str(temp_dir)) + run = loader.load_single("12345_2P_4D_20250126_120000") + + # Load node metrics with automatic detection + nodes = loader.load_node_metrics_for_run(run) + + # Verify nodes were loaded + assert len(nodes) == 3 # 1 prefill + 2 decode + + def test_convert_parser_results_to_dict(self, temp_dir): + """Test conversion of parser results to dict format.""" + loader = RunLoader(str(temp_dir)) + + # Sample parser results + parser_results = [ + { + "max_concurrency": 50, + "output_throughput": 2500.0, + "mean_ttft_ms": 175.0, + "mean_itl_ms": 20.0, + "p99_ttft_ms": 300.0, + }, + { + "max_concurrency": 100, + "output_throughput": 5000.0, + "mean_ttft_ms": 200.0, + "mean_itl_ms": 22.0, + "p99_ttft_ms": 350.0, + }, + ] + + # Convert to dict format + result_dict = loader._convert_parser_results_to_dict(parser_results) + + # Verify structure + assert result_dict["concurrencies"] == [50, 100] + assert result_dict["output_tps"] == [2500.0, 5000.0] + assert result_dict["mean_ttft_ms"] == [175.0, 200.0] + assert result_dict["mean_itl_ms"] == [20.0, 22.0] + assert result_dict["p99_ttft_ms"] == [300.0, 350.0] + + def test_mooncake_router_directory_detection(self, temp_dir, sample_run_metadata): + """Test that mooncake-router directories are detected correctly.""" + # Create run directory + run_dir = temp_dir / "12345_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata with mooncake-router benchmark type + metadata = sample_run_metadata.copy() + metadata["benchmark"]["type"] = "mooncake-router" + metadata_path = run_dir / "12345.json" + with open(metadata_path, "w") as f: + json.dump(metadata, f) + + # Create benchmark results directory + bench_dir = run_dir / "mooncake-router_isl_8192_osl_1024" + bench_dir.mkdir() + + # Create AIPerf result JSON + aiperf_data = { + "output_token_throughput": {"avg": 1150.92}, + "request_throughput": {"avg": 3.37}, + "time_to_first_token": {"avg": 150.5}, + "inter_token_latency": {"avg": 18.5}, + "request_count": {"avg": 1000}, + } + result_path = bench_dir / "profile_export_aiperf.json" + with open(result_path, "w") as f: + json.dump(aiperf_data, f) + + # Load the run + loader = RunLoader(str(temp_dir)) + run = loader.load_single("12345_2P_4D_20250126_120000") + + # Verify run was loaded + assert run is not None + # Verify mooncake-router results were parsed + assert len(run.profiler.output_tps) >= 1 + From 159aab8a4fb20d3f4ef4d7d851bbad55070e0ad2 Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 13:01:11 -0800 Subject: [PATCH 02/15] create architecture diagram --- analysis/docs/ARCHITECTURE_DATAFLOW.md | 522 +++++++++++++++++++++++++ 1 file changed, 522 insertions(+) create mode 100644 analysis/docs/ARCHITECTURE_DATAFLOW.md diff --git a/analysis/docs/ARCHITECTURE_DATAFLOW.md b/analysis/docs/ARCHITECTURE_DATAFLOW.md new file mode 100644 index 00000000..5662767a --- /dev/null +++ b/analysis/docs/ARCHITECTURE_DATAFLOW.md @@ -0,0 +1,522 @@ +# SRT-SLURM Log Analysis Architecture - Dataflow Diagram + +## Overview +This document describes the data flow through the log analysis system, from raw log files to structured data models. + +--- + +## 1. Entry Point: RunLoader + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ RunLoader │ +│ Entry point for loading and analyzing benchmark run data │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ├──► discover_runs() + ├──► load_single(job_id) + └──► load_node_metrics_for_run() + + ┌───────────────┴───────────────┐ + │ │ + ▼ ▼ + ┌─────────────────────┐ ┌────────────────────┐ + │ Metadata Discovery │ │ Results Parsing │ + └─────────────────────┘ └────────────────────┘ +``` + +--- + +## 2. Metadata Discovery Flow + +``` + ┌─────────────────────────────────┐ + │ Source Files (per run) │ + │ │ + │ 📁 {job_id}/metadata.json │ + │ 📁 {job_id}/config.yaml │ + │ 📁 {job_id}/*.json │ + └─────────────────────────────────┘ + │ + │ read by + ▼ + ┌─────────────────────────────────┐ + │ RunLoader._load_metadata() │ + └─────────────────────────────────┘ + │ + │ creates + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ RunMetadata │ +│ Fields: Source File: │ +│ • job_id 📁 metadata.json │ +│ • job_name 📁 metadata.json │ +│ • run_date 📁 metadata.json │ +│ • mode (monolithic/disaggregated) 📁 metadata.json │ +│ • prefill_nodes, decode_nodes 📁 metadata.json │ +│ • prefill_workers, decode_workers 📁 metadata.json │ +│ • model: ModelConfig 📁 metadata.json │ +│ - path, tensor_parallel, ... │ +└───────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 3. Profiler/Benchmark Results Flow + +``` + ┌─────────────────────────────────────────┐ + │ Profiler Type Detection │ + │ │ + │ 📁 logs/benchmark.out │ + │ - Search for "SA-Bench Config" │ + │ - Search for "aiperf" commands │ + └─────────────────────────────────────────┘ + │ + │ determines + ▼ + ┌─────────────────────────────────────────┐ + │ ProfilerMetadata │ + │ Fields: Source: │ + │ • profiler_type benchmark.out │ + │ • isl benchmark.out │ + │ • osl benchmark.out │ + │ • concurrencies benchmark.out │ + └─────────────────────────────────────────┘ + │ + │ used to find + ▼ + ┌────────────────────────────────────────────────────────────────┐ + │ BenchmarkParser.find_result_directory() │ + │ │ + │ SA-Bench: Mooncake-Router: │ + │ 📁 sa-bench_isl_*_osl_*/ 📁 logs/artifacts/*/ │ + │ result_*.json profile_export_aiperf.json │ + └────────────────────────────────────────────────────────────────┘ + │ + │ parse_result_directory() + ▼ +┌──────────────────────────────────────────────────────────────────────────┐ +│ ProfilerResults │ +│ Fields: Source Files: │ +│ • output_tps: list[float] 📁 result_*.json (SA-Bench) │ +│ • request_throughput: list[float] 📁 profile_export_aiperf.json │ +│ • concurrency_values: list[int] (Mooncake-Router) │ +│ • mean_ttft_ms: list[float] │ +│ • mean_itl_ms: list[float] One entry per concurrency level │ +│ • mean_e2el_ms: list[float] │ +│ • p99_ttft_ms, median_ttft_ms, ... Aggregated from all result files │ +│ • total_input_tokens: list[int] │ +│ • total_output_tokens: list[int] │ +└──────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 4. Benchmark Launch Command Flow + +``` + ┌─────────────────────────────────┐ + │ Source File │ + │ 📁 logs/benchmark.out │ + │ - Command line arguments │ + │ - SA-Bench Config: header │ + │ - aiperf profile commands │ + └─────────────────────────────────┘ + │ + │ parse_launch_command() + ▼ + ┌─────────────────────────────────┐ + │ BenchmarkParser │ + │ (SA-Bench or Mooncake) │ + └─────────────────────────────────┘ + │ + │ creates + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ BenchmarkLaunchCommand │ +│ Fields: Source: │ +│ • benchmark_type 📁 logs/benchmark.out │ +│ • raw_command 📁 logs/benchmark.out │ +│ • extra_args: dict 📁 logs/benchmark.out │ +│ - base_url, model, input_len, │ +│ output_len, max_concurrency, ... │ +└───────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 5. Node Metrics Flow + +``` + ┌─────────────────────────────────────────────────┐ + │ Source Files (per node/worker) │ + │ │ + │ 📁 logs/{node}_{worker_type}_{worker_id}.out │ + │ Examples: │ + │ - worker-3_decode_w0.out │ + │ - eos0219_prefill_w1.out │ + │ │ + │ Content: │ + │ • Batch metrics lines │ + │ • Memory snapshot lines │ + │ • TP/DP/EP configuration │ + │ • Launch command │ + └─────────────────────────────────────────────────┘ + │ + │ detect backend type + ▼ + ┌─────────────────────────────────┐ + │ NodeAnalyzer │ + │ _detect_backend_type() │ + │ • Checks config.yaml │ + │ • Checks log patterns │ + └─────────────────────────────────┘ + │ + │ get_node_parser() + ▼ + ┌────────────────────────────────────────────────────────┐ + │ NodeParser (SGLang or TRT-LLM) │ + │ │ + │ parse_single_log() - parses one worker's log file │ + └────────────────────────────────────────────────────────┘ + │ + │ creates + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ NodeMetadata │ +│ Fields: Source: │ +│ • node_name 📁 *_{type}_{id}.out (filename) │ +│ • worker_type (prefill/decode/agg) 📁 *_{type}_{id}.out (filename) │ +│ • worker_id (w0, w1, ...) 📁 *_{type}_{id}.out (filename) │ +└───────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ BatchMetrics │ +│ Fields: Source: │ +│ • timestamp 📁 *.out log lines │ +│ • dp, tp, ep 📁 *.out log lines │ +│ • batch_type (prefill/decode) 📁 *.out log lines │ +│ • new_seq, new_token, cached_token 📁 *.out log lines │ +│ • token_usage 📁 *.out log lines │ +│ • running_req, queue_req 📁 *.out log lines │ +│ • num_tokens 📁 *.out log lines │ +│ • input_throughput, gen_throughput 📁 *.out log lines │ +│ │ +│ Example log line (SGLang): │ +│ 2024-12-30 08:10:15 DP0.TP0.EP0 [BATCH] prefill #new-seq: 2 ... │ +│ │ +│ Example log line (TRT-LLM): │ +│ [TensorRT-LLM][INFO] [ITERATION] tokens=1024 new_tokens=128 ... │ +└───────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ MemoryMetrics │ +│ Fields: Source: │ +│ • timestamp 📁 *.out log lines │ +│ • dp, tp, ep 📁 *.out log lines │ +│ • avail_mem_gb 📁 *.out log lines │ +│ • mem_usage_gb 📁 *.out log lines │ +│ • kv_cache_gb 📁 *.out log lines │ +│ • kv_tokens 📁 *.out log lines │ +│ │ +│ Example log line (SGLang): │ +│ 2024-12-30 08:10:15 DP0.TP0.EP0 #running-req: 10, avail_mem=45.2GB │ +│ │ +│ Example log line (TRT-LLM): │ +│ [TensorRT-LLM][INFO] Memory Stats: free=48.5GB, kv_cache=12.3GB │ +└───────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ NodeMetrics │ +│ Fields: Source: │ +│ • metadata: NodeMetadata (see above) │ +│ • batches: list[BatchMetrics] 📁 *.out log lines │ +│ • memory_snapshots: list[MemoryMetrics] 📁 *.out log lines │ +│ • config: dict 📁 *.out log lines │ +│ - tp_size, dp_size, ep_size (parsed from DP0.TP2.EP1 tags) │ +│ • run_id (from metadata) │ +└───────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 6. Node Configuration Flow + +``` + ┌─────────────────────────────────────────────────┐ + │ Source Files (per node) │ + │ │ + │ 📁 logs/*_{type}_{id}.out - launch command │ + │ 📁 logs/*_config.json - node config │ + │ 📁 logs/config.yaml - environment vars │ + └─────────────────────────────────────────────────┘ + │ + │ parsed by + ▼ + ┌─────────────────────────────────┐ + │ NodeAnalyzer │ + │ _populate_config_from_files() │ + └─────────────────────────────────┘ + │ + │ creates + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ NodeLaunchCommand │ +│ Fields: Source: │ +│ • backend_type (sglang/trtllm) 📁 *_{type}_{id}.out │ +│ • worker_type (prefill/decode) 📁 *_{type}_{id}.out │ +│ • raw_command 📁 *_{type}_{id}.out │ +│ • extra_args: dict 📁 *_{type}_{id}.out │ +│ - model_path, served_model_name, │ +│ disaggregation_mode, tp_size, │ +│ pp_size, max_num_seqs, ... │ +│ │ +│ Example (TRT-LLM): │ +│ python3 -m dynamo.trtllm --model-path /model --disaggregation-mode │ +│ decode --extra-engine-args /logs/trtllm_config_decode.yaml │ +│ │ +│ Example (SGLang): │ +│ python -m sglang.launch_server --model-path /model --disagg-mode prefill │ +│ --tp-size 2 --dp-size 1 │ +└───────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ NodeConfig (TypedDict) │ +│ Fields: Source: │ +│ • launch_command: NodeLaunchCommand 📁 *_{type}_{id}.out │ +│ • environment: dict[str, str] 📁 config.yaml │ +│ - NCCL settings, CUDA settings, │ +│ model paths, etc. │ +│ • gpu_info: dict (optional) 📁 *_config.json │ +└───────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ NodeInfo │ +│ Top-level container combining metrics and configuration │ +│ │ +│ Fields: │ +│ • metrics: NodeMetrics (performance data) │ +│ • node_config: NodeConfig (configuration) │ +│ │ +│ Convenience properties delegate to nested fields: │ +│ • node_name → metrics.metadata.node_name │ +│ • worker_type → metrics.metadata.worker_type │ +│ • launch_command → node_config["launch_command"] │ +│ • environment → node_config["environment"] │ +│ • batches → metrics.batches │ +└───────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 7. Complete Data Model Hierarchy + +``` +BenchmarkRun (top-level container for entire run) +│ +├─ metadata: RunMetadata +│ └─ Source: 📁 metadata.json, config.yaml +│ +├─ profiler_metadata: ProfilerMetadata +│ └─ Source: 📁 logs/benchmark.out +│ +├─ profiler: ProfilerResults +│ └─ Source: 📁 sa-bench_isl_*_osl_*/result_*.json +│ 📁 logs/artifacts/*/profile_export_aiperf.json +│ +├─ benchmark_launch_command: BenchmarkLaunchCommand +│ └─ Source: 📁 logs/benchmark.out +│ +└─ nodes: list[NodeInfo] + └─ Each NodeInfo contains: + │ + ├─ metrics: NodeMetrics + │ ├─ metadata: NodeMetadata + │ │ └─ Source: 📁 logs/*_{type}_{id}.out (filename) + │ ├─ batches: list[BatchMetrics] + │ │ └─ Source: 📁 logs/*_{type}_{id}.out (log lines) + │ ├─ memory_snapshots: list[MemoryMetrics] + │ │ └─ Source: 📁 logs/*_{type}_{id}.out (log lines) + │ └─ config: dict + │ └─ Source: 📁 logs/*_{type}_{id}.out (DP/TP/EP tags) + │ + └─ node_config: NodeConfig + ├─ launch_command: NodeLaunchCommand + │ └─ Source: 📁 logs/*_{type}_{id}.out (command line) + ├─ environment: dict[str, str] + │ └─ Source: 📁 logs/config.yaml + └─ gpu_info: dict (optional) + └─ Source: 📁 logs/*_config.json +``` + +--- + +## 8. Parser Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ Parser Registry System │ +│ │ +│ Decorators: │ +│ • @register_benchmark_parser("sa-bench") │ +│ • @register_benchmark_parser("mooncake-router") │ +│ • @register_node_parser("sglang") │ +│ • @register_node_parser("trtllm") │ +│ │ +│ Lookup Functions: │ +│ • get_benchmark_parser(type) → BenchmarkParser │ +│ • get_node_parser(type) → NodeParser │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ┌───────────────┴────────────────┐ + │ │ + ▼ ▼ + ┌──────────────────────┐ ┌──────────────────────┐ + │ BenchmarkParsers │ │ NodeParsers │ + └──────────────────────┘ └──────────────────────┘ + │ │ + ┌───────────┴───────────┐ ┌──────────┴──────────┐ + ▼ ▼ ▼ ▼ +┌──────────────┐ ┌──────────────┐ ┌─────────┐ ┌──────────────┐ +│ SABench │ │ Mooncake │ │ SGLang │ │ TRT-LLM │ +│ Parser │ │ Parser │ │ Parser │ │ Parser │ +└──────────────┘ └──────────────┘ └─────────┘ └──────────────┘ + +Each parser implements: + Benchmark: + • find_result_directory() - locate result files + • parse_result_directory() - parse all results + • parse_result_json() - parse single result file + • parse_launch_command() - extract command + + Node: + • parse_logs() - parse directory of logs + • parse_single_log() - parse one worker log + • parse_launch_command() - extract command +``` + +--- + +## 9. Caching Layer + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ CacheManager │ +│ │ +│ Caches to 📁 {run_path}/cached_assets/ │ +│ │ +│ Cached Data: │ +│ • benchmark_results.parquet - ProfilerResults │ +│ • node_metrics.parquet - NodeMetrics (all workers) │ +│ • cache_metadata.json - timestamps, source patterns │ +│ │ +│ Cache Validation: │ +│ • Checks if source files have changed (mtime) │ +│ • Invalidates cache if patterns don't match │ +│ • Automatically rebuilds if invalid │ +└─────────────────────────────────────────────────────────────────────────┘ + +Flow with cache: + 1. RunLoader checks cache validity + 2. If valid → deserialize from .parquet + 3. If invalid → parse from source files → cache results + 4. Populate NodeConfig from files (not cached) +``` + +--- + +## 10. File Structure Summary + +``` +{run_directory}/ +├── metadata.json → RunMetadata +├── config.yaml → ProfilerMetadata.isl/osl +├── logs/ +│ ├── benchmark.out → BenchmarkLaunchCommand, ProfilerMetadata +│ ├── config.yaml → NodeConfig.environment +│ ├── {node}_{type}_{id}.out → NodeMetrics, NodeLaunchCommand +│ ├── {node}_config.json → NodeConfig.gpu_info +│ └── sa-bench_isl_*/ +│ └── result_*.json → ProfilerResults (SA-Bench) +│ └── artifacts/ +│ └── */ +│ └── profile_export_aiperf.json → ProfilerResults (Mooncake) +└── cached_assets/ + ├── benchmark_results.parquet + ├── node_metrics.parquet + └── cache_metadata.json +``` + +--- + +## 11. Key Design Principles + +1. **Parser Autonomy**: Each parser knows how to find and parse its own files + - `find_result_directory()` encapsulates file discovery logic + - RunLoader doesn't need benchmark-specific knowledge + +2. **Separation of Concerns**: + - **Metrics** (NodeMetrics): Performance data from log parsing + - **Configuration** (NodeConfig): Launch commands, environment, GPU info + - **Metadata** (NodeMetadata): Worker identification + +3. **Caching Strategy**: + - Cache expensive parsing operations (batch/memory metrics) + - Don't cache configuration (files are small, may change) + - Validate cache against source file timestamps + +4. **Extensibility**: + - New benchmark types: Implement BenchmarkParserProtocol + - New node backends: Implement NodeParserProtocol + - Register with decorator → automatically available + +5. **Data Flow Direction**: + ``` + Raw Files → Parsers → Data Models → Cache → Application + ↓ ↓ + (specific) (generic) + ``` + +--- + +## 12. Usage Example + +```python +from pathlib import Path +from analysis.srtlog.run_loader import RunLoader + +# Load a run +loader = RunLoader("/path/to/runs") +run = loader.load_single("553") + +# Access metadata (from metadata.json) +print(f"Job: {run.metadata.job_id}") +print(f"Model: {run.metadata.model.path}") + +# Access profiler results (from result_*.json or profile_export_aiperf.json) +print(f"Output TPS: {run.profiler.output_tps}") +print(f"Mean TTFT: {run.profiler.mean_ttft_ms}") + +# Access benchmark launch command (from logs/benchmark.out) +print(f"Benchmark: {run.benchmark_launch_command.benchmark_type}") +print(f"Arguments: {run.benchmark_launch_command.extra_args}") + +# Load node metrics (from logs/*_{type}_{id}.out) +nodes = loader.load_node_metrics_for_run(run) +for node in nodes: + # Metrics from log file parsing + print(f"Node: {node.node_name} ({node.worker_type})") + print(f" Batches: {len(node.batches)}") + print(f" Memory snapshots: {len(node.memory_snapshots)}") + + # Config from config files + print(f" Backend: {node.launch_command.backend_type}") + print(f" Environment vars: {len(node.environment)}") +``` + From f630cbaadaae7e8246dc60882c9f2ebb831ec63d Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 13:23:11 -0800 Subject: [PATCH 03/15] benchmark log out as fallback --- analysis/docs/ARCHITECTURE_DATAFLOW.md | 27 +++++++++------ analysis/srtlog/parsers/__init__.py | 22 ++++++++++-- .../parsers/benchmark/mooncake_router.py | 32 +++++++++++++++-- analysis/srtlog/parsers/benchmark/sa_bench.py | 34 ++++++++++++++++++- 4 files changed, 99 insertions(+), 16 deletions(-) diff --git a/analysis/docs/ARCHITECTURE_DATAFLOW.md b/analysis/docs/ARCHITECTURE_DATAFLOW.md index 5662767a..d51c42c7 100644 --- a/analysis/docs/ARCHITECTURE_DATAFLOW.md +++ b/analysis/docs/ARCHITECTURE_DATAFLOW.md @@ -91,23 +91,28 @@ This document describes the data flow through the log analysis system, from raw │ │ │ SA-Bench: Mooncake-Router: │ │ 📁 sa-bench_isl_*_osl_*/ 📁 logs/artifacts/*/ │ - │ result_*.json profile_export_aiperf.json │ + │ result_*.json (PRIMARY) profile_export_aiperf.json │ + │ benchmark.out (FALLBACK) (PRIMARY) │ + │ 📁 logs/benchmark.out │ + │ (FALLBACK) │ └────────────────────────────────────────────────────────────────┘ │ │ parse_result_directory() + │ ⚠️ JSON files are PRIMARY source of truth + │ .out files are FALLBACK only ▼ ┌──────────────────────────────────────────────────────────────────────────┐ │ ProfilerResults │ -│ Fields: Source Files: │ -│ • output_tps: list[float] 📁 result_*.json (SA-Bench) │ -│ • request_throughput: list[float] 📁 profile_export_aiperf.json │ -│ • concurrency_values: list[int] (Mooncake-Router) │ -│ • mean_ttft_ms: list[float] │ -│ • mean_itl_ms: list[float] One entry per concurrency level │ -│ • mean_e2el_ms: list[float] │ -│ • p99_ttft_ms, median_ttft_ms, ... Aggregated from all result files │ -│ • total_input_tokens: list[int] │ -│ • total_output_tokens: list[int] │ +│ Fields: Source Files (Priority Order): │ +│ • output_tps: list[float] 1️⃣ 📁 result_*.json (SA-Bench) │ +│ • request_throughput: list[float] 📁 profile_export_aiperf.json │ +│ • concurrency_values: list[int] (Mooncake-Router) │ +│ • mean_ttft_ms: list[float] 2️⃣ 📁 logs/benchmark.out (fallback)│ +│ • mean_itl_ms: list[float] │ +│ • mean_e2el_ms: list[float] One entry per concurrency level │ +│ • p99_ttft_ms, median_ttft_ms, ... │ +│ • total_input_tokens: list[int] JSON = Source of Truth ✨ │ +│ • total_output_tokens: list[int] .out = Fallback only ⚠️ │ └──────────────────────────────────────────────────────────────────────────┘ ``` diff --git a/analysis/srtlog/parsers/__init__.py b/analysis/srtlog/parsers/__init__.py index f534a4eb..e7f13be2 100644 --- a/analysis/srtlog/parsers/__init__.py +++ b/analysis/srtlog/parsers/__init__.py @@ -25,6 +25,9 @@ class BenchmarkParserProtocol(Protocol): """Protocol for benchmark output parsers. Each benchmark type (sa-bench, mooncake-router, etc.) should have a parser that implements this protocol. + + Design principle: JSON files are the primary source of truth. + The parse() method is a fallback for when JSON files are unavailable. """ @property @@ -33,7 +36,12 @@ def benchmark_type(self) -> str: ... def parse(self, benchmark_out_path: Path) -> dict[str, Any]: - """Parse benchmark.out file and return results. + """Parse benchmark.out file and return results (FALLBACK method). + + This is a fallback method used when JSON result files are not available. + Prefer using parse_result_directory() which prioritizes JSON files as + the source of truth. + Args: benchmark_out_path: Path to the benchmark.out file Returns: @@ -55,7 +63,11 @@ def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | Non ... def parse_result_json(self, json_path: Path) -> dict[str, Any]: - """Parse a benchmark result JSON file. + """Parse a benchmark result JSON file (PRIMARY source of truth). + + JSON files contain the complete, accurate benchmark results and should + be used as the primary data source whenever available. + Args: json_path: Path to a result JSON file Returns: @@ -84,6 +96,12 @@ def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: """Parse all result files in a directory. + This is the primary entry point for parsing benchmark results. + Implementation should: + 1. First attempt to parse JSON result files (primary source of truth) + 2. Fall back to parsing benchmark.out if no JSON files found + 3. Return list of results (one per concurrency level or benchmark run) + Args: result_dir: Directory containing benchmark result files diff --git a/analysis/srtlog/parsers/benchmark/mooncake_router.py b/analysis/srtlog/parsers/benchmark/mooncake_router.py index 52f0832e..02bd59c3 100644 --- a/analysis/srtlog/parsers/benchmark/mooncake_router.py +++ b/analysis/srtlog/parsers/benchmark/mooncake_router.py @@ -30,7 +30,11 @@ def benchmark_type(self) -> str: return "mooncake-router" def parse(self, benchmark_out_path: Path) -> dict[str, Any]: - """Parse benchmark.out file for mooncake-router results. + """Parse benchmark.out file for mooncake-router results (FALLBACK method). + + This is a fallback method used when JSON result files are not available. + Prefer using parse_result_directory() which prioritizes JSON files. + Args: benchmark_out_path: Path to benchmark.out file Returns: @@ -148,6 +152,10 @@ def _get_metric(self, data: dict, metric_name: str, stat: str) -> float | None: def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: """Parse AIPerf result files in a directory. + + Uses JSON files (profile_export_aiperf.json) as the primary source of truth. + Falls back to parsing benchmark.out only if no JSON results are found. + Args: result_dir: Directory containing profile_export_aiperf.json Returns: @@ -155,11 +163,31 @@ def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: """ results = [] - # Look for AIPerf JSON files + # Primary: Look for AIPerf JSON files (source of truth) for json_file in result_dir.rglob("profile_export_aiperf.json"): result = self.parse_result_json(json_file) if result.get("output_tps") is not None: results.append(result) + logger.info(f"Loaded mooncake-router results from JSON: {json_file}") + + # Fallback: If no JSON results found, try parsing benchmark.out + if not results: + benchmark_out = result_dir / "benchmark.out" + if benchmark_out.exists(): + logger.info(f"No JSON results found in {result_dir}, falling back to benchmark.out parsing") + fallback_result = self.parse(benchmark_out) + if fallback_result.get("output_tps"): + # Convert to format expected by caller + results.append({ + "concurrency": 0, # Mooncake doesn't track concurrency + "output_tps": fallback_result.get("output_tps"), + "request_throughput": fallback_result.get("request_throughput"), + "mean_ttft_ms": fallback_result.get("mean_ttft_ms"), + "mean_itl_ms": fallback_result.get("mean_itl_ms"), + "total_requests": fallback_result.get("total_requests"), + }) + else: + logger.warning(f"No results found in {result_dir} (no profile_export_aiperf.json or benchmark.out)") return results diff --git a/analysis/srtlog/parsers/benchmark/sa_bench.py b/analysis/srtlog/parsers/benchmark/sa_bench.py index de0ba071..83161832 100644 --- a/analysis/srtlog/parsers/benchmark/sa_bench.py +++ b/analysis/srtlog/parsers/benchmark/sa_bench.py @@ -30,7 +30,11 @@ def benchmark_type(self) -> str: return "sa-bench" def parse(self, benchmark_out_path: Path) -> dict[str, Any]: - """Parse benchmark.out file for SA-Bench results. + """Parse benchmark.out file for SA-Bench results (FALLBACK method). + + This is a fallback method used when JSON result files are not available. + Prefer using parse_result_directory() which prioritizes JSON files. + Args: benchmark_out_path: Path to benchmark.out file Returns: @@ -143,6 +147,10 @@ def parse_result_json(self, json_path: Path) -> dict[str, Any]: def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: """Parse all result JSON files in a benchmark result directory. + + Uses JSON files as the primary source of truth. Falls back to parsing + benchmark.out only if no JSON results are found. + Args: result_dir: Directory containing result_*.json files Returns: @@ -150,11 +158,35 @@ def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: """ results = [] + # Primary: Parse JSON result files (source of truth) for json_file in result_dir.glob("*.json"): result = self.parse_result_json(json_file) if result.get("max_concurrency") is not None: results.append(result) + # Fallback: If no JSON results found, try parsing benchmark.out + if not results: + benchmark_out = result_dir / "benchmark.out" + if benchmark_out.exists(): + logger.info(f"No JSON results found in {result_dir}, falling back to benchmark.out parsing") + # Parse benchmark.out and create a single result entry + fallback_result = self.parse(benchmark_out) + if fallback_result.get("output_tps"): + # Wrap in list format expected by caller + results.append({ + "max_concurrency": fallback_result.get("concurrencies", [0])[0] if fallback_result.get("concurrencies") else 0, + "output_tps": fallback_result.get("output_tps"), + "request_throughput": fallback_result.get("request_throughput"), + "mean_ttft_ms": fallback_result.get("mean_ttft_ms"), + "mean_itl_ms": fallback_result.get("mean_itl_ms"), + "mean_tpot_ms": fallback_result.get("mean_tpot_ms"), + "p99_ttft_ms": fallback_result.get("p99_ttft_ms"), + "p99_itl_ms": fallback_result.get("p99_itl_ms"), + "completed": fallback_result.get("completed_requests"), + }) + else: + logger.warning(f"No results found in {result_dir} (no JSON files or benchmark.out)") + # Sort by concurrency results.sort(key=lambda x: x.get("max_concurrency", 0) or 0) From 574658ca4095cf1f0eab0d16205dbe6cd00642e4 Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 13:24:34 -0800 Subject: [PATCH 04/15] benchmark log out as fallback --- analysis/docs/ARCHITECTURE_DATAFLOW.md | 106 ++++++++++++++++++++++--- 1 file changed, 93 insertions(+), 13 deletions(-) diff --git a/analysis/docs/ARCHITECTURE_DATAFLOW.md b/analysis/docs/ARCHITECTURE_DATAFLOW.md index d51c42c7..ec3420fb 100644 --- a/analysis/docs/ARCHITECTURE_DATAFLOW.md +++ b/analysis/docs/ARCHITECTURE_DATAFLOW.md @@ -408,7 +408,79 @@ Each parser implements: --- -## 9. Caching Layer +## 9. Parsing Strategy: JSON-First Approach + +### Design Principle: JSON as Source of Truth ✨ + +The parser infrastructure follows a **JSON-first** approach for benchmark results: + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ Benchmark Result Parsing Priority │ +│ │ +│ 1️⃣ PRIMARY: JSON Result Files (Source of Truth) │ +│ 📁 result_*.json (SA-Bench) │ +│ 📁 profile_export_aiperf.json (Mooncake-Router) │ +│ - Complete, structured data │ +│ - Machine-readable, validated format │ +│ - Contains all metrics with precision │ +│ │ +│ 2️⃣ FALLBACK: benchmark.out Parsing │ +│ 📁 logs/benchmark.out │ +│ - Used ONLY when JSON files are unavailable │ +│ - Regex-based extraction from human-readable logs │ +│ - May be incomplete or imprecise │ +│ - Logged as fallback in parser output │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +### Implementation + +All benchmark parsers implement this strategy in `parse_result_directory()`: + +```python +def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: + results = [] + + # 1️⃣ PRIMARY: Try JSON files first + for json_file in result_dir.glob("*.json"): # or rglob() for nested + result = self.parse_result_json(json_file) + if result.get("output_tps"): + results.append(result) + logger.info(f"Loaded from JSON: {json_file}") + + # 2️⃣ FALLBACK: If no JSON found, try benchmark.out + if not results: + benchmark_out = result_dir / "benchmark.out" + if benchmark_out.exists(): + logger.info("No JSON results found, falling back to .out parsing") + fallback_result = self.parse(benchmark_out) + if fallback_result.get("output_tps"): + results.append(fallback_result) + else: + logger.warning(f"No results found in {result_dir}") + + return results +``` + +### Rationale + +1. **Accuracy**: JSON files contain exact, validated data +2. **Completeness**: JSON includes all metrics, not just what's in logs +3. **Reliability**: Structured format vs regex parsing +4. **Performance**: JSON parsing is faster than regex on large logs +5. **Maintainability**: Less brittle than log format changes + +### When Fallback is Used + +The fallback to `.out` file parsing occurs when: +- JSON result files are missing (incomplete benchmark run) +- Results directory doesn't contain expected JSON files +- Legacy runs from before JSON export was implemented + +--- + +## 10. Caching Layer ``` ┌─────────────────────────────────────────────────────────────────────────┐ @@ -436,56 +508,64 @@ Flow with cache: --- -## 10. File Structure Summary +## 11. File Structure Summary ``` {run_directory}/ ├── metadata.json → RunMetadata ├── config.yaml → ProfilerMetadata.isl/osl ├── logs/ -│ ├── benchmark.out → BenchmarkLaunchCommand, ProfilerMetadata +│ ├── benchmark.out → BenchmarkLaunchCommand, ProfilerMetadata, (fallback metrics) │ ├── config.yaml → NodeConfig.environment │ ├── {node}_{type}_{id}.out → NodeMetrics, NodeLaunchCommand │ ├── {node}_config.json → NodeConfig.gpu_info │ └── sa-bench_isl_*/ -│ └── result_*.json → ProfilerResults (SA-Bench) +│ └── result_*.json → ProfilerResults (PRIMARY ✨) │ └── artifacts/ │ └── */ -│ └── profile_export_aiperf.json → ProfilerResults (Mooncake) +│ └── profile_export_aiperf.json → ProfilerResults (PRIMARY ✨) └── cached_assets/ ├── benchmark_results.parquet ├── node_metrics.parquet └── cache_metadata.json ``` +**Note**: JSON files are the primary source of truth for benchmark results. +The `.out` files serve as fallback for legacy/incomplete runs. + --- -## 11. Key Design Principles +## 12. Key Design Principles 1. **Parser Autonomy**: Each parser knows how to find and parse its own files - `find_result_directory()` encapsulates file discovery logic - RunLoader doesn't need benchmark-specific knowledge -2. **Separation of Concerns**: +2. **JSON-First Parsing** ✨: JSON files are the primary source of truth + - `parse_result_json()` for structured, accurate data + - `parse()` method is fallback for when JSON is unavailable + - Logged clearly when fallback is used + +3. **Separation of Concerns**: - **Metrics** (NodeMetrics): Performance data from log parsing - **Configuration** (NodeConfig): Launch commands, environment, GPU info - **Metadata** (NodeMetadata): Worker identification -3. **Caching Strategy**: +4. **Caching Strategy**: - Cache expensive parsing operations (batch/memory metrics) - Don't cache configuration (files are small, may change) - Validate cache against source file timestamps -4. **Extensibility**: +5. **Extensibility**: - New benchmark types: Implement BenchmarkParserProtocol - New node backends: Implement NodeParserProtocol - Register with decorator → automatically available -5. **Data Flow Direction**: +6. **Data Flow Direction**: ``` - Raw Files → Parsers → Data Models → Cache → Application - ↓ ↓ - (specific) (generic) + JSON Files (Primary) ──┐ + ├──► Parsers ──► Data Models ──► Cache ──► Application + .out Files (Fallback) ─┘ ``` --- From 466c3c9a3d269dc8c079357d7edde1b0c8df0f11 Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 14:30:05 -0800 Subject: [PATCH 05/15] rabbit deedback --- analysis/srtlog/log_parser.py | 7 +- analysis/srtlog/run_loader.py | 36 +- rollup_mooncake_4405566.json | 1091 +++++++++++++++++ rollup_sa_bench_553.json | 963 +++++++++++++++ src/srtctl/benchmarks/scripts/gpqa/bench.sh | 15 +- .../benchmarks/scripts/longbenchv2/bench.sh | 20 +- src/srtctl/benchmarks/scripts/mmlu/bench.sh | 15 +- .../benchmarks/scripts/profiling/profile.sh | 33 +- .../benchmarks/scripts/sa-bench/bench.sh | 52 +- tests/fixtures_parsers.py | 3 +- 10 files changed, 2199 insertions(+), 36 deletions(-) create mode 100644 rollup_mooncake_4405566.json create mode 100644 rollup_sa_bench_553.json diff --git a/analysis/srtlog/log_parser.py b/analysis/srtlog/log_parser.py index ee2ca471..cf622278 100644 --- a/analysis/srtlog/log_parser.py +++ b/analysis/srtlog/log_parser.py @@ -66,9 +66,10 @@ def parse_run_logs(self, run_path: str, return_dicts: bool = False) -> list: return [] # Get appropriate parser - parser = get_node_parser(backend_type) - if not parser: - logger.warning(f"No parser registered for backend '{backend_type}'") + try: + parser = get_node_parser(backend_type) + except ValueError as e: + logger.warning(f"No parser registered for backend '{backend_type}': {e}") return [] # Use parser to parse logs directory diff --git a/analysis/srtlog/run_loader.py b/analysis/srtlog/run_loader.py index 102e488f..20797163 100644 --- a/analysis/srtlog/run_loader.py +++ b/analysis/srtlog/run_loader.py @@ -242,8 +242,9 @@ def _load_benchmark_results(self, run: BenchmarkRun) -> None: return # Define source patterns for cache validation (relative to run_path) + # Use recursive glob to catch nested result files (e.g., artifacts/*/profile_export_aiperf.json) result_dir_rel = result_dir.relative_to(Path(run_path)) if result_dir.is_relative_to(Path(run_path)) else result_dir.name - source_patterns = [f"{result_dir_rel}/*.json"] + source_patterns = [f"{result_dir_rel}/**/*.json"] # Try to load from cache first if cache_mgr.is_cache_valid("benchmark_results", source_patterns): @@ -413,12 +414,28 @@ def _convert_parser_results_to_dict(self, results_list: list[dict]) -> dict: # results_list is already sorted by the parser for data in results_list: - # Concurrency - concurrency = data.get("max_concurrency") or data.get("concurrency") or 0 + # Concurrency - explicit None checks to preserve 0 values + if data.get("max_concurrency") is not None: + concurrency = data.get("max_concurrency") + elif data.get("concurrency") is not None: + concurrency = data.get("concurrency") + else: + concurrency = 0 out["concurrencies"].append(concurrency) - # Throughput - normalize field names - out["output_tps"].append(data.get("output_throughput") or data.get("output_tps")) - out["total_tps"].append(data.get("total_token_throughput") or data.get("total_tps")) + + # Throughput - normalize field names with explicit None checks to preserve 0.0 + if "output_throughput" in data and data["output_throughput"] is not None: + output_tps = data["output_throughput"] + else: + output_tps = data.get("output_tps") + out["output_tps"].append(output_tps) + + if "total_token_throughput" in data and data["total_token_throughput"] is not None: + total_tps = data["total_token_throughput"] + else: + total_tps = data.get("total_tps") + out["total_tps"].append(total_tps) + out["request_throughput"].append(data.get("request_throughput")) out["request_goodput"].append(data.get("request_goodput")) out["request_rate"].append(data.get("request_rate")) @@ -680,7 +697,7 @@ def load_node_metrics(self, run_path: str, backend_type: str = "sglang") -> list backend_type: Backend type (sglang or trtllm) - deprecated, auto-detected Returns: - List of NodeInfo objects, one per worker + List of NodeMetrics objects, one per worker """ # Handle both relative and absolute paths if not os.path.isabs(run_path): @@ -688,7 +705,10 @@ def load_node_metrics(self, run_path: str, backend_type: str = "sglang") -> list # Use NodeAnalyzer which handles caching, backend detection, and config loading analyzer = NodeAnalyzer() - return analyzer.parse_run_logs(run_path, return_dicts=False) + node_infos = analyzer.parse_run_logs(run_path, return_dicts=False) + + # Extract only the metrics from each NodeInfo + return [node.metrics for node in node_infos] def load_node_metrics_for_run(self, run: BenchmarkRun) -> list[NodeMetrics]: """Load node metrics for a BenchmarkRun. diff --git a/rollup_mooncake_4405566.json b/rollup_mooncake_4405566.json new file mode 100644 index 00000000..5479d871 --- /dev/null +++ b/rollup_mooncake_4405566.json @@ -0,0 +1,1091 @@ +{ + "metadata": { + "job_id": "4405566", + "job_name": "disagg-kv-dynamo", + "run_date": "2025-12-30 07:45:37", + "mode": "disaggregated", + "container": "/lustre/fsw/coreai_tritoninference_triton3/idhanani/lmsysorg+sglang+v0.5.6.post2.sqsh", + "prefill_nodes": 2, + "decode_nodes": 0, + "prefill_workers": 6, + "decode_workers": 2, + "model_dir": "/lustre/fsw/coreai_tritoninference_triton3/idhanani/qwen32b", + "gpus_per_node": 8, + "gpu_type": "h100", + "partition": "" + }, + "profiler_metadata": { + "profiler_type": "mooncake-router", + "isl": "None", + "osl": "None", + "concurrencies": "" + }, + "profiler_results": { + "output_tps": [ + 1150.9244871905873 + ], + "request_throughput": [ + 3.372979548010381 + ], + "concurrency_values": [ + 0 + ], + "mean_ttft_ms": [ + 40618.77639945425 + ], + "mean_tpot_ms": [ + 23.95447833171341 + ], + "mean_itl_ms": [ + 23.95447833171341 + ], + "mean_e2el_ms": [ + 48790.48244293251 + ], + "p99_ttft_ms": [ + 90226.47291000008 + ], + "median_ttft_ms": [ + 43941.271566999996 + ], + "total_input_tokens": [ + null + ], + "total_output_tokens": [ + null + ], + "total_tps": [ + null + ], + "request_goodput": [ + null + ] + }, + "benchmark_command": null, + "node_metrics": [ + { + "metadata": { + "node_name": "eos0219", + "worker_type": "prefill", + "worker_id": "w0" + }, + "batches_sample": [ + { + "timestamp": "2025-12-30T15:52:33.006497Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 2, + "new_token": 1152, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:36.982941Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:39.574408Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:39.725257Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:39.753706Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + } + ], + "total_batches": 3455, + "memory_snapshots_sample": [ + { + "timestamp": "2025-12-30T15:49:50.419744Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:50.419758Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:51.513673Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:51.514008Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:51:35.770502Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 46.94, + "mem_usage_gb": 30.64, + "kv_cache_gb": null + } + ], + "total_memory_snapshots": 8 + }, + { + "metadata": { + "node_name": "eos0219", + "worker_type": "prefill", + "worker_id": "w1" + }, + "batches_sample": [ + { + "timestamp": "2025-12-30T15:52:33.012601Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:36.927954Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:39.723525Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 2, + "new_token": 1152, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:53:22.456717Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 7360, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:53:25.404620Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 6848, + "cached_token": 512, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + } + ], + "total_batches": 3517, + "memory_snapshots_sample": [ + { + "timestamp": "2025-12-30T15:49:54.916394Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:54.916394Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:55.991768Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:55.992136Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:51:35.967407Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 46.94, + "mem_usage_gb": 30.64, + "kv_cache_gb": null + } + ], + "total_memory_snapshots": 8 + }, + { + "metadata": { + "node_name": "eos0219", + "worker_type": "prefill", + "worker_id": "w2" + }, + "batches_sample": [ + { + "timestamp": "2025-12-30T15:52:32.929926Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:36.762916Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:39.705654Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:53:22.527700Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 7296, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:53:25.340856Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 5824, + "cached_token": 512, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + } + ], + "total_batches": 3464, + "memory_snapshots_sample": [ + { + "timestamp": "2025-12-30T15:49:47.416555Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:47.416798Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:48.628013Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:48.628441Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:51:35.967487Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 46.94, + "mem_usage_gb": 30.64, + "kv_cache_gb": null + } + ], + "total_memory_snapshots": 8 + }, + { + "metadata": { + "node_name": "eos0219", + "worker_type": "prefill", + "worker_id": "w3" + }, + "batches_sample": [ + { + "timestamp": "2025-12-30T15:52:33.005841Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:39.733732Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:53:22.527290Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 6784, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:53:23.473058Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 8192, + "cached_token": 512, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:53:23.628404Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 8192, + "cached_token": 0, + "token_usage": 0.03, + "running_req": 0, + "queue_req": 0 + } + ], + "total_batches": 3532, + "memory_snapshots_sample": [ + { + "timestamp": "2025-12-30T15:49:58.600285Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:58.600316Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:59.729690Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:59.730974Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:51:35.824848Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 46.94, + "mem_usage_gb": 30.64, + "kv_cache_gb": null + } + ], + "total_memory_snapshots": 8 + }, + { + "metadata": { + "node_name": "eos0222", + "worker_type": "decode", + "worker_id": "w0" + }, + "batches_sample": [ + { + "timestamp": "2025-12-30T15:52:38.206058Z", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": 0.01, + "running_req": 5, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:38.803501Z", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": 0.01, + "running_req": 5, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:39.402332Z", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": 0.01, + "running_req": 5, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:40.163757Z", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": 0.01, + "running_req": 5, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:40.761164Z", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": 0.01, + "running_req": 5, + "queue_req": 0 + } + ], + "total_batches": 3846, + "memory_snapshots_sample": [ + { + "timestamp": "2025-12-30T15:49:57.308943Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:57.308953Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:58.370263Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:58.370400Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:51:46.420882Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 46.94, + "mem_usage_gb": 30.64, + "kv_cache_gb": null + } + ], + "total_memory_snapshots": 12 + }, + { + "metadata": { + "node_name": "eos0222", + "worker_type": "decode", + "worker_id": "w1" + }, + "batches_sample": [ + { + "timestamp": "2025-12-30T15:52:37.722171Z", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": 0.01, + "running_req": 5, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:38.341097Z", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": 0.01, + "running_req": 5, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:38.931990Z", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": 0.01, + "running_req": 5, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:39.599897Z", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": 0.01, + "running_req": 5, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:40.187303Z", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": 0.01, + "running_req": 5, + "queue_req": 0 + } + ], + "total_batches": 3864, + "memory_snapshots_sample": [ + { + "timestamp": "2025-12-30T15:49:52.655883Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:52.656957Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:53.708240Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:53.711032Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:51:46.134369Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 46.94, + "mem_usage_gb": 30.64, + "kv_cache_gb": null + } + ], + "total_memory_snapshots": 12 + }, + { + "metadata": { + "node_name": "eos0222", + "worker_type": "prefill", + "worker_id": "w4" + }, + "batches_sample": [ + { + "timestamp": "2025-12-30T15:52:32.923440Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:39.722934Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:53:22.456085Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 6784, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:53:23.846651Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 8192, + "cached_token": 512, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:53:24.000062Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 8192, + "cached_token": 0, + "token_usage": 0.03, + "running_req": 0, + "queue_req": 0 + } + ], + "total_batches": 3414, + "memory_snapshots_sample": [ + { + "timestamp": "2025-12-30T15:49:54.137445Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:54.139498Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:55.197554Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:55.197829Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:51:46.171554Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 46.94, + "mem_usage_gb": 30.64, + "kv_cache_gb": null + } + ], + "total_memory_snapshots": 8 + }, + { + "metadata": { + "node_name": "eos0222", + "worker_type": "prefill", + "worker_id": "w5" + }, + "batches_sample": [ + { + "timestamp": "2025-12-30T15:52:33.012108Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:39.103041Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:52:39.719468Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 576, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:53:22.380331Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 2304, + "cached_token": 0, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + }, + { + "timestamp": "2025-12-30T15:53:22.779594Z", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": 1, + "new_token": 8192, + "cached_token": 512, + "token_usage": 0.0, + "running_req": 0, + "queue_req": 0 + } + ], + "total_batches": 3553, + "memory_snapshots_sample": [ + { + "timestamp": "2025-12-30T15:49:54.770786Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:54.770827Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": 1.01, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:55.885527Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:49:55.886845Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 77.58, + "mem_usage_gb": null, + "kv_cache_gb": null + }, + { + "timestamp": "2025-12-30T15:51:46.420708Z", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 46.94, + "mem_usage_gb": 30.64, + "kv_cache_gb": null + } + ], + "total_memory_snapshots": 8 + } + ] +} \ No newline at end of file diff --git a/rollup_sa_bench_553.json b/rollup_sa_bench_553.json new file mode 100644 index 00000000..ab1fe849 --- /dev/null +++ b/rollup_sa_bench_553.json @@ -0,0 +1,963 @@ +{ + "metadata": { + "job_id": "553", + "job_name": "ctx2_gen5_tep8_batch128_eplb0_mtp0", + "run_date": "2026-01-15 07:14:56", + "mode": "disaggregated", + "container": "/home/nlevin/containers/jwillthomson_dynamo-0.8.0_trtllm-1.2.0rc6-post1-router-patch-x86.sqsh", + "prefill_nodes": 2, + "decode_nodes": 5, + "prefill_workers": 2, + "decode_workers": 5, + "model_dir": "/models/dsr1-fp8/", + "gpus_per_node": 8, + "gpu_type": "h200", + "partition": "" + }, + "profiler_metadata": { + "profiler_type": "sa-bench", + "isl": "8192", + "osl": "1024", + "concurrencies": "" + }, + "profiler_results": { + "output_tps": [ + 1748.0954656979873 + ], + "request_throughput": [ + 1.9035306394778506 + ], + "concurrency_values": [ + 32 + ], + "mean_ttft_ms": [ + 895.2331849702091 + ], + "mean_tpot_ms": [ + 16.621064921028896 + ], + "mean_itl_ms": [ + 1573.2346725090367 + ], + "mean_e2el_ms": [ + 16142.089245179704 + ], + "p99_ttft_ms": [ + 5329.597819654006 + ], + "median_ttft_ms": [ + 487.5694230067893 + ], + "total_input_tokens": [ + 1885733 + ], + "total_output_tokens": [ + 235096 + ], + "total_tps": [ + 15769.777275754572 + ], + "request_goodput": [ + null + ] + }, + "benchmark_command": null, + "node_metrics": [ + { + "metadata": { + "node_name": "worker-0", + "worker_type": "prefill", + "worker_id": "w0" + }, + "batches_sample": [ + { + "timestamp": "01/15/2026-07:52:14", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 16462, + "cached_token": null, + "token_usage": null, + "running_req": 2, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:52:44", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 2, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:05", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 6, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:29", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 7237, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:29", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 15157, + "cached_token": null, + "token_usage": null, + "running_req": 2, + "queue_req": null + } + ], + "total_batches": 145, + "memory_snapshots_sample": [ + { + "timestamp": "01/15/2026-07:52:44", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 42.64, + "mem_usage_gb": 97.17, + "kv_cache_gb": 36.71 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.54 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.54 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.54 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.54 + } + ], + "total_memory_snapshots": 17 + }, + { + "metadata": { + "node_name": "worker-10", + "worker_type": "decode", + "worker_id": "w1" + }, + "batches_sample": [ + { + "timestamp": "01/15/2026-07:51:56", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 128, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:51:56", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 128, + "cached_token": null, + "token_usage": null, + "running_req": 0, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:28", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 0, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:28", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:28", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + } + ], + "total_batches": 10483, + "memory_snapshots_sample": [ + { + "timestamp": "01/15/2026-07:51:56", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 49.769999999999996, + "mem_usage_gb": 90.04, + "kv_cache_gb": 42.31 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + } + ], + "total_memory_snapshots": 17 + }, + { + "metadata": { + "node_name": "worker-13", + "worker_type": "decode", + "worker_id": "w0" + }, + "batches_sample": [ + { + "timestamp": "01/15/2026-07:51:58", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 128, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:51:58", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 128, + "cached_token": null, + "token_usage": null, + "running_req": 0, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:29", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 0, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:29", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:29", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + } + ], + "total_batches": 10169, + "memory_snapshots_sample": [ + { + "timestamp": "01/15/2026-07:51:59", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 49.769999999999996, + "mem_usage_gb": 90.04, + "kv_cache_gb": 42.31 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + } + ], + "total_memory_snapshots": 17 + }, + { + "metadata": { + "node_name": "worker-3", + "worker_type": "prefill", + "worker_id": "w1" + }, + "batches_sample": [ + { + "timestamp": "01/15/2026-07:52:02", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 16462, + "cached_token": null, + "token_usage": null, + "running_req": 2, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:52:31", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 2, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:17", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 7237, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:28", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 7016, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:29", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 14902, + "cached_token": null, + "token_usage": null, + "running_req": 2, + "queue_req": null + } + ], + "total_batches": 139, + "memory_snapshots_sample": [ + { + "timestamp": "01/15/2026-07:52:31", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 42.64, + "mem_usage_gb": 97.17, + "kv_cache_gb": 36.71 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.54 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.54 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.54 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.54 + } + ], + "total_memory_snapshots": 17 + }, + { + "metadata": { + "node_name": "worker-4", + "worker_type": "decode", + "worker_id": "w4" + }, + "batches_sample": [ + { + "timestamp": "01/15/2026-07:51:58", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 128, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:51:58", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 128, + "cached_token": null, + "token_usage": null, + "running_req": 0, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:29", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 0, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:29", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:29", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + } + ], + "total_batches": 10395, + "memory_snapshots_sample": [ + { + "timestamp": "01/15/2026-07:51:58", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 49.769999999999996, + "mem_usage_gb": 90.04, + "kv_cache_gb": 42.31 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + } + ], + "total_memory_snapshots": 17 + }, + { + "metadata": { + "node_name": "worker-6", + "worker_type": "decode", + "worker_id": "w2" + }, + "batches_sample": [ + { + "timestamp": "01/15/2026-07:51:55", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 128, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:51:55", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 128, + "cached_token": null, + "token_usage": null, + "running_req": 0, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:05", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 0, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:05", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:05", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + } + ], + "total_batches": 12129, + "memory_snapshots_sample": [ + { + "timestamp": "01/15/2026-07:51:55", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 49.769999999999996, + "mem_usage_gb": 90.04, + "kv_cache_gb": 42.31 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + } + ], + "total_memory_snapshots": 17 + }, + { + "metadata": { + "node_name": "worker-7", + "worker_type": "decode", + "worker_id": "w3" + }, + "batches_sample": [ + { + "timestamp": "01/15/2026-07:51:55", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 128, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:51:55", + "batch_type": "prefill", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": 128, + "cached_token": null, + "token_usage": null, + "running_req": 0, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:29", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 0, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:29", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + }, + { + "timestamp": "01/15/2026-07:53:29", + "batch_type": "decode", + "dp": 0, + "tp": 0, + "ep": 0, + "new_seq": null, + "new_token": null, + "cached_token": null, + "token_usage": null, + "running_req": 1, + "queue_req": null + } + ], + "total_batches": 10415, + "memory_snapshots_sample": [ + { + "timestamp": "01/15/2026-07:51:55", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": 49.769999999999996, + "mem_usage_gb": 90.04, + "kv_cache_gb": 42.31 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + }, + { + "timestamp": "", + "dp": 0, + "tp": 0, + "ep": 0, + "metric_type": "memory", + "avail_mem_gb": null, + "mem_usage_gb": null, + "kv_cache_gb": 0.01 + } + ], + "total_memory_snapshots": 17 + } + ] +} \ No newline at end of file diff --git a/src/srtctl/benchmarks/scripts/gpqa/bench.sh b/src/srtctl/benchmarks/scripts/gpqa/bench.sh index 064ad04a..bf2c8695 100644 --- a/src/srtctl/benchmarks/scripts/gpqa/bench.sh +++ b/src/srtctl/benchmarks/scripts/gpqa/bench.sh @@ -26,9 +26,18 @@ export OPENAI_API_KEY="${OPENAI_API_KEY:-EMPTY}" echo "Running GPQA evaluation..." -command="python3 -m sglang.test.run_eval --base-url ${ENDPOINT} --model ${MODEL_NAME} --eval-name gpqa --num-examples ${NUM_EXAMPLES} --max-tokens ${MAX_TOKENS} --repeat ${REPEAT} --num-threads ${NUM_THREADS}" -echo "[CMD] $command" -eval "$command" +cmd=( + python3 -m sglang.test.run_eval + --base-url "$ENDPOINT" + --model "$MODEL_NAME" + --eval-name gpqa + --num-examples "$NUM_EXAMPLES" + --max-tokens "$MAX_TOKENS" + --repeat "$REPEAT" + --num-threads "$NUM_THREADS" +) +printf "[CMD] %s\n" "${cmd[*]}" +"${cmd[@]}" # Copy result file result_file=$(ls -t /tmp/gpqa_*.json 2>/dev/null | head -n1) diff --git a/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh b/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh index 0d4235ee..5cbc81c7 100644 --- a/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh +++ b/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh @@ -27,20 +27,28 @@ export OPENAI_API_KEY="${OPENAI_API_KEY:-EMPTY}" echo "Running LongBench-v2 evaluation..." -# Build command -command="python3 -m sglang.test.run_eval --base-url ${ENDPOINT} --model ${MODEL_NAME} --eval-name longbench_v2 --max-tokens ${MAX_TOKENS} --max-context-length ${MAX_CONTEXT_LENGTH} --num-threads ${NUM_THREADS}" +# Build command array +cmd=( + python3 -m sglang.test.run_eval + --base-url "$ENDPOINT" + --model "$MODEL_NAME" + --eval-name longbench_v2 + --max-tokens "$MAX_TOKENS" + --max-context-length "$MAX_CONTEXT_LENGTH" + --num-threads "$NUM_THREADS" +) # Add optional arguments if [ -n "$NUM_EXAMPLES" ]; then - command="$command --num-examples ${NUM_EXAMPLES}" + cmd+=(--num-examples "$NUM_EXAMPLES") fi if [ -n "$CATEGORIES" ]; then - command="$command --categories ${CATEGORIES}" + cmd+=(--categories "$CATEGORIES") fi -echo "[CMD] $command" -eval "$command" +printf "[CMD] %s\n" "${cmd[*]}" +"${cmd[@]}" # Copy result files result_file=$(ls -t /tmp/longbench_v2_*.json 2>/dev/null | head -n1) diff --git a/src/srtctl/benchmarks/scripts/mmlu/bench.sh b/src/srtctl/benchmarks/scripts/mmlu/bench.sh index f1389d00..2ccd9ba3 100644 --- a/src/srtctl/benchmarks/scripts/mmlu/bench.sh +++ b/src/srtctl/benchmarks/scripts/mmlu/bench.sh @@ -26,9 +26,18 @@ export OPENAI_API_KEY="${OPENAI_API_KEY:-EMPTY}" echo "Running MMLU evaluation..." -command="python3 -m sglang.test.run_eval --base-url ${ENDPOINT} --model ${MODEL_NAME} --eval-name mmlu --num-examples ${NUM_EXAMPLES} --max-tokens ${MAX_TOKENS} --repeat ${REPEAT} --num-threads ${NUM_THREADS}" -echo "[CMD] $command" -eval "$command" +cmd=( + python3 -m sglang.test.run_eval + --base-url "$ENDPOINT" + --model "$MODEL_NAME" + --eval-name mmlu + --num-examples "$NUM_EXAMPLES" + --max-tokens "$MAX_TOKENS" + --repeat "$REPEAT" + --num-threads "$NUM_THREADS" +) +printf "[CMD] %s\n" "${cmd[*]}" +"${cmd[@]}" # Copy result file result_file=$(ls -t /tmp/mmlu_*.json 2>/dev/null | head -n1) diff --git a/src/srtctl/benchmarks/scripts/profiling/profile.sh b/src/srtctl/benchmarks/scripts/profiling/profile.sh index a024a821..92ee4a0b 100644 --- a/src/srtctl/benchmarks/scripts/profiling/profile.sh +++ b/src/srtctl/benchmarks/scripts/profiling/profile.sh @@ -131,14 +131,35 @@ if [[ "${PROFILING_MODE}" == "prefill" ]]; then echo "" echo "Generating profiling traffic..." - command="python3 -m sglang.bench_serving --backend sglang --model ${model_name} --host ${head_node} --port ${head_port} --dataset-name random --max-concurrency ${PROFILE_CONCURRENCY} --num-prompts 128 --random-input-len ${PROFILE_ISL} --random-output-len ${PROFILE_OSL} --random-range-ratio 1 --warmup-request 0" - echo "[CMD] $command" - eval "$command" + cmd=( + python3 -m sglang.bench_serving + --backend sglang + --model "${model_name}" + --host "${head_node}" + --port "${head_port}" + --dataset-name random + --max-concurrency "${PROFILE_CONCURRENCY}" + --num-prompts 128 + --random-input-len "${PROFILE_ISL}" + --random-output-len "${PROFILE_OSL}" + --random-range-ratio 1 + --warmup-request 0 + ) + printf "[CMD] %s\n" "${cmd[*]}" + "${cmd[@]}" # Run lm-eval for additional profiling coverage - command="python -m lm_eval --model local-completions --tasks gsm8k --model_args \"base_url=http://${head_node}:${head_port}/v1/completions,model=${model_name},tokenized_requests=False,tokenizer_backend=None,num_concurrent=${PROFILE_CONCURRENCY},timeout=6000,max_retries=1\" --limit 10" - echo "[CMD-LM-EVAL] $command" - eval "$command" + # Note: model_args must be a single array element to prevent splitting + model_args="base_url=http://${head_node}:${head_port}/v1/completions,model=${model_name},tokenized_requests=False,tokenizer_backend=None,num_concurrent=${PROFILE_CONCURRENCY},timeout=6000,max_retries=1" + lm_cmd=( + python -m lm_eval + --model local-completions + --tasks gsm8k + --model_args "${model_args}" + --limit 10 + ) + printf "[CMD-LM-EVAL] %s\n" "${lm_cmd[*]}" + "${lm_cmd[@]}" fi exit_code=$? diff --git a/src/srtctl/benchmarks/scripts/sa-bench/bench.sh b/src/srtctl/benchmarks/scripts/sa-bench/bench.sh index 0f53d58b..64ec7fa0 100644 --- a/src/srtctl/benchmarks/scripts/sa-bench/bench.sh +++ b/src/srtctl/benchmarks/scripts/sa-bench/bench.sh @@ -52,9 +52,27 @@ mkdir -p "$result_dir" for concurrency in "${CONCURRENCY_LIST[@]}"; do num_warmup_prompts=$((concurrency * 2)) - command="python3 -u ${WORK_DIR}/benchmark_serving.py --model ${MODEL_NAME} --tokenizer ${MODEL_PATH} --host $HOST --port $PORT --backend dynamo --endpoint /v1/completions --disable-tqdm --dataset-name random --num-prompts $num_prompts --random-input-len $ISL --random-output-len $OSL --random-range-ratio 0.8 --ignore-eos --request-rate 250 --percentile-metrics ttft,tpot,itl,e2el --max-concurrency $concurrency" - echo "[CMD-WARMUP] $command" - eval "$command" + cmd=( + python3 -u "${WORK_DIR}/benchmark_serving.py" + --model "${MODEL_NAME}" + --tokenizer "${MODEL_PATH}" + --host "$HOST" + --port "$PORT" + --backend dynamo + --endpoint /v1/completions + --disable-tqdm + --dataset-name random + --num-prompts "$num_warmup_prompts" + --random-input-len "$ISL" + --random-output-len "$OSL" + --random-range-ratio 0.8 + --ignore-eos + --request-rate 250 + --percentile-metrics ttft,tpot,itl,e2el + --max-concurrency "$concurrency" + ) + echo "[CMD-WARMUP] ${cmd[*]}" + "${cmd[@]}" num_prompts=$((concurrency * 10)) @@ -68,9 +86,31 @@ for concurrency in "${CONCURRENCY_LIST[@]}"; do echo "Running benchmark with concurrency: $concurrency" echo "$(date '+%Y-%m-%d %H:%M:%S')" - command="python3 -u ${WORK_DIR}/benchmark_serving.py --model ${MODEL_NAME} --tokenizer ${MODEL_PATH} --host $HOST --port $PORT --backend dynamo --endpoint /v1/completions --disable-tqdm --dataset-name random --num-prompts $num_prompts --random-input-len $ISL --random-output-len $OSL --random-range-ratio 0.8 --ignore-eos --request-rate ${REQ_RATE} --percentile-metrics ttft,tpot,itl,e2el --max-concurrency $concurrency --use-chat-template --save-result --result-dir $result_dir --result-filename $result_filename" - echo "[CMD] $command" - eval "$command" + cmd=( + python3 -u "${WORK_DIR}/benchmark_serving.py" + --model "${MODEL_NAME}" + --tokenizer "${MODEL_PATH}" + --host "$HOST" + --port "$PORT" + --backend dynamo + --endpoint /v1/completions + --disable-tqdm + --dataset-name random + --num-prompts "$num_prompts" + --random-input-len "$ISL" + --random-output-len "$OSL" + --random-range-ratio 0.8 + --ignore-eos + --request-rate "${REQ_RATE}" + --percentile-metrics ttft,tpot,itl,e2el + --max-concurrency "$concurrency" + --use-chat-template + --save-result + --result-dir "$result_dir" + --result-filename "$result_filename" + ) + echo "[CMD] ${cmd[*]}" + "${cmd[@]}" echo "$(date '+%Y-%m-%d %H:%M:%S')" echo "Completed benchmark with concurrency: $concurrency" diff --git a/tests/fixtures_parsers.py b/tests/fixtures_parsers.py index 58938644..666872e1 100644 --- a/tests/fixtures_parsers.py +++ b/tests/fixtures_parsers.py @@ -342,7 +342,8 @@ def assert_valid_benchmark_results(results: dict, expected_fields: list[str] | N assert field in results, f"Missing expected field: {field}" value = results[field] # Check it's not None and if it's a list, check it's not empty - assert value is not None or value == [], f"Field {field} is None" + assert value is not None and (not isinstance(value, list) or len(value) > 0), \ + f"Field {field} is None or empty list" @staticmethod def assert_valid_node_metrics(node_metrics, min_batches: int = 0): From 05649d859209d7d368fcb11bb3ef739460715bc6 Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 14:36:57 -0800 Subject: [PATCH 06/15] deleted debug files --- rollup_mooncake_4405566.json | 1091 ---------------------------------- rollup_sa_bench_553.json | 963 ------------------------------ 2 files changed, 2054 deletions(-) delete mode 100644 rollup_mooncake_4405566.json delete mode 100644 rollup_sa_bench_553.json diff --git a/rollup_mooncake_4405566.json b/rollup_mooncake_4405566.json deleted file mode 100644 index 5479d871..00000000 --- a/rollup_mooncake_4405566.json +++ /dev/null @@ -1,1091 +0,0 @@ -{ - "metadata": { - "job_id": "4405566", - "job_name": "disagg-kv-dynamo", - "run_date": "2025-12-30 07:45:37", - "mode": "disaggregated", - "container": "/lustre/fsw/coreai_tritoninference_triton3/idhanani/lmsysorg+sglang+v0.5.6.post2.sqsh", - "prefill_nodes": 2, - "decode_nodes": 0, - "prefill_workers": 6, - "decode_workers": 2, - "model_dir": "/lustre/fsw/coreai_tritoninference_triton3/idhanani/qwen32b", - "gpus_per_node": 8, - "gpu_type": "h100", - "partition": "" - }, - "profiler_metadata": { - "profiler_type": "mooncake-router", - "isl": "None", - "osl": "None", - "concurrencies": "" - }, - "profiler_results": { - "output_tps": [ - 1150.9244871905873 - ], - "request_throughput": [ - 3.372979548010381 - ], - "concurrency_values": [ - 0 - ], - "mean_ttft_ms": [ - 40618.77639945425 - ], - "mean_tpot_ms": [ - 23.95447833171341 - ], - "mean_itl_ms": [ - 23.95447833171341 - ], - "mean_e2el_ms": [ - 48790.48244293251 - ], - "p99_ttft_ms": [ - 90226.47291000008 - ], - "median_ttft_ms": [ - 43941.271566999996 - ], - "total_input_tokens": [ - null - ], - "total_output_tokens": [ - null - ], - "total_tps": [ - null - ], - "request_goodput": [ - null - ] - }, - "benchmark_command": null, - "node_metrics": [ - { - "metadata": { - "node_name": "eos0219", - "worker_type": "prefill", - "worker_id": "w0" - }, - "batches_sample": [ - { - "timestamp": "2025-12-30T15:52:33.006497Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 2, - "new_token": 1152, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:36.982941Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:39.574408Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:39.725257Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:39.753706Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - } - ], - "total_batches": 3455, - "memory_snapshots_sample": [ - { - "timestamp": "2025-12-30T15:49:50.419744Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:50.419758Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:51.513673Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:51.514008Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:51:35.770502Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 46.94, - "mem_usage_gb": 30.64, - "kv_cache_gb": null - } - ], - "total_memory_snapshots": 8 - }, - { - "metadata": { - "node_name": "eos0219", - "worker_type": "prefill", - "worker_id": "w1" - }, - "batches_sample": [ - { - "timestamp": "2025-12-30T15:52:33.012601Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:36.927954Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:39.723525Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 2, - "new_token": 1152, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:53:22.456717Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 7360, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:53:25.404620Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 6848, - "cached_token": 512, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - } - ], - "total_batches": 3517, - "memory_snapshots_sample": [ - { - "timestamp": "2025-12-30T15:49:54.916394Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:54.916394Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:55.991768Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:55.992136Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:51:35.967407Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 46.94, - "mem_usage_gb": 30.64, - "kv_cache_gb": null - } - ], - "total_memory_snapshots": 8 - }, - { - "metadata": { - "node_name": "eos0219", - "worker_type": "prefill", - "worker_id": "w2" - }, - "batches_sample": [ - { - "timestamp": "2025-12-30T15:52:32.929926Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:36.762916Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:39.705654Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:53:22.527700Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 7296, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:53:25.340856Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 5824, - "cached_token": 512, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - } - ], - "total_batches": 3464, - "memory_snapshots_sample": [ - { - "timestamp": "2025-12-30T15:49:47.416555Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:47.416798Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:48.628013Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:48.628441Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:51:35.967487Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 46.94, - "mem_usage_gb": 30.64, - "kv_cache_gb": null - } - ], - "total_memory_snapshots": 8 - }, - { - "metadata": { - "node_name": "eos0219", - "worker_type": "prefill", - "worker_id": "w3" - }, - "batches_sample": [ - { - "timestamp": "2025-12-30T15:52:33.005841Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:39.733732Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:53:22.527290Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 6784, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:53:23.473058Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 8192, - "cached_token": 512, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:53:23.628404Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 8192, - "cached_token": 0, - "token_usage": 0.03, - "running_req": 0, - "queue_req": 0 - } - ], - "total_batches": 3532, - "memory_snapshots_sample": [ - { - "timestamp": "2025-12-30T15:49:58.600285Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:58.600316Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:59.729690Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:59.730974Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:51:35.824848Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 46.94, - "mem_usage_gb": 30.64, - "kv_cache_gb": null - } - ], - "total_memory_snapshots": 8 - }, - { - "metadata": { - "node_name": "eos0222", - "worker_type": "decode", - "worker_id": "w0" - }, - "batches_sample": [ - { - "timestamp": "2025-12-30T15:52:38.206058Z", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": 0.01, - "running_req": 5, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:38.803501Z", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": 0.01, - "running_req": 5, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:39.402332Z", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": 0.01, - "running_req": 5, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:40.163757Z", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": 0.01, - "running_req": 5, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:40.761164Z", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": 0.01, - "running_req": 5, - "queue_req": 0 - } - ], - "total_batches": 3846, - "memory_snapshots_sample": [ - { - "timestamp": "2025-12-30T15:49:57.308943Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:57.308953Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:58.370263Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:58.370400Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:51:46.420882Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 46.94, - "mem_usage_gb": 30.64, - "kv_cache_gb": null - } - ], - "total_memory_snapshots": 12 - }, - { - "metadata": { - "node_name": "eos0222", - "worker_type": "decode", - "worker_id": "w1" - }, - "batches_sample": [ - { - "timestamp": "2025-12-30T15:52:37.722171Z", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": 0.01, - "running_req": 5, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:38.341097Z", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": 0.01, - "running_req": 5, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:38.931990Z", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": 0.01, - "running_req": 5, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:39.599897Z", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": 0.01, - "running_req": 5, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:40.187303Z", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": 0.01, - "running_req": 5, - "queue_req": 0 - } - ], - "total_batches": 3864, - "memory_snapshots_sample": [ - { - "timestamp": "2025-12-30T15:49:52.655883Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:52.656957Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:53.708240Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:53.711032Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:51:46.134369Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 46.94, - "mem_usage_gb": 30.64, - "kv_cache_gb": null - } - ], - "total_memory_snapshots": 12 - }, - { - "metadata": { - "node_name": "eos0222", - "worker_type": "prefill", - "worker_id": "w4" - }, - "batches_sample": [ - { - "timestamp": "2025-12-30T15:52:32.923440Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:39.722934Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:53:22.456085Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 6784, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:53:23.846651Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 8192, - "cached_token": 512, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:53:24.000062Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 8192, - "cached_token": 0, - "token_usage": 0.03, - "running_req": 0, - "queue_req": 0 - } - ], - "total_batches": 3414, - "memory_snapshots_sample": [ - { - "timestamp": "2025-12-30T15:49:54.137445Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:54.139498Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:55.197554Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:55.197829Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:51:46.171554Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 46.94, - "mem_usage_gb": 30.64, - "kv_cache_gb": null - } - ], - "total_memory_snapshots": 8 - }, - { - "metadata": { - "node_name": "eos0222", - "worker_type": "prefill", - "worker_id": "w5" - }, - "batches_sample": [ - { - "timestamp": "2025-12-30T15:52:33.012108Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:39.103041Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:52:39.719468Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 576, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:53:22.380331Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 2304, - "cached_token": 0, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - }, - { - "timestamp": "2025-12-30T15:53:22.779594Z", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": 1, - "new_token": 8192, - "cached_token": 512, - "token_usage": 0.0, - "running_req": 0, - "queue_req": 0 - } - ], - "total_batches": 3553, - "memory_snapshots_sample": [ - { - "timestamp": "2025-12-30T15:49:54.770786Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:54.770827Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": 1.01, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:55.885527Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:49:55.886845Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 77.58, - "mem_usage_gb": null, - "kv_cache_gb": null - }, - { - "timestamp": "2025-12-30T15:51:46.420708Z", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 46.94, - "mem_usage_gb": 30.64, - "kv_cache_gb": null - } - ], - "total_memory_snapshots": 8 - } - ] -} \ No newline at end of file diff --git a/rollup_sa_bench_553.json b/rollup_sa_bench_553.json deleted file mode 100644 index ab1fe849..00000000 --- a/rollup_sa_bench_553.json +++ /dev/null @@ -1,963 +0,0 @@ -{ - "metadata": { - "job_id": "553", - "job_name": "ctx2_gen5_tep8_batch128_eplb0_mtp0", - "run_date": "2026-01-15 07:14:56", - "mode": "disaggregated", - "container": "/home/nlevin/containers/jwillthomson_dynamo-0.8.0_trtllm-1.2.0rc6-post1-router-patch-x86.sqsh", - "prefill_nodes": 2, - "decode_nodes": 5, - "prefill_workers": 2, - "decode_workers": 5, - "model_dir": "/models/dsr1-fp8/", - "gpus_per_node": 8, - "gpu_type": "h200", - "partition": "" - }, - "profiler_metadata": { - "profiler_type": "sa-bench", - "isl": "8192", - "osl": "1024", - "concurrencies": "" - }, - "profiler_results": { - "output_tps": [ - 1748.0954656979873 - ], - "request_throughput": [ - 1.9035306394778506 - ], - "concurrency_values": [ - 32 - ], - "mean_ttft_ms": [ - 895.2331849702091 - ], - "mean_tpot_ms": [ - 16.621064921028896 - ], - "mean_itl_ms": [ - 1573.2346725090367 - ], - "mean_e2el_ms": [ - 16142.089245179704 - ], - "p99_ttft_ms": [ - 5329.597819654006 - ], - "median_ttft_ms": [ - 487.5694230067893 - ], - "total_input_tokens": [ - 1885733 - ], - "total_output_tokens": [ - 235096 - ], - "total_tps": [ - 15769.777275754572 - ], - "request_goodput": [ - null - ] - }, - "benchmark_command": null, - "node_metrics": [ - { - "metadata": { - "node_name": "worker-0", - "worker_type": "prefill", - "worker_id": "w0" - }, - "batches_sample": [ - { - "timestamp": "01/15/2026-07:52:14", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 16462, - "cached_token": null, - "token_usage": null, - "running_req": 2, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:52:44", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 2, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:05", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 6, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:29", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 7237, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:29", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 15157, - "cached_token": null, - "token_usage": null, - "running_req": 2, - "queue_req": null - } - ], - "total_batches": 145, - "memory_snapshots_sample": [ - { - "timestamp": "01/15/2026-07:52:44", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 42.64, - "mem_usage_gb": 97.17, - "kv_cache_gb": 36.71 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.54 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.54 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.54 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.54 - } - ], - "total_memory_snapshots": 17 - }, - { - "metadata": { - "node_name": "worker-10", - "worker_type": "decode", - "worker_id": "w1" - }, - "batches_sample": [ - { - "timestamp": "01/15/2026-07:51:56", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 128, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:51:56", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 128, - "cached_token": null, - "token_usage": null, - "running_req": 0, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:28", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 0, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:28", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:28", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - } - ], - "total_batches": 10483, - "memory_snapshots_sample": [ - { - "timestamp": "01/15/2026-07:51:56", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 49.769999999999996, - "mem_usage_gb": 90.04, - "kv_cache_gb": 42.31 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - } - ], - "total_memory_snapshots": 17 - }, - { - "metadata": { - "node_name": "worker-13", - "worker_type": "decode", - "worker_id": "w0" - }, - "batches_sample": [ - { - "timestamp": "01/15/2026-07:51:58", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 128, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:51:58", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 128, - "cached_token": null, - "token_usage": null, - "running_req": 0, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:29", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 0, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:29", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:29", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - } - ], - "total_batches": 10169, - "memory_snapshots_sample": [ - { - "timestamp": "01/15/2026-07:51:59", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 49.769999999999996, - "mem_usage_gb": 90.04, - "kv_cache_gb": 42.31 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - } - ], - "total_memory_snapshots": 17 - }, - { - "metadata": { - "node_name": "worker-3", - "worker_type": "prefill", - "worker_id": "w1" - }, - "batches_sample": [ - { - "timestamp": "01/15/2026-07:52:02", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 16462, - "cached_token": null, - "token_usage": null, - "running_req": 2, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:52:31", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 2, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:17", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 7237, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:28", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 7016, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:29", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 14902, - "cached_token": null, - "token_usage": null, - "running_req": 2, - "queue_req": null - } - ], - "total_batches": 139, - "memory_snapshots_sample": [ - { - "timestamp": "01/15/2026-07:52:31", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 42.64, - "mem_usage_gb": 97.17, - "kv_cache_gb": 36.71 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.54 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.54 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.54 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.54 - } - ], - "total_memory_snapshots": 17 - }, - { - "metadata": { - "node_name": "worker-4", - "worker_type": "decode", - "worker_id": "w4" - }, - "batches_sample": [ - { - "timestamp": "01/15/2026-07:51:58", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 128, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:51:58", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 128, - "cached_token": null, - "token_usage": null, - "running_req": 0, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:29", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 0, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:29", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:29", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - } - ], - "total_batches": 10395, - "memory_snapshots_sample": [ - { - "timestamp": "01/15/2026-07:51:58", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 49.769999999999996, - "mem_usage_gb": 90.04, - "kv_cache_gb": 42.31 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - } - ], - "total_memory_snapshots": 17 - }, - { - "metadata": { - "node_name": "worker-6", - "worker_type": "decode", - "worker_id": "w2" - }, - "batches_sample": [ - { - "timestamp": "01/15/2026-07:51:55", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 128, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:51:55", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 128, - "cached_token": null, - "token_usage": null, - "running_req": 0, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:05", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 0, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:05", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:05", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - } - ], - "total_batches": 12129, - "memory_snapshots_sample": [ - { - "timestamp": "01/15/2026-07:51:55", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 49.769999999999996, - "mem_usage_gb": 90.04, - "kv_cache_gb": 42.31 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - } - ], - "total_memory_snapshots": 17 - }, - { - "metadata": { - "node_name": "worker-7", - "worker_type": "decode", - "worker_id": "w3" - }, - "batches_sample": [ - { - "timestamp": "01/15/2026-07:51:55", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 128, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:51:55", - "batch_type": "prefill", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": 128, - "cached_token": null, - "token_usage": null, - "running_req": 0, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:29", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 0, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:29", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - }, - { - "timestamp": "01/15/2026-07:53:29", - "batch_type": "decode", - "dp": 0, - "tp": 0, - "ep": 0, - "new_seq": null, - "new_token": null, - "cached_token": null, - "token_usage": null, - "running_req": 1, - "queue_req": null - } - ], - "total_batches": 10415, - "memory_snapshots_sample": [ - { - "timestamp": "01/15/2026-07:51:55", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": 49.769999999999996, - "mem_usage_gb": 90.04, - "kv_cache_gb": 42.31 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - }, - { - "timestamp": "", - "dp": 0, - "tp": 0, - "ep": 0, - "metric_type": "memory", - "avail_mem_gb": null, - "mem_usage_gb": null, - "kv_cache_gb": 0.01 - } - ], - "total_memory_snapshots": 17 - } - ] -} \ No newline at end of file From 71d13cbfca31ab0527f1ffdb9d58989e589253eb Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 14:40:50 -0800 Subject: [PATCH 07/15] router bench --- src/srtctl/benchmarks/scripts/router/bench.sh | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/srtctl/benchmarks/scripts/router/bench.sh b/src/srtctl/benchmarks/scripts/router/bench.sh index 052ba7bb..85376cd2 100644 --- a/src/srtctl/benchmarks/scripts/router/bench.sh +++ b/src/srtctl/benchmarks/scripts/router/bench.sh @@ -39,10 +39,18 @@ mkdir -p "$result_dir" echo "Running prefix ratio benchmark..." echo "Results will be saved to: $result_dir" -# shellcheck disable=SC2086 -command="python prefix_ratio_benchmark.py --prefix-ratios $PREFIX_RATIOS --isl $ISL --osl $OSL --requests $REQUESTS --concurrency $CONCURRENCY --output-dir $result_dir" -echo "[CMD] $command" -eval "$command" +cmd=( + python prefix_ratio_benchmark.py + --prefix-ratios "$PREFIX_RATIOS" + --isl "$ISL" + --osl "$OSL" + --requests "$REQUESTS" + --concurrency "$CONCURRENCY" + --output-dir "$result_dir" +) + +printf "[CMD] %s\n" "${cmd[*]}" +"${cmd[@]}" echo "Router benchmark complete. Results in $result_dir" From f06eee7d859dbfc55ad49c705d71d6d26999c568 Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 18:34:06 -0800 Subject: [PATCH 08/15] parser logic for timestamp --- analysis/dashboard/rate_match_tab.py | 50 +++++++++++++++++++++++-- analysis/srtlog/parsers/nodes/sglang.py | 24 ++++++++++++ analysis/srtlog/parsers/nodes/trtllm.py | 18 +++++++++ 3 files changed, 88 insertions(+), 4 deletions(-) diff --git a/analysis/dashboard/rate_match_tab.py b/analysis/dashboard/rate_match_tab.py index e86f48dd..a8cbb3a7 100644 --- a/analysis/dashboard/rate_match_tab.py +++ b/analysis/dashboard/rate_match_tab.py @@ -10,6 +10,48 @@ from analysis.dashboard.components import load_node_metrics +def _parse_timestamp(timestamp: str) -> datetime: + """Parse timestamp from multiple possible formats. + + Supports: + - ISO 8601: 2025-12-30T15:52:38.206058Z + - YYYY-MM-DD HH:MM:SS + - MM/DD/YYYY-HH:MM:SS (TRTLLM format) + + Args: + timestamp: Timestamp string in one of the supported formats + + Returns: + datetime object + + Raises: + ValueError: If timestamp format is not recognized + """ + # Try YYYY-MM-DD HH:MM:SS format first (most common) + try: + return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S") + except ValueError: + pass + + # Try ISO 8601 format (SGLang) + try: + ts = timestamp.rstrip('Z') + if '.' in ts: + return datetime.fromisoformat(ts) + else: + return datetime.strptime(ts, "%Y-%m-%dT%H:%M:%S") + except ValueError: + pass + + # Try MM/DD/YYYY-HH:MM:SS format (TRTLLM) + try: + return datetime.strptime(timestamp, "%m/%d/%Y-%H:%M:%S") + except ValueError: + pass + + raise ValueError(f"Unable to parse timestamp: {timestamp}") + + def render(filtered_runs: list, logs_dir: str): """Render rate match analysis. @@ -139,8 +181,8 @@ def _create_rate_match_graph(prefill_nodes, decode_nodes, job_id="", show_reques avg_input_tps.append(avg / prefill_divisor) if timestamps: - first_time = datetime.strptime(timestamps[0], "%Y-%m-%d %H:%M:%S") - elapsed = [(datetime.strptime(ts, "%Y-%m-%d %H:%M:%S") - first_time).total_seconds() for ts in timestamps] + first_time = _parse_timestamp(timestamps[0]) + elapsed = [(_parse_timestamp(ts) - first_time).total_seconds() for ts in timestamps] unit = "req/s" if show_request_rate else "tok/s" rate_fig.add_trace( @@ -175,8 +217,8 @@ def _create_rate_match_graph(prefill_nodes, decode_nodes, job_id="", show_reques avg_gen_tps.append(avg / decode_divisor) if timestamps: - first_time = datetime.strptime(timestamps[0], "%Y-%m-%d %H:%M:%S") - elapsed = [(datetime.strptime(ts, "%Y-%m-%d %H:%M:%S") - first_time).total_seconds() for ts in timestamps] + first_time = _parse_timestamp(timestamps[0]) + elapsed = [(_parse_timestamp(ts) - first_time).total_seconds() for ts in timestamps] unit = "req/s" if show_request_rate else "tok/s" rate_fig.add_trace( diff --git a/analysis/srtlog/parsers/nodes/sglang.py b/analysis/srtlog/parsers/nodes/sglang.py index ea094f97..bd10adef 100644 --- a/analysis/srtlog/parsers/nodes/sglang.py +++ b/analysis/srtlog/parsers/nodes/sglang.py @@ -12,6 +12,7 @@ import logging import os import re +from datetime import datetime from pathlib import Path from typing import TYPE_CHECKING, Any @@ -33,11 +34,33 @@ class SGLangNodeParser: """Parser for SGLang node logs. Handles SGLang structured logging with ISO 8601 timestamps. May contain ANSI color codes which are stripped during parsing. + + Timestamp format: YYYY-MM-DDTHH:MM:SS.microsZ (e.g., 2025-12-30T15:52:38.206058Z) """ @property def backend_type(self) -> str: return "sglang" + + @staticmethod + def parse_timestamp(timestamp: str) -> datetime: + """Parse SGLang timestamp format to datetime object. + + Args: + timestamp: Timestamp string in ISO 8601 format (e.g., 2025-12-30T15:52:38.206058Z) + + Returns: + datetime object + + Raises: + ValueError: If timestamp format is invalid + """ + # Handle both with and without microseconds and timezone + timestamp = timestamp.rstrip('Z') + if '.' in timestamp: + return datetime.fromisoformat(timestamp) + else: + return datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S") def parse_logs(self, log_dir: Path) -> list[NodeInfo]: """Parse all prefill/decode/agg log files in a directory. @@ -210,6 +233,7 @@ def parse_single_log(self, log_path: Path) -> NodeInfo | None: def _parse_timestamp(self, line: str) -> str | None: """Extract ISO 8601 timestamp from log line. Example: 2025-12-30T15:52:38.206058Z + Returns the timestamp string as-is without conversion. """ match = re.search(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z?)", line) if match: diff --git a/analysis/srtlog/parsers/nodes/trtllm.py b/analysis/srtlog/parsers/nodes/trtllm.py index 067d0b00..c3fe3a62 100644 --- a/analysis/srtlog/parsers/nodes/trtllm.py +++ b/analysis/srtlog/parsers/nodes/trtllm.py @@ -13,6 +13,7 @@ import logging import os import re +from datetime import datetime from pathlib import Path from typing import TYPE_CHECKING, Any @@ -35,11 +36,28 @@ class TRTLLMNodeParser: - Launch command from dynamo.trtllm - Worker configuration from Config() dump - MPI rank and world size information + + Timestamp format: MM/DD/YYYY-HH:MM:SS (e.g., 01/23/2026-08:04:38) """ @property def backend_type(self) -> str: return "trtllm" + + @staticmethod + def parse_timestamp(timestamp: str) -> datetime: + """Parse TRTLLM timestamp format to datetime object. + + Args: + timestamp: Timestamp string in format MM/DD/YYYY-HH:MM:SS + + Returns: + datetime object + + Raises: + ValueError: If timestamp format is invalid + """ + return datetime.strptime(timestamp, "%m/%d/%Y-%H:%M:%S") def parse_logs(self, log_dir: Path) -> list[NodeInfo]: """Parse all TRTLLM node logs in a directory. From fe3b9aaca69a1b0a8d8f405ee586c9f04bdb933f Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 20:15:39 -0800 Subject: [PATCH 09/15] added beter fixtures --- analysis/srtlog/parsers/nodes/sglang.py | 103 +++++-- analysis/srtlog/parsers/nodes/trtllm.py | 101 +++++-- tests/fixtures_parsers.py | 28 +- tests/test_dashboard.py | 240 +++++++++++++++ tests/test_parsers.py | 382 +++++++++++++++++++++++- 5 files changed, 800 insertions(+), 54 deletions(-) create mode 100644 tests/test_dashboard.py diff --git a/analysis/srtlog/parsers/nodes/sglang.py b/analysis/srtlog/parsers/nodes/sglang.py index bd10adef..b7b7ee39 100644 --- a/analysis/srtlog/parsers/nodes/sglang.py +++ b/analysis/srtlog/parsers/nodes/sglang.py @@ -126,9 +126,9 @@ def parse_single_log(self, log_path: Path) -> NodeInfo | None: batches.append( BatchMetrics( timestamp=batch_metrics["timestamp"], - dp=0, # Default since not in log - tp=0, - ep=0, + dp=batch_metrics.get("dp", 0), + tp=batch_metrics.get("tp", 0), + ep=batch_metrics.get("ep", 0), batch_type=batch_metrics["type"], new_seq=batch_metrics.get("new_seq"), new_token=batch_metrics.get("new_token"), @@ -148,9 +148,9 @@ def parse_single_log(self, log_path: Path) -> NodeInfo | None: batches.append( BatchMetrics( timestamp=decode_metrics["timestamp"], - dp=0, - tp=0, - ep=0, + dp=decode_metrics.get("dp", 0), + tp=decode_metrics.get("tp", 0), + ep=decode_metrics.get("ep", 0), batch_type=decode_metrics["type"], running_req=decode_metrics.get("running_req"), queue_req=decode_metrics.get("queue_req"), @@ -169,9 +169,9 @@ def parse_single_log(self, log_path: Path) -> NodeInfo | None: memory_snapshots.append( MemoryMetrics( timestamp=mem_metrics["timestamp"], - dp=0, - tp=0, - ep=0, + dp=mem_metrics.get("dp", 0), + tp=mem_metrics.get("tp", 0), + ep=mem_metrics.get("ep", 0), metric_type=mem_metrics["type"], avail_mem_gb=mem_metrics.get("avail_mem_gb"), mem_usage_gb=mem_metrics.get("mem_usage_gb"), @@ -231,25 +231,77 @@ def parse_single_log(self, log_path: Path) -> NodeInfo | None: return NodeInfo(metrics=metrics, node_config=node_config if node_config else None) def _parse_timestamp(self, line: str) -> str | None: - """Extract ISO 8601 timestamp from log line. - Example: 2025-12-30T15:52:38.206058Z + """Extract timestamp from log line. + + Supports two formats: + - Tagged format: [2025-11-04 05:31:43 DP0 TP0 EP0] + - ISO format: 2025-12-30T15:52:38.206058Z (fallback) + Returns the timestamp string as-is without conversion. """ + # Try tagged format first (YYYY-MM-DD HH:MM:SS) + match = re.search(r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})", line) + if match: + return match.group(1) + + # Fall back to ISO format match = re.search(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z?)", line) if match: return match.group(1) + return None + def _parse_parallelism_tags(self, line: str) -> tuple[int, int, int]: + """Extract DP, TP, EP indices from log line prefix. + + Supports three formats: + - Full: [2025-11-04 05:31:43 DP0 TP0 EP0] + - Simple TP: [2025-11-04 07:05:55 TP0] (defaults DP=0, EP=0) + - Pipeline: [2025-12-08 14:34:44 PP0] (defaults DP=0, EP=0, TP=PP value) + + Args: + line: Log line to parse + + Returns: + (dp, tp, ep) tuple with default values of 0 if not found + """ + # Try full format first: DP0 TP0 EP0 + match = re.search(r"DP(\d+)\s+TP(\d+)\s+EP(\d+)", line) + if match: + return int(match.group(1)), int(match.group(2)), int(match.group(3)) + + # Try simple format: TP0 only (1P4D style) + match = re.search(r"\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} TP(\d+)\]", line) + if match: + return 0, int(match.group(1)), 0 # Default DP=0, EP=0 + + # Try pipeline parallelism format: PP0 (prefill with PP) + match = re.search(r"\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} PP(\d+)\]", line) + if match: + return 0, int(match.group(1)), 0 # Map PP to TP slot, default DP=0, EP=0 + + # Default: no parallelism tags found + return 0, 0, 0 + def _parse_prefill_batch_line(self, line: str) -> dict | None: """Parse prefill batch log line for metrics.""" if "Prefill batch" not in line: return None + # Parse timestamp and parallelism tags separately timestamp = self._parse_timestamp(line) if not timestamp: return None - - metrics = {"timestamp": timestamp, "type": "prefill"} + + dp, tp, ep = self._parse_parallelism_tags(line) + + metrics = { + "timestamp": timestamp, + "type": "prefill", + "dp": dp, + "tp": tp, + "ep": ep, + } patterns = { "new_seq": r"#new-seq:\s*(\d+)", @@ -276,11 +328,20 @@ def _parse_decode_batch_line(self, line: str) -> dict | None: if "Decode batch" not in line: return None + # Parse timestamp and parallelism tags separately timestamp = self._parse_timestamp(line) if not timestamp: return None - - metrics = {"timestamp": timestamp, "type": "decode"} + + dp, tp, ep = self._parse_parallelism_tags(line) + + metrics = { + "timestamp": timestamp, + "type": "decode", + "dp": dp, + "tp": tp, + "ep": ep, + } patterns = { "running_req": r"#running-req:\s*(\d+)", @@ -303,11 +364,19 @@ def _parse_decode_batch_line(self, line: str) -> dict | None: def _parse_memory_line(self, line: str) -> dict | None: """Parse memory-related log lines.""" + # Parse timestamp and parallelism tags separately timestamp = self._parse_timestamp(line) if not timestamp: return None - - metrics = {"timestamp": timestamp} + + dp, tp, ep = self._parse_parallelism_tags(line) + + metrics = { + "timestamp": timestamp, + "dp": dp, + "tp": tp, + "ep": ep, + } # Parse available memory from "avail mem=75.11 GB" avail_match = re.search(r"avail mem=([\d.]+)\s*GB", line) diff --git a/analysis/srtlog/parsers/nodes/trtllm.py b/analysis/srtlog/parsers/nodes/trtllm.py index c3fe3a62..9287ac26 100644 --- a/analysis/srtlog/parsers/nodes/trtllm.py +++ b/analysis/srtlog/parsers/nodes/trtllm.py @@ -58,6 +58,51 @@ def parse_timestamp(timestamp: str) -> datetime: ValueError: If timestamp format is invalid """ return datetime.strptime(timestamp, "%m/%d/%Y-%H:%M:%S") + + def _extract_timestamp(self, line: str) -> str | None: + """Extract timestamp string from log line. + + Supports format: [MM/DD/YYYY-HH:MM:SS ...] + + Returns: + Timestamp string or None if not found + """ + match = re.search(r"\[(\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2})", line) + if match: + return match.group(1) + return None + + def _parse_parallelism_tags(self, line: str) -> tuple[int, int, int]: + """Extract DP, TP, EP indices from TRTLLM log line. + + Supports three formats: + - Full: [01/23/2026-08:04:38 DP1 TP2 EP3] + - Simple TP: [01/23/2026-08:04:38 TP0] (defaults DP=0, EP=0) + - Pipeline: [01/23/2026-08:04:38 PP3] (defaults DP=0, EP=0, TP=PP value) + + Args: + line: Log line to parse + + Returns: + (dp, tp, ep) tuple with default values of 0 if not found + """ + # Try full format first: DP0 TP0 EP0 + match = re.search(r"DP(\d+)\s+TP(\d+)\s+EP(\d+)", line) + if match: + return int(match.group(1)), int(match.group(2)), int(match.group(3)) + + # Try simple format: TP0 only (1P4D style) + match = re.search(r"\[\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2} TP(\d+)\]", line) + if match: + return 0, int(match.group(1)), 0 # Default DP=0, EP=0 + + # Try pipeline parallelism format: PP0 + match = re.search(r"\[\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2} PP(\d+)\]", line) + if match: + return 0, int(match.group(1)), 0 # Map PP to TP slot, default DP=0, EP=0 + + # Default: no parallelism tags found + return 0, 0, 0 def parse_logs(self, log_dir: Path) -> list[NodeInfo]: """Parse all TRTLLM node logs in a directory. @@ -233,17 +278,28 @@ def _parse_iteration_logs(self, content: str, worker_type: str) -> list[BatchMet # Pattern to match TRTLLM iteration logs iter_pattern = re.compile( - r"\[(\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2})\].*" r"iter\s*=\s*(\d+).*" r"num_scheduled_requests:\s*(\d+).*" r"states\s*=\s*\{([^}]+)\}" ) for match in iter_pattern.finditer(content): - timestamp = match.group(1) - iteration = int(match.group(2)) - num_scheduled = int(match.group(3)) - states_str = match.group(4) + # Extract timestamp and parallelism from the line + line_start = content.rfind('\n', 0, match.start()) + 1 + line_end = content.find('\n', match.end()) + if line_end == -1: + line_end = len(content) + full_line = content[line_start:line_end] + + timestamp = self._extract_timestamp(full_line) + if not timestamp: + continue + + dp, tp, ep = self._parse_parallelism_tags(full_line) + + iteration = int(match.group(1)) + num_scheduled = int(match.group(2)) + states_str = match.group(3) # Parse states dict ctx_requests = 0 @@ -290,9 +346,9 @@ def _parse_iteration_logs(self, content: str, worker_type: str) -> list[BatchMet batches.append( BatchMetrics( timestamp=timestamp, - dp=0, - tp=0, - ep=0, + dp=dp, + tp=tp, + ep=ep, batch_type=batch_type, running_req=num_scheduled, new_token=ctx_tokens if batch_type == "prefill" else None, @@ -319,24 +375,35 @@ def _parse_memory_info(self, content: str) -> list[MemoryMetrics]: # Pattern to match memory info mem_pattern = re.compile( - r"\[(\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2})\].*" r"Peak memory.*?:\s*([\d.]+)\s*GiB.*?" r"available KV cache memory.*?:\s*([\d.]+)\s*GiB.*?" r"device total memory\s*([\d.]+)\s*GiB" ) for match in mem_pattern.finditer(content): - timestamp = match.group(1) - peak_mem = float(match.group(2)) - avail_kv = float(match.group(3)) - total_mem = float(match.group(4)) + # Extract timestamp and parallelism from the line + line_start = content.rfind('\n', 0, match.start()) + 1 + line_end = content.find('\n', match.end()) + if line_end == -1: + line_end = len(content) + full_line = content[line_start:line_end] + + timestamp = self._extract_timestamp(full_line) + if not timestamp: + timestamp = "" # Some memory lines may not have timestamps + + dp, tp, ep = self._parse_parallelism_tags(full_line) + + peak_mem = float(match.group(1)) + avail_kv = float(match.group(2)) + total_mem = float(match.group(3)) memory_snapshots.append( MemoryMetrics( timestamp=timestamp, - dp=0, - tp=0, - ep=0, + dp=dp, + tp=tp, + ep=ep, metric_type="memory", mem_usage_gb=peak_mem, avail_mem_gb=total_mem - peak_mem, @@ -344,7 +411,7 @@ def _parse_memory_info(self, content: str) -> list[MemoryMetrics]: ) ) - # Also parse KV cache allocation info + # Also parse KV cache allocation info (no timestamp/DP/TP/EP for these) kv_alloc_pattern = re.compile( r"\[MemUsageChange\] Allocated\s*([\d.]+)\s*GiB for max tokens.*?\((\d+)\)" ) diff --git a/tests/fixtures_parsers.py b/tests/fixtures_parsers.py index 666872e1..be780d78 100644 --- a/tests/fixtures_parsers.py +++ b/tests/fixtures_parsers.py @@ -132,32 +132,32 @@ class SampleSGLangLogData: def prefill_log_content() -> str: """Sample prefill worker log.""" return """ -[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m [1msglang[0m Starting SGLang prefill worker -[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m [1msglang[0m server_args=ServerArgs(tp_size=8, dp_size=1, ep_size=1, model_path=/models/qwen3-32b, served_model_name=Qwen3-32B, host=10.0.0.1, port=30000, disaggregation_mode=prefill, context_length=131072, max_running_requests=1024, mem_fraction_static=0.85, kv_cache_dtype=fp8_e5m2) +[2025-12-30 15:52:38 DP0 TP0 EP0] INFO sglang Starting SGLang prefill worker +[2025-12-30 15:52:38 DP0 TP0 EP0] INFO sglang server_args=ServerArgs(tp_size=8, dp_size=1, ep_size=1, model_path=/models/qwen3-32b, served_model_name=Qwen3-32B, host=10.0.0.1, port=30000, disaggregation_mode=prefill, context_length=131072, max_running_requests=1024, mem_fraction_static=0.85, kv_cache_dtype=fp8_e5m2) [CMD] python -m sglang.launch_server --model /models/qwen3-32b --served-model-name Qwen3-32B --tp-size 8 --dp-size 1 --ep-size 1 --host 10.0.0.1 --port 30000 --disaggregation-mode prefill --context-length 131072 --max-running-requests 1024 --mem-fraction-static 0.85 --kv-cache-dtype fp8_e5m2 -[2m2025-12-30T15:52:40.123456Z[0m [32m INFO[0m [1msglang[0m Prefill batch, #new-seq: 8, #new-token: 65536, #cached-token: 0, token usage: 0.78, #running-req: 8, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 6500.5 -[2m2025-12-30T15:52:40.523456Z[0m [32m INFO[0m [1msglang[0m Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.85, #running-req: 13, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5120.0 -[2m2025-12-30T15:52:41.123456Z[0m [32m INFO[0m [1msglang[0m Prefill batch, #new-seq: 10, #new-token: 81920, #cached-token: 16384, token usage: 0.90, #running-req: 23, #queue-req: 2, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 8192.0 -[2m2025-12-30T15:52:42.000000Z[0m [32m INFO[0m [1msglang[0m avail mem=75.11 GB, mem usage=107.07 GB -[2m2025-12-30T15:52:43.000000Z[0m [32m INFO[0m [1msglang[0m KV size: 32.50 GB, #tokens: 1048576 +[2025-12-30 15:52:40 DP0 TP0 EP0] INFO sglang Prefill batch, #new-seq: 8, #new-token: 65536, #cached-token: 0, token usage: 0.78, #running-req: 8, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 6500.5 +[2025-12-30 15:52:40 DP0 TP0 EP0] INFO sglang Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.85, #running-req: 13, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5120.0 +[2025-12-30 15:52:41 DP0 TP0 EP0] INFO sglang Prefill batch, #new-seq: 10, #new-token: 81920, #cached-token: 16384, token usage: 0.90, #running-req: 23, #queue-req: 2, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 8192.0 +[2025-12-30 15:52:42 DP0 TP0 EP0] INFO sglang avail mem=75.11 GB, mem usage=107.07 GB +[2025-12-30 15:52:43 DP0 TP0 EP0] INFO sglang KV size: 32.50 GB, #tokens: 1048576 """ @staticmethod def decode_log_content() -> str: """Sample decode worker log.""" return """ -[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m [1msglang[0m Starting SGLang decode worker -[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m [1msglang[0m server_args=ServerArgs(tp_size=4, dp_size=1, ep_size=1, model_path=/models/qwen3-32b, disaggregation_mode=decode) +[2025-12-30 15:52:38 DP0 TP0 EP0] INFO sglang Starting SGLang decode worker +[2025-12-30 15:52:38 DP0 TP0 EP0] INFO sglang server_args=ServerArgs(tp_size=4, dp_size=1, ep_size=1, model_path=/models/qwen3-32b, disaggregation_mode=decode) [CMD] python -m sglang.launch_server --model /models/qwen3-32b --tp-size 4 --disaggregation-mode decode -[2m2025-12-30T15:52:40.123456Z[0m [32m INFO[0m [1msglang[0m Decode batch, #running-req: 15, #token: 512, token usage: 0.65, pre-allocated usage: 0.10, #prealloc-req: 3, #transfer-req: 0, #queue-req: 0, gen throughput (token/s): 2048.0 -[2m2025-12-30T15:52:40.523456Z[0m [32m INFO[0m [1msglang[0m Decode batch, #running-req: 20, #token: 768, token usage: 0.72, pre-allocated usage: 0.15, #prealloc-req: 5, #transfer-req: 2, #queue-req: 0, gen throughput (token/s): 3072.0 -[2m2025-12-30T15:52:41.123456Z[0m [32m INFO[0m [1msglang[0m Decode batch, #running-req: 18, #token: 640, token usage: 0.70, pre-allocated usage: 0.12, #prealloc-req: 4, #transfer-req: 1, #queue-req: 0, gen throughput (token/s): 2560.0 -[2m2025-12-30T15:52:42.000000Z[0m [32m INFO[0m [1msglang[0m avail mem=85.00 GB, mem usage=97.00 GB -[2m2025-12-30T15:52:43.000000Z[0m [32m INFO[0m [1msglang[0m KV size: 48.00 GB, #tokens: 2097152 +[2025-12-30 15:52:40 DP0 TP0 EP0] INFO sglang Decode batch, #running-req: 15, #token: 512, token usage: 0.65, pre-allocated usage: 0.10, #prealloc-req: 3, #transfer-req: 0, #queue-req: 0, gen throughput (token/s): 2048.0 +[2025-12-30 15:52:40 DP0 TP0 EP0] INFO sglang Decode batch, #running-req: 20, #token: 768, token usage: 0.72, pre-allocated usage: 0.15, #prealloc-req: 5, #transfer-req: 2, #queue-req: 0, gen throughput (token/s): 3072.0 +[2025-12-30 15:52:41 DP0 TP0 EP0] INFO sglang Decode batch, #running-req: 18, #token: 640, token usage: 0.70, pre-allocated usage: 0.12, #prealloc-req: 4, #transfer-req: 1, #queue-req: 0, gen throughput (token/s): 2560.0 +[2025-12-30 15:52:42 DP0 TP0 EP0] INFO sglang avail mem=85.00 GB, mem usage=97.00 GB +[2025-12-30 15:52:43 DP0 TP0 EP0] INFO sglang KV size: 48.00 GB, #tokens: 2097152 """ diff --git a/tests/test_dashboard.py b/tests/test_dashboard.py new file mode 100644 index 00000000..0d2f54a3 --- /dev/null +++ b/tests/test_dashboard.py @@ -0,0 +1,240 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Tests for dashboard components. + +Tests timestamp parsing and other dashboard functionality. +""" + +from datetime import datetime + +import pytest + + +class TestRateMatchTab: + """Tests for rate_match_tab module.""" + + def test_parse_timestamp_yyyy_mm_dd(self): + """Test parsing YYYY-MM-DD HH:MM:SS format.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + ts = "2025-12-30 15:52:38" + dt = _parse_timestamp(ts) + + assert isinstance(dt, datetime) + assert dt.year == 2025 + assert dt.month == 12 + assert dt.day == 30 + assert dt.hour == 15 + assert dt.minute == 52 + assert dt.second == 38 + + def test_parse_timestamp_iso8601_with_microseconds(self): + """Test parsing ISO 8601 format with microseconds and Z.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + ts = "2025-12-30T15:52:38.206058Z" + dt = _parse_timestamp(ts) + + assert isinstance(dt, datetime) + assert dt.year == 2025 + assert dt.month == 12 + assert dt.day == 30 + assert dt.hour == 15 + assert dt.minute == 52 + assert dt.second == 38 + + def test_parse_timestamp_iso8601_without_z(self): + """Test parsing ISO 8601 format without Z.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + ts = "2025-12-30T15:52:38.206058" + dt = _parse_timestamp(ts) + + assert isinstance(dt, datetime) + assert dt.year == 2025 + assert dt.month == 12 + assert dt.day == 30 + + def test_parse_timestamp_iso8601_without_microseconds(self): + """Test parsing ISO 8601 format without microseconds.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + ts = "2025-12-30T15:52:38" + dt = _parse_timestamp(ts) + + assert isinstance(dt, datetime) + assert dt.year == 2025 + assert dt.hour == 15 + + def test_parse_timestamp_trtllm_format(self): + """Test parsing TRTLLM MM/DD/YYYY-HH:MM:SS format.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + ts = "01/23/2026-08:04:38" + dt = _parse_timestamp(ts) + + assert isinstance(dt, datetime) + assert dt.year == 2026 + assert dt.month == 1 + assert dt.day == 23 + assert dt.hour == 8 + assert dt.minute == 4 + assert dt.second == 38 + + def test_parse_timestamp_trtllm_various_dates(self): + """Test parsing various TRTLLM timestamps.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + # End of year + ts1 = "12/31/2025-23:59:59" + dt1 = _parse_timestamp(ts1) + assert dt1.year == 2025 + assert dt1.month == 12 + assert dt1.day == 31 + assert dt1.hour == 23 + + # Start of year + ts2 = "01/01/2026-00:00:00" + dt2 = _parse_timestamp(ts2) + assert dt2.year == 2026 + assert dt2.month == 1 + assert dt2.day == 1 + assert dt2.hour == 0 + + def test_parse_timestamp_invalid(self): + """Test that invalid timestamps raise ValueError.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + with pytest.raises(ValueError): + _parse_timestamp("invalid-timestamp") + + with pytest.raises(ValueError): + _parse_timestamp("not a date at all") + + with pytest.raises(ValueError): + _parse_timestamp("2025-13-40 25:99:99") # Invalid values + + def test_parse_timestamp_format_fallback(self): + """Test that parser tries multiple formats in order.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + # Should parse successfully with any supported format + formats = [ + ("2025-12-30 15:52:38", 2025), # Standard + ("2025-12-30T15:52:38.206058Z", 2025), # ISO 8601 with Z + ("01/23/2026-08:04:38", 2026), # TRTLLM + ] + + for ts, expected_year in formats: + dt = _parse_timestamp(ts) + assert dt.year == expected_year + + def test_parse_timestamp_time_delta(self): + """Test that timestamps can be used for time delta calculations.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + ts1 = "01/23/2026-08:04:38" + ts2 = "01/23/2026-08:04:40" + + dt1 = _parse_timestamp(ts1) + dt2 = _parse_timestamp(ts2) + + delta = dt2 - dt1 + assert delta.total_seconds() == 2.0 + + def test_parse_timestamp_mixed_formats(self): + """Test parsing a sequence of different timestamp formats.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + # Simulate what dashboard might see from different backends + timestamps = [ + "2025-12-30 15:52:38", # Standard (could be from old cache) + "2025-12-30T15:52:39.100000Z", # SGLang + "01/23/2026-08:04:40", # TRTLLM + ] + + dts = [_parse_timestamp(ts) for ts in timestamps] + + # All should parse successfully + assert len(dts) == 3 + assert all(isinstance(dt, datetime) for dt in dts) + + # Should be able to compute deltas (even if not chronological) + delta = dts[1] - dts[0] + assert delta.total_seconds() == 1.1 + + +class TestTimestampIntegration: + """Integration tests for timestamp handling across parsers and dashboard.""" + + def test_sglang_to_dashboard_pipeline(self): + """Test that SGLang timestamps work through the entire pipeline.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + from analysis.srtlog.parsers import get_node_parser + + parser = get_node_parser("sglang") + + # SGLang format timestamp + sglang_ts = "2025-12-30T15:52:38.206058Z" + + # Parser should be able to parse it + dt_parser = parser.parse_timestamp(sglang_ts) + + # Dashboard should be able to parse it + dt_dashboard = _parse_timestamp(sglang_ts) + + # Both should produce same datetime + assert dt_parser.year == dt_dashboard.year + assert dt_parser.month == dt_dashboard.month + assert dt_parser.day == dt_dashboard.day + assert dt_parser.hour == dt_dashboard.hour + assert dt_parser.minute == dt_dashboard.minute + assert dt_parser.second == dt_dashboard.second + + def test_trtllm_to_dashboard_pipeline(self): + """Test that TRTLLM timestamps work through the entire pipeline.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + from analysis.srtlog.parsers import get_node_parser + + parser = get_node_parser("trtllm") + + # TRTLLM format timestamp + trtllm_ts = "01/23/2026-08:04:38" + + # Parser should be able to parse it + dt_parser = parser.parse_timestamp(trtllm_ts) + + # Dashboard should be able to parse it + dt_dashboard = _parse_timestamp(trtllm_ts) + + # Both should produce same datetime + assert dt_parser.year == dt_dashboard.year + assert dt_parser.month == dt_dashboard.month + assert dt_parser.day == dt_dashboard.day + assert dt_parser.hour == dt_dashboard.hour + assert dt_parser.minute == dt_dashboard.minute + assert dt_parser.second == dt_dashboard.second + + def test_mixed_backend_timestamps_in_dashboard(self): + """Test that dashboard can handle timestamps from mixed backends.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + # Simulate dashboard receiving timestamps from different backends + mixed_timestamps = [ + "2025-12-30T15:52:38.206058Z", # SGLang + "01/23/2026-08:04:38", # TRTLLM + "2025-12-30 15:52:38", # Standard format + ] + + # All should parse without error + parsed = [] + for ts in mixed_timestamps: + dt = _parse_timestamp(ts) + parsed.append(dt) + assert isinstance(dt, datetime) + + # Should be able to compute time deltas + assert len(parsed) == 3 + diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 0046583d..bf4e123a 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -302,7 +302,7 @@ def test_parser_type(self, parser): def test_parse_prefill_batch_line(self, parser): """Test parsing prefill batch log line.""" - line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.78, #running-req: 5, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5000.5" + line = "[2025-12-30 15:52:38 DP0 TP0 EP0] Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.78, #running-req: 5, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5000.5" metrics = parser._parse_prefill_batch_line(line) @@ -317,7 +317,7 @@ def test_parse_prefill_batch_line(self, parser): def test_parse_decode_batch_line(self, parser): """Test parsing decode batch log line.""" - line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m Decode batch, #running-req: 10, #token: 512, token usage: 0.85, pre-allocated usage: 0.10, #prealloc-req: 2, #transfer-req: 0, #queue-req: 0, gen throughput (token/s): 1500.5" + line = "[2025-12-30 15:52:38 DP0 TP0 EP0] Decode batch, #running-req: 10, #token: 512, token usage: 0.85, pre-allocated usage: 0.10, #prealloc-req: 2, #transfer-req: 0, #queue-req: 0, gen throughput (token/s): 1500.5" metrics = parser._parse_decode_batch_line(line) @@ -357,10 +357,10 @@ def test_parse_memory_line_without_kv(self, parser): def test_parse_single_log(self, parser, temp_dir): """Test parsing a complete SGLang log file.""" log_content = """ -[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m Starting SGLang server with server_args=ServerArgs(tp_size=8, dp_size=1, ep_size=1, model_path=/models/qwen3-32b) -[2m2025-12-30T15:52:40.206058Z[0m [32m INFO[0m Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.78, #running-req: 5, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5000.5 -[2m2025-12-30T15:52:41.206058Z[0m [32m INFO[0m Decode batch, #running-req: 5, #token: 512, token usage: 0.85, gen throughput (token/s): 1500.5 -[2m2025-12-30T15:52:42.206058Z[0m [32m INFO[0m avail mem=75.11 GB, mem usage=107.07 GB +[2025-12-30 15:52:38 DP0 TP0 EP0] Starting SGLang server with server_args=ServerArgs(tp_size=8, dp_size=1, ep_size=1, model_path=/models/qwen3-32b) +[2025-12-30 15:52:40 DP0 TP0 EP0] Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.78, #running-req: 5, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5000.5 +[2025-12-30 15:52:41 DP0 TP0 EP0] Decode batch, #running-req: 5, #token: 512, token usage: 0.85, gen throughput (token/s): 1500.5 +[2025-12-30 15:52:42 DP0 TP0 EP0] avail mem=75.11 GB, mem usage=107.07 GB """ log_path = temp_dir / "eos0219_prefill_w0.out" @@ -397,6 +397,226 @@ def test_parse_launch_command(self, parser): assert cmd.extra_args["max_num_seqs"] == 1024 assert cmd.extra_args["disaggregation_mode"] == "prefill" + def test_parse_timestamp(self, parser): + """Test parsing SGLang timestamp format.""" + from datetime import datetime + + # Test ISO 8601 format with microseconds and Z + ts1 = "2025-12-30T15:52:38.206058Z" + dt1 = parser.parse_timestamp(ts1) + assert isinstance(dt1, datetime) + assert dt1.year == 2025 + assert dt1.month == 12 + assert dt1.day == 30 + assert dt1.hour == 15 + assert dt1.minute == 52 + assert dt1.second == 38 + + # Test ISO 8601 format without Z + ts2 = "2025-12-30T15:52:38.206058" + dt2 = parser.parse_timestamp(ts2) + assert isinstance(dt2, datetime) + assert dt2.year == 2025 + + # Test ISO 8601 format without microseconds + ts3 = "2025-12-30T15:52:38" + dt3 = parser.parse_timestamp(ts3) + assert isinstance(dt3, datetime) + assert dt3.year == 2025 + assert dt3.hour == 15 + + def test_parse_timestamp_invalid(self, parser): + """Test parsing invalid timestamp raises ValueError.""" + import pytest + + with pytest.raises(ValueError): + parser.parse_timestamp("invalid-timestamp") + + with pytest.raises(ValueError): + parser.parse_timestamp("2025-13-40T25:99:99") # Invalid date/time + + def test_parse_dp_tp_ep_tag_full_format(self, parser): + """Test parsing full DP/TP/EP tag format.""" + line = "[2025-11-04 05:31:43 DP0 TP2 EP1] Prefill batch, #new-seq: 5" + + timestamp = parser._parse_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "2025-11-04 05:31:43" + assert dp == 0 + assert tp == 2 + assert ep == 1 + + def test_parse_dp_tp_ep_tag_simple_tp(self, parser): + """Test parsing simple TP-only format (1P4D style).""" + line = "[2025-11-04 07:05:55 TP0] Decode batch, #running-req: 10" + + timestamp = parser._parse_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "2025-11-04 07:05:55" + assert dp == 0 # Default + assert tp == 0 + assert ep == 0 # Default + + def test_parse_dp_tp_ep_tag_pipeline(self, parser): + """Test parsing pipeline parallelism format.""" + line = "[2025-12-08 14:34:44 PP3] Prefill batch, #new-seq: 8" + + timestamp = parser._parse_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "2025-12-08 14:34:44" + assert dp == 0 # Default + assert tp == 3 # PP mapped to TP + assert ep == 0 # Default + + def test_parse_dp_tp_ep_tag_no_tags(self, parser): + """Test parsing line without parallelism tags.""" + line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m Prefill batch" + + timestamp = parser._parse_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "2025-12-30T15:52:38.206058Z" # ISO format fallback + assert dp == 0 + assert tp == 0 + assert ep == 0 + + def test_parse_parallelism_wrapper(self, parser): + """Test _parse_parallelism_tags method.""" + # With full tags + line_with_tags = "[2025-11-04 05:31:43 DP1 TP2 EP3] Prefill batch" + dp, tp, ep = parser._parse_parallelism_tags(line_with_tags) + assert dp == 1 + assert tp == 2 + assert ep == 3 + + # Without tags - should default to 0 + line_without_tags = "Some log line without tags" + dp, tp, ep = parser._parse_parallelism_tags(line_without_tags) + assert dp == 0 + assert tp == 0 + assert ep == 0 + + def test_parse_prefill_batch_with_dp_tp_ep(self, parser): + """Test that prefill batch parsing extracts DP/TP/EP values.""" + line = "[2025-11-04 05:31:43 DP1 TP2 EP3] Prefill batch, #new-seq: 5, #new-token: 40960, #running-req: 5, input throughput (token/s): 5000.5" + + metrics = parser._parse_prefill_batch_line(line) + + assert metrics is not None + assert metrics["dp"] == 1 + assert metrics["tp"] == 2 + assert metrics["ep"] == 3 + assert metrics["timestamp"] == "2025-11-04 05:31:43" + assert metrics["type"] == "prefill" + assert metrics["new_seq"] == 5 + assert metrics["new_token"] == 40960 + + def test_parse_decode_batch_with_dp_tp_ep(self, parser): + """Test that decode batch parsing extracts DP/TP/EP values.""" + line = "[2025-11-04 05:31:45 DP0 TP1 EP0] Decode batch, #running-req: 10, #token: 512, gen throughput (token/s): 1500.5" + + metrics = parser._parse_decode_batch_line(line) + + assert metrics is not None + assert metrics["dp"] == 0 + assert metrics["tp"] == 1 + assert metrics["ep"] == 0 + assert metrics["timestamp"] == "2025-11-04 05:31:45" + assert metrics["type"] == "decode" + assert metrics["running_req"] == 10 + + def test_parse_memory_with_dp_tp_ep(self, parser): + """Test that memory line parsing extracts DP/TP/EP values.""" + line = "[2025-11-04 05:31:50 DP0 TP2 EP1] avail mem=75.11 GB, mem usage=107.07 GB, KV size: 17.16 GB" + + metrics = parser._parse_memory_line(line) + + assert metrics is not None + assert metrics["dp"] == 0 + assert metrics["tp"] == 2 + assert metrics["ep"] == 1 + assert metrics["timestamp"] == "2025-11-04 05:31:50" + assert metrics["type"] == "kv_cache" + assert metrics["kv_cache_gb"] == 17.16 + + def test_parse_batch_fallback_to_iso_timestamp(self, parser): + """Test that parser supports ISO timestamp fallback.""" + # Prefill batch with ISO timestamp (old format) - should parse with default parallelism + line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m Prefill batch, #new-seq: 5, #new-token: 40960" + + metrics = parser._parse_prefill_batch_line(line) + + # Should parse successfully with ISO timestamp and default parallelism tags + assert metrics is not None + assert metrics["timestamp"] == "2025-12-30T15:52:38.206058Z" + assert metrics["dp"] == 0 + assert metrics["tp"] == 0 + assert metrics["ep"] == 0 + + def test_parse_batch_with_simple_tp_format(self, parser): + """Test parsing batch with simple TP format (1P4D disaggregated style).""" + line = "[2025-11-04 07:05:55 TP0] Prefill batch, #new-seq: 3, #new-token: 24576, input throughput (token/s): 3000.0" + + metrics = parser._parse_prefill_batch_line(line) + + assert metrics is not None + assert metrics["dp"] == 0 + assert metrics["tp"] == 0 + assert metrics["ep"] == 0 + assert metrics["new_token"] == 24576 + + def test_parse_batch_with_pipeline_format(self, parser): + """Test parsing batch with pipeline parallelism format.""" + line = "[2025-12-08 14:34:44 PP2] Prefill batch, #new-seq: 4, #new-token: 32768" + + metrics = parser._parse_prefill_batch_line(line) + + assert metrics is not None + assert metrics["dp"] == 0 + assert metrics["tp"] == 2 # PP mapped to TP + assert metrics["ep"] == 0 + assert metrics["new_token"] == 32768 + + def test_parse_single_log_with_parallelism_tags(self, parser, temp_dir): + """Test parsing complete log file with parallelism tags.""" + log_content = """ +[2025-11-04 05:31:43 DP0 TP0 EP0] Starting SGLang server with server_args=ServerArgs(tp_size=8, dp_size=1, ep_size=1, model_path=/models/qwen3-32b) +[2025-11-04 05:31:45 DP0 TP0 EP0] Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.78, #running-req: 5, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5000.5 +[2025-11-04 05:31:46 DP0 TP0 EP0] Decode batch, #running-req: 5, #token: 512, token usage: 0.85, gen throughput (token/s): 1500.5 +[2025-11-04 05:31:47 DP0 TP0 EP0] avail mem=75.11 GB, mem usage=107.07 GB + """ + + log_path = temp_dir / "test_node_prefill_w0.out" + log_path.write_text(log_content) + + node = parser.parse_single_log(log_path) + + assert node is not None + assert node.node_name == "test_node" + assert node.worker_type == "prefill" + assert node.worker_id == "w0" + + # Check that batches have correct DP/TP/EP values + assert len(node.batches) == 2 + for batch in node.batches: + assert batch.dp == 0 + assert batch.tp == 0 + assert batch.ep == 0 + + # Check memory snapshots have correct DP/TP/EP values + assert len(node.memory_snapshots) == 1 + assert node.memory_snapshots[0].dp == 0 + assert node.memory_snapshots[0].tp == 0 + assert node.memory_snapshots[0].ep == 0 + + # Verify config extraction still works + assert node.config["tp_size"] == 8 + assert node.config["dp_size"] == 1 + assert node.config["ep_size"] == 1 + def test_extract_node_info_from_filename(self, parser): """Test extracting node info from filename.""" result = parser._extract_node_info_from_filename("eos0219_prefill_w0.out") @@ -528,6 +748,156 @@ def test_extract_node_info_from_filename(self, parser): assert result["worker_type"] == "decode" assert result["worker_id"] == "w1" + def test_parse_timestamp(self, parser): + """Test parsing TRTLLM timestamp format.""" + from datetime import datetime + + # Test MM/DD/YYYY-HH:MM:SS format + ts1 = "01/23/2026-08:04:38" + dt1 = parser.parse_timestamp(ts1) + assert isinstance(dt1, datetime) + assert dt1.year == 2026 + assert dt1.month == 1 + assert dt1.day == 23 + assert dt1.hour == 8 + assert dt1.minute == 4 + assert dt1.second == 38 + + # Test another timestamp + ts2 = "12/31/2025-23:59:59" + dt2 = parser.parse_timestamp(ts2) + assert isinstance(dt2, datetime) + assert dt2.year == 2025 + assert dt2.month == 12 + assert dt2.day == 31 + assert dt2.hour == 23 + assert dt2.minute == 59 + assert dt2.second == 59 + + # Test with leading zeros + ts3 = "01/01/2026-00:00:00" + dt3 = parser.parse_timestamp(ts3) + assert isinstance(dt3, datetime) + assert dt3.year == 2026 + assert dt3.month == 1 + assert dt3.day == 1 + assert dt3.hour == 0 + assert dt3.minute == 0 + assert dt3.second == 0 + + def test_parse_timestamp_invalid(self, parser): + """Test parsing invalid timestamp raises ValueError.""" + import pytest + + with pytest.raises(ValueError): + parser.parse_timestamp("invalid-timestamp") + + with pytest.raises(ValueError): + parser.parse_timestamp("13/40/2026-25:99:99") # Invalid date/time + + with pytest.raises(ValueError): + parser.parse_timestamp("2025-12-30 15:52:38") # Wrong format + + def test_timestamp_preserved_in_batches(self, parser): + """Test that timestamps are preserved in their original format.""" + log_content = """ +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5559, host_step_time = 50.5ms, num_scheduled_requests: 3, states = {'num_ctx_requests': 2, 'num_ctx_tokens': 16384, 'num_generation_tokens': 0} + """ + + batches = parser._parse_iteration_logs(log_content, "prefill") + + assert len(batches) == 1 + # Timestamp should be preserved in original format + assert batches[0].timestamp == "01/16/2026-06:20:17" + + def test_timestamp_preserved_in_memory(self, parser): + """Test that timestamps are preserved in memory snapshots.""" + log_content = """ +[01/16/2026-06:20:17] [TRT-LLM] [I] Peak memory during memory usage profiling (torch + non-torch): 91.46 GiB, available KV cache memory when calculating max tokens: 41.11 GiB, fraction is set 0.85, kv size is 35136. device total memory 139.81 GiB + """ + + memory_snapshots = parser._parse_memory_info(log_content) + + assert len(memory_snapshots) == 1 + # Timestamp should be preserved in original format + assert memory_snapshots[0].timestamp == "01/16/2026-06:20:17" + + def test_parse_dp_tp_ep_tag_full_format(self, parser): + """Test parsing full DP/TP/EP tag format in TRTLLM logs.""" + line = "[01/23/2026-08:04:38 DP1 TP2 EP3] [TRT-LLM] iter = 100, num_scheduled_requests: 5, states = {'num_ctx_requests': 2, 'num_ctx_tokens': 1024, 'num_generation_tokens': 0}" + + timestamp = parser._extract_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "01/23/2026-08:04:38" + assert dp == 1 + assert tp == 2 + assert ep == 3 + + def test_parse_dp_tp_ep_tag_simple_tp(self, parser): + """Test parsing simple TP-only format (1P4D style) in TRTLLM logs.""" + line = "[01/23/2026-08:04:38 TP0] [TRT-LLM] iter = 100, num_scheduled_requests: 10, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 512}" + + timestamp = parser._extract_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "01/23/2026-08:04:38" + assert dp == 0 # Default + assert tp == 0 + assert ep == 0 # Default + + def test_parse_dp_tp_ep_tag_pipeline(self, parser): + """Test parsing pipeline parallelism format in TRTLLM logs.""" + line = "[01/23/2026-08:04:38 PP3] [TRT-LLM] iter = 100, num_scheduled_requests: 8, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 256}" + + timestamp = parser._extract_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "01/23/2026-08:04:38" + assert dp == 0 # Default + assert tp == 3 # PP mapped to TP + assert ep == 0 # Default + + def test_parse_dp_tp_ep_tag_no_tags(self, parser): + """Test parsing line without parallelism tags in TRTLLM logs.""" + line = "[01/23/2026-08:04:38] [TRT-LLM] iter = 100, num_scheduled_requests: 5" + + timestamp = parser._extract_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "01/23/2026-08:04:38" + assert dp == 0 + assert tp == 0 + assert ep == 0 + + def test_parse_iteration_with_dp_tp_ep(self, parser): + """Test that iteration parsing extracts DP/TP/EP values.""" + log_content = """ +[01/23/2026-08:04:38 DP0 TP1 EP0] [TRT-LLM] [RANK 0] [I] iter = 100, num_scheduled_requests: 5, states = {'num_ctx_requests': 2, 'num_ctx_tokens': 1024, 'num_generation_tokens': 0}, host_step_time = 50.0ms + """ + + batches = parser._parse_iteration_logs(log_content, "prefill") + + assert len(batches) == 1 + assert batches[0].dp == 0 + assert batches[0].tp == 1 + assert batches[0].ep == 0 + assert batches[0].timestamp == "01/23/2026-08:04:38" + + def test_parse_memory_with_dp_tp_ep(self, parser): + """Test that memory parsing extracts DP/TP/EP values.""" + log_content = """ +[01/23/2026-08:04:38 DP0 TP2 EP1] [TRT-LLM] [RANK 0] [I] Peak memory during memory usage profiling (torch + non-torch): 91.46 GiB, available KV cache memory when calculating max tokens: 41.11 GiB, fraction is set 0.85, kv size is 35136. device total memory 139.81 GiB + """ + + memory_snapshots = parser._parse_memory_info(log_content) + + assert len(memory_snapshots) >= 1 + assert memory_snapshots[0].dp == 0 + assert memory_snapshots[0].tp == 2 + assert memory_snapshots[0].ep == 1 + assert memory_snapshots[0].timestamp == "01/23/2026-08:04:38" + class TestBenchmarkLaunchCommand: """Test BenchmarkLaunchCommand dataclass.""" From 689dcd7114031b232521eeee4c4299d0ef1995e4 Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 20:29:17 -0800 Subject: [PATCH 10/15] precoommiit --- analysis/dashboard/app.py | 4 +- analysis/dashboard/node_metrics_tab.py | 16 ++-- analysis/dashboard/rate_match_tab.py | 18 ++--- analysis/srtlog/cluster_config.py | 3 +- analysis/srtlog/log_parser.py | 27 +++---- analysis/srtlog/models.py | 19 +++-- analysis/srtlog/parsers/__init__.py | 26 +++---- analysis/srtlog/parsers/benchmark/__init__.py | 3 +- .../parsers/benchmark/mooncake_router.py | 40 +++++----- analysis/srtlog/parsers/benchmark/sa_bench.py | 47 ++++++------ analysis/srtlog/parsers/nodes/sglang.py | 43 ++++++----- analysis/srtlog/parsers/nodes/trtllm.py | 62 ++++++++-------- analysis/srtlog/run_loader.py | 23 +++--- pyproject.toml | 1 + tests/fixtures_parsers.py | 6 +- tests/test_benchmarks.py | 5 +- tests/test_dashboard.py | 35 +++++---- tests/test_e2e.py | 52 ++++++------- tests/test_frontend_topology.py | 2 - tests/test_frontends.py | 36 +++++---- tests/test_health.py | 10 +-- tests/test_parsers.py | 73 +++++++++---------- tests/test_process_registry.py | 2 - tests/test_profiling.py | 1 - tests/test_runloader_parsers.py | 2 - tests/test_sweep.py | 1 - 26 files changed, 275 insertions(+), 282 deletions(-) diff --git a/analysis/dashboard/app.py b/analysis/dashboard/app.py index 35e0722e..83bbf819 100644 --- a/analysis/dashboard/app.py +++ b/analysis/dashboard/app.py @@ -115,7 +115,9 @@ def render_sidebar(logs_dir, runs): ) if selected_pairs: - sorted_runs = [r for r in sorted_runs if f"{r.profiler_metadata.isl}/{r.profiler_metadata.osl}" in selected_pairs] + sorted_runs = [ + r for r in sorted_runs if f"{r.profiler_metadata.isl}/{r.profiler_metadata.osl}" in selected_pairs + ] else: st.caption("No ISL/OSL information available") diff --git a/analysis/dashboard/node_metrics_tab.py b/analysis/dashboard/node_metrics_tab.py index 37d250be..24ab0665 100644 --- a/analysis/dashboard/node_metrics_tab.py +++ b/analysis/dashboard/node_metrics_tab.py @@ -7,16 +7,16 @@ import streamlit as st from analysis.dashboard.components import ( - load_node_metrics, - create_node_throughput_graph, - create_kv_cache_utilization_graph, - create_queue_depth_graph, - create_node_inflight_requests_graph, - create_decode_running_requests_graph, + create_decode_disagg_stacked_graph, create_decode_gen_throughput_graph, - create_decode_transfer_req_graph, create_decode_prealloc_req_graph, - create_decode_disagg_stacked_graph, + create_decode_running_requests_graph, + create_decode_transfer_req_graph, + create_kv_cache_utilization_graph, + create_node_inflight_requests_graph, + create_node_throughput_graph, + create_queue_depth_graph, + load_node_metrics, ) from analysis.srtlog.visualizations import aggregate_all_nodes, group_nodes_by_dp diff --git a/analysis/dashboard/rate_match_tab.py b/analysis/dashboard/rate_match_tab.py index a8cbb3a7..5826faf2 100644 --- a/analysis/dashboard/rate_match_tab.py +++ b/analysis/dashboard/rate_match_tab.py @@ -12,18 +12,18 @@ def _parse_timestamp(timestamp: str) -> datetime: """Parse timestamp from multiple possible formats. - + Supports: - ISO 8601: 2025-12-30T15:52:38.206058Z - YYYY-MM-DD HH:MM:SS - MM/DD/YYYY-HH:MM:SS (TRTLLM format) - + Args: timestamp: Timestamp string in one of the supported formats - + Returns: datetime object - + Raises: ValueError: If timestamp format is not recognized """ @@ -32,23 +32,23 @@ def _parse_timestamp(timestamp: str) -> datetime: return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S") except ValueError: pass - + # Try ISO 8601 format (SGLang) try: - ts = timestamp.rstrip('Z') - if '.' in ts: + ts = timestamp.rstrip("Z") + if "." in ts: return datetime.fromisoformat(ts) else: return datetime.strptime(ts, "%Y-%m-%dT%H:%M:%S") except ValueError: pass - + # Try MM/DD/YYYY-HH:MM:SS format (TRTLLM) try: return datetime.strptime(timestamp, "%m/%d/%Y-%H:%M:%S") except ValueError: pass - + raise ValueError(f"Unable to parse timestamp: {timestamp}") diff --git a/analysis/srtlog/cluster_config.py b/analysis/srtlog/cluster_config.py index 383f4cad..2a8621c9 100644 --- a/analysis/srtlog/cluster_config.py +++ b/analysis/srtlog/cluster_config.py @@ -3,9 +3,10 @@ """ import logging -import tomllib from pathlib import Path +import tomllib + logger = logging.getLogger(__name__) diff --git a/analysis/srtlog/log_parser.py b/analysis/srtlog/log_parser.py index cf622278..71adf4f5 100644 --- a/analysis/srtlog/log_parser.py +++ b/analysis/srtlog/log_parser.py @@ -111,12 +111,12 @@ def _detect_backend_type(self, run_path: str) -> str | None: Backend type string (e.g., 'sglang', 'trtllm') or None """ run_path = Path(run_path) - + # Try current directory and parent directory search_dirs = [run_path] if run_path.name == "logs" and run_path.parent.exists(): search_dirs.insert(0, run_path.parent) # Check parent first - + # Try JSON files first for search_dir in search_dirs: json_files = list(search_dir.glob("*.json")) @@ -128,7 +128,7 @@ def _detect_backend_type(self, run_path: str) -> str | None: container = metadata.get("container", "") if not container: container = metadata.get("model", {}).get("container", "") - + container_lower = container.lower() if "sglang" in container_lower: logger.debug(f"Detected sglang from {json_file}") @@ -189,7 +189,7 @@ def _populate_config_from_files(self, run_path: str, node_infos: list) -> None: import os run_path = Path(run_path) - + # If run_path is the logs directory, look in parent for config files if run_path.name == "logs" and run_path.parent.exists(): config_dir = run_path.parent @@ -242,7 +242,9 @@ def _populate_config_from_files(self, run_path: str, node_infos: list) -> None: node_info.node_config["launch_command"] = launch_cmd else: node_info.node_config = file_config - logger.debug(f"Loaded config for {node_name} with {len(file_config.get('environment', {}))} env vars") + logger.debug( + f"Loaded config for {node_name} with {len(file_config.get('environment', {}))} env vars" + ) except Exception as e: logger.warning(f"Could not load config from {config_path}: {e}") # Keep existing minimal config with launch_command @@ -260,7 +262,7 @@ def _populate_config_from_files(self, run_path: str, node_infos: list) -> None: node_info.node_config = {} if "environment" not in node_info.node_config: node_info.node_config["environment"] = {} - + # Merge YAML env vars (they take precedence over JSON) yaml_worker_env = yaml_env[worker_type] node_info.node_config["environment"].update(yaml_worker_env) @@ -284,7 +286,7 @@ def _parse_yaml_environment(self, run_path: Path) -> dict[str, dict[str, str]]: try: with open(yaml_path) as f: config = yaml.safe_load(f) - + if not config or "backend" not in config: logger.debug("config.yaml has no backend section") return {} @@ -337,7 +339,7 @@ def _node_info_to_dict(self, node_info: "NodeInfo") -> dict: "environment": node_info.environment, # Property accessor for backward compatibility "run_id": metrics.run_id, } - + def get_prefill_nodes(self, nodes: list): """Filter for prefill nodes only. @@ -476,7 +478,6 @@ def _deserialize_node_metrics(self, df: pd.DataFrame, run_path: str = None) -> l Returns: List of NodeInfo objects """ - import time from .models import BatchMetrics, MemoryMetrics, NodeInfo, NodeMetadata, NodeMetrics start_time = time.time() @@ -558,7 +559,7 @@ def _deserialize_node_metrics(self, df: pd.DataFrame, run_path: str = None) -> l worker_type=worker_type, worker_id=worker_id, ) - + # Create NodeMetrics (NEW structure) metrics = NodeMetrics( metadata=node_metadata, @@ -566,18 +567,18 @@ def _deserialize_node_metrics(self, df: pd.DataFrame, run_path: str = None) -> l memory_snapshots=memory_snapshots, config=config, ) - + # Create NodeInfo with empty config (will be populated below) node_info = NodeInfo(metrics=metrics, node_config={}) nodes.append(node_info) elapsed = time.time() - start_time logger.info(f"Deserialized {len(nodes)} nodes in {elapsed:.2f}s") - + # Populate config from files (environment, launch_command) if run_path and nodes: self._populate_config_from_files(run_path, nodes) - + return nodes diff --git a/analysis/srtlog/models.py b/analysis/srtlog/models.py index ad7852d7..e5bf4662 100644 --- a/analysis/srtlog/models.py +++ b/analysis/srtlog/models.py @@ -145,7 +145,7 @@ def formatted_date(self) -> str: @dataclass class ProfilerMetadata: """Metadata about the benchmark/profiler configuration. - + This describes what the benchmark was configured to do, not the actual results. """ @@ -288,7 +288,7 @@ def add_benchmark_results(self, results: dict) -> None: @dataclass class BenchmarkLaunchCommand: """Parsed benchmark launch command information. - + Source: logs/benchmark.out Only contains essential fields. All parsed arguments go into extra_args. """ @@ -299,6 +299,7 @@ class BenchmarkLaunchCommand: # All parsed arguments as dict extra_args: dict[str, Any] = field(default_factory=dict) + @dataclass class BenchmarkRun: """Complete benchmark run with metadata and profiler results.""" @@ -440,7 +441,7 @@ class MemoryMetrics: @dataclass class NodeMetadata: """Node identification and worker information. - + This is the equivalent of RunMetadata but for individual worker nodes. """ @@ -452,7 +453,7 @@ class NodeMetadata: @dataclass class NodeMetrics: """Metrics from a single node (prefill or decode worker), parsed from log files. - + This class contains ONLY metrics data. Configuration is in NodeConfig. """ @@ -492,7 +493,7 @@ def is_decode(self) -> bool: @dataclass class NodeLaunchCommand: """Parsed node worker launch command information. - + Source: logs/{node}_{worker_type}_{worker_id}.out or .err Only contains essential fields. All parsed arguments go into extra_args. """ @@ -515,6 +516,7 @@ class GPUInfo(TypedDict, total=False): memory_total: str driver_version: str + class NodeConfig(TypedDict, total=False): """Expected structure of a node config JSON file (*_config.json).""" @@ -528,7 +530,7 @@ class NodeConfig(TypedDict, total=False): @dataclass class NodeInfo: """Complete information about a node, combining metrics and configuration. - + This is the top-level container for all node data. """ @@ -601,6 +603,7 @@ def launch_command(self) -> NodeLaunchCommand | None: return self.node_config.get("launch_command") return None + class ServerArgs(TypedDict, total=False): """Expected structure of server_args in node config. @@ -620,9 +623,10 @@ class ServerArgs(TypedDict, total=False): disaggregation_mode: str context_length: int + class TopologyInfo(TypedDict): """Service topology and configuration information from log files. - + Returned by parse_command_line_from_err() which analyzes log files to discover: - Which flags were explicitly set in launch commands - Physical node to service type mapping @@ -630,4 +634,3 @@ class TopologyInfo(TypedDict): explicit_flags: set services: dict[str, list[str]] # {node_name: [service_types]} - diff --git a/analysis/srtlog/parsers/__init__.py b/analysis/srtlog/parsers/__init__.py index e7f13be2..ec8f0243 100644 --- a/analysis/srtlog/parsers/__init__.py +++ b/analysis/srtlog/parsers/__init__.py @@ -25,7 +25,7 @@ class BenchmarkParserProtocol(Protocol): """Protocol for benchmark output parsers. Each benchmark type (sa-bench, mooncake-router, etc.) should have a parser that implements this protocol. - + Design principle: JSON files are the primary source of truth. The parse() method is a fallback for when JSON files are unavailable. """ @@ -37,11 +37,11 @@ def benchmark_type(self) -> str: def parse(self, benchmark_out_path: Path) -> dict[str, Any]: """Parse benchmark.out file and return results (FALLBACK method). - + This is a fallback method used when JSON result files are not available. Prefer using parse_result_directory() which prioritizes JSON files as the source of truth. - + Args: benchmark_out_path: Path to the benchmark.out file Returns: @@ -64,10 +64,10 @@ def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | Non def parse_result_json(self, json_path: Path) -> dict[str, Any]: """Parse a benchmark result JSON file (PRIMARY source of truth). - + JSON files contain the complete, accurate benchmark results and should be used as the primary data source whenever available. - + Args: json_path: Path to a result JSON file Returns: @@ -77,34 +77,34 @@ def parse_result_json(self, json_path: Path) -> dict[str, Any]: def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int | None = None) -> Path | None: """Find the directory containing benchmark results within a run directory. - + This method encapsulates the logic for locating result files, which varies by benchmark type. For example: - sa-bench: looks for directories like "sa-bench_isl_8192_osl_1024" - mooncake-router: looks in "logs/artifacts/" subdirectory - + Args: run_path: Path to the run directory (contains logs/, metadata, etc.) isl: Input sequence length (optional, used for pattern matching) osl: Output sequence length (optional, used for pattern matching) - + Returns: Path to directory containing result files, or None if not found """ ... - + def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: """Parse all result files in a directory. - + This is the primary entry point for parsing benchmark results. Implementation should: 1. First attempt to parse JSON result files (primary source of truth) 2. Fall back to parsing benchmark.out if no JSON files found 3. Return list of results (one per concurrency level or benchmark run) - + Args: result_dir: Directory containing benchmark result files - + Returns: List of result dicts (one per concurrency level or benchmark run) """ @@ -245,4 +245,4 @@ def list_node_parsers() -> list[str]: "list_node_parsers", "register_benchmark_parser", "register_node_parser", -] \ No newline at end of file +] diff --git a/analysis/srtlog/parsers/benchmark/__init__.py b/analysis/srtlog/parsers/benchmark/__init__.py index 29010ae2..51efb4a6 100644 --- a/analysis/srtlog/parsers/benchmark/__init__.py +++ b/analysis/srtlog/parsers/benchmark/__init__.py @@ -1,7 +1,6 @@ - """Benchmark output parsers.""" from analysis.srtlog.parsers.benchmark.mooncake_router import MooncakeRouterParser from analysis.srtlog.parsers.benchmark.sa_bench import SABenchParser -__all__ = ["SABenchParser", "MooncakeRouterParser"] \ No newline at end of file +__all__ = ["SABenchParser", "MooncakeRouterParser"] diff --git a/analysis/srtlog/parsers/benchmark/mooncake_router.py b/analysis/srtlog/parsers/benchmark/mooncake_router.py index 02bd59c3..1d2fbdb6 100644 --- a/analysis/srtlog/parsers/benchmark/mooncake_router.py +++ b/analysis/srtlog/parsers/benchmark/mooncake_router.py @@ -31,10 +31,10 @@ def benchmark_type(self) -> str: def parse(self, benchmark_out_path: Path) -> dict[str, Any]: """Parse benchmark.out file for mooncake-router results (FALLBACK method). - + This is a fallback method used when JSON result files are not available. Prefer using parse_result_directory() which prioritizes JSON files. - + Args: benchmark_out_path: Path to benchmark.out file Returns: @@ -152,10 +152,10 @@ def _get_metric(self, data: dict, metric_name: str, stat: str) -> float | None: def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: """Parse AIPerf result files in a directory. - + Uses JSON files (profile_export_aiperf.json) as the primary source of truth. Falls back to parsing benchmark.out only if no JSON results are found. - + Args: result_dir: Directory containing profile_export_aiperf.json Returns: @@ -178,14 +178,16 @@ def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: fallback_result = self.parse(benchmark_out) if fallback_result.get("output_tps"): # Convert to format expected by caller - results.append({ - "concurrency": 0, # Mooncake doesn't track concurrency - "output_tps": fallback_result.get("output_tps"), - "request_throughput": fallback_result.get("request_throughput"), - "mean_ttft_ms": fallback_result.get("mean_ttft_ms"), - "mean_itl_ms": fallback_result.get("mean_itl_ms"), - "total_requests": fallback_result.get("total_requests"), - }) + results.append( + { + "concurrency": 0, # Mooncake doesn't track concurrency + "output_tps": fallback_result.get("output_tps"), + "request_throughput": fallback_result.get("request_throughput"), + "mean_ttft_ms": fallback_result.get("mean_ttft_ms"), + "mean_itl_ms": fallback_result.get("mean_itl_ms"), + "total_requests": fallback_result.get("total_requests"), + } + ) else: logger.warning(f"No results found in {result_dir} (no profile_export_aiperf.json or benchmark.out)") @@ -193,18 +195,18 @@ def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int | None = None) -> Path | None: """Find the directory containing mooncake-router/AIPerf results. - + Mooncake-router results are typically in: - logs/artifacts/*/profile_export_aiperf.json - + Since results can be in nested subdirectories, we return the logs directory and let parse_result_directory use rglob to find them. - + Args: run_path: Path to the run directory isl: Input sequence length (not used for mooncake-router) osl: Output sequence length (not used for mooncake-router) - + Returns: Path to logs directory where results can be found, or None """ @@ -219,7 +221,7 @@ def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int return logs_dir except (OSError, PermissionError) as e: logger.warning(f"Error accessing {logs_dir}: {e}") - + # Also check run_path directly in case logs are at root try: for item in run_path.rglob("profile_export_aiperf.json"): @@ -227,7 +229,7 @@ def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int return run_path except (OSError, PermissionError) as e: logger.warning(f"Error accessing {run_path}: {e}") - + return None def find_aiperf_results(self, log_dir: Path) -> list[Path]: @@ -331,4 +333,4 @@ def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | Non benchmark_type=self.benchmark_type, raw_command=raw_command, extra_args=extra_args, - ) \ No newline at end of file + ) diff --git a/analysis/srtlog/parsers/benchmark/sa_bench.py b/analysis/srtlog/parsers/benchmark/sa_bench.py index 83161832..e8a6f8ab 100644 --- a/analysis/srtlog/parsers/benchmark/sa_bench.py +++ b/analysis/srtlog/parsers/benchmark/sa_bench.py @@ -31,10 +31,10 @@ def benchmark_type(self) -> str: def parse(self, benchmark_out_path: Path) -> dict[str, Any]: """Parse benchmark.out file for SA-Bench results (FALLBACK method). - + This is a fallback method used when JSON result files are not available. Prefer using parse_result_directory() which prioritizes JSON files. - + Args: benchmark_out_path: Path to benchmark.out file Returns: @@ -147,10 +147,10 @@ def parse_result_json(self, json_path: Path) -> dict[str, Any]: def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: """Parse all result JSON files in a benchmark result directory. - + Uses JSON files as the primary source of truth. Falls back to parsing benchmark.out only if no JSON results are found. - + Args: result_dir: Directory containing result_*.json files Returns: @@ -173,17 +173,21 @@ def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: fallback_result = self.parse(benchmark_out) if fallback_result.get("output_tps"): # Wrap in list format expected by caller - results.append({ - "max_concurrency": fallback_result.get("concurrencies", [0])[0] if fallback_result.get("concurrencies") else 0, - "output_tps": fallback_result.get("output_tps"), - "request_throughput": fallback_result.get("request_throughput"), - "mean_ttft_ms": fallback_result.get("mean_ttft_ms"), - "mean_itl_ms": fallback_result.get("mean_itl_ms"), - "mean_tpot_ms": fallback_result.get("mean_tpot_ms"), - "p99_ttft_ms": fallback_result.get("p99_ttft_ms"), - "p99_itl_ms": fallback_result.get("p99_itl_ms"), - "completed": fallback_result.get("completed_requests"), - }) + results.append( + { + "max_concurrency": fallback_result.get("concurrencies", [0])[0] + if fallback_result.get("concurrencies") + else 0, + "output_tps": fallback_result.get("output_tps"), + "request_throughput": fallback_result.get("request_throughput"), + "mean_ttft_ms": fallback_result.get("mean_ttft_ms"), + "mean_itl_ms": fallback_result.get("mean_itl_ms"), + "mean_tpot_ms": fallback_result.get("mean_tpot_ms"), + "p99_ttft_ms": fallback_result.get("p99_ttft_ms"), + "p99_itl_ms": fallback_result.get("p99_itl_ms"), + "completed": fallback_result.get("completed_requests"), + } + ) else: logger.warning(f"No results found in {result_dir} (no JSON files or benchmark.out)") @@ -194,16 +198,16 @@ def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int | None = None) -> Path | None: """Find the directory containing SA-Bench results. - + SA-Bench results are typically in directories named like: - sa-bench_isl_8192_osl_1024 - vllm_isl_8192_osl_1024 - + Args: run_path: Path to the run directory isl: Input sequence length osl: Output sequence length - + Returns: Path to results directory, or None if not found """ @@ -212,7 +216,7 @@ def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int logs_dir = run_path / "logs" if logs_dir.exists(): search_paths.append(logs_dir) - + # Build prefix patterns if isl is not None and osl is not None: prefixes = [ @@ -222,7 +226,7 @@ def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int else: # Fallback: match any sa-bench or vllm directory prefixes = ["sa-bench", "vllm"] - + # Search for matching directories for search_path in search_paths: if not search_path.exists(): @@ -241,9 +245,8 @@ def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int except (OSError, PermissionError) as e: logger.warning(f"Error accessing {search_path}: {e}") continue - - return None + return None def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | None: """Parse the SA-Bench launch command from log content. diff --git a/analysis/srtlog/parsers/nodes/sglang.py b/analysis/srtlog/parsers/nodes/sglang.py index b7b7ee39..03a4ee9e 100644 --- a/analysis/srtlog/parsers/nodes/sglang.py +++ b/analysis/srtlog/parsers/nodes/sglang.py @@ -34,30 +34,30 @@ class SGLangNodeParser: """Parser for SGLang node logs. Handles SGLang structured logging with ISO 8601 timestamps. May contain ANSI color codes which are stripped during parsing. - + Timestamp format: YYYY-MM-DDTHH:MM:SS.microsZ (e.g., 2025-12-30T15:52:38.206058Z) """ @property def backend_type(self) -> str: return "sglang" - + @staticmethod def parse_timestamp(timestamp: str) -> datetime: """Parse SGLang timestamp format to datetime object. - + Args: timestamp: Timestamp string in ISO 8601 format (e.g., 2025-12-30T15:52:38.206058Z) - + Returns: datetime object - + Raises: ValueError: If timestamp format is invalid """ # Handle both with and without microseconds and timezone - timestamp = timestamp.rstrip('Z') - if '.' in timestamp: + timestamp = timestamp.rstrip("Z") + if "." in timestamp: return datetime.fromisoformat(timestamp) else: return datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S") @@ -101,8 +101,7 @@ def parse_single_log(self, log_path: Path) -> NodeInfo | None: node_info = self._extract_node_info_from_filename(str(log_path)) if not node_info: logger.warning( - "Could not extract node info from filename: %s. " - "Expected format: __.err or .out", + "Could not extract node info from filename: %s. " "Expected format: __.err or .out", log_path, ) return None @@ -212,7 +211,7 @@ def parse_single_log(self, log_path: Path) -> NodeInfo | None: worker_type=node_info["worker_type"], worker_id=node_info["worker_id"], ) - + # Create NodeMetrics with metadata metrics = NodeMetrics( metadata=node_metadata, @@ -220,35 +219,35 @@ def parse_single_log(self, log_path: Path) -> NodeInfo | None: memory_snapshots=memory_snapshots, config=config, ) - + # Create NodeConfig with launch_command node_config = {} if launch_command: node_config["launch_command"] = launch_command node_config["environment"] = {} # Will be populated by NodeAnalyzer if config file exists - + # Return complete NodeInfo return NodeInfo(metrics=metrics, node_config=node_config if node_config else None) def _parse_timestamp(self, line: str) -> str | None: """Extract timestamp from log line. - + Supports two formats: - Tagged format: [2025-11-04 05:31:43 DP0 TP0 EP0] - ISO format: 2025-12-30T15:52:38.206058Z (fallback) - + Returns the timestamp string as-is without conversion. """ # Try tagged format first (YYYY-MM-DD HH:MM:SS) match = re.search(r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})", line) if match: return match.group(1) - + # Fall back to ISO format match = re.search(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z?)", line) if match: return match.group(1) - + return None def _parse_parallelism_tags(self, line: str) -> tuple[int, int, int]: @@ -292,9 +291,9 @@ def _parse_prefill_batch_line(self, line: str) -> dict | None: timestamp = self._parse_timestamp(line) if not timestamp: return None - + dp, tp, ep = self._parse_parallelism_tags(line) - + metrics = { "timestamp": timestamp, "type": "prefill", @@ -332,9 +331,9 @@ def _parse_decode_batch_line(self, line: str) -> dict | None: timestamp = self._parse_timestamp(line) if not timestamp: return None - + dp, tp, ep = self._parse_parallelism_tags(line) - + metrics = { "timestamp": timestamp, "type": "decode", @@ -368,9 +367,9 @@ def _parse_memory_line(self, line: str) -> dict | None: timestamp = self._parse_timestamp(line) if not timestamp: return None - + dp, tp, ep = self._parse_parallelism_tags(line) - + metrics = { "timestamp": timestamp, "dp": dp, diff --git a/analysis/srtlog/parsers/nodes/trtllm.py b/analysis/srtlog/parsers/nodes/trtllm.py index 9287ac26..88e31b96 100644 --- a/analysis/srtlog/parsers/nodes/trtllm.py +++ b/analysis/srtlog/parsers/nodes/trtllm.py @@ -36,34 +36,34 @@ class TRTLLMNodeParser: - Launch command from dynamo.trtllm - Worker configuration from Config() dump - MPI rank and world size information - + Timestamp format: MM/DD/YYYY-HH:MM:SS (e.g., 01/23/2026-08:04:38) """ @property def backend_type(self) -> str: return "trtllm" - + @staticmethod def parse_timestamp(timestamp: str) -> datetime: """Parse TRTLLM timestamp format to datetime object. - + Args: timestamp: Timestamp string in format MM/DD/YYYY-HH:MM:SS - + Returns: datetime object - + Raises: ValueError: If timestamp format is invalid """ return datetime.strptime(timestamp, "%m/%d/%Y-%H:%M:%S") - + def _extract_timestamp(self, line: str) -> str | None: """Extract timestamp string from log line. - + Supports format: [MM/DD/YYYY-HH:MM:SS ...] - + Returns: Timestamp string or None if not found """ @@ -71,7 +71,7 @@ def _extract_timestamp(self, line: str) -> str | None: if match: return match.group(1) return None - + def _parse_parallelism_tags(self, line: str) -> tuple[int, int, int]: """Extract DP, TP, EP indices from TRTLLM log line. @@ -237,7 +237,13 @@ def parse_single_log(self, log_path: Path) -> NodeInfo | None: logger.error("Error parsing %s: %s", log_path, e) return None - logger.debug("Parsed %s: %d batches, %d memory snapshots, config=%s", log_path, len(batches), len(memory_snapshots), config) + logger.debug( + "Parsed %s: %d batches, %d memory snapshots, config=%s", + log_path, + len(batches), + len(memory_snapshots), + config, + ) # Create NodeMetadata node_metadata = NodeMetadata( @@ -245,7 +251,7 @@ def parse_single_log(self, log_path: Path) -> NodeInfo | None: worker_type=node_info["worker_type"], worker_id=node_info["worker_id"], ) - + # Create NodeMetrics with metadata metrics = NodeMetrics( metadata=node_metadata, @@ -253,13 +259,13 @@ def parse_single_log(self, log_path: Path) -> NodeInfo | None: memory_snapshots=memory_snapshots, config=config, ) - + # Create NodeConfig with launch_command node_config = {} if launch_command: node_config["launch_command"] = launch_command node_config["environment"] = {} # Will be populated by NodeAnalyzer if config file exists - + # Return complete NodeInfo return NodeInfo(metrics=metrics, node_config=node_config if node_config else None) @@ -278,25 +284,23 @@ def _parse_iteration_logs(self, content: str, worker_type: str) -> list[BatchMet # Pattern to match TRTLLM iteration logs iter_pattern = re.compile( - r"iter\s*=\s*(\d+).*" - r"num_scheduled_requests:\s*(\d+).*" - r"states\s*=\s*\{([^}]+)\}" + r"iter\s*=\s*(\d+).*" r"num_scheduled_requests:\s*(\d+).*" r"states\s*=\s*\{([^}]+)\}" ) for match in iter_pattern.finditer(content): # Extract timestamp and parallelism from the line - line_start = content.rfind('\n', 0, match.start()) + 1 - line_end = content.find('\n', match.end()) + line_start = content.rfind("\n", 0, match.start()) + 1 + line_end = content.find("\n", match.end()) if line_end == -1: line_end = len(content) full_line = content[line_start:line_end] - + timestamp = self._extract_timestamp(full_line) if not timestamp: continue - + dp, tp, ep = self._parse_parallelism_tags(full_line) - + iteration = int(match.group(1)) num_scheduled = int(match.group(2)) states_str = match.group(3) @@ -382,18 +386,18 @@ def _parse_memory_info(self, content: str) -> list[MemoryMetrics]: for match in mem_pattern.finditer(content): # Extract timestamp and parallelism from the line - line_start = content.rfind('\n', 0, match.start()) + 1 - line_end = content.find('\n', match.end()) + line_start = content.rfind("\n", 0, match.start()) + 1 + line_end = content.find("\n", match.end()) if line_end == -1: line_end = len(content) full_line = content[line_start:line_end] - + timestamp = self._extract_timestamp(full_line) if not timestamp: timestamp = "" # Some memory lines may not have timestamps - + dp, tp, ep = self._parse_parallelism_tags(full_line) - + peak_mem = float(match.group(1)) avail_kv = float(match.group(2)) total_mem = float(match.group(3)) @@ -412,9 +416,7 @@ def _parse_memory_info(self, content: str) -> list[MemoryMetrics]: ) # Also parse KV cache allocation info (no timestamp/DP/TP/EP for these) - kv_alloc_pattern = re.compile( - r"\[MemUsageChange\] Allocated\s*([\d.]+)\s*GiB for max tokens.*?\((\d+)\)" - ) + kv_alloc_pattern = re.compile(r"\[MemUsageChange\] Allocated\s*([\d.]+)\s*GiB for max tokens.*?\((\d+)\)") for match in kv_alloc_pattern.finditer(content): kv_gb = float(match.group(1)) @@ -555,4 +557,4 @@ def parse_launch_command(self, log_content: str, worker_type: str = "unknown") - worker_type=worker_type, raw_command=raw_command, extra_args=extra_args, - ) \ No newline at end of file + ) diff --git a/analysis/srtlog/run_loader.py b/analysis/srtlog/run_loader.py index 20797163..b97c91be 100644 --- a/analysis/srtlog/run_loader.py +++ b/analysis/srtlog/run_loader.py @@ -7,7 +7,6 @@ import json import logging import os -import re from pathlib import Path import pandas as pd @@ -15,7 +14,7 @@ from .cache_manager import CacheManager from .log_parser import NodeAnalyzer from .models import BenchmarkRun, NodeMetrics -from .parsers import get_benchmark_parser, get_node_parser +from .parsers import get_benchmark_parser logger = logging.getLogger(__name__) @@ -222,7 +221,7 @@ def _load_benchmark_results(self, run: BenchmarkRun) -> None: # Use profiler_type from metadata profiler_type = run.profiler_metadata.profiler_type - + # Get the parser for this benchmark type try: parser = get_benchmark_parser(profiler_type) @@ -232,18 +231,18 @@ def _load_benchmark_results(self, run: BenchmarkRun) -> None: # Let the parser find its result directory result_dir = parser.find_result_directory( - Path(run_path), - isl=run.profiler_metadata.isl, - osl=run.profiler_metadata.osl + Path(run_path), isl=run.profiler_metadata.isl, osl=run.profiler_metadata.osl ) - + if not result_dir: logger.warning(f"No results directory found for {profiler_type} in {run_path}") return # Define source patterns for cache validation (relative to run_path) # Use recursive glob to catch nested result files (e.g., artifacts/*/profile_export_aiperf.json) - result_dir_rel = result_dir.relative_to(Path(run_path)) if result_dir.is_relative_to(Path(run_path)) else result_dir.name + result_dir_rel = ( + result_dir.relative_to(Path(run_path)) if result_dir.is_relative_to(Path(run_path)) else result_dir.name + ) source_patterns = [f"{result_dir_rel}/**/*.json"] # Try to load from cache first @@ -422,20 +421,20 @@ def _convert_parser_results_to_dict(self, results_list: list[dict]) -> dict: else: concurrency = 0 out["concurrencies"].append(concurrency) - + # Throughput - normalize field names with explicit None checks to preserve 0.0 if "output_throughput" in data and data["output_throughput"] is not None: output_tps = data["output_throughput"] else: output_tps = data.get("output_tps") out["output_tps"].append(output_tps) - + if "total_token_throughput" in data and data["total_token_throughput"] is not None: total_tps = data["total_token_throughput"] else: total_tps = data.get("total_tps") out["total_tps"].append(total_tps) - + out["request_throughput"].append(data.get("request_throughput")) out["request_goodput"].append(data.get("request_goodput")) out["request_rate"].append(data.get("request_rate")) @@ -706,7 +705,7 @@ def load_node_metrics(self, run_path: str, backend_type: str = "sglang") -> list # Use NodeAnalyzer which handles caching, backend detection, and config loading analyzer = NodeAnalyzer() node_infos = analyzer.parse_run_logs(run_path, return_dicts=False) - + # Extract only the metrics from each NodeInfo return [node.metrics for node in node_infos] diff --git a/pyproject.toml b/pyproject.toml index 265ec6c1..3e623f72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "requests>=2.31.0", "rich>=13.0.0", "questionary>=2.0.0", + "pre-commit>=4.5.1", ] [project.scripts] diff --git a/tests/fixtures_parsers.py b/tests/fixtures_parsers.py index be780d78..e84c16ac 100644 --- a/tests/fixtures_parsers.py +++ b/tests/fixtures_parsers.py @@ -342,8 +342,9 @@ def assert_valid_benchmark_results(results: dict, expected_fields: list[str] | N assert field in results, f"Missing expected field: {field}" value = results[field] # Check it's not None and if it's a list, check it's not empty - assert value is not None and (not isinstance(value, list) or len(value) > 0), \ - f"Field {field} is None or empty list" + assert value is not None and ( + not isinstance(value, list) or len(value) > 0 + ), f"Field {field} is None or empty list" @staticmethod def assert_valid_node_metrics(node_metrics, min_batches: int = 0): @@ -359,4 +360,3 @@ def assert_valid_node_metrics(node_metrics, min_batches: int = 0): assert node_metrics.worker_id assert len(node_metrics.batches) >= min_batches assert isinstance(node_metrics.config, dict) - diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py index aa95cc28..ac9644f7 100644 --- a/tests/test_benchmarks.py +++ b/tests/test_benchmarks.py @@ -72,9 +72,7 @@ def test_validate_config_valid(self): name="test", model=ModelConfig(path="/model", container="/image", precision="fp4"), resources=ResourceConfig(gpu_type="h100"), - benchmark=BenchmarkConfig( - type="sa-bench", isl=1024, osl=1024, concurrencies="4x8" - ), + benchmark=BenchmarkConfig(type="sa-bench", isl=1024, osl=1024, concurrencies="4x8"), ) errors = runner.validate_config(config) assert errors == [] @@ -96,4 +94,3 @@ def test_mmlu_script_exists(self): """MMLU script exists.""" script = SCRIPTS_DIR / "mmlu" / "bench.sh" assert script.exists() - diff --git a/tests/test_dashboard.py b/tests/test_dashboard.py index 0d2f54a3..d50a236d 100644 --- a/tests/test_dashboard.py +++ b/tests/test_dashboard.py @@ -21,7 +21,7 @@ def test_parse_timestamp_yyyy_mm_dd(self): ts = "2025-12-30 15:52:38" dt = _parse_timestamp(ts) - + assert isinstance(dt, datetime) assert dt.year == 2025 assert dt.month == 12 @@ -36,7 +36,7 @@ def test_parse_timestamp_iso8601_with_microseconds(self): ts = "2025-12-30T15:52:38.206058Z" dt = _parse_timestamp(ts) - + assert isinstance(dt, datetime) assert dt.year == 2025 assert dt.month == 12 @@ -51,7 +51,7 @@ def test_parse_timestamp_iso8601_without_z(self): ts = "2025-12-30T15:52:38.206058" dt = _parse_timestamp(ts) - + assert isinstance(dt, datetime) assert dt.year == 2025 assert dt.month == 12 @@ -63,7 +63,7 @@ def test_parse_timestamp_iso8601_without_microseconds(self): ts = "2025-12-30T15:52:38" dt = _parse_timestamp(ts) - + assert isinstance(dt, datetime) assert dt.year == 2025 assert dt.hour == 15 @@ -74,7 +74,7 @@ def test_parse_timestamp_trtllm_format(self): ts = "01/23/2026-08:04:38" dt = _parse_timestamp(ts) - + assert isinstance(dt, datetime) assert dt.year == 2026 assert dt.month == 1 @@ -137,10 +137,10 @@ def test_parse_timestamp_time_delta(self): ts1 = "01/23/2026-08:04:38" ts2 = "01/23/2026-08:04:40" - + dt1 = _parse_timestamp(ts1) dt2 = _parse_timestamp(ts2) - + delta = dt2 - dt1 assert delta.total_seconds() == 2.0 @@ -156,11 +156,11 @@ def test_parse_timestamp_mixed_formats(self): ] dts = [_parse_timestamp(ts) for ts in timestamps] - + # All should parse successfully assert len(dts) == 3 assert all(isinstance(dt, datetime) for dt in dts) - + # Should be able to compute deltas (even if not chronological) delta = dts[1] - dts[0] assert delta.total_seconds() == 1.1 @@ -175,16 +175,16 @@ def test_sglang_to_dashboard_pipeline(self): from analysis.srtlog.parsers import get_node_parser parser = get_node_parser("sglang") - + # SGLang format timestamp sglang_ts = "2025-12-30T15:52:38.206058Z" - + # Parser should be able to parse it dt_parser = parser.parse_timestamp(sglang_ts) - + # Dashboard should be able to parse it dt_dashboard = _parse_timestamp(sglang_ts) - + # Both should produce same datetime assert dt_parser.year == dt_dashboard.year assert dt_parser.month == dt_dashboard.month @@ -199,16 +199,16 @@ def test_trtllm_to_dashboard_pipeline(self): from analysis.srtlog.parsers import get_node_parser parser = get_node_parser("trtllm") - + # TRTLLM format timestamp trtllm_ts = "01/23/2026-08:04:38" - + # Parser should be able to parse it dt_parser = parser.parse_timestamp(trtllm_ts) - + # Dashboard should be able to parse it dt_dashboard = _parse_timestamp(trtllm_ts) - + # Both should produce same datetime assert dt_parser.year == dt_dashboard.year assert dt_parser.month == dt_dashboard.month @@ -237,4 +237,3 @@ def test_mixed_backend_timestamps_in_dashboard(self): # Should be able to compute time deltas assert len(parsed) == 3 - diff --git a/tests/test_e2e.py b/tests/test_e2e.py index b3a702e6..dad2233d 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -124,9 +124,9 @@ def test_fits_in_rack(self, recipe_path): config = load_config(str(recipe_path)) r = config.resources total_nodes_needed = (r.prefill_nodes or 0) + (r.decode_nodes or 0) + (r.agg_nodes or 0) - assert total_nodes_needed <= self.RACK.NUM_NODES, ( - f"{recipe_path.name}: needs {total_nodes_needed} nodes, rack has {self.RACK.NUM_NODES}" - ) + assert ( + total_nodes_needed <= self.RACK.NUM_NODES + ), f"{recipe_path.name}: needs {total_nodes_needed} nodes, rack has {self.RACK.NUM_NODES}" @pytest.mark.parametrize("recipe_path", RECIPES, ids=lambda p: p.name) def test_endpoint_allocation(self, recipe_path): @@ -154,14 +154,14 @@ def test_endpoint_allocation(self, recipe_path): assert len(decode_eps) == r.num_decode for ep in prefill_eps: - assert ep.total_gpus == r.gpus_per_prefill, ( - f"prefill endpoint {ep.index} has {ep.total_gpus} GPUs, expected {r.gpus_per_prefill}" - ) + assert ( + ep.total_gpus == r.gpus_per_prefill + ), f"prefill endpoint {ep.index} has {ep.total_gpus} GPUs, expected {r.gpus_per_prefill}" for ep in decode_eps: - assert ep.total_gpus == r.gpus_per_decode, ( - f"decode endpoint {ep.index} has {ep.total_gpus} GPUs, expected {r.gpus_per_decode}" - ) + assert ( + ep.total_gpus == r.gpus_per_decode + ), f"decode endpoint {ep.index} has {ep.total_gpus} GPUs, expected {r.gpus_per_decode}" class TestH100Cluster: @@ -234,9 +234,9 @@ def test_multi_node_tp(self, recipe_path): ) for ep in [e for e in endpoints if e.mode == "prefill"]: - assert ep.num_nodes == expected_nodes, ( - f"prefill endpoint should span {expected_nodes} nodes, got {ep.num_nodes}" - ) + assert ( + ep.num_nodes == expected_nodes + ), f"prefill endpoint should span {expected_nodes} nodes, got {ep.num_nodes}" class TestCIConfigs: @@ -375,9 +375,9 @@ def test_disagg_kv_router_shared_node_allocation(self): for ep in decode_eps: node1_decode_gpus.update(ep.gpu_indices) - assert node1_prefill_gpus.isdisjoint(node1_decode_gpus), ( - f"GPU overlap on node1! prefill uses {node1_prefill_gpus}, decode uses {node1_decode_gpus}" - ) + assert node1_prefill_gpus.isdisjoint( + node1_decode_gpus + ), f"GPU overlap on node1! prefill uses {node1_prefill_gpus}, decode uses {node1_decode_gpus}" def test_disagg_kv_router_cuda_visible_devices(self): """Processes on shared node have non-overlapping CUDA_VISIBLE_DEVICES.""" @@ -414,15 +414,20 @@ def test_disagg_kv_router_cuda_visible_devices(self): all_gpus_on_node1 = set() for proc in node1_processes: for gpu in proc.gpu_indices: - assert gpu not in all_gpus_on_node1, ( - f"GPU {gpu} assigned to multiple processes on {nodes[1]}!" - ) + assert gpu not in all_gpus_on_node1, f"GPU {gpu} assigned to multiple processes on {nodes[1]}!" all_gpus_on_node1.add(gpu) # All 8 GPUs on node1 should be used - assert all_gpus_on_node1 == {0, 1, 2, 3, 4, 5, 6, 7}, ( - f"Expected all 8 GPUs used on node1, got {all_gpus_on_node1}" - ) + assert all_gpus_on_node1 == { + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + }, f"Expected all 8 GPUs used on node1, got {all_gpus_on_node1}" # Verify CUDA_VISIBLE_DEVICES strings are correct for proc in node1_processes: @@ -442,10 +447,7 @@ def test_disagg_kv_router_total_allocation_fits(self): config = load_config(str(recipe_path)) r = config.resources - total_gpus_needed = ( - r.num_prefill * r.gpus_per_prefill - + r.num_decode * r.gpus_per_decode - ) + total_gpus_needed = r.num_prefill * r.gpus_per_prefill + r.num_decode * r.gpus_per_decode total_gpus_available = r.total_nodes * r.gpus_per_node assert total_gpus_needed <= total_gpus_available, ( diff --git a/tests/test_frontend_topology.py b/tests/test_frontend_topology.py index 60251085..05c7d4fe 100644 --- a/tests/test_frontend_topology.py +++ b/tests/test_frontend_topology.py @@ -6,8 +6,6 @@ from pathlib import Path from unittest.mock import MagicMock, patch -import pytest - from srtctl.cli.do_sweep import SweepOrchestrator from srtctl.cli.mixins.frontend_stage import FrontendTopology from srtctl.core.runtime import Nodes, RuntimeContext diff --git a/tests/test_frontends.py b/tests/test_frontends.py index f3f94771..f11db00d 100644 --- a/tests/test_frontends.py +++ b/tests/test_frontends.py @@ -10,7 +10,6 @@ from srtctl.frontends import DynamoFrontend, SGLangFrontend, get_frontend - # ============================================================================ # get_frontend() Tests # ============================================================================ @@ -130,13 +129,15 @@ def test_mixed_args(self): """Mixed arg types are handled correctly.""" frontend = SGLangFrontend() - result = frontend.get_frontend_args_list({ - "policy": "round_robin", - "verbose": True, - "timeout": 60, - "disabled": False, - "optional": None, - }) + result = frontend.get_frontend_args_list( + { + "policy": "round_robin", + "verbose": True, + "timeout": 60, + "disabled": False, + "optional": None, + } + ) # Check all expected args are present assert "--policy" in result @@ -152,10 +153,12 @@ def test_dynamo_frontend_args_list(self): """DynamoFrontend has same args list behavior.""" frontend = DynamoFrontend() - result = frontend.get_frontend_args_list({ - "router-mode": "kv", - "router-reset-states": True, - }) + result = frontend.get_frontend_args_list( + { + "router-mode": "kv", + "router-reset-states": True, + } + ) assert "--router-mode" in result assert "kv" in result @@ -392,9 +395,7 @@ def test_sglang_env_passed_to_process(self, mock_get_ip, mock_srun): frontend = SGLangFrontend() topology = MockTopology(frontend_nodes=["node0"]) config = MockConfig( - frontend=MockFrontendConfig( - env={"MY_VAR": "my_value", "ANOTHER": "123"} - ), + frontend=MockFrontendConfig(env={"MY_VAR": "my_value", "ANOTHER": "123"}), resources=MockResourceConfig(num_agg=1), ) @@ -465,9 +466,7 @@ def test_sglang_frontend_args_in_command(self, mock_get_ip, mock_srun): frontend = SGLangFrontend() topology = MockTopology(frontend_nodes=["node0"]) config = MockConfig( - frontend=MockFrontendConfig( - args={"policy": "cache_aware", "verbose": True} - ), + frontend=MockFrontendConfig(args={"policy": "cache_aware", "verbose": True}), resources=MockResourceConfig(num_agg=1), ) @@ -492,4 +491,3 @@ def test_sglang_frontend_args_in_command(self, mock_get_ip, mock_srun): assert "--policy" in cmd assert "cache_aware" in cmd assert "--verbose" in cmd - diff --git a/tests/test_health.py b/tests/test_health.py index 034b7180..b894104e 100644 --- a/tests/test_health.py +++ b/tests/test_health.py @@ -3,15 +3,12 @@ """Tests for health check parsing (Dynamo and SGLang router).""" -import pytest - from srtctl.core.health import ( WorkerHealthResult, check_dynamo_health, check_sglang_router_health, ) - # ============================================================================ # Dynamo Health Check Tests # ============================================================================ @@ -103,7 +100,7 @@ def test_ignores_non_generate_endpoints(self): class TestDynamoHealthAggregated: """Test Dynamo /health parsing for aggregated mode (backend workers). - + In aggregated mode, workers report as "backend" and count as decode. Caller should pass expected_prefill=0, expected_decode=num_agg. """ @@ -391,9 +388,7 @@ def test_empty_response(self): def test_missing_count_fields_defaults_to_zero(self): """Missing count fields default to 0.""" - response = { - "stats": {} - } + response = {"stats": {}} result = check_sglang_router_health(response, expected_prefill=1, expected_decode=1) @@ -432,4 +427,3 @@ def test_with_counts(self): assert result.prefill_ready == 2 assert result.decode_ready == 4 - diff --git a/tests/test_parsers.py b/tests/test_parsers.py index bf4e123a..135bd48c 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -438,10 +438,10 @@ def test_parse_timestamp_invalid(self, parser): def test_parse_dp_tp_ep_tag_full_format(self, parser): """Test parsing full DP/TP/EP tag format.""" line = "[2025-11-04 05:31:43 DP0 TP2 EP1] Prefill batch, #new-seq: 5" - + timestamp = parser._parse_timestamp(line) dp, tp, ep = parser._parse_parallelism_tags(line) - + assert timestamp == "2025-11-04 05:31:43" assert dp == 0 assert tp == 2 @@ -450,10 +450,10 @@ def test_parse_dp_tp_ep_tag_full_format(self, parser): def test_parse_dp_tp_ep_tag_simple_tp(self, parser): """Test parsing simple TP-only format (1P4D style).""" line = "[2025-11-04 07:05:55 TP0] Decode batch, #running-req: 10" - + timestamp = parser._parse_timestamp(line) dp, tp, ep = parser._parse_parallelism_tags(line) - + assert timestamp == "2025-11-04 07:05:55" assert dp == 0 # Default assert tp == 0 @@ -462,10 +462,10 @@ def test_parse_dp_tp_ep_tag_simple_tp(self, parser): def test_parse_dp_tp_ep_tag_pipeline(self, parser): """Test parsing pipeline parallelism format.""" line = "[2025-12-08 14:34:44 PP3] Prefill batch, #new-seq: 8" - + timestamp = parser._parse_timestamp(line) dp, tp, ep = parser._parse_parallelism_tags(line) - + assert timestamp == "2025-12-08 14:34:44" assert dp == 0 # Default assert tp == 3 # PP mapped to TP @@ -474,10 +474,10 @@ def test_parse_dp_tp_ep_tag_pipeline(self, parser): def test_parse_dp_tp_ep_tag_no_tags(self, parser): """Test parsing line without parallelism tags.""" line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m Prefill batch" - + timestamp = parser._parse_timestamp(line) dp, tp, ep = parser._parse_parallelism_tags(line) - + assert timestamp == "2025-12-30T15:52:38.206058Z" # ISO format fallback assert dp == 0 assert tp == 0 @@ -491,7 +491,7 @@ def test_parse_parallelism_wrapper(self, parser): assert dp == 1 assert tp == 2 assert ep == 3 - + # Without tags - should default to 0 line_without_tags = "Some log line without tags" dp, tp, ep = parser._parse_parallelism_tags(line_without_tags) @@ -502,9 +502,9 @@ def test_parse_parallelism_wrapper(self, parser): def test_parse_prefill_batch_with_dp_tp_ep(self, parser): """Test that prefill batch parsing extracts DP/TP/EP values.""" line = "[2025-11-04 05:31:43 DP1 TP2 EP3] Prefill batch, #new-seq: 5, #new-token: 40960, #running-req: 5, input throughput (token/s): 5000.5" - + metrics = parser._parse_prefill_batch_line(line) - + assert metrics is not None assert metrics["dp"] == 1 assert metrics["tp"] == 2 @@ -517,9 +517,9 @@ def test_parse_prefill_batch_with_dp_tp_ep(self, parser): def test_parse_decode_batch_with_dp_tp_ep(self, parser): """Test that decode batch parsing extracts DP/TP/EP values.""" line = "[2025-11-04 05:31:45 DP0 TP1 EP0] Decode batch, #running-req: 10, #token: 512, gen throughput (token/s): 1500.5" - + metrics = parser._parse_decode_batch_line(line) - + assert metrics is not None assert metrics["dp"] == 0 assert metrics["tp"] == 1 @@ -531,9 +531,9 @@ def test_parse_decode_batch_with_dp_tp_ep(self, parser): def test_parse_memory_with_dp_tp_ep(self, parser): """Test that memory line parsing extracts DP/TP/EP values.""" line = "[2025-11-04 05:31:50 DP0 TP2 EP1] avail mem=75.11 GB, mem usage=107.07 GB, KV size: 17.16 GB" - + metrics = parser._parse_memory_line(line) - + assert metrics is not None assert metrics["dp"] == 0 assert metrics["tp"] == 2 @@ -546,9 +546,9 @@ def test_parse_batch_fallback_to_iso_timestamp(self, parser): """Test that parser supports ISO timestamp fallback.""" # Prefill batch with ISO timestamp (old format) - should parse with default parallelism line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m Prefill batch, #new-seq: 5, #new-token: 40960" - + metrics = parser._parse_prefill_batch_line(line) - + # Should parse successfully with ISO timestamp and default parallelism tags assert metrics is not None assert metrics["timestamp"] == "2025-12-30T15:52:38.206058Z" @@ -559,9 +559,9 @@ def test_parse_batch_fallback_to_iso_timestamp(self, parser): def test_parse_batch_with_simple_tp_format(self, parser): """Test parsing batch with simple TP format (1P4D disaggregated style).""" line = "[2025-11-04 07:05:55 TP0] Prefill batch, #new-seq: 3, #new-token: 24576, input throughput (token/s): 3000.0" - + metrics = parser._parse_prefill_batch_line(line) - + assert metrics is not None assert metrics["dp"] == 0 assert metrics["tp"] == 0 @@ -571,9 +571,9 @@ def test_parse_batch_with_simple_tp_format(self, parser): def test_parse_batch_with_pipeline_format(self, parser): """Test parsing batch with pipeline parallelism format.""" line = "[2025-12-08 14:34:44 PP2] Prefill batch, #new-seq: 4, #new-token: 32768" - + metrics = parser._parse_prefill_batch_line(line) - + assert metrics is not None assert metrics["dp"] == 0 assert metrics["tp"] == 2 # PP mapped to TP @@ -598,20 +598,20 @@ def test_parse_single_log_with_parallelism_tags(self, parser, temp_dir): assert node.node_name == "test_node" assert node.worker_type == "prefill" assert node.worker_id == "w0" - + # Check that batches have correct DP/TP/EP values assert len(node.batches) == 2 for batch in node.batches: assert batch.dp == 0 assert batch.tp == 0 assert batch.ep == 0 - + # Check memory snapshots have correct DP/TP/EP values assert len(node.memory_snapshots) == 1 assert node.memory_snapshots[0].dp == 0 assert node.memory_snapshots[0].tp == 0 assert node.memory_snapshots[0].ep == 0 - + # Verify config extraction still works assert node.config["tp_size"] == 8 assert node.config["dp_size"] == 1 @@ -825,10 +825,10 @@ def test_timestamp_preserved_in_memory(self, parser): def test_parse_dp_tp_ep_tag_full_format(self, parser): """Test parsing full DP/TP/EP tag format in TRTLLM logs.""" line = "[01/23/2026-08:04:38 DP1 TP2 EP3] [TRT-LLM] iter = 100, num_scheduled_requests: 5, states = {'num_ctx_requests': 2, 'num_ctx_tokens': 1024, 'num_generation_tokens': 0}" - + timestamp = parser._extract_timestamp(line) dp, tp, ep = parser._parse_parallelism_tags(line) - + assert timestamp == "01/23/2026-08:04:38" assert dp == 1 assert tp == 2 @@ -837,10 +837,10 @@ def test_parse_dp_tp_ep_tag_full_format(self, parser): def test_parse_dp_tp_ep_tag_simple_tp(self, parser): """Test parsing simple TP-only format (1P4D style) in TRTLLM logs.""" line = "[01/23/2026-08:04:38 TP0] [TRT-LLM] iter = 100, num_scheduled_requests: 10, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 512}" - + timestamp = parser._extract_timestamp(line) dp, tp, ep = parser._parse_parallelism_tags(line) - + assert timestamp == "01/23/2026-08:04:38" assert dp == 0 # Default assert tp == 0 @@ -849,10 +849,10 @@ def test_parse_dp_tp_ep_tag_simple_tp(self, parser): def test_parse_dp_tp_ep_tag_pipeline(self, parser): """Test parsing pipeline parallelism format in TRTLLM logs.""" line = "[01/23/2026-08:04:38 PP3] [TRT-LLM] iter = 100, num_scheduled_requests: 8, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 256}" - + timestamp = parser._extract_timestamp(line) dp, tp, ep = parser._parse_parallelism_tags(line) - + assert timestamp == "01/23/2026-08:04:38" assert dp == 0 # Default assert tp == 3 # PP mapped to TP @@ -861,10 +861,10 @@ def test_parse_dp_tp_ep_tag_pipeline(self, parser): def test_parse_dp_tp_ep_tag_no_tags(self, parser): """Test parsing line without parallelism tags in TRTLLM logs.""" line = "[01/23/2026-08:04:38] [TRT-LLM] iter = 100, num_scheduled_requests: 5" - + timestamp = parser._extract_timestamp(line) dp, tp, ep = parser._parse_parallelism_tags(line) - + assert timestamp == "01/23/2026-08:04:38" assert dp == 0 assert tp == 0 @@ -875,9 +875,9 @@ def test_parse_iteration_with_dp_tp_ep(self, parser): log_content = """ [01/23/2026-08:04:38 DP0 TP1 EP0] [TRT-LLM] [RANK 0] [I] iter = 100, num_scheduled_requests: 5, states = {'num_ctx_requests': 2, 'num_ctx_tokens': 1024, 'num_generation_tokens': 0}, host_step_time = 50.0ms """ - + batches = parser._parse_iteration_logs(log_content, "prefill") - + assert len(batches) == 1 assert batches[0].dp == 0 assert batches[0].tp == 1 @@ -889,9 +889,9 @@ def test_parse_memory_with_dp_tp_ep(self, parser): log_content = """ [01/23/2026-08:04:38 DP0 TP2 EP1] [TRT-LLM] [RANK 0] [I] Peak memory during memory usage profiling (torch + non-torch): 91.46 GiB, available KV cache memory when calculating max tokens: 41.11 GiB, fraction is set 0.85, kv size is 35136. device total memory 139.81 GiB """ - + memory_snapshots = parser._parse_memory_info(log_content) - + assert len(memory_snapshots) >= 1 assert memory_snapshots[0].dp == 0 assert memory_snapshots[0].tp == 2 @@ -1110,4 +1110,3 @@ def test_trtllm_decode_sample_data(self): assert cmd is not None assert cmd.backend_type == "trtllm" assert cmd.worker_type == "decode" - diff --git a/tests/test_process_registry.py b/tests/test_process_registry.py index 0e556ec3..45e3e793 100644 --- a/tests/test_process_registry.py +++ b/tests/test_process_registry.py @@ -7,8 +7,6 @@ from subprocess import Popen from unittest.mock import MagicMock -import pytest - from srtctl.core.processes import ManagedProcess, ProcessRegistry diff --git a/tests/test_profiling.py b/tests/test_profiling.py index 3002ccb0..fe1d2f60 100644 --- a/tests/test_profiling.py +++ b/tests/test_profiling.py @@ -387,4 +387,3 @@ def test_profiling_script_exists(self): """Profiling script exists.""" script = SCRIPTS_DIR / "profiling" / "profile.sh" assert script.exists() - diff --git a/tests/test_runloader_parsers.py b/tests/test_runloader_parsers.py index 68070dc6..4b7b4fff 100644 --- a/tests/test_runloader_parsers.py +++ b/tests/test_runloader_parsers.py @@ -13,7 +13,6 @@ import pytest -from analysis.srtlog.models import BenchmarkRun from analysis.srtlog.run_loader import RunLoader from tests.fixtures_parsers import ParserTestHarness, SampleSABenchData @@ -329,4 +328,3 @@ def test_mooncake_router_directory_detection(self, temp_dir, sample_run_metadata assert run is not None # Verify mooncake-router results were parsed assert len(run.profiler.output_tps) >= 1 - diff --git a/tests/test_sweep.py b/tests/test_sweep.py index 35dfa71d..ce1168df 100644 --- a/tests/test_sweep.py +++ b/tests/test_sweep.py @@ -287,4 +287,3 @@ def test_placeholder_substitution_in_generated_config(self): assert prefill1["mem-fraction-static"] == "0.85" assert prefill2["mem-fraction-static"] == "0.9" - From 5e73496153c822f2dc0593b3027ad41cdb9ebc8a Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 21:17:49 -0800 Subject: [PATCH 11/15] precoommiit --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 3e623f72..eb8f6ce2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "rich>=13.0.0", "questionary>=2.0.0", "pre-commit>=4.5.1", + "pandas>=2.1.0", ] [project.scripts] From 958f913cf0bcdf9f0847835b5676e5f2eb24e4f6 Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 21:19:03 -0800 Subject: [PATCH 12/15] precoommiit --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index eb8f6ce2..a9a06858 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "questionary>=2.0.0", "pre-commit>=4.5.1", "pandas>=2.1.0", + "pyarrow>=23.0.0", ] [project.scripts] From 36d10176d955bd74b1c25b24864b1b94cb86508e Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 21:21:30 -0800 Subject: [PATCH 13/15] precoommiit --- pyproject.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index a9a06858..8e96121e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ dependencies = [ "pre-commit>=4.5.1", "pandas>=2.1.0", "pyarrow>=23.0.0", + "plotly>=6.5.2", ] [project.scripts] @@ -37,8 +38,14 @@ packages = ["src/srtctl"] dev = [ "pytest>=8.0.0", "pytest-cov>=4.0", + "pytest-typeguard>=4.0.0", "ruff>=0.8.0", "ty", # Astral's fast type checker (replaces mypy) + # Analysis dependencies needed for tests + "streamlit>=1.30.0", + "plotly>=5.18.0", + "pandas>=2.1.0", + "pyarrow>=14.0.0", # For parquet caching ] analysis = [ From ee4d607bd80038af23180f38d1eb048b614d1d8d Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 21:24:15 -0800 Subject: [PATCH 14/15] precoommiit --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8e96121e..257463f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ packages = ["src/srtctl"] dev = [ "pytest>=8.0.0", "pytest-cov>=4.0", - "pytest-typeguard>=4.0.0", + "typeguard>=4.0.0", # Includes pytest integration "ruff>=0.8.0", "ty", # Astral's fast type checker (replaces mypy) # Analysis dependencies needed for tests From 99e2225326bd145f7b20ef91764d81f68e7b55bc Mon Sep 17 00:00:00 2001 From: Kaunil Dhruv Date: Mon, 26 Jan 2026 21:35:41 -0800 Subject: [PATCH 15/15] ruff check --- analysis/srtlog/parsers/nodes/trtllm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/analysis/srtlog/parsers/nodes/trtllm.py b/analysis/srtlog/parsers/nodes/trtllm.py index 88e31b96..0cfc9dc5 100644 --- a/analysis/srtlog/parsers/nodes/trtllm.py +++ b/analysis/srtlog/parsers/nodes/trtllm.py @@ -301,18 +301,18 @@ def _parse_iteration_logs(self, content: str, worker_type: str) -> list[BatchMet dp, tp, ep = self._parse_parallelism_tags(full_line) - iteration = int(match.group(1)) + # iteration = int(match.group(1)) # Not used currently num_scheduled = int(match.group(2)) states_str = match.group(3) # Parse states dict - ctx_requests = 0 ctx_tokens = 0 gen_tokens = 0 - ctx_req_match = re.search(r"'num_ctx_requests':\s*(\d+)", states_str) - if ctx_req_match: - ctx_requests = int(ctx_req_match.group(1)) + # Skip ctx_requests as it's not used + # ctx_req_match = re.search(r"'num_ctx_requests':\s*(\d+)", states_str) + # if ctx_req_match: + # ctx_requests = int(ctx_req_match.group(1)) ctx_tok_match = re.search(r"'num_ctx_tokens':\s*(\d+)", states_str) if ctx_tok_match: