diff --git a/analysis/dashboard/app.py b/analysis/dashboard/app.py index 87920a15..83bbf819 100644 --- a/analysis/dashboard/app.py +++ b/analysis/dashboard/app.py @@ -102,8 +102,8 @@ def render_sidebar(logs_dir, runs): with st.sidebar.expander("📊 ISL/OSL", expanded=False): isl_osl_pairs = set() for run in sorted_runs: - if run.profiler.isl and run.profiler.osl: - isl_osl_pairs.add(f"{run.profiler.isl}/{run.profiler.osl}") + if run.profiler_metadata.isl and run.profiler_metadata.osl: + isl_osl_pairs.add(f"{run.profiler_metadata.isl}/{run.profiler_metadata.osl}") if isl_osl_pairs: pair_options = sorted(isl_osl_pairs) @@ -115,7 +115,9 @@ def render_sidebar(logs_dir, runs): ) if selected_pairs: - sorted_runs = [r for r in sorted_runs if f"{r.profiler.isl}/{r.profiler.osl}" in selected_pairs] + sorted_runs = [ + r for r in sorted_runs if f"{r.profiler_metadata.isl}/{r.profiler_metadata.osl}" in selected_pairs + ] else: st.caption("No ISL/OSL information available") @@ -176,8 +178,8 @@ def render_sidebar(logs_dir, runs): for run in sorted_runs: topology = run.metadata.topology_label - isl = run.profiler.isl - osl = run.profiler.osl + isl = run.profiler_metadata.isl + osl = run.profiler_metadata.osl gpu_type = run.metadata.gpu_type gpu_suffix = f" [{gpu_type}]" if gpu_type else "" # Include job ID to ensure unique labels @@ -284,7 +286,7 @@ def render_sidebar(logs_dir, runs): f"{run.job_id} | " f"{run.metadata.agg_workers}A | " f"{total_gpus} GPUs | " - f"{run.profiler.isl}/{run.profiler.osl}" + f"{run.profiler_metadata.isl}/{run.profiler_metadata.osl}" ) else: run_id = ( @@ -298,7 +300,7 @@ def render_sidebar(logs_dir, runs): f"{run.job_id} | " f"{run.metadata.prefill_workers}P{run.metadata.decode_workers}D | " f"{prefill_gpus}/{decode_gpus} | " - f"{run.profiler.isl}/{run.profiler.osl}" + f"{run.profiler_metadata.isl}/{run.profiler_metadata.osl}" ) if run.metadata.gpu_type: diff --git a/analysis/dashboard/components.py b/analysis/dashboard/components.py index 22bb320c..1b0979d0 100644 --- a/analysis/dashboard/components.py +++ b/analysis/dashboard/components.py @@ -101,7 +101,11 @@ def _node_to_dict(node) -> dict: Temporary converter for compatibility with existing visualization code. """ return { - "node_info": node.node_info, + "node_info": { + "node": node.node_name, + "worker_type": node.worker_type, + "worker_id": node.worker_id, + }, "prefill_batches": [_batch_to_dict(b) for b in node.batches], "memory_snapshots": [_memory_to_dict(m) for m in node.memory_snapshots], "config": node.config, diff --git a/analysis/dashboard/config_tab.py b/analysis/dashboard/config_tab.py index ec234de2..a8b9f59a 100644 --- a/analysis/dashboard/config_tab.py +++ b/analysis/dashboard/config_tab.py @@ -64,10 +64,10 @@ def render(filtered_runs: list): with col2: st.metric("GPU", config_data["summary"]["gpu_type"]) with col3: - st.metric("ISL/OSL", f"{run.profiler.isl}/{run.profiler.osl}") + st.metric("ISL/OSL", f"{run.profiler_metadata.isl}/{run.profiler_metadata.osl}") with col4: gpu_type_suffix = f" ({run.metadata.gpu_type})" if run.metadata.gpu_type else "" - st.metric("Profiler", f"{run.profiler.profiler_type}{gpu_type_suffix}") + st.metric("Profiler", f"{run.profiler_metadata.profiler_type}{gpu_type_suffix}") st.caption(f"Model: {config_data['summary']['model']}") st.divider() diff --git a/analysis/dashboard/node_metrics_tab.py b/analysis/dashboard/node_metrics_tab.py index 5c83ea9e..24ab0665 100644 --- a/analysis/dashboard/node_metrics_tab.py +++ b/analysis/dashboard/node_metrics_tab.py @@ -7,16 +7,16 @@ import streamlit as st from analysis.dashboard.components import ( - load_node_metrics, - create_node_throughput_graph, - create_kv_cache_utilization_graph, - create_queue_depth_graph, - create_node_inflight_requests_graph, - create_decode_running_requests_graph, + create_decode_disagg_stacked_graph, create_decode_gen_throughput_graph, - create_decode_transfer_req_graph, create_decode_prealloc_req_graph, - create_decode_disagg_stacked_graph, + create_decode_running_requests_graph, + create_decode_transfer_req_graph, + create_kv_cache_utilization_graph, + create_node_inflight_requests_graph, + create_node_throughput_graph, + create_queue_depth_graph, + load_node_metrics, ) from analysis.srtlog.visualizations import aggregate_all_nodes, group_nodes_by_dp @@ -72,8 +72,8 @@ def render(filtered_runs: list, logs_dir: str): "agg_workers": run.metadata.agg_workers, "gpus_per_node": run.metadata.gpus_per_node, "total_gpus": run.total_gpus, - "isl": run.profiler.isl, - "osl": run.profiler.osl, + "isl": run.profiler_metadata.isl, + "osl": run.profiler_metadata.osl, "gpu_type": run.metadata.gpu_type, } all_node_metrics.extend(node_metrics) diff --git a/analysis/dashboard/rate_match_tab.py b/analysis/dashboard/rate_match_tab.py index 36fa50cb..5826faf2 100644 --- a/analysis/dashboard/rate_match_tab.py +++ b/analysis/dashboard/rate_match_tab.py @@ -10,6 +10,48 @@ from analysis.dashboard.components import load_node_metrics +def _parse_timestamp(timestamp: str) -> datetime: + """Parse timestamp from multiple possible formats. + + Supports: + - ISO 8601: 2025-12-30T15:52:38.206058Z + - YYYY-MM-DD HH:MM:SS + - MM/DD/YYYY-HH:MM:SS (TRTLLM format) + + Args: + timestamp: Timestamp string in one of the supported formats + + Returns: + datetime object + + Raises: + ValueError: If timestamp format is not recognized + """ + # Try YYYY-MM-DD HH:MM:SS format first (most common) + try: + return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S") + except ValueError: + pass + + # Try ISO 8601 format (SGLang) + try: + ts = timestamp.rstrip("Z") + if "." in ts: + return datetime.fromisoformat(ts) + else: + return datetime.strptime(ts, "%Y-%m-%dT%H:%M:%S") + except ValueError: + pass + + # Try MM/DD/YYYY-HH:MM:SS format (TRTLLM) + try: + return datetime.strptime(timestamp, "%m/%d/%Y-%H:%M:%S") + except ValueError: + pass + + raise ValueError(f"Unable to parse timestamp: {timestamp}") + + def render(filtered_runs: list, logs_dir: str): """Render rate match analysis. @@ -84,11 +126,11 @@ def render(filtered_runs: list, logs_dir: str): decode_gpus = run.metadata.decode_nodes * run.metadata.gpus_per_node st.metric("GPU Split", f"{prefill_gpus} / {decode_gpus}") with col4: - st.metric("ISL/OSL", f"{run.profiler.isl}/{run.profiler.osl}") + st.metric("ISL/OSL", f"{run.profiler_metadata.isl}/{run.profiler_metadata.osl}") # Create rate match graph - isl = int(run.profiler.isl) if run.profiler.isl else None - osl = int(run.profiler.osl) if run.profiler.osl else None + isl = int(run.profiler_metadata.isl) if run.profiler_metadata.isl else None + osl = int(run.profiler_metadata.osl) if run.profiler_metadata.osl else None rate_fig = _create_rate_match_graph( prefill_nodes, decode_nodes, run.job_id, show_request_rate=show_request_rate, isl=isl, osl=osl ) @@ -139,8 +181,8 @@ def _create_rate_match_graph(prefill_nodes, decode_nodes, job_id="", show_reques avg_input_tps.append(avg / prefill_divisor) if timestamps: - first_time = datetime.strptime(timestamps[0], "%Y-%m-%d %H:%M:%S") - elapsed = [(datetime.strptime(ts, "%Y-%m-%d %H:%M:%S") - first_time).total_seconds() for ts in timestamps] + first_time = _parse_timestamp(timestamps[0]) + elapsed = [(_parse_timestamp(ts) - first_time).total_seconds() for ts in timestamps] unit = "req/s" if show_request_rate else "tok/s" rate_fig.add_trace( @@ -175,8 +217,8 @@ def _create_rate_match_graph(prefill_nodes, decode_nodes, job_id="", show_reques avg_gen_tps.append(avg / decode_divisor) if timestamps: - first_time = datetime.strptime(timestamps[0], "%Y-%m-%d %H:%M:%S") - elapsed = [(datetime.strptime(ts, "%Y-%m-%d %H:%M:%S") - first_time).total_seconds() for ts in timestamps] + first_time = _parse_timestamp(timestamps[0]) + elapsed = [(_parse_timestamp(ts) - first_time).total_seconds() for ts in timestamps] unit = "req/s" if show_request_rate else "tok/s" rate_fig.add_trace( diff --git a/analysis/docs/ARCHITECTURE_DATAFLOW.md b/analysis/docs/ARCHITECTURE_DATAFLOW.md new file mode 100644 index 00000000..ec3420fb --- /dev/null +++ b/analysis/docs/ARCHITECTURE_DATAFLOW.md @@ -0,0 +1,607 @@ +# SRT-SLURM Log Analysis Architecture - Dataflow Diagram + +## Overview +This document describes the data flow through the log analysis system, from raw log files to structured data models. + +--- + +## 1. Entry Point: RunLoader + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ RunLoader │ +│ Entry point for loading and analyzing benchmark run data │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ├──► discover_runs() + ├──► load_single(job_id) + └──► load_node_metrics_for_run() + + ┌───────────────┴───────────────┐ + │ │ + ▼ ▼ + ┌─────────────────────┐ ┌────────────────────┐ + │ Metadata Discovery │ │ Results Parsing │ + └─────────────────────┘ └────────────────────┘ +``` + +--- + +## 2. Metadata Discovery Flow + +``` + ┌─────────────────────────────────┐ + │ Source Files (per run) │ + │ │ + │ 📁 {job_id}/metadata.json │ + │ 📁 {job_id}/config.yaml │ + │ 📁 {job_id}/*.json │ + └─────────────────────────────────┘ + │ + │ read by + ▼ + ┌─────────────────────────────────┐ + │ RunLoader._load_metadata() │ + └─────────────────────────────────┘ + │ + │ creates + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ RunMetadata │ +│ Fields: Source File: │ +│ • job_id 📁 metadata.json │ +│ • job_name 📁 metadata.json │ +│ • run_date 📁 metadata.json │ +│ • mode (monolithic/disaggregated) 📁 metadata.json │ +│ • prefill_nodes, decode_nodes 📁 metadata.json │ +│ • prefill_workers, decode_workers 📁 metadata.json │ +│ • model: ModelConfig 📁 metadata.json │ +│ - path, tensor_parallel, ... │ +└───────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 3. Profiler/Benchmark Results Flow + +``` + ┌─────────────────────────────────────────┐ + │ Profiler Type Detection │ + │ │ + │ 📁 logs/benchmark.out │ + │ - Search for "SA-Bench Config" │ + │ - Search for "aiperf" commands │ + └─────────────────────────────────────────┘ + │ + │ determines + ▼ + ┌─────────────────────────────────────────┐ + │ ProfilerMetadata │ + │ Fields: Source: │ + │ • profiler_type benchmark.out │ + │ • isl benchmark.out │ + │ • osl benchmark.out │ + │ • concurrencies benchmark.out │ + └─────────────────────────────────────────┘ + │ + │ used to find + ▼ + ┌────────────────────────────────────────────────────────────────┐ + │ BenchmarkParser.find_result_directory() │ + │ │ + │ SA-Bench: Mooncake-Router: │ + │ 📁 sa-bench_isl_*_osl_*/ 📁 logs/artifacts/*/ │ + │ result_*.json (PRIMARY) profile_export_aiperf.json │ + │ benchmark.out (FALLBACK) (PRIMARY) │ + │ 📁 logs/benchmark.out │ + │ (FALLBACK) │ + └────────────────────────────────────────────────────────────────┘ + │ + │ parse_result_directory() + │ ⚠️ JSON files are PRIMARY source of truth + │ .out files are FALLBACK only + ▼ +┌──────────────────────────────────────────────────────────────────────────┐ +│ ProfilerResults │ +│ Fields: Source Files (Priority Order): │ +│ • output_tps: list[float] 1️⃣ 📁 result_*.json (SA-Bench) │ +│ • request_throughput: list[float] 📁 profile_export_aiperf.json │ +│ • concurrency_values: list[int] (Mooncake-Router) │ +│ • mean_ttft_ms: list[float] 2️⃣ 📁 logs/benchmark.out (fallback)│ +│ • mean_itl_ms: list[float] │ +│ • mean_e2el_ms: list[float] One entry per concurrency level │ +│ • p99_ttft_ms, median_ttft_ms, ... │ +│ • total_input_tokens: list[int] JSON = Source of Truth ✨ │ +│ • total_output_tokens: list[int] .out = Fallback only ⚠️ │ +└──────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 4. Benchmark Launch Command Flow + +``` + ┌─────────────────────────────────┐ + │ Source File │ + │ 📁 logs/benchmark.out │ + │ - Command line arguments │ + │ - SA-Bench Config: header │ + │ - aiperf profile commands │ + └─────────────────────────────────┘ + │ + │ parse_launch_command() + ▼ + ┌─────────────────────────────────┐ + │ BenchmarkParser │ + │ (SA-Bench or Mooncake) │ + └─────────────────────────────────┘ + │ + │ creates + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ BenchmarkLaunchCommand │ +│ Fields: Source: │ +│ • benchmark_type 📁 logs/benchmark.out │ +│ • raw_command 📁 logs/benchmark.out │ +│ • extra_args: dict 📁 logs/benchmark.out │ +│ - base_url, model, input_len, │ +│ output_len, max_concurrency, ... │ +└───────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 5. Node Metrics Flow + +``` + ┌─────────────────────────────────────────────────┐ + │ Source Files (per node/worker) │ + │ │ + │ 📁 logs/{node}_{worker_type}_{worker_id}.out │ + │ Examples: │ + │ - worker-3_decode_w0.out │ + │ - eos0219_prefill_w1.out │ + │ │ + │ Content: │ + │ • Batch metrics lines │ + │ • Memory snapshot lines │ + │ • TP/DP/EP configuration │ + │ • Launch command │ + └─────────────────────────────────────────────────┘ + │ + │ detect backend type + ▼ + ┌─────────────────────────────────┐ + │ NodeAnalyzer │ + │ _detect_backend_type() │ + │ • Checks config.yaml │ + │ • Checks log patterns │ + └─────────────────────────────────┘ + │ + │ get_node_parser() + ▼ + ┌────────────────────────────────────────────────────────┐ + │ NodeParser (SGLang or TRT-LLM) │ + │ │ + │ parse_single_log() - parses one worker's log file │ + └────────────────────────────────────────────────────────┘ + │ + │ creates + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ NodeMetadata │ +│ Fields: Source: │ +│ • node_name 📁 *_{type}_{id}.out (filename) │ +│ • worker_type (prefill/decode/agg) 📁 *_{type}_{id}.out (filename) │ +│ • worker_id (w0, w1, ...) 📁 *_{type}_{id}.out (filename) │ +└───────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ BatchMetrics │ +│ Fields: Source: │ +│ • timestamp 📁 *.out log lines │ +│ • dp, tp, ep 📁 *.out log lines │ +│ • batch_type (prefill/decode) 📁 *.out log lines │ +│ • new_seq, new_token, cached_token 📁 *.out log lines │ +│ • token_usage 📁 *.out log lines │ +│ • running_req, queue_req 📁 *.out log lines │ +│ • num_tokens 📁 *.out log lines │ +│ • input_throughput, gen_throughput 📁 *.out log lines │ +│ │ +│ Example log line (SGLang): │ +│ 2024-12-30 08:10:15 DP0.TP0.EP0 [BATCH] prefill #new-seq: 2 ... │ +│ │ +│ Example log line (TRT-LLM): │ +│ [TensorRT-LLM][INFO] [ITERATION] tokens=1024 new_tokens=128 ... │ +└───────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ MemoryMetrics │ +│ Fields: Source: │ +│ • timestamp 📁 *.out log lines │ +│ • dp, tp, ep 📁 *.out log lines │ +│ • avail_mem_gb 📁 *.out log lines │ +│ • mem_usage_gb 📁 *.out log lines │ +│ • kv_cache_gb 📁 *.out log lines │ +│ • kv_tokens 📁 *.out log lines │ +│ │ +│ Example log line (SGLang): │ +│ 2024-12-30 08:10:15 DP0.TP0.EP0 #running-req: 10, avail_mem=45.2GB │ +│ │ +│ Example log line (TRT-LLM): │ +│ [TensorRT-LLM][INFO] Memory Stats: free=48.5GB, kv_cache=12.3GB │ +└───────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ NodeMetrics │ +│ Fields: Source: │ +│ • metadata: NodeMetadata (see above) │ +│ • batches: list[BatchMetrics] 📁 *.out log lines │ +│ • memory_snapshots: list[MemoryMetrics] 📁 *.out log lines │ +│ • config: dict 📁 *.out log lines │ +│ - tp_size, dp_size, ep_size (parsed from DP0.TP2.EP1 tags) │ +│ • run_id (from metadata) │ +└───────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 6. Node Configuration Flow + +``` + ┌─────────────────────────────────────────────────┐ + │ Source Files (per node) │ + │ │ + │ 📁 logs/*_{type}_{id}.out - launch command │ + │ 📁 logs/*_config.json - node config │ + │ 📁 logs/config.yaml - environment vars │ + └─────────────────────────────────────────────────┘ + │ + │ parsed by + ▼ + ┌─────────────────────────────────┐ + │ NodeAnalyzer │ + │ _populate_config_from_files() │ + └─────────────────────────────────┘ + │ + │ creates + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ NodeLaunchCommand │ +│ Fields: Source: │ +│ • backend_type (sglang/trtllm) 📁 *_{type}_{id}.out │ +│ • worker_type (prefill/decode) 📁 *_{type}_{id}.out │ +│ • raw_command 📁 *_{type}_{id}.out │ +│ • extra_args: dict 📁 *_{type}_{id}.out │ +│ - model_path, served_model_name, │ +│ disaggregation_mode, tp_size, │ +│ pp_size, max_num_seqs, ... │ +│ │ +│ Example (TRT-LLM): │ +│ python3 -m dynamo.trtllm --model-path /model --disaggregation-mode │ +│ decode --extra-engine-args /logs/trtllm_config_decode.yaml │ +│ │ +│ Example (SGLang): │ +│ python -m sglang.launch_server --model-path /model --disagg-mode prefill │ +│ --tp-size 2 --dp-size 1 │ +└───────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ NodeConfig (TypedDict) │ +│ Fields: Source: │ +│ • launch_command: NodeLaunchCommand 📁 *_{type}_{id}.out │ +│ • environment: dict[str, str] 📁 config.yaml │ +│ - NCCL settings, CUDA settings, │ +│ model paths, etc. │ +│ • gpu_info: dict (optional) 📁 *_config.json │ +└───────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌───────────────────────────────────────────────────────────────────────────┐ +│ NodeInfo │ +│ Top-level container combining metrics and configuration │ +│ │ +│ Fields: │ +│ • metrics: NodeMetrics (performance data) │ +│ • node_config: NodeConfig (configuration) │ +│ │ +│ Convenience properties delegate to nested fields: │ +│ • node_name → metrics.metadata.node_name │ +│ • worker_type → metrics.metadata.worker_type │ +│ • launch_command → node_config["launch_command"] │ +│ • environment → node_config["environment"] │ +│ • batches → metrics.batches │ +└───────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 7. Complete Data Model Hierarchy + +``` +BenchmarkRun (top-level container for entire run) +│ +├─ metadata: RunMetadata +│ └─ Source: 📁 metadata.json, config.yaml +│ +├─ profiler_metadata: ProfilerMetadata +│ └─ Source: 📁 logs/benchmark.out +│ +├─ profiler: ProfilerResults +│ └─ Source: 📁 sa-bench_isl_*_osl_*/result_*.json +│ 📁 logs/artifacts/*/profile_export_aiperf.json +│ +├─ benchmark_launch_command: BenchmarkLaunchCommand +│ └─ Source: 📁 logs/benchmark.out +│ +└─ nodes: list[NodeInfo] + └─ Each NodeInfo contains: + │ + ├─ metrics: NodeMetrics + │ ├─ metadata: NodeMetadata + │ │ └─ Source: 📁 logs/*_{type}_{id}.out (filename) + │ ├─ batches: list[BatchMetrics] + │ │ └─ Source: 📁 logs/*_{type}_{id}.out (log lines) + │ ├─ memory_snapshots: list[MemoryMetrics] + │ │ └─ Source: 📁 logs/*_{type}_{id}.out (log lines) + │ └─ config: dict + │ └─ Source: 📁 logs/*_{type}_{id}.out (DP/TP/EP tags) + │ + └─ node_config: NodeConfig + ├─ launch_command: NodeLaunchCommand + │ └─ Source: 📁 logs/*_{type}_{id}.out (command line) + ├─ environment: dict[str, str] + │ └─ Source: 📁 logs/config.yaml + └─ gpu_info: dict (optional) + └─ Source: 📁 logs/*_config.json +``` + +--- + +## 8. Parser Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ Parser Registry System │ +│ │ +│ Decorators: │ +│ • @register_benchmark_parser("sa-bench") │ +│ • @register_benchmark_parser("mooncake-router") │ +│ • @register_node_parser("sglang") │ +│ • @register_node_parser("trtllm") │ +│ │ +│ Lookup Functions: │ +│ • get_benchmark_parser(type) → BenchmarkParser │ +│ • get_node_parser(type) → NodeParser │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ┌───────────────┴────────────────┐ + │ │ + ▼ ▼ + ┌──────────────────────┐ ┌──────────────────────┐ + │ BenchmarkParsers │ │ NodeParsers │ + └──────────────────────┘ └──────────────────────┘ + │ │ + ┌───────────┴───────────┐ ┌──────────┴──────────┐ + ▼ ▼ ▼ ▼ +┌──────────────┐ ┌──────────────┐ ┌─────────┐ ┌──────────────┐ +│ SABench │ │ Mooncake │ │ SGLang │ │ TRT-LLM │ +│ Parser │ │ Parser │ │ Parser │ │ Parser │ +└──────────────┘ └──────────────┘ └─────────┘ └──────────────┘ + +Each parser implements: + Benchmark: + • find_result_directory() - locate result files + • parse_result_directory() - parse all results + • parse_result_json() - parse single result file + • parse_launch_command() - extract command + + Node: + • parse_logs() - parse directory of logs + • parse_single_log() - parse one worker log + • parse_launch_command() - extract command +``` + +--- + +## 9. Parsing Strategy: JSON-First Approach + +### Design Principle: JSON as Source of Truth ✨ + +The parser infrastructure follows a **JSON-first** approach for benchmark results: + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ Benchmark Result Parsing Priority │ +│ │ +│ 1️⃣ PRIMARY: JSON Result Files (Source of Truth) │ +│ 📁 result_*.json (SA-Bench) │ +│ 📁 profile_export_aiperf.json (Mooncake-Router) │ +│ - Complete, structured data │ +│ - Machine-readable, validated format │ +│ - Contains all metrics with precision │ +│ │ +│ 2️⃣ FALLBACK: benchmark.out Parsing │ +│ 📁 logs/benchmark.out │ +│ - Used ONLY when JSON files are unavailable │ +│ - Regex-based extraction from human-readable logs │ +│ - May be incomplete or imprecise │ +│ - Logged as fallback in parser output │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +### Implementation + +All benchmark parsers implement this strategy in `parse_result_directory()`: + +```python +def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: + results = [] + + # 1️⃣ PRIMARY: Try JSON files first + for json_file in result_dir.glob("*.json"): # or rglob() for nested + result = self.parse_result_json(json_file) + if result.get("output_tps"): + results.append(result) + logger.info(f"Loaded from JSON: {json_file}") + + # 2️⃣ FALLBACK: If no JSON found, try benchmark.out + if not results: + benchmark_out = result_dir / "benchmark.out" + if benchmark_out.exists(): + logger.info("No JSON results found, falling back to .out parsing") + fallback_result = self.parse(benchmark_out) + if fallback_result.get("output_tps"): + results.append(fallback_result) + else: + logger.warning(f"No results found in {result_dir}") + + return results +``` + +### Rationale + +1. **Accuracy**: JSON files contain exact, validated data +2. **Completeness**: JSON includes all metrics, not just what's in logs +3. **Reliability**: Structured format vs regex parsing +4. **Performance**: JSON parsing is faster than regex on large logs +5. **Maintainability**: Less brittle than log format changes + +### When Fallback is Used + +The fallback to `.out` file parsing occurs when: +- JSON result files are missing (incomplete benchmark run) +- Results directory doesn't contain expected JSON files +- Legacy runs from before JSON export was implemented + +--- + +## 10. Caching Layer + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ CacheManager │ +│ │ +│ Caches to 📁 {run_path}/cached_assets/ │ +│ │ +│ Cached Data: │ +│ • benchmark_results.parquet - ProfilerResults │ +│ • node_metrics.parquet - NodeMetrics (all workers) │ +│ • cache_metadata.json - timestamps, source patterns │ +│ │ +│ Cache Validation: │ +│ • Checks if source files have changed (mtime) │ +│ • Invalidates cache if patterns don't match │ +│ • Automatically rebuilds if invalid │ +└─────────────────────────────────────────────────────────────────────────┘ + +Flow with cache: + 1. RunLoader checks cache validity + 2. If valid → deserialize from .parquet + 3. If invalid → parse from source files → cache results + 4. Populate NodeConfig from files (not cached) +``` + +--- + +## 11. File Structure Summary + +``` +{run_directory}/ +├── metadata.json → RunMetadata +├── config.yaml → ProfilerMetadata.isl/osl +├── logs/ +│ ├── benchmark.out → BenchmarkLaunchCommand, ProfilerMetadata, (fallback metrics) +│ ├── config.yaml → NodeConfig.environment +│ ├── {node}_{type}_{id}.out → NodeMetrics, NodeLaunchCommand +│ ├── {node}_config.json → NodeConfig.gpu_info +│ └── sa-bench_isl_*/ +│ └── result_*.json → ProfilerResults (PRIMARY ✨) +│ └── artifacts/ +│ └── */ +│ └── profile_export_aiperf.json → ProfilerResults (PRIMARY ✨) +└── cached_assets/ + ├── benchmark_results.parquet + ├── node_metrics.parquet + └── cache_metadata.json +``` + +**Note**: JSON files are the primary source of truth for benchmark results. +The `.out` files serve as fallback for legacy/incomplete runs. + +--- + +## 12. Key Design Principles + +1. **Parser Autonomy**: Each parser knows how to find and parse its own files + - `find_result_directory()` encapsulates file discovery logic + - RunLoader doesn't need benchmark-specific knowledge + +2. **JSON-First Parsing** ✨: JSON files are the primary source of truth + - `parse_result_json()` for structured, accurate data + - `parse()` method is fallback for when JSON is unavailable + - Logged clearly when fallback is used + +3. **Separation of Concerns**: + - **Metrics** (NodeMetrics): Performance data from log parsing + - **Configuration** (NodeConfig): Launch commands, environment, GPU info + - **Metadata** (NodeMetadata): Worker identification + +4. **Caching Strategy**: + - Cache expensive parsing operations (batch/memory metrics) + - Don't cache configuration (files are small, may change) + - Validate cache against source file timestamps + +5. **Extensibility**: + - New benchmark types: Implement BenchmarkParserProtocol + - New node backends: Implement NodeParserProtocol + - Register with decorator → automatically available + +6. **Data Flow Direction**: + ``` + JSON Files (Primary) ──┐ + ├──► Parsers ──► Data Models ──► Cache ──► Application + .out Files (Fallback) ─┘ + ``` + +--- + +## 12. Usage Example + +```python +from pathlib import Path +from analysis.srtlog.run_loader import RunLoader + +# Load a run +loader = RunLoader("/path/to/runs") +run = loader.load_single("553") + +# Access metadata (from metadata.json) +print(f"Job: {run.metadata.job_id}") +print(f"Model: {run.metadata.model.path}") + +# Access profiler results (from result_*.json or profile_export_aiperf.json) +print(f"Output TPS: {run.profiler.output_tps}") +print(f"Mean TTFT: {run.profiler.mean_ttft_ms}") + +# Access benchmark launch command (from logs/benchmark.out) +print(f"Benchmark: {run.benchmark_launch_command.benchmark_type}") +print(f"Arguments: {run.benchmark_launch_command.extra_args}") + +# Load node metrics (from logs/*_{type}_{id}.out) +nodes = loader.load_node_metrics_for_run(run) +for node in nodes: + # Metrics from log file parsing + print(f"Node: {node.node_name} ({node.worker_type})") + print(f" Batches: {len(node.batches)}") + print(f" Memory snapshots: {len(node.memory_snapshots)}") + + # Config from config files + print(f" Backend: {node.launch_command.backend_type}") + print(f" Environment vars: {len(node.environment)}") +``` + diff --git a/analysis/srtlog/cluster_config.py b/analysis/srtlog/cluster_config.py index 383f4cad..2a8621c9 100644 --- a/analysis/srtlog/cluster_config.py +++ b/analysis/srtlog/cluster_config.py @@ -3,9 +3,10 @@ """ import logging -import tomllib from pathlib import Path +import tomllib + logger = logging.getLogger(__name__) diff --git a/analysis/srtlog/config_reader.py b/analysis/srtlog/config_reader.py index ce5eb328..98a4c089 100644 --- a/analysis/srtlog/config_reader.py +++ b/analysis/srtlog/config_reader.py @@ -10,7 +10,7 @@ import pandas as pd from .cache_manager import CacheManager -from .models import NodeConfig, ParsedCommandInfo +from .models import NodeConfig, TopologyInfo # Configure logging logger = logging.getLogger(__name__) @@ -285,7 +285,7 @@ def parse_command_line_to_dict(cmd_args: list[str]) -> dict[str, str]: return parsed -def parse_command_line_from_err(run_path: str) -> ParsedCommandInfo: +def parse_command_line_from_err(run_path: str) -> TopologyInfo: """Parse .err/.out files to find explicitly set flags and service topology. Uses parquet caching to avoid re-parsing on subsequent loads. @@ -298,10 +298,9 @@ def parse_command_line_from_err(run_path: str) -> ParsedCommandInfo: run_path: Path to the run directory containing .err/.out files Returns: - { + TopologyInfo with: 'explicit_flags': set of flag names that were explicitly set, 'services': {node_name: [service_types]} - } """ import os import re diff --git a/analysis/srtlog/log_parser.py b/analysis/srtlog/log_parser.py index 7de28ac2..71adf4f5 100644 --- a/analysis/srtlog/log_parser.py +++ b/analysis/srtlog/log_parser.py @@ -4,13 +4,17 @@ All parsing logic encapsulated in the NodeAnalyzer class. """ +import json import logging -import os -import re +import time +from pathlib import Path import pandas as pd +import yaml from .cache_manager import CacheManager +from .models import NodeInfo +from .parsers import get_node_parser # Configure logging logger = logging.getLogger(__name__) @@ -19,21 +23,21 @@ class NodeAnalyzer: """Service for analyzing node-level metrics from log files. - Parses .err/.out files to extract batch metrics, memory usage, and configuration. - All parsing logic is encapsulated as methods. + Uses the new parser infrastructure to parse node logs based on detected backend type. """ def parse_run_logs(self, run_path: str, return_dicts: bool = False) -> list: """Parse all node log files in a run directory. Uses parquet caching to avoid re-parsing on subsequent loads. + Automatically detects backend type and uses appropriate parser. Args: run_path: Path to the run directory containing .err/.out files - return_dicts: If True, return dicts directly (faster). If False, return NodeMetrics objects. + return_dicts: If True, return dicts directly (faster). If False, return NodeInfo objects. Returns: - List of NodeMetrics objects or dicts, one per node + List of NodeInfo objects (or dicts), one per node """ # Initialize cache manager cache_mgr = CacheManager(run_path) @@ -50,163 +54,291 @@ def parse_run_logs(self, run_path: str, return_dicts: bool = False) -> list: nodes = self._dataframe_to_dicts(cached_df) logger.info(f"Loaded {len(nodes)} nodes from cache (as dicts)") else: - # Reconstruct NodeMetrics objects from DataFrame - nodes = self._deserialize_node_metrics(cached_df) + # Reconstruct NodeInfo objects from DataFrame + nodes = self._deserialize_node_metrics(cached_df, run_path=run_path) logger.info(f"Loaded {len(nodes)} nodes from cache") return nodes - # Cache miss or invalid - parse from .err/.out files - nodes = [] - - if not os.path.exists(run_path): - logger.error(f"Run path does not exist: {run_path}") - return nodes + # Cache miss or invalid - parse using new parser infrastructure + backend_type = self._detect_backend_type(run_path) + if not backend_type: + logger.warning(f"Could not detect backend type for {run_path}") + return [] - total_err_files = 0 - parsed_successfully = 0 + # Get appropriate parser + try: + parser = get_node_parser(backend_type) + except ValueError as e: + logger.warning(f"No parser registered for backend '{backend_type}': {e}") + return [] - for file in os.listdir(run_path): - if (file.endswith(".err") or file.endswith(".out")) and ("prefill" in file or "decode" in file): - total_err_files += 1 - filepath = os.path.join(run_path, file) - node = self.parse_single_log(filepath) - if node: - nodes.append(node) - parsed_successfully += 1 + # Use parser to parse logs directory + logs_dir = Path(run_path) / "logs" + if not logs_dir.exists(): + # For backwards compatibility, try parsing files in run_path directly + logs_dir = Path(run_path) - logger.info(f"Parsed {parsed_successfully}/{total_err_files} prefill/decode log files from {run_path}") + logger.info(f"Using {backend_type} parser to parse logs in {logs_dir}") + node_infos = parser.parse_logs(logs_dir) - if total_err_files == 0: - logger.warning(f"No prefill/decode log files found in {run_path}") + # Populate additional config from config files if available + if node_infos: + self._populate_config_from_files(run_path, node_infos) # Save to cache if we have data - if nodes: - cache_df = self._serialize_node_metrics(nodes) + if node_infos: + # Extract metrics for caching + metrics_list = [ni.metrics for ni in node_infos] + cache_df = self._serialize_node_metrics(metrics_list) cache_mgr.save_to_cache("node_metrics", cache_df, source_patterns) + logger.info(f"Parsed and cached {len(node_infos)} nodes from {logs_dir}") - return nodes + if return_dicts: + return [self._node_info_to_dict(node) for node in node_infos] + return node_infos + + def _detect_backend_type(self, run_path: str) -> str | None: + """Detect backend type from run metadata. - def parse_single_log(self, filepath: str): - """Parse a single node log file. + Looks for *.json files with container information in run_path + and its parent directory (for cases where run_path is logs/). + Also looks at log file content as fallback. Args: - filepath: Path to the .err/.out log file + run_path: Path to the run directory (or logs subdirectory) Returns: - NodeMetrics object or None if parsing failed + Backend type string (e.g., 'sglang', 'trtllm') or None """ - from .models import BatchMetrics, MemoryMetrics, NodeMetrics + run_path = Path(run_path) + + # Try current directory and parent directory + search_dirs = [run_path] + if run_path.name == "logs" and run_path.parent.exists(): + search_dirs.insert(0, run_path.parent) # Check parent first + + # Try JSON files first + for search_dir in search_dirs: + json_files = list(search_dir.glob("*.json")) + for json_file in json_files: + try: + with open(json_file) as f: + metadata = json.load(f) + # Try different possible locations for container info + container = metadata.get("container", "") + if not container: + container = metadata.get("model", {}).get("container", "") + + container_lower = container.lower() + if "sglang" in container_lower: + logger.debug(f"Detected sglang from {json_file}") + return "sglang" + if "trtllm" in container_lower or "dynamo" in container_lower: + logger.debug(f"Detected trtllm from {json_file}") + return "trtllm" + except Exception as e: + logger.debug(f"Could not read {json_file}: {e}") + continue + + # Try config.yaml as fallback + for search_dir in search_dirs: + yaml_path = search_dir / "config.yaml" + if yaml_path.exists(): + try: + with open(yaml_path) as f: + config = yaml.safe_load(f) + backend_type = config.get("backend", {}).get("type", "").lower() + if backend_type in ["sglang", "trtllm"]: + logger.debug(f"Detected {backend_type} from config.yaml") + return backend_type + except Exception as e: + logger.debug(f"Could not read {yaml_path}: {e}") + + # Last resort: look at log files + logs_dir = run_path if run_path.name == "logs" else run_path / "logs" + if logs_dir.exists(): + log_files = list(logs_dir.glob("*.out")) + list(logs_dir.glob("*.err")) + for log_file in log_files[:3]: # Check first few files + try: + with open(log_file) as f: + content = f.read(2000) # Read first 2KB + if "sglang.launch_server" in content or "sglang.srt" in content: + logger.debug(f"Detected sglang from log content in {log_file.name}") + return "sglang" + if "dynamo.trtllm" in content or "tensorrt_llm" in content: + logger.debug(f"Detected trtllm from log content in {log_file.name}") + return "trtllm" + except Exception as e: + logger.debug(f"Could not read {log_file}: {e}") - node_info = self._extract_node_info_from_filename(filepath) - if not node_info: - logger.warning( - f"Could not extract node info from filename: {filepath}. " - f"Expected format: __.err or .out" - ) - return None + return None - batches = [] - memory_snapshots = [] - config = {} + def _populate_config_from_files(self, run_path: str, node_infos: list) -> None: + """Populate node configuration from config files. - try: - with open(filepath) as f: - for line in f: - # Parse prefill batch metrics - batch_metrics = self._parse_prefill_batch_line(line) - if batch_metrics: - batches.append( - BatchMetrics( - timestamp=batch_metrics["timestamp"], - dp=batch_metrics["dp"], - tp=batch_metrics["tp"], - ep=batch_metrics["ep"], - batch_type=batch_metrics["type"], - new_seq=batch_metrics.get("new_seq"), - new_token=batch_metrics.get("new_token"), - cached_token=batch_metrics.get("cached_token"), - token_usage=batch_metrics.get("token_usage"), - running_req=batch_metrics.get("running_req"), - queue_req=batch_metrics.get("queue_req"), - prealloc_req=batch_metrics.get("prealloc_req"), - inflight_req=batch_metrics.get("inflight_req"), - input_throughput=batch_metrics.get("input_throughput"), - ) - ) + Reads both: + 1. Per-node *_config.json files (gpu_info, server_args) + 2. Global config.yaml file (environment variables by worker type) - # Parse decode batch metrics - decode_metrics = self._parse_decode_batch_line(line) - if decode_metrics: - batches.append( - BatchMetrics( - timestamp=decode_metrics["timestamp"], - dp=decode_metrics["dp"], - tp=decode_metrics["tp"], - ep=decode_metrics["ep"], - batch_type=decode_metrics["type"], - running_req=decode_metrics.get("running_req"), - queue_req=decode_metrics.get("queue_req"), - prealloc_req=decode_metrics.get("prealloc_req"), - transfer_req=decode_metrics.get("transfer_req"), - token_usage=decode_metrics.get("token_usage"), - preallocated_usage=decode_metrics.get("preallocated_usage"), - num_tokens=decode_metrics.get("num_tokens"), - gen_throughput=decode_metrics.get("gen_throughput"), - ) - ) + Merges with existing config that already has launch_command from log parsing. + + Args: + run_path: Path to the run directory (or logs subdirectory) + node_infos: List of NodeInfo objects to enhance with config file data + """ + import os + + run_path = Path(run_path) - # Parse memory metrics - mem_metrics = self._parse_memory_line(line) - if mem_metrics: - memory_snapshots.append( - MemoryMetrics( - timestamp=mem_metrics["timestamp"], - dp=mem_metrics["dp"], - tp=mem_metrics["tp"], - ep=mem_metrics["ep"], - metric_type=mem_metrics["type"], - avail_mem_gb=mem_metrics.get("avail_mem_gb"), - mem_usage_gb=mem_metrics.get("mem_usage_gb"), - kv_cache_gb=mem_metrics.get("kv_cache_gb"), - kv_tokens=mem_metrics.get("kv_tokens"), - ) + # If run_path is the logs directory, look in parent for config files + if run_path.name == "logs" and run_path.parent.exists(): + config_dir = run_path.parent + else: + config_dir = run_path + + # Parse global config.yaml for environment variables + yaml_env = self._parse_yaml_environment(config_dir) + + # Find all per-node config files + config_files = {} + for file in os.listdir(config_dir): + if file.endswith("_config.json"): + # Extract node identifier from filename (e.g., "worker-3_prefill_w0_config.json" -> "worker-3_prefill_w0") + node_id = file.replace("_config.json", "") + config_files[node_id] = config_dir / file + + # Build or enhance node_config for each NodeInfo + for node_info in node_infos: + metrics = node_info.metrics + node_name = metrics.node_name + worker_type = metrics.worker_type + worker_id = metrics.worker_id + + # Try to find matching config file + # Format: ___config.json + potential_keys = [ + f"{node_name}_{worker_type}_{worker_id}", # Exact match + f"{node_name}_{worker_type}", # Without worker_id + node_name, # Just node name + ] + + config_path = None + for key in potential_keys: + if key in config_files: + config_path = config_files[key] + break + + # Load config file if it exists and merge with existing config + if config_path and config_path.exists(): + try: + with open(config_path) as f: + file_config = json.load(f) + # Merge file config with existing config (which has launch_command) + if node_info.node_config: + # Keep launch_command from log parsing + launch_cmd = node_info.node_config.get("launch_command") + node_info.node_config.update(file_config) + if launch_cmd: + node_info.node_config["launch_command"] = launch_cmd + else: + node_info.node_config = file_config + logger.debug( + f"Loaded config for {node_name} with {len(file_config.get('environment', {}))} env vars" ) + except Exception as e: + logger.warning(f"Could not load config from {config_path}: {e}") + # Keep existing minimal config with launch_command + else: + # No config file found + if not node_info.node_config: + node_info.node_config = {"environment": {}} + elif "environment" not in node_info.node_config: + node_info.node_config["environment"] = {} + logger.debug(f"No config file found for node {node_name}, using minimal config") + + # Merge environment variables from config.yaml + if yaml_env and worker_type in yaml_env: + if not node_info.node_config: + node_info.node_config = {} + if "environment" not in node_info.node_config: + node_info.node_config["environment"] = {} + + # Merge YAML env vars (they take precedence over JSON) + yaml_worker_env = yaml_env[worker_type] + node_info.node_config["environment"].update(yaml_worker_env) + logger.debug(f"Merged {len(yaml_worker_env)} env vars from config.yaml for {node_name} ({worker_type})") + + def _parse_yaml_environment(self, run_path: Path) -> dict[str, dict[str, str]]: + """Parse environment variables from config.yaml. - # Extract TP/DP/EP configuration from command line - if "--tp-size" in line: - tp_match = re.search(r"--tp-size\s+(\d+)", line) - dp_match = re.search(r"--dp-size\s+(\d+)", line) - ep_match = re.search(r"--ep-size\s+(\d+)", line) + Args: + run_path: Path to the run directory - if tp_match: - config["tp_size"] = int(tp_match.group(1)) - if dp_match: - config["dp_size"] = int(dp_match.group(1)) - if ep_match: - config["ep_size"] = int(ep_match.group(1)) + Returns: + Dict mapping worker_type to environment variables + Example: {"prefill": {"VAR1": "val1"}, "decode": {"VAR2": "val2"}} + """ + yaml_path = run_path / "config.yaml" + if not yaml_path.exists(): + logger.debug(f"No config.yaml found in {run_path}") + return {} - except Exception as e: - logger.error(f"Error parsing {filepath}: {e}") - return None + try: + with open(yaml_path) as f: + config = yaml.safe_load(f) - # Validation: Log if we found no metrics - total_metrics = len(batches) + len(memory_snapshots) + if not config or "backend" not in config: + logger.debug("config.yaml has no backend section") + return {} - if total_metrics == 0: - logger.warning( - f"Parsed {filepath} but found no metrics. " - f"Expected to find lines with DP/TP/EP tags. " - f"Log format may have changed." - ) + backend = config["backend"] + env_vars = {} + + # Extract prefill_environment + if "prefill_environment" in backend: + env_vars["prefill"] = backend["prefill_environment"] + logger.info(f"Loaded {len(env_vars['prefill'])} prefill env vars from config.yaml") - logger.debug(f"Parsed {filepath}: {len(batches)} batches, " f"{len(memory_snapshots)} memory snapshots") + # Extract decode_environment + if "decode_environment" in backend: + env_vars["decode"] = backend["decode_environment"] + logger.info(f"Loaded {len(env_vars['decode'])} decode env vars from config.yaml") + + # Extract agg_environment if present + if "agg_environment" in backend: + env_vars["agg"] = backend["agg_environment"] + logger.info(f"Loaded {len(env_vars['agg'])} agg env vars from config.yaml") + + return env_vars + + except Exception as e: + logger.warning(f"Could not parse config.yaml in {run_path}: {e}") + return {} - return NodeMetrics( - node_info=node_info, - batches=batches, - memory_snapshots=memory_snapshots, - config=config, - ) + def _node_info_to_dict(self, node_info: "NodeInfo") -> dict: + """Convert NodeInfo object to dict for compatibility. + + Args: + node_info: NodeInfo object + + Returns: + Dict representation compatible with old structure + """ + metrics = node_info.metrics + return { + "node_info": { + "node": metrics.node_name, + "worker_type": metrics.worker_type, + "worker_id": metrics.worker_id, + }, + "prefill_batches": metrics.batches, # Keep as list of BatchMetrics objects + "memory_snapshots": metrics.memory_snapshots, # Keep as list of MemoryMetrics objects + "config": metrics.config, # Runtime config (TP/PP/EP, batch sizes) + "node_config": node_info.node_config, # Full config (environment, launch_command, gpu_info) + "launch_command": node_info.launch_command, # Property accessor for backward compatibility + "environment": node_info.environment, # Property accessor for backward compatibility + "run_id": metrics.run_id, + } def get_prefill_nodes(self, nodes: list): """Filter for prefill nodes only. @@ -271,16 +403,16 @@ def _serialize_node_metrics(self, nodes: list) -> pd.DataFrame: rows = [] for node in nodes: - node_info = node.node_info + metadata = node.metadata config = node.config # Serialize batch metrics for batch in node.batches: row = { # Node identification - "node": node_info.get("node", ""), - "worker_type": node_info.get("worker_type", ""), - "worker_id": node_info.get("worker_id", ""), + "node": metadata.node_name, + "worker_type": metadata.worker_type, + "worker_id": metadata.worker_id, # Config "tp_size": config.get("tp_size"), "dp_size": config.get("dp_size"), @@ -313,9 +445,9 @@ def _serialize_node_metrics(self, nodes: list) -> pd.DataFrame: for mem in node.memory_snapshots: row = { # Node identification - "node": node_info.get("node", ""), - "worker_type": node_info.get("worker_type", ""), - "worker_id": node_info.get("worker_id", ""), + "node": metadata.node_name, + "worker_type": metadata.worker_type, + "worker_id": metadata.worker_id, # Config "tp_size": config.get("tp_size"), "dp_size": config.get("dp_size"), @@ -336,17 +468,17 @@ def _serialize_node_metrics(self, nodes: list) -> pd.DataFrame: return pd.DataFrame(rows) - def _deserialize_node_metrics(self, df: pd.DataFrame) -> list: - """Deserialize NodeMetrics objects from a cached DataFrame. + def _deserialize_node_metrics(self, df: pd.DataFrame, run_path: str = None) -> list: + """Deserialize NodeInfo objects from a cached DataFrame. Args: df: DataFrame with cached node metrics + run_path: Path to the run directory (for loading config files) Returns: - List of NodeMetrics objects + List of NodeInfo objects """ - import time - from .models import BatchMetrics, MemoryMetrics, NodeMetrics + from .models import BatchMetrics, MemoryMetrics, NodeInfo, NodeMetadata, NodeMetrics start_time = time.time() nodes = [] @@ -355,12 +487,6 @@ def _deserialize_node_metrics(self, df: pd.DataFrame) -> list: for (node_name, worker_type, worker_id), group_df in df.groupby( ["node", "worker_type", "worker_id"], dropna=False ): - node_info = { - "node": node_name, - "worker_type": worker_type, - "worker_id": worker_id, - } - # Extract config (same for all rows in this node) config = {} if not group_df.empty: @@ -427,184 +553,33 @@ def _deserialize_node_metrics(self, df: pd.DataFrame) -> list: ) memory_snapshots.append(mem) - # Create NodeMetrics object - node = NodeMetrics( - node_info=node_info, + # Create NodeMetadata + node_metadata = NodeMetadata( + node_name=node_name, + worker_type=worker_type, + worker_id=worker_id, + ) + + # Create NodeMetrics (NEW structure) + metrics = NodeMetrics( + metadata=node_metadata, batches=batches, memory_snapshots=memory_snapshots, config=config, ) - nodes.append(node) + + # Create NodeInfo with empty config (will be populated below) + node_info = NodeInfo(metrics=metrics, node_config={}) + nodes.append(node_info) elapsed = time.time() - start_time logger.info(f"Deserialized {len(nodes)} nodes in {elapsed:.2f}s") - return nodes - - # Private helper methods - - def _parse_dp_tp_ep_tag(self, line: str) -> tuple[int | None, int | None, int | None, str | None]: - """Extract DP, TP, EP indices and timestamp from log line. - - Supports three formats: - - Full: [2025-11-04 05:31:43 DP0 TP0 EP0] - - Simple TP: [2025-11-04 07:05:55 TP0] (defaults DP=0, EP=0) - - Pipeline: [2025-12-08 14:34:44 PP0] (defaults DP=0, EP=0, TP=PP value) - - Args: - line: Log line to parse - - Returns: - (dp, tp, ep, timestamp) or (None, None, None, None) if pattern not found - """ - # Try full format first: DP0 TP0 EP0 - match = re.search(r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) DP(\d+) TP(\d+) EP(\d+)\]", line) - if match: - timestamp, dp, tp, ep = match.groups() - return int(dp), int(tp), int(ep), timestamp - - # Try simple format: TP0 only (1P4D style) - match = re.search(r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) TP(\d+)\]", line) - if match: - timestamp, tp = match.groups() - return 0, int(tp), 0, timestamp # Default DP=0, EP=0 - - # Try pipeline parallelism format: PP0 (prefill with PP) - match = re.search(r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) PP(\d+)\]", line) - if match: - timestamp, pp = match.groups() - return 0, int(pp), 0, timestamp # Map PP to TP slot, default DP=0, EP=0 - - return None, None, None, None - - def _parse_prefill_batch_line(self, line: str) -> dict | None: - """Parse prefill batch log line for metrics. - - Example line: - [2025-11-04 05:31:43 DP0 TP0 EP0] Prefill batch, #new-seq: 18, #new-token: 16384, - #cached-token: 0, token usage: 0.00, #running-req: 0, #queue-req: 0, - #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 0.00, - """ - dp, tp, ep, timestamp = self._parse_dp_tp_ep_tag(line) - if dp is None or "Prefill batch" not in line: - return None - - metrics = {"timestamp": timestamp, "dp": dp, "tp": tp, "ep": ep, "type": "prefill"} - - # Extract metrics using regex - patterns = { - "new_seq": r"#new-seq:\s*(\d+)", - "new_token": r"#new-token:\s*(\d+)", - "cached_token": r"#cached-token:\s*(\d+)", - "token_usage": r"token usage:\s*([\d.]+)", - "running_req": r"#running-req:\s*(\d+)", - "queue_req": r"#queue-req:\s*(\d+)", - "prealloc_req": r"#prealloc-req:\s*(\d+)", - "inflight_req": r"#inflight-req:\s*(\d+)", - "input_throughput": r"input throughput \(token/s\):\s*([\d.]+)", - } - - for key, pattern in patterns.items(): - match = re.search(pattern, line) - if match: - value = match.group(1) - metrics[key] = float(value) if "." in value else int(value) - - return metrics - - def _parse_decode_batch_line(self, line: str) -> dict | None: - """Parse decode batch log line for metrics. - - Example line: - [2025-11-04 05:32:32 DP31 TP31 EP31] Decode batch, #running-req: 7, #token: 7040, - token usage: 0.00, pre-allocated usage: 0.00, #prealloc-req: 0, #transfer-req: 0, - #retracted-req: 0, cuda graph: True, gen throughput (token/s): 6.73, #queue-req: 0, - """ - dp, tp, ep, timestamp = self._parse_dp_tp_ep_tag(line) - if dp is None or "Decode batch" not in line: - return None - - metrics = {"timestamp": timestamp, "dp": dp, "tp": tp, "ep": ep, "type": "decode"} - - # Extract metrics using regex - patterns = { - "running_req": r"#running-req:\s*(\d+)", - "num_tokens": r"#token:\s*(\d+)", - "token_usage": r"token usage:\s*([\d.]+)", - "preallocated_usage": r"pre-allocated usage:\s*([\d.]+)", - "prealloc_req": r"#prealloc-req:\s*(\d+)", - "transfer_req": r"#transfer-req:\s*(\d+)", - "queue_req": r"#queue-req:\s*(\d+)", - "gen_throughput": r"gen throughput \(token/s\):\s*([\d.]+)", - } - - for key, pattern in patterns.items(): - match = re.search(pattern, line) - if match: - value = match.group(1) - metrics[key] = float(value) if "." in value else int(value) - - return metrics - - def _parse_memory_line(self, line: str) -> dict | None: - """Parse memory-related log lines. - - Examples: - [2025-11-04 05:27:13 DP0 TP0 EP0] Load weight end. type=DeepseekV3ForCausalLM, - dtype=torch.bfloat16, avail mem=75.11 GB, mem usage=107.07 GB. - - [2025-11-04 05:27:13 DP0 TP0 EP0] KV Cache is allocated. #tokens: 524288, KV size: 17.16 GB - """ - dp, tp, ep, timestamp = self._parse_dp_tp_ep_tag(line) - if dp is None: - return None - - metrics = { - "timestamp": timestamp, - "dp": dp, - "tp": tp, - "ep": ep, - } - # Parse available memory - avail_match = re.search(r"avail mem=([\d.]+)\s*GB", line) - if avail_match: - metrics["avail_mem_gb"] = float(avail_match.group(1)) - metrics["type"] = "memory" + # Populate config from files (environment, launch_command) + if run_path and nodes: + self._populate_config_from_files(run_path, nodes) - # Parse memory usage - usage_match = re.search(r"mem usage=([\d.]+)\s*GB", line) - if usage_match: - metrics["mem_usage_gb"] = float(usage_match.group(1)) - metrics["type"] = "memory" - - # Parse KV cache size - kv_match = re.search(r"KV size:\s*([\d.]+)\s*GB", line) - if kv_match: - metrics["kv_cache_gb"] = float(kv_match.group(1)) - metrics["type"] = "kv_cache" - - # Parse token count for KV cache - token_match = re.search(r"#tokens:\s*(\d+)", line) - if token_match: - metrics["kv_tokens"] = int(token_match.group(1)) - - return metrics if "type" in metrics else None - - def _extract_node_info_from_filename(self, filename: str) -> dict | None: - """Extract node name and worker info from filename. - - Example: watchtower-navy-cn01_prefill_w0.err or r02-p01-dgx-c11_prefill_w0.out - Returns: {'node': 'watchtower-navy-cn01', 'worker_type': 'prefill', 'worker_id': 'w0'} - """ - # Use greedy match for node name up to _(prefill|decode|frontend)_ - match = re.match(r"(.+)_(prefill|decode|frontend)_([^.]+)\.(err|out)", os.path.basename(filename)) - if match: - return { - "node": match.group(1), - "worker_type": match.group(2), - "worker_id": match.group(3), - } - return None + return nodes # Standalone helper function for visualizations diff --git a/analysis/srtlog/models.py b/analysis/srtlog/models.py index 26744184..e5bf4662 100644 --- a/analysis/srtlog/models.py +++ b/analysis/srtlog/models.py @@ -142,10 +142,46 @@ def formatted_date(self) -> str: return self.run_date +@dataclass +class ProfilerMetadata: + """Metadata about the benchmark/profiler configuration. + + This describes what the benchmark was configured to do, + not the actual results. + """ + + profiler_type: str + isl: str + osl: str + concurrencies: str = "" + req_rate: str = "" + + @classmethod + def from_json(cls, json_data: dict) -> "ProfilerMetadata": + """Create from {jobid}.json benchmark section. + + Args: + json_data: Parsed JSON from {jobid}.json file + + Returns: + ProfilerMetadata instance + """ + profiler_meta = json_data.get("benchmark", {}) + + return cls( + profiler_type=profiler_meta.get("type", "unknown"), + isl=str(profiler_meta.get("isl", "")), + osl=str(profiler_meta.get("osl", "")), + concurrencies=profiler_meta.get("concurrencies", ""), + req_rate=profiler_meta.get("req-rate", ""), + ) + + @dataclass class ProfilerResults: """Results from profiler benchmarks. + Contains only the actual metrics, not configuration metadata. Parses 32 out of 39 fields from benchmark JSON output. NOT PARSED (7 fields): @@ -154,12 +190,6 @@ class ProfilerResults: - tokenizer_id, best_of, burstiness: Metadata not critical for dashboards """ - profiler_type: str - isl: str - osl: str - concurrencies: str = "" - req_rate: str = "" - # Primary throughput metrics (per concurrency level) output_tps: list[float] = field(default_factory=list) total_tps: list[float] = field(default_factory=list) @@ -204,26 +234,6 @@ class ProfilerResults: completed: list[int] = field(default_factory=list) num_prompts: list[int] = field(default_factory=list) - @classmethod - def from_json(cls, json_data: dict) -> "ProfilerResults": - """Create from {jobid}.json profiler_metadata section. - - Args: - json_data: Parsed JSON from {jobid}.json file - - Returns: - ProfilerResults instance (benchmark data added later from result files) - """ - profiler_meta = json_data.get("benchmark", {}) - - return cls( - profiler_type=profiler_meta.get("type", "unknown"), - isl=str(profiler_meta.get("isl", "")), - osl=str(profiler_meta.get("osl", "")), - concurrencies=profiler_meta.get("concurrencies", ""), - req_rate=profiler_meta.get("req-rate", ""), - ) - def add_benchmark_results(self, results: dict) -> None: """Add actual benchmark results from profiler output files. @@ -275,11 +285,27 @@ def add_benchmark_results(self, results: dict) -> None: self.num_prompts = results.get("num_prompts", []) +@dataclass +class BenchmarkLaunchCommand: + """Parsed benchmark launch command information. + + Source: logs/benchmark.out + Only contains essential fields. All parsed arguments go into extra_args. + """ + + benchmark_type: str + raw_command: str + + # All parsed arguments as dict + extra_args: dict[str, Any] = field(default_factory=dict) + + @dataclass class BenchmarkRun: """Complete benchmark run with metadata and profiler results.""" metadata: RunMetadata + profiler_metadata: ProfilerMetadata profiler: ProfilerResults is_complete: bool = True missing_concurrencies: list[int] = field(default_factory=list) @@ -311,10 +337,16 @@ def from_json_file(cls, run_path: str) -> "BenchmarkRun | None": json_data = json.load(f) metadata = RunMetadata.from_json(json_data, run_path) - profiler = ProfilerResults.from_json(json_data) + profiler_metadata = ProfilerMetadata.from_json(json_data) + profiler = ProfilerResults() tags = json_data.get("tags", []) - return cls(metadata=metadata, profiler=profiler, tags=tags) + return cls( + metadata=metadata, + profiler_metadata=profiler_metadata, + profiler=profiler, + tags=tags, + ) except Exception: return None @@ -335,14 +367,14 @@ def check_completeness(self) -> None: Updates is_complete and missing_concurrencies fields. """ # Parse expected concurrencies from metadata - if not self.profiler.concurrencies: + if not self.profiler_metadata.concurrencies: # No expected concurrencies specified, assume manual run self.is_complete = True self.missing_concurrencies = [] return expected = set() - for val in self.profiler.concurrencies.split("x"): + for val in self.profiler_metadata.concurrencies.split("x"): try: expected.add(int(val.strip())) except ValueError: @@ -406,35 +438,72 @@ class MemoryMetrics: kv_tokens: int | None = None +@dataclass +class NodeMetadata: + """Node identification and worker information. + + This is the equivalent of RunMetadata but for individual worker nodes. + """ + + node_name: str # Node identifier (e.g., "worker-3") + worker_type: str # Worker type: prefill, decode, agg + worker_id: str # Worker ID (e.g., "w0") + + @dataclass class NodeMetrics: - """Metrics from a single node (prefill or decode worker), parsed from log files.""" + """Metrics from a single node (prefill or decode worker), parsed from log files. - node_info: dict # Has node name, worker type, worker_id + This class contains ONLY metrics data. Configuration is in NodeConfig. + """ + + metadata: NodeMetadata batches: list[BatchMetrics] = field(default_factory=list) memory_snapshots: list[MemoryMetrics] = field(default_factory=list) - config: dict = field(default_factory=dict) # TP/DP/EP config + config: dict = field(default_factory=dict) # Runtime config: TP/PP/EP, batch sizes, etc. run_id: str = "" + # Convenience properties for backward compatibility @property def node_name(self) -> str: - """Get node name.""" - return self.node_info.get("node", "Unknown") + """Get node name from metadata.""" + return self.metadata.node_name @property def worker_type(self) -> str: - """Get worker type (prefill/decode/frontend).""" - return self.node_info.get("worker_type", "unknown") + """Get worker type from metadata.""" + return self.metadata.worker_type + + @property + def worker_id(self) -> str: + """Get worker ID from metadata.""" + return self.metadata.worker_id @property def is_prefill(self) -> bool: """Check if this is a prefill node.""" - return self.worker_type == "prefill" + return self.metadata.worker_type == "prefill" @property def is_decode(self) -> bool: """Check if this is a decode node.""" - return self.worker_type == "decode" + return self.metadata.worker_type == "decode" + + +@dataclass +class NodeLaunchCommand: + """Parsed node worker launch command information. + + Source: logs/{node}_{worker_type}_{worker_id}.out or .err + Only contains essential fields. All parsed arguments go into extra_args. + """ + + backend_type: str + worker_type: str # prefill, decode, agg + raw_command: str + + # All parsed arguments as dict + extra_args: dict[str, Any] = field(default_factory=dict) # Config-related TypedDicts (from config_reader.py) @@ -448,6 +517,93 @@ class GPUInfo(TypedDict, total=False): driver_version: str +class NodeConfig(TypedDict, total=False): + """Expected structure of a node config JSON file (*_config.json).""" + + filename: str + gpu_info: GPUInfo + config: dict[str, Any] # Contains 'server_args' and other fields + environment: dict[str, str] + launch_command: NodeLaunchCommand | None # Parsed launch command (added at runtime) + + +@dataclass +class NodeInfo: + """Complete information about a node, combining metrics and configuration. + + This is the top-level container for all node data. + """ + + metrics: NodeMetrics # Performance metrics (batches, memory, throughput) + node_config: NodeConfig | None = None # Configuration (environment, launch_command, gpu_info) + + # Convenience properties that delegate to metrics + @property + def node_name(self) -> str: + """Get node name from metrics.""" + return self.metrics.node_name + + @property + def worker_type(self) -> str: + """Get worker type from metrics.""" + return self.metrics.worker_type + + @property + def worker_id(self) -> str: + """Get worker ID from metrics.""" + return self.metrics.worker_id + + @property + def is_prefill(self) -> bool: + """Check if this is a prefill node.""" + return self.metrics.is_prefill + + @property + def is_decode(self) -> bool: + """Check if this is a decode node.""" + return self.metrics.is_decode + + @property + def batches(self) -> list[BatchMetrics]: + """Get batches from metrics.""" + return self.metrics.batches + + @property + def memory_snapshots(self) -> list[MemoryMetrics]: + """Get memory snapshots from metrics.""" + return self.metrics.memory_snapshots + + @property + def config(self) -> dict: + """Get runtime config from metrics.""" + return self.metrics.config + + @property + def run_id(self) -> str: + """Get run_id from metrics.""" + return self.metrics.run_id + + @run_id.setter + def run_id(self, value: str): + """Set run_id on metrics.""" + self.metrics.run_id = value + + # Convenience properties that delegate to node_config + @property + def environment(self) -> dict[str, str]: + """Get environment variables from node_config.""" + if self.node_config: + return self.node_config.get("environment", {}) + return {} + + @property + def launch_command(self) -> NodeLaunchCommand | None: + """Get launch command from node_config.""" + if self.node_config: + return self.node_config.get("launch_command") + return None + + class ServerArgs(TypedDict, total=False): """Expected structure of server_args in node config. @@ -468,17 +624,13 @@ class ServerArgs(TypedDict, total=False): context_length: int -class NodeConfig(TypedDict, total=False): - """Expected structure of a node config JSON file (*_config.json).""" - - filename: str - gpu_info: GPUInfo - config: dict[str, Any] # Contains 'server_args' and other fields - environment: dict[str, str] - +class TopologyInfo(TypedDict): + """Service topology and configuration information from log files. -class ParsedCommandInfo(TypedDict): - """Expected return structure from parse_command_line_from_err.""" + Returned by parse_command_line_from_err() which analyzes log files to discover: + - Which flags were explicitly set in launch commands + - Physical node to service type mapping + """ explicit_flags: set - services: dict[str, list[str]] + services: dict[str, list[str]] # {node_name: [service_types]} diff --git a/analysis/srtlog/parsers/__init__.py b/analysis/srtlog/parsers/__init__.py new file mode 100644 index 00000000..ec8f0243 --- /dev/null +++ b/analysis/srtlog/parsers/__init__.py @@ -0,0 +1,248 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Parser protocols and registries for benchmark and node log parsing. +This module provides extensible parsing infrastructure: +- BenchmarkParser: Parses benchmark.out files based on benchmark type +- NodeParser: Parses prefill/decode/agg logs based on backend type +Usage: + from analysis.srtlog.parsers import get_benchmark_parser, get_node_parser + # Get parser by type + bench_parser = get_benchmark_parser("sa-bench") + results = bench_parser.parse(benchmark_out_path) + node_parser = get_node_parser("sglang") + nodes = node_parser.parse_logs(log_dir) +""" + +from pathlib import Path +from typing import Any, Protocol + +from analysis.srtlog.models import BenchmarkLaunchCommand, NodeLaunchCommand, NodeMetrics + + +class BenchmarkParserProtocol(Protocol): + """Protocol for benchmark output parsers. + Each benchmark type (sa-bench, mooncake-router, etc.) should have + a parser that implements this protocol. + + Design principle: JSON files are the primary source of truth. + The parse() method is a fallback for when JSON files are unavailable. + """ + + @property + def benchmark_type(self) -> str: + """Return the benchmark type this parser handles.""" + ... + + def parse(self, benchmark_out_path: Path) -> dict[str, Any]: + """Parse benchmark.out file and return results (FALLBACK method). + + This is a fallback method used when JSON result files are not available. + Prefer using parse_result_directory() which prioritizes JSON files as + the source of truth. + + Args: + benchmark_out_path: Path to the benchmark.out file + Returns: + Dict with benchmark results including: + - output_tps: Output tokens per second + - mean_ttft_ms: Mean time to first token + - mean_itl_ms: Mean inter-token latency + - etc. + """ + ... + + def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | None: + """Parse the benchmark launch command from log content. + Args: + log_content: Content of the benchmark log file + Returns: + BenchmarkLaunchCommand with parsed parameters, or None if not found + """ + ... + + def parse_result_json(self, json_path: Path) -> dict[str, Any]: + """Parse a benchmark result JSON file (PRIMARY source of truth). + + JSON files contain the complete, accurate benchmark results and should + be used as the primary data source whenever available. + + Args: + json_path: Path to a result JSON file + Returns: + Dict with parsed benchmark metrics + """ + ... + + def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int | None = None) -> Path | None: + """Find the directory containing benchmark results within a run directory. + + This method encapsulates the logic for locating result files, which varies by benchmark type. + For example: + - sa-bench: looks for directories like "sa-bench_isl_8192_osl_1024" + - mooncake-router: looks in "logs/artifacts/" subdirectory + + Args: + run_path: Path to the run directory (contains logs/, metadata, etc.) + isl: Input sequence length (optional, used for pattern matching) + osl: Output sequence length (optional, used for pattern matching) + + Returns: + Path to directory containing result files, or None if not found + """ + ... + + def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: + """Parse all result files in a directory. + + This is the primary entry point for parsing benchmark results. + Implementation should: + 1. First attempt to parse JSON result files (primary source of truth) + 2. Fall back to parsing benchmark.out if no JSON files found + 3. Return list of results (one per concurrency level or benchmark run) + + Args: + result_dir: Directory containing benchmark result files + + Returns: + List of result dicts (one per concurrency level or benchmark run) + """ + ... + + +class NodeParserProtocol(Protocol): + """Protocol for node log parsers. + Each backend type (sglang, trtllm, etc.) should have a parser + that implements this protocol for parsing prefill/decode/agg logs. + """ + + @property + def backend_type(self) -> str: + """Return the backend type this parser handles.""" + ... + + def parse_logs(self, log_dir: Path) -> list[NodeMetrics]: + """Parse all node logs in a directory. + Args: + log_dir: Directory containing prefill/decode/agg .out/.err files + Returns: + List of NodeMetrics objects, one per worker + """ + ... + + def parse_single_log(self, log_path: Path) -> NodeMetrics | None: + """Parse a single node log file. + Args: + log_path: Path to a prefill/decode/agg log file + Returns: + NodeMetrics object or None if parsing failed + """ + ... + + def parse_launch_command(self, log_content: str, worker_type: str = "unknown") -> NodeLaunchCommand | None: + """Parse the worker launch command from log content. + Args: + log_content: Content of the worker log file + worker_type: Type of worker (prefill, decode, agg) + Returns: + NodeLaunchCommand with parsed parameters, or None if not found + """ + ... + + +# Registry for benchmark parsers +_benchmark_parsers: dict[str, type] = {} + +# Registry for node parsers +_node_parsers: dict[str, type] = {} + + +def register_benchmark_parser(benchmark_type: str): + """Decorator to register a benchmark parser. + Usage: + @register_benchmark_parser("sa-bench") + class SABenchParser: + ... + """ + + def decorator(cls): + _benchmark_parsers[benchmark_type] = cls + return cls + + return decorator + + +def register_node_parser(backend_type: str): + """Decorator to register a node parser. + Usage: + @register_node_parser("sglang") + class SGLangNodeParser: + ... + """ + + def decorator(cls): + _node_parsers[backend_type] = cls + return cls + + return decorator + + +def get_benchmark_parser(benchmark_type: str) -> BenchmarkParserProtocol: + """Get a benchmark parser by type. + Args: + benchmark_type: Type of benchmark (e.g., "sa-bench", "mooncake-router") + Returns: + Instance of the appropriate benchmark parser + Raises: + ValueError: If no parser registered for the benchmark type + """ + if benchmark_type not in _benchmark_parsers: + available = ", ".join(_benchmark_parsers.keys()) or "none" + raise ValueError(f"No benchmark parser registered for '{benchmark_type}'. Available: {available}") + return _benchmark_parsers[benchmark_type]() + + +def get_node_parser(backend_type: str) -> NodeParserProtocol: + """Get a node parser by backend type. + Args: + backend_type: Type of backend (e.g., "sglang", "trtllm") + Returns: + Instance of the appropriate node parser + Raises: + ValueError: If no parser registered for the backend type + """ + if backend_type not in _node_parsers: + available = ", ".join(_node_parsers.keys()) or "none" + raise ValueError(f"No node parser registered for '{backend_type}'. Available: {available}") + return _node_parsers[backend_type]() + + +def list_benchmark_parsers() -> list[str]: + """List all registered benchmark parser types.""" + return list(_benchmark_parsers.keys()) + + +def list_node_parsers() -> list[str]: + """List all registered node parser types.""" + return list(_node_parsers.keys()) + + +# Import parsers to trigger registration +from analysis.srtlog.parsers.benchmark import * # noqa: E402, F401, F403 +from analysis.srtlog.parsers.nodes import * # noqa: E402, F401, F403 + +# Re-export models for convenience +__all__ = [ + "BenchmarkLaunchCommand", + "NodeLaunchCommand", + "NodeMetrics", + "BenchmarkParserProtocol", + "NodeParserProtocol", + "get_benchmark_parser", + "get_node_parser", + "list_benchmark_parsers", + "list_node_parsers", + "register_benchmark_parser", + "register_node_parser", +] diff --git a/analysis/srtlog/parsers/benchmark/__init__.py b/analysis/srtlog/parsers/benchmark/__init__.py new file mode 100644 index 00000000..51efb4a6 --- /dev/null +++ b/analysis/srtlog/parsers/benchmark/__init__.py @@ -0,0 +1,6 @@ +"""Benchmark output parsers.""" + +from analysis.srtlog.parsers.benchmark.mooncake_router import MooncakeRouterParser +from analysis.srtlog.parsers.benchmark.sa_bench import SABenchParser + +__all__ = ["SABenchParser", "MooncakeRouterParser"] diff --git a/analysis/srtlog/parsers/benchmark/mooncake_router.py b/analysis/srtlog/parsers/benchmark/mooncake_router.py new file mode 100644 index 00000000..1d2fbdb6 --- /dev/null +++ b/analysis/srtlog/parsers/benchmark/mooncake_router.py @@ -0,0 +1,336 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Mooncake Router benchmark output parser.""" + +from __future__ import annotations + +import json +import logging +import re +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from analysis.srtlog.parsers import register_benchmark_parser + +if TYPE_CHECKING: + from analysis.srtlog.parsers import BenchmarkLaunchCommand + +logger = logging.getLogger(__name__) + + +@register_benchmark_parser("mooncake-router") +class MooncakeRouterParser: + """Parser for Mooncake Router benchmark output. + Parses benchmark.out files and AIPerf result JSON files from mooncake-router runs. + """ + + @property + def benchmark_type(self) -> str: + return "mooncake-router" + + def parse(self, benchmark_out_path: Path) -> dict[str, Any]: + """Parse benchmark.out file for mooncake-router results (FALLBACK method). + + This is a fallback method used when JSON result files are not available. + Prefer using parse_result_directory() which prioritizes JSON files. + + Args: + benchmark_out_path: Path to benchmark.out file + Returns: + Dict with aggregated benchmark results + """ + results = { + "benchmark_type": self.benchmark_type, + "output_tps": None, + "request_throughput": None, + "mean_ttft_ms": None, + "mean_itl_ms": None, + "total_requests": None, + } + + if not benchmark_out_path.exists(): + logger.warning("benchmark.out not found: %s", benchmark_out_path) + return results + + try: + content = benchmark_out_path.read_text() + + # Parse mooncake-router output patterns + # Example: "Request throughput: 3.37 req/s" + # Example: "Output token throughput: 1150.92 tok/s" + req_tpt_pattern = r"[Rr]equest\s+throughput[:\s]+([\d.]+)" + out_tpt_pattern = r"[Oo]utput\s+(?:token\s+)?throughput[:\s]+([\d.]+)" + ttft_pattern = r"[Tt]ime\s+to\s+first\s+token[:\s]+([\d.]+)" + itl_pattern = r"[Ii]nter.?token\s+latency[:\s]+([\d.]+)" + + for line in content.split("\n"): + if req_tpt_match := re.search(req_tpt_pattern, line): + results["request_throughput"] = float(req_tpt_match.group(1)) + if out_tpt_match := re.search(out_tpt_pattern, line): + results["output_tps"] = float(out_tpt_match.group(1)) + if ttft_match := re.search(ttft_pattern, line): + results["mean_ttft_ms"] = float(ttft_match.group(1)) + if itl_match := re.search(itl_pattern, line): + results["mean_itl_ms"] = float(itl_match.group(1)) + + except Exception as e: + logger.warning("Failed to parse benchmark.out: %s", e) + + return results + + def parse_result_json(self, json_path: Path) -> dict[str, Any]: + """Parse an AIPerf result JSON file. + Args: + json_path: Path to profile_export_aiperf.json + Returns: + Dict with benchmark metrics + """ + result = {} + + try: + with open(json_path) as f: + data = json.load(f) + + # AIPerf format has nested structure with unit and values + result = { + "concurrency": 0, # Mooncake uses open-loop, no fixed concurrency + # Throughput metrics + "output_tps": self._get_metric(data, "output_token_throughput", "avg"), + "request_throughput": self._get_metric(data, "request_throughput", "avg"), + # Mean latencies (convert from ms) + "mean_ttft_ms": self._get_metric(data, "time_to_first_token", "avg"), + "mean_tpot_ms": self._get_metric(data, "inter_token_latency", "avg"), + "mean_itl_ms": self._get_metric(data, "inter_token_latency", "avg"), + "mean_e2el_ms": self._get_metric(data, "request_latency", "avg"), + # Median latencies + "median_ttft_ms": self._get_metric(data, "time_to_first_token", "p50"), + "median_tpot_ms": self._get_metric(data, "inter_token_latency", "p50"), + "median_itl_ms": self._get_metric(data, "inter_token_latency", "p50"), + "median_e2el_ms": self._get_metric(data, "request_latency", "p50"), + # P99 latencies + "p99_ttft_ms": self._get_metric(data, "time_to_first_token", "p99"), + "p99_tpot_ms": self._get_metric(data, "inter_token_latency", "p99"), + "p99_itl_ms": self._get_metric(data, "inter_token_latency", "p99"), + "p99_e2el_ms": self._get_metric(data, "request_latency", "p99"), + # Std dev latencies + "std_ttft_ms": self._get_metric(data, "time_to_first_token", "std"), + "std_itl_ms": self._get_metric(data, "inter_token_latency", "std"), + "std_e2el_ms": self._get_metric(data, "request_latency", "std"), + # Request count + "completed": self._get_metric(data, "request_count", "avg"), + "num_prompts": self._get_metric(data, "request_count", "avg"), + } + + # Also extract per-user throughput if available + tps_per_user = self._get_metric(data, "output_token_throughput_per_user", "avg") + if tps_per_user: + result["output_tps_per_user"] = tps_per_user + except Exception as e: + logger.warning("Failed to parse %s: %s", json_path, e) + + return result + + def _get_metric(self, data: dict, metric_name: str, stat: str) -> float | None: + """Extract a metric value from AIPerf data structure. + Args: + data: AIPerf JSON data + metric_name: Name of the metric (e.g., "time_to_first_token") + stat: Statistic to extract (e.g., "avg", "p50", "p99") + Returns: + Metric value or None if not found + """ + try: + metric_data = data.get(metric_name, {}) + if isinstance(metric_data, dict): + value = metric_data.get(stat) + if value is not None: + return float(value) + except (KeyError, TypeError, ValueError): + pass + return None + + def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: + """Parse AIPerf result files in a directory. + + Uses JSON files (profile_export_aiperf.json) as the primary source of truth. + Falls back to parsing benchmark.out only if no JSON results are found. + + Args: + result_dir: Directory containing profile_export_aiperf.json + Returns: + List of result dicts (usually just one for mooncake-router) + """ + results = [] + + # Primary: Look for AIPerf JSON files (source of truth) + for json_file in result_dir.rglob("profile_export_aiperf.json"): + result = self.parse_result_json(json_file) + if result.get("output_tps") is not None: + results.append(result) + logger.info(f"Loaded mooncake-router results from JSON: {json_file}") + + # Fallback: If no JSON results found, try parsing benchmark.out + if not results: + benchmark_out = result_dir / "benchmark.out" + if benchmark_out.exists(): + logger.info(f"No JSON results found in {result_dir}, falling back to benchmark.out parsing") + fallback_result = self.parse(benchmark_out) + if fallback_result.get("output_tps"): + # Convert to format expected by caller + results.append( + { + "concurrency": 0, # Mooncake doesn't track concurrency + "output_tps": fallback_result.get("output_tps"), + "request_throughput": fallback_result.get("request_throughput"), + "mean_ttft_ms": fallback_result.get("mean_ttft_ms"), + "mean_itl_ms": fallback_result.get("mean_itl_ms"), + "total_requests": fallback_result.get("total_requests"), + } + ) + else: + logger.warning(f"No results found in {result_dir} (no profile_export_aiperf.json or benchmark.out)") + + return results + + def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int | None = None) -> Path | None: + """Find the directory containing mooncake-router/AIPerf results. + + Mooncake-router results are typically in: + - logs/artifacts/*/profile_export_aiperf.json + + Since results can be in nested subdirectories, we return the logs directory + and let parse_result_directory use rglob to find them. + + Args: + run_path: Path to the run directory + isl: Input sequence length (not used for mooncake-router) + osl: Output sequence length (not used for mooncake-router) + + Returns: + Path to logs directory where results can be found, or None + """ + # Mooncake-router results are in logs/artifacts/ subdirectories + logs_dir = run_path / "logs" + if logs_dir.exists(): + # Check if there are any AIPerf result files using iterdir recursively + try: + for root_dir in [logs_dir]: + for item in root_dir.rglob("profile_export_aiperf.json"): + logger.info(f"Found mooncake-router results in: {logs_dir}") + return logs_dir + except (OSError, PermissionError) as e: + logger.warning(f"Error accessing {logs_dir}: {e}") + + # Also check run_path directly in case logs are at root + try: + for item in run_path.rglob("profile_export_aiperf.json"): + logger.info(f"Found mooncake-router results in: {run_path}") + return run_path + except (OSError, PermissionError) as e: + logger.warning(f"Error accessing {run_path}: {e}") + + return None + + def find_aiperf_results(self, log_dir: Path) -> list[Path]: + """Find all AIPerf result files in a log directory. + Args: + log_dir: Root log directory + Returns: + List of paths to profile_export_aiperf.json files + """ + return list(log_dir.rglob("profile_export_aiperf.json")) + + def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | None: + """Parse the mooncake-router launch command from log content. + Looks for command lines like: + [CMD] aiperf profile --model ... --url ... + genai-perf profile --model ... --endpoint ... + Also parses header format: + Endpoint: http://localhost:8000 + Model: Qwen/Qwen3-32B + Workload: conversation + Args: + log_content: Content of the benchmark log file + Returns: + BenchmarkLaunchCommand with parsed parameters, or None if not found + """ + from analysis.srtlog.parsers import BenchmarkLaunchCommand + + raw_command = None + + # First, try to find [CMD] tagged command (preferred - from our scripts) + cmd_match = re.search(r"\[CMD\]\s*(.+)$", log_content, re.MULTILINE) + if cmd_match: + raw_command = cmd_match.group(1).strip() + + # Fallback: pattern to match genai-perf, aiperf or mooncake-router commands + # aiperf format: aiperf profile -m "Model" --url "http://..." --concurrency 10 + if not raw_command: + command_patterns = [ + r"(aiperf\s+profile\s+[^\n]+)", + r"(genai-perf\s+profile\s+[^\n]+)", + r"(python[3]?\s+.*genai_perf[^\n]+)", + r"(python[3]?\s+.*aiperf[^\n]+)", + r"(mooncake-router\s+[^\n]+)", + ] + + for pattern in command_patterns: + match = re.search(pattern, log_content, re.IGNORECASE) + if match: + raw_command = match.group(1).strip() + break + + # If no command found, try to build from header format + if not raw_command: + if "Mooncake Router Benchmark" in log_content: + raw_command = "mooncake-router-benchmark (from header)" + + if not raw_command: + return None + + extra_args: dict[str, Any] = {} + + # Parse aiperf/genai-perf arguments from command line + # Supports both --model and -m formats, quoted and unquoted values + arg_patterns = { + "model": r"(?:--model|-m)[=\s]+[\"']?([^\"'\s]+)[\"']?", + "base_url": r"--url[=\s]+[\"']?([^\"'\s]+)[\"']?", + "num_prompts": r"--(?:num-prompts|request-count|request)[=\s]+(\d+)", + "request_rate": r"--request-rate[=\s]+([^\s]+)", + "max_concurrency": r"--concurrency[=\s]+(\d+)", + "input_len": r"--(?:synthetic-input-tokens-mean|input-sequence-length|isl)[=\s]+(\d+)", + "output_len": r"--(?:output-tokens-mean|output-sequence-length|osl)[=\s]+(\d+)", + } + + for field, pattern in arg_patterns.items(): + match = re.search(pattern, raw_command) + if match: + value: Any = match.group(1) + if field in ("num_prompts", "max_concurrency", "input_len", "output_len"): + value = int(value) + elif field == "request_rate" and value != "inf": + try: + value = float(value) + except ValueError: + pass + extra_args[field] = value + + # Also parse from header format (srtctl-style) + header_patterns = { + "model": r"^Model:\s*(.+)$", + "base_url": r"^Endpoint:\s*(.+)$", + "dataset": r"^Workload:\s*(.+)$", + } + + for field, pattern in header_patterns.items(): + if field not in extra_args: + match = re.search(pattern, log_content, re.MULTILINE) + if match: + extra_args[field] = match.group(1).strip() + + return BenchmarkLaunchCommand( + benchmark_type=self.benchmark_type, + raw_command=raw_command, + extra_args=extra_args, + ) diff --git a/analysis/srtlog/parsers/benchmark/sa_bench.py b/analysis/srtlog/parsers/benchmark/sa_bench.py new file mode 100644 index 00000000..e8a6f8ab --- /dev/null +++ b/analysis/srtlog/parsers/benchmark/sa_bench.py @@ -0,0 +1,353 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""SA-Bench benchmark output parser.""" + +from __future__ import annotations + +import json +import logging +import re +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from analysis.srtlog.parsers import register_benchmark_parser + +if TYPE_CHECKING: + from analysis.srtlog.parsers import BenchmarkLaunchCommand + +logger = logging.getLogger(__name__) + + +@register_benchmark_parser("sa-bench") +class SABenchParser: + """Parser for SA-Bench benchmark output. + Parses benchmark.out files and result JSON files from SA-Bench runs. + """ + + @property + def benchmark_type(self) -> str: + return "sa-bench" + + def parse(self, benchmark_out_path: Path) -> dict[str, Any]: + """Parse benchmark.out file for SA-Bench results (FALLBACK method). + + This is a fallback method used when JSON result files are not available. + Prefer using parse_result_directory() which prioritizes JSON files. + + Args: + benchmark_out_path: Path to benchmark.out file + Returns: + Dict with aggregated benchmark results + """ + results = { + "benchmark_type": self.benchmark_type, + "concurrencies": [], + "output_tps": [], + "mean_ttft_ms": [], + "mean_itl_ms": [], + "mean_tpot_ms": [], + "p99_ttft_ms": [], + "p99_itl_ms": [], + "request_throughput": [], + "completed_requests": [], + } + + if not benchmark_out_path.exists(): + logger.warning("benchmark.out not found: %s", benchmark_out_path) + return results + + try: + content = benchmark_out_path.read_text() + + # Parse summary lines from benchmark output + # Example: "Concurrency: 100, Throughput: 5000 tok/s, TTFT: 150ms, ITL: 20ms" + concurrency_pattern = r"Concurrency[:\s]+(\d+)" + throughput_pattern = r"(?:Output\s+)?[Tt]hroughput[:\s]+([\d.]+)" + ttft_pattern = r"(?:Mean\s+)?TTFT[:\s]+([\d.]+)" + itl_pattern = r"(?:Mean\s+)?ITL[:\s]+([\d.]+)" + + # Try to extract from summary lines + for line in content.split("\n"): + if "concurrency" in line.lower() or "throughput" in line.lower(): + conc_match = re.search(concurrency_pattern, line, re.IGNORECASE) + tpt_match = re.search(throughput_pattern, line, re.IGNORECASE) + ttft_match = re.search(ttft_pattern, line, re.IGNORECASE) + itl_match = re.search(itl_pattern, line, re.IGNORECASE) + + if conc_match and tpt_match: + results["concurrencies"].append(int(conc_match.group(1))) + results["output_tps"].append(float(tpt_match.group(1))) + if ttft_match: + results["mean_ttft_ms"].append(float(ttft_match.group(1))) + if itl_match: + results["mean_itl_ms"].append(float(itl_match.group(1))) + + except Exception as e: + logger.warning("Failed to parse benchmark.out: %s", e) + + return results + + def parse_result_json(self, json_path: Path) -> dict[str, Any]: + """Parse a SA-Bench result JSON file. + Args: + json_path: Path to result JSON (e.g., result_c100.json) + Returns: + Dict with benchmark metrics for this concurrency level + """ + result = {} + + try: + with open(json_path) as f: + data = json.load(f) + + # Return with same field names as original JSON for compatibility + # with downstream processing in _build_rollup_summary + result = { + "max_concurrency": data.get("max_concurrency"), + # Throughput metrics (keep original field names) + "output_throughput": data.get("output_throughput"), + "total_token_throughput": data.get("total_token_throughput"), + "request_throughput": data.get("request_throughput"), + "request_goodput": data.get("request_goodput"), + "request_rate": data.get("request_rate"), + # Mean latencies + "mean_ttft_ms": data.get("mean_ttft_ms"), + "mean_tpot_ms": data.get("mean_tpot_ms"), + "mean_itl_ms": data.get("mean_itl_ms"), + "mean_e2el_ms": data.get("mean_e2el_ms"), + # Median latencies + "median_ttft_ms": data.get("median_ttft_ms"), + "median_tpot_ms": data.get("median_tpot_ms"), + "median_itl_ms": data.get("median_itl_ms"), + "median_e2el_ms": data.get("median_e2el_ms"), + # P99 latencies + "p99_ttft_ms": data.get("p99_ttft_ms"), + "p99_tpot_ms": data.get("p99_tpot_ms"), + "p99_itl_ms": data.get("p99_itl_ms"), + "p99_e2el_ms": data.get("p99_e2el_ms"), + # Std dev latencies + "std_ttft_ms": data.get("std_ttft_ms"), + "std_tpot_ms": data.get("std_tpot_ms"), + "std_itl_ms": data.get("std_itl_ms"), + "std_e2el_ms": data.get("std_e2el_ms"), + # Token counts + "total_input_tokens": data.get("total_input_tokens"), + "total_output_tokens": data.get("total_output_tokens"), + # Metadata + "duration": data.get("duration"), + "completed": data.get("completed"), + "num_prompts": data.get("num_prompts"), + } + + except Exception as e: + logger.warning("Failed to parse %s: %s", json_path, e) + + return result + + def parse_result_directory(self, result_dir: Path) -> list[dict[str, Any]]: + """Parse all result JSON files in a benchmark result directory. + + Uses JSON files as the primary source of truth. Falls back to parsing + benchmark.out only if no JSON results are found. + + Args: + result_dir: Directory containing result_*.json files + Returns: + List of result dicts sorted by concurrency + """ + results = [] + + # Primary: Parse JSON result files (source of truth) + for json_file in result_dir.glob("*.json"): + result = self.parse_result_json(json_file) + if result.get("max_concurrency") is not None: + results.append(result) + + # Fallback: If no JSON results found, try parsing benchmark.out + if not results: + benchmark_out = result_dir / "benchmark.out" + if benchmark_out.exists(): + logger.info(f"No JSON results found in {result_dir}, falling back to benchmark.out parsing") + # Parse benchmark.out and create a single result entry + fallback_result = self.parse(benchmark_out) + if fallback_result.get("output_tps"): + # Wrap in list format expected by caller + results.append( + { + "max_concurrency": fallback_result.get("concurrencies", [0])[0] + if fallback_result.get("concurrencies") + else 0, + "output_tps": fallback_result.get("output_tps"), + "request_throughput": fallback_result.get("request_throughput"), + "mean_ttft_ms": fallback_result.get("mean_ttft_ms"), + "mean_itl_ms": fallback_result.get("mean_itl_ms"), + "mean_tpot_ms": fallback_result.get("mean_tpot_ms"), + "p99_ttft_ms": fallback_result.get("p99_ttft_ms"), + "p99_itl_ms": fallback_result.get("p99_itl_ms"), + "completed": fallback_result.get("completed_requests"), + } + ) + else: + logger.warning(f"No results found in {result_dir} (no JSON files or benchmark.out)") + + # Sort by concurrency + results.sort(key=lambda x: x.get("max_concurrency", 0) or 0) + + return results + + def find_result_directory(self, run_path: Path, isl: int | None = None, osl: int | None = None) -> Path | None: + """Find the directory containing SA-Bench results. + + SA-Bench results are typically in directories named like: + - sa-bench_isl_8192_osl_1024 + - vllm_isl_8192_osl_1024 + + Args: + run_path: Path to the run directory + isl: Input sequence length + osl: Output sequence length + + Returns: + Path to results directory, or None if not found + """ + # Search paths: run_path and run_path/logs + search_paths = [run_path] + logs_dir = run_path / "logs" + if logs_dir.exists(): + search_paths.append(logs_dir) + + # Build prefix patterns + if isl is not None and osl is not None: + prefixes = [ + f"sa-bench_isl_{isl}_osl_{osl}", + f"vllm_isl_{isl}_osl_{osl}", + ] + else: + # Fallback: match any sa-bench or vllm directory + prefixes = ["sa-bench", "vllm"] + + # Search for matching directories + for search_path in search_paths: + if not search_path.exists(): + continue + try: + for entry in search_path.iterdir(): + if not entry.is_dir(): + continue + # Check if directory name starts with any of our prefixes + for prefix in prefixes: + if entry.name.startswith(prefix): + # Verify it contains result files + if list(entry.glob("*.json")): + logger.info(f"Found SA-Bench results in: {entry}") + return entry + except (OSError, PermissionError) as e: + logger.warning(f"Error accessing {search_path}: {e}") + continue + + return None + + def parse_launch_command(self, log_content: str) -> BenchmarkLaunchCommand | None: + """Parse the SA-Bench launch command from log content. + Looks for command lines like: + [CMD] python -m sglang.bench_serving --model ... --base-url ... + python -m sglang.bench_serving --model ... --base-url ... + Also parses SA-Bench Config header format: + SA-Bench Config: endpoint=http://localhost:8000; isl=8192; osl=1024; ... + Args: + log_content: Content of the benchmark log file + Returns: + BenchmarkLaunchCommand with parsed parameters, or None if not found + """ + from analysis.srtlog.parsers import BenchmarkLaunchCommand + + raw_command = None + + # First, try to find [CMD] tagged command (preferred - from our scripts) + cmd_match = re.search(r"\[CMD\]\s*(.+)$", log_content, re.MULTILINE) + if cmd_match: + raw_command = cmd_match.group(1).strip() + + # Fallback: pattern to match sa-bench / sglang.bench_serving command + if not raw_command: + command_patterns = [ + r"(python[3]?\s+-m\s+sglang\.bench_serving\s+[^\n]+)", + r"(sa-bench\s+[^\n]+)", + r"(python[3]?\s+.*bench_serving\.py\s+[^\n]+)", + ] + + for pattern in command_patterns: + match = re.search(pattern, log_content, re.IGNORECASE) + if match: + raw_command = match.group(1).strip() + break + + # Also try SA-Bench Config header format + if not raw_command: + config_match = re.search(r"(SA-Bench Config:[^\n]+)", log_content) + if config_match: + raw_command = config_match.group(1).strip() + + if not raw_command: + return None + + extra_args: dict[str, Any] = {} + + # Parse common arguments from command line + arg_patterns = { + "model": r"--model[=\s]+([^\s]+)", + "base_url": r"--base-url[=\s]+([^\s]+)", + "num_prompts": r"--num-prompts?[=\s]+(\d+)", + "request_rate": r"--request-rate[=\s]+([^\s]+)", + "max_concurrency": r"--max-concurrency[=\s]+(\d+)", + "input_len": r"--(?:input-len|random-input-len)[=\s]+(\d+)", + "output_len": r"--(?:output-len|random-output-len)[=\s]+(\d+)", + "dataset": r"--dataset[=\s]+([^\s]+)", + "dataset_path": r"--dataset-path[=\s]+([^\s]+)", + } + + for field, pattern in arg_patterns.items(): + match = re.search(pattern, raw_command) + if match: + value: Any = match.group(1) + # Convert to appropriate type + if field in ("num_prompts", "max_concurrency", "input_len", "output_len"): + value = int(value) + elif field == "request_rate" and value != "inf": + try: + value = float(value) + except ValueError: + pass + extra_args[field] = value + + # Also parse from SA-Bench Config header format + # Format: SA-Bench Config: endpoint=http://localhost:8000; isl=8192; osl=1024; concurrencies=28; req_rate=inf; model=dsr1-fp8 + header_patterns = { + "base_url": r"endpoint=([^;\s]+)", + "model": r"model=([^;\s]+)", + "input_len": r"isl=(\d+)", + "output_len": r"osl=(\d+)", + "max_concurrency": r"concurrencies=(\d+)", + "request_rate": r"req_rate=([^;\s]+)", + } + + for field, pattern in header_patterns.items(): + if field not in extra_args: + match = re.search(pattern, raw_command) + if match: + value = match.group(1) + if field in ("input_len", "output_len", "max_concurrency"): + value = int(value) + elif field == "request_rate" and value != "inf": + try: + value = float(value) + except ValueError: + pass + extra_args[field] = value + + return BenchmarkLaunchCommand( + benchmark_type=self.benchmark_type, + raw_command=raw_command, + extra_args=extra_args, + ) diff --git a/analysis/srtlog/parsers/nodes/__init__.py b/analysis/srtlog/parsers/nodes/__init__.py new file mode 100644 index 00000000..40fc849e --- /dev/null +++ b/analysis/srtlog/parsers/nodes/__init__.py @@ -0,0 +1,9 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Node log parsers for different backends.""" + +from analysis.srtlog.parsers.nodes.sglang import SGLangNodeParser +from analysis.srtlog.parsers.nodes.trtllm import TRTLLMNodeParser + +__all__ = ["SGLangNodeParser", "TRTLLMNodeParser"] diff --git a/analysis/srtlog/parsers/nodes/sglang.py b/analysis/srtlog/parsers/nodes/sglang.py new file mode 100644 index 00000000..03a4ee9e --- /dev/null +++ b/analysis/srtlog/parsers/nodes/sglang.py @@ -0,0 +1,533 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""SGLang node log parser. +Parses logs with format: + [2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m ... Decode batch, #running-req: 5, ... +This parser handles SGLang structured logging format with ISO 8601 timestamps. +""" + +from __future__ import annotations + +import logging +import os +import re +from datetime import datetime +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from analysis.srtlog.models import BatchMetrics, MemoryMetrics, NodeInfo, NodeMetadata, NodeMetrics +from analysis.srtlog.parsers import register_node_parser + +if TYPE_CHECKING: + from analysis.srtlog.parsers import NodeLaunchCommand + +logger = logging.getLogger(__name__) + + +# ANSI escape code pattern for stripping colors +ANSI_ESCAPE = re.compile(r"\x1b\[[0-9;]*m") + + +@register_node_parser("sglang") +class SGLangNodeParser: + """Parser for SGLang node logs. + Handles SGLang structured logging with ISO 8601 timestamps. + May contain ANSI color codes which are stripped during parsing. + + Timestamp format: YYYY-MM-DDTHH:MM:SS.microsZ (e.g., 2025-12-30T15:52:38.206058Z) + """ + + @property + def backend_type(self) -> str: + return "sglang" + + @staticmethod + def parse_timestamp(timestamp: str) -> datetime: + """Parse SGLang timestamp format to datetime object. + + Args: + timestamp: Timestamp string in ISO 8601 format (e.g., 2025-12-30T15:52:38.206058Z) + + Returns: + datetime object + + Raises: + ValueError: If timestamp format is invalid + """ + # Handle both with and without microseconds and timezone + timestamp = timestamp.rstrip("Z") + if "." in timestamp: + return datetime.fromisoformat(timestamp) + else: + return datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S") + + def parse_logs(self, log_dir: Path) -> list[NodeInfo]: + """Parse all prefill/decode/agg log files in a directory. + Args: + log_dir: Directory containing *_prefill_*.out, *_decode_*.out, *_agg_*.out files + Returns: + List of NodeInfo objects + """ + log_dir = Path(log_dir) + nodes = [] + + if not log_dir.exists(): + logger.error("Log directory does not exist: %s", log_dir) + return nodes + + # Find all worker log files + for file in os.listdir(log_dir): + if not (file.endswith(".err") or file.endswith(".out")): + continue + if not any(wt in file for wt in ("prefill", "decode", "agg")): + continue + + filepath = log_dir / file + node = self.parse_single_log(filepath) + if node: + nodes.append(node) + + logger.info("Parsed %d node log files from %s", len(nodes), log_dir) + return nodes + + def parse_single_log(self, log_path: Path) -> NodeInfo | None: + """Parse a single node log file. + Args: + log_path: Path to a prefill/decode/agg log file + Returns: + NodeInfo object or None if parsing failed + """ + node_info = self._extract_node_info_from_filename(str(log_path)) + if not node_info: + logger.warning( + "Could not extract node info from filename: %s. " "Expected format: __.err or .out", + log_path, + ) + return None + + batches = [] + memory_snapshots = [] + config = {} + launch_command = None + full_content = [] + + try: + with open(log_path) as f: + for line in f: + full_content.append(line) + # Strip ANSI escape codes + clean_line = ANSI_ESCAPE.sub("", line) + + # Parse prefill batch metrics + batch_metrics = self._parse_prefill_batch_line(clean_line) + if batch_metrics: + batches.append( + BatchMetrics( + timestamp=batch_metrics["timestamp"], + dp=batch_metrics.get("dp", 0), + tp=batch_metrics.get("tp", 0), + ep=batch_metrics.get("ep", 0), + batch_type=batch_metrics["type"], + new_seq=batch_metrics.get("new_seq"), + new_token=batch_metrics.get("new_token"), + cached_token=batch_metrics.get("cached_token"), + token_usage=batch_metrics.get("token_usage"), + running_req=batch_metrics.get("running_req"), + queue_req=batch_metrics.get("queue_req"), + prealloc_req=batch_metrics.get("prealloc_req"), + inflight_req=batch_metrics.get("inflight_req"), + input_throughput=batch_metrics.get("input_throughput"), + ) + ) + + # Parse decode batch metrics + decode_metrics = self._parse_decode_batch_line(clean_line) + if decode_metrics: + batches.append( + BatchMetrics( + timestamp=decode_metrics["timestamp"], + dp=decode_metrics.get("dp", 0), + tp=decode_metrics.get("tp", 0), + ep=decode_metrics.get("ep", 0), + batch_type=decode_metrics["type"], + running_req=decode_metrics.get("running_req"), + queue_req=decode_metrics.get("queue_req"), + prealloc_req=decode_metrics.get("prealloc_req"), + transfer_req=decode_metrics.get("transfer_req"), + token_usage=decode_metrics.get("token_usage"), + preallocated_usage=decode_metrics.get("preallocated_usage"), + num_tokens=decode_metrics.get("num_tokens"), + gen_throughput=decode_metrics.get("gen_throughput"), + ) + ) + + # Parse memory metrics + mem_metrics = self._parse_memory_line(clean_line) + if mem_metrics: + memory_snapshots.append( + MemoryMetrics( + timestamp=mem_metrics["timestamp"], + dp=mem_metrics.get("dp", 0), + tp=mem_metrics.get("tp", 0), + ep=mem_metrics.get("ep", 0), + metric_type=mem_metrics["type"], + avail_mem_gb=mem_metrics.get("avail_mem_gb"), + mem_usage_gb=mem_metrics.get("mem_usage_gb"), + kv_cache_gb=mem_metrics.get("kv_cache_gb"), + kv_tokens=mem_metrics.get("kv_tokens"), + ) + ) + + # Extract TP/DP/EP configuration from server_args + if "tp_size=" in clean_line: + tp_match = re.search(r"tp_size=(\d+)", clean_line) + dp_match = re.search(r"dp_size=(\d+)", clean_line) + ep_match = re.search(r"ep_size=(\d+)", clean_line) + + if tp_match: + config["tp_size"] = int(tp_match.group(1)) + if dp_match: + config["dp_size"] = int(dp_match.group(1)) + if ep_match: + config["ep_size"] = int(ep_match.group(1)) + + # Parse launch command from full content + launch_command = self.parse_launch_command("".join(full_content), node_info["worker_type"]) + + except Exception as e: + logger.error("Error parsing %s: %s", log_path, e) + return None + + total_metrics = len(batches) + len(memory_snapshots) + if total_metrics == 0: + logger.debug("Parsed %s but found no batch/memory metrics", log_path) + + logger.debug("Parsed %s: %d batches, %d memory snapshots", log_path, len(batches), len(memory_snapshots)) + + # Create NodeMetadata + node_metadata = NodeMetadata( + node_name=node_info["node"], + worker_type=node_info["worker_type"], + worker_id=node_info["worker_id"], + ) + + # Create NodeMetrics with metadata + metrics = NodeMetrics( + metadata=node_metadata, + batches=batches, + memory_snapshots=memory_snapshots, + config=config, + ) + + # Create NodeConfig with launch_command + node_config = {} + if launch_command: + node_config["launch_command"] = launch_command + node_config["environment"] = {} # Will be populated by NodeAnalyzer if config file exists + + # Return complete NodeInfo + return NodeInfo(metrics=metrics, node_config=node_config if node_config else None) + + def _parse_timestamp(self, line: str) -> str | None: + """Extract timestamp from log line. + + Supports two formats: + - Tagged format: [2025-11-04 05:31:43 DP0 TP0 EP0] + - ISO format: 2025-12-30T15:52:38.206058Z (fallback) + + Returns the timestamp string as-is without conversion. + """ + # Try tagged format first (YYYY-MM-DD HH:MM:SS) + match = re.search(r"\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})", line) + if match: + return match.group(1) + + # Fall back to ISO format + match = re.search(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z?)", line) + if match: + return match.group(1) + + return None + + def _parse_parallelism_tags(self, line: str) -> tuple[int, int, int]: + """Extract DP, TP, EP indices from log line prefix. + + Supports three formats: + - Full: [2025-11-04 05:31:43 DP0 TP0 EP0] + - Simple TP: [2025-11-04 07:05:55 TP0] (defaults DP=0, EP=0) + - Pipeline: [2025-12-08 14:34:44 PP0] (defaults DP=0, EP=0, TP=PP value) + + Args: + line: Log line to parse + + Returns: + (dp, tp, ep) tuple with default values of 0 if not found + """ + # Try full format first: DP0 TP0 EP0 + match = re.search(r"DP(\d+)\s+TP(\d+)\s+EP(\d+)", line) + if match: + return int(match.group(1)), int(match.group(2)), int(match.group(3)) + + # Try simple format: TP0 only (1P4D style) + match = re.search(r"\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} TP(\d+)\]", line) + if match: + return 0, int(match.group(1)), 0 # Default DP=0, EP=0 + + # Try pipeline parallelism format: PP0 (prefill with PP) + match = re.search(r"\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} PP(\d+)\]", line) + if match: + return 0, int(match.group(1)), 0 # Map PP to TP slot, default DP=0, EP=0 + + # Default: no parallelism tags found + return 0, 0, 0 + + def _parse_prefill_batch_line(self, line: str) -> dict | None: + """Parse prefill batch log line for metrics.""" + if "Prefill batch" not in line: + return None + + # Parse timestamp and parallelism tags separately + timestamp = self._parse_timestamp(line) + if not timestamp: + return None + + dp, tp, ep = self._parse_parallelism_tags(line) + + metrics = { + "timestamp": timestamp, + "type": "prefill", + "dp": dp, + "tp": tp, + "ep": ep, + } + + patterns = { + "new_seq": r"#new-seq:\s*(\d+)", + "new_token": r"#new-token:\s*(\d+)", + "cached_token": r"#cached-token:\s*(\d+)", + "token_usage": r"token usage:\s*([\d.]+)", + "running_req": r"#running-req:\s*(\d+)", + "queue_req": r"#queue-req:\s*(\d+)", + "prealloc_req": r"#prealloc-req:\s*(\d+)", + "inflight_req": r"#inflight-req:\s*(\d+)", + "input_throughput": r"input throughput \(token/s\):\s*([\d.]+)", + } + + for key, pattern in patterns.items(): + match = re.search(pattern, line) + if match: + value = match.group(1) + metrics[key] = float(value) if "." in value else int(value) + + return metrics + + def _parse_decode_batch_line(self, line: str) -> dict | None: + """Parse decode batch log line for metrics.""" + if "Decode batch" not in line: + return None + + # Parse timestamp and parallelism tags separately + timestamp = self._parse_timestamp(line) + if not timestamp: + return None + + dp, tp, ep = self._parse_parallelism_tags(line) + + metrics = { + "timestamp": timestamp, + "type": "decode", + "dp": dp, + "tp": tp, + "ep": ep, + } + + patterns = { + "running_req": r"#running-req:\s*(\d+)", + "num_tokens": r"#token:\s*(\d+)", + "token_usage": r"token usage:\s*([\d.]+)", + "preallocated_usage": r"pre-allocated usage:\s*([\d.]+)", + "prealloc_req": r"#prealloc-req:\s*(\d+)", + "transfer_req": r"#transfer-req:\s*(\d+)", + "queue_req": r"#queue-req:\s*(\d+)", + "gen_throughput": r"gen throughput \(token/s\):\s*([\d.]+)", + } + + for key, pattern in patterns.items(): + match = re.search(pattern, line) + if match: + value = match.group(1) + metrics[key] = float(value) if "." in value else int(value) + + return metrics + + def _parse_memory_line(self, line: str) -> dict | None: + """Parse memory-related log lines.""" + # Parse timestamp and parallelism tags separately + timestamp = self._parse_timestamp(line) + if not timestamp: + return None + + dp, tp, ep = self._parse_parallelism_tags(line) + + metrics = { + "timestamp": timestamp, + "dp": dp, + "tp": tp, + "ep": ep, + } + + # Parse available memory from "avail mem=75.11 GB" + avail_match = re.search(r"avail mem=([\d.]+)\s*GB", line) + if avail_match: + metrics["avail_mem_gb"] = float(avail_match.group(1)) + metrics["type"] = "memory" + + # Parse memory usage from "mem usage=107.07 GB" + usage_match = re.search(r"mem usage=([\d.]+)\s*GB", line) + if usage_match: + metrics["mem_usage_gb"] = float(usage_match.group(1)) + metrics["type"] = "memory" + + # Parse KV cache size from "KV size: 17.16 GB" + kv_match = re.search(r"KV size:\s*([\d.]+)\s*GB", line) + if kv_match: + metrics["kv_cache_gb"] = float(kv_match.group(1)) + metrics["type"] = "kv_cache" + + # Parse token count from "#tokens: 524288" + token_match = re.search(r"#tokens:\s*(\d+)", line) + if token_match: + metrics["kv_tokens"] = int(token_match.group(1)) + + # Parse from "Capturing batches" progress lines + # Example: "Capturing batches (bs=256 avail_mem=6.32 GB)" + capture_match = re.search(r"avail_mem=([\d.]+)\s*GB", line) + if capture_match and "type" not in metrics: + metrics["avail_mem_gb"] = float(capture_match.group(1)) + metrics["type"] = "memory" + + return metrics if "type" in metrics else None + + def _extract_node_info_from_filename(self, filename: str) -> dict | None: + """Extract node name and worker info from filename. + Example: eos0219_prefill_w0.out + Returns: {'node': 'eos0219', 'worker_type': 'prefill', 'worker_id': 'w0'} + """ + match = re.match( + r"(.+)_(prefill|decode|agg|frontend)_([^.]+)\.(err|out)", + os.path.basename(filename), + ) + if match: + return { + "node": match.group(1), + "worker_type": match.group(2), + "worker_id": match.group(3), + } + return None + + def parse_launch_command(self, log_content: str, worker_type: str = "unknown") -> NodeLaunchCommand | None: + """Parse the SGLang worker launch command from log content. + Looks for command lines or ServerArgs in the log. + Args: + log_content: Content of the worker log file + worker_type: Type of worker (prefill, decode, agg) + Returns: + NodeLaunchCommand with parsed parameters, or None if not found + """ + from analysis.srtlog.parsers import NodeLaunchCommand + + # Strip ANSI codes for cleaner parsing + clean_content = ANSI_ESCAPE.sub("", log_content) + + raw_command = None + + # First, try to find [CMD] tagged command (preferred - from our scripts) + cmd_match = re.search(r"\[CMD\]\s*(.+)$", clean_content, re.MULTILINE) + if cmd_match: + raw_command = cmd_match.group(1).strip() + + # Fallback: pattern to match sglang launch commands + if not raw_command: + patterns = [ + r"(python[3]?\s+-m\s+sglang\.launch_server\s+[^\n]+)", + r"(python[3]?\s+.*launch_server\.py\s+[^\n]+)", + r"(sglang\.launch_server\s+[^\n]+)", + ] + + for pattern in patterns: + match = re.search(pattern, clean_content, re.IGNORECASE) + if match: + raw_command = match.group(1).strip() + break + + # Also try to parse from ServerArgs() log line + if not raw_command: + server_args_match = re.search(r"server_args=ServerArgs\((.*?)\)", clean_content, re.DOTALL) + if server_args_match: + raw_command = f"ServerArgs({server_args_match.group(1)[:200]}...)" + + if not raw_command: + return None + + extra_args: dict[str, Any] = {} + + # Parse SGLang server arguments (from command line) + arg_patterns = { + "model_path": r"--model(?:-path)?[=\s]+([^\s]+)", + "served_model_name": r"--served-model-name[=\s]+([^\s]+)", + "tp_size": r"--tp-size[=\s]+(\d+)", + "pp_size": r"--pp-size[=\s]+(\d+)", + "dp_size": r"--dp-size[=\s]+(\d+)", + "ep_size": r"--ep-size[=\s]+(\d+)", + "host": r"--host[=\s]+([^\s]+)", + "port": r"--port[=\s]+(\d+)", + "max_num_seqs": r"--max-(?:num-seqs|running-requests)[=\s]+(\d+)", + "max_model_len": r"--(?:max-model-len|context-length)[=\s]+(\d+)", + "kv_cache_dtype": r"--kv-cache-dtype[=\s]+([^\s]+)", + "gpu_memory_utilization": r"--(?:mem-fraction-static|gpu-memory-utilization)[=\s]+([\d.]+)", + "disaggregation_mode": r"--disaggregation-mode[=\s]+([^\s]+)", + "nccl_init_addr": r"--(?:dist-init-addr|nccl-init-addr)[=\s]+([^\s]+)", + } + + # Also parse from ServerArgs format + server_args_patterns = { + "model_path": r"model_path=['\"]?([^'\"]+)['\"]?", + "served_model_name": r"served_model_name=['\"]?([^'\"]+)['\"]?", + "tp_size": r"tp_size=(\d+)", + "pp_size": r"pp_size=(\d+)", + "dp_size": r"dp_size=(\d+)", + "ep_size": r"ep_size=(\d+)", + "host": r"host=['\"]?([^'\"]+)['\"]?", + "port": r"port=(\d+)", + "max_num_seqs": r"max_running_requests=(\d+)", + "max_model_len": r"context_length=(\d+)", + "disaggregation_mode": r"disaggregation_mode=['\"]?([^'\"]+)['\"]?", + } + + for field, pattern in arg_patterns.items(): + match = re.search(pattern, raw_command) + if match: + value: Any = match.group(1) + if field in ("tp_size", "pp_size", "dp_size", "ep_size", "port", "max_num_seqs", "max_model_len"): + value = int(value) + elif field == "gpu_memory_utilization": + value = float(value) + extra_args[field] = value + + # Try ServerArgs patterns for any missing fields + for field, pattern in server_args_patterns.items(): + if field not in extra_args: + match = re.search(pattern, clean_content) + if match: + value = match.group(1) + if field in ("tp_size", "pp_size", "dp_size", "ep_size", "port", "max_num_seqs", "max_model_len"): + value = int(value) + extra_args[field] = value + + return NodeLaunchCommand( + backend_type="sglang", + worker_type=worker_type, + raw_command=raw_command, + extra_args=extra_args, + ) diff --git a/analysis/srtlog/parsers/nodes/trtllm.py b/analysis/srtlog/parsers/nodes/trtllm.py new file mode 100644 index 00000000..0cfc9dc5 --- /dev/null +++ b/analysis/srtlog/parsers/nodes/trtllm.py @@ -0,0 +1,560 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""TRTLLM node log parser. +Parses logs from TensorRT-LLM workers launched via dynamo.trtllm. +Example log format: + [33mRank0 run python3 -m dynamo.trtllm --model-path /model --served-model-name dsr1-fp8 ... + Initializing the worker with config: Config(namespace=dynamo, component=prefill, ...) +""" + +from __future__ import annotations + +import logging +import os +import re +from datetime import datetime +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from analysis.srtlog.models import BatchMetrics, MemoryMetrics, NodeInfo, NodeMetadata, NodeMetrics +from analysis.srtlog.parsers import register_node_parser + +if TYPE_CHECKING: + from analysis.srtlog.parsers import NodeLaunchCommand + +logger = logging.getLogger(__name__) + +# ANSI escape code pattern for stripping colors +ANSI_ESCAPE = re.compile(r"\x1b\[[0-9;]*m") + + +@register_node_parser("trtllm") +class TRTLLMNodeParser: + """Parser for TensorRT-LLM node logs. + Parses logs from TRTLLM workers, including: + - Launch command from dynamo.trtllm + - Worker configuration from Config() dump + - MPI rank and world size information + + Timestamp format: MM/DD/YYYY-HH:MM:SS (e.g., 01/23/2026-08:04:38) + """ + + @property + def backend_type(self) -> str: + return "trtllm" + + @staticmethod + def parse_timestamp(timestamp: str) -> datetime: + """Parse TRTLLM timestamp format to datetime object. + + Args: + timestamp: Timestamp string in format MM/DD/YYYY-HH:MM:SS + + Returns: + datetime object + + Raises: + ValueError: If timestamp format is invalid + """ + return datetime.strptime(timestamp, "%m/%d/%Y-%H:%M:%S") + + def _extract_timestamp(self, line: str) -> str | None: + """Extract timestamp string from log line. + + Supports format: [MM/DD/YYYY-HH:MM:SS ...] + + Returns: + Timestamp string or None if not found + """ + match = re.search(r"\[(\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2})", line) + if match: + return match.group(1) + return None + + def _parse_parallelism_tags(self, line: str) -> tuple[int, int, int]: + """Extract DP, TP, EP indices from TRTLLM log line. + + Supports three formats: + - Full: [01/23/2026-08:04:38 DP1 TP2 EP3] + - Simple TP: [01/23/2026-08:04:38 TP0] (defaults DP=0, EP=0) + - Pipeline: [01/23/2026-08:04:38 PP3] (defaults DP=0, EP=0, TP=PP value) + + Args: + line: Log line to parse + + Returns: + (dp, tp, ep) tuple with default values of 0 if not found + """ + # Try full format first: DP0 TP0 EP0 + match = re.search(r"DP(\d+)\s+TP(\d+)\s+EP(\d+)", line) + if match: + return int(match.group(1)), int(match.group(2)), int(match.group(3)) + + # Try simple format: TP0 only (1P4D style) + match = re.search(r"\[\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2} TP(\d+)\]", line) + if match: + return 0, int(match.group(1)), 0 # Default DP=0, EP=0 + + # Try pipeline parallelism format: PP0 + match = re.search(r"\[\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2} PP(\d+)\]", line) + if match: + return 0, int(match.group(1)), 0 # Map PP to TP slot, default DP=0, EP=0 + + # Default: no parallelism tags found + return 0, 0, 0 + + def parse_logs(self, log_dir: Path) -> list[NodeInfo]: + """Parse all TRTLLM node logs in a directory. + Args: + log_dir: Directory containing *_prefill_*.out, *_decode_*.out files + Returns: + List of NodeInfo objects + """ + log_dir = Path(log_dir) + nodes = [] + + if not log_dir.exists(): + logger.error("Log directory does not exist: %s", log_dir) + return nodes + + # Find all worker log files + for file in os.listdir(log_dir): + if not (file.endswith(".err") or file.endswith(".out")): + continue + if not any(wt in file for wt in ("prefill", "decode", "agg")): + continue + + filepath = log_dir / file + node = self.parse_single_log(filepath) + if node: + nodes.append(node) + + logger.info("Parsed %d TRTLLM node log files from %s", len(nodes), log_dir) + return nodes + + def parse_single_log(self, log_path: Path) -> NodeInfo | None: + """Parse a single TRTLLM log file. + Args: + log_path: Path to a node log file + Returns: + NodeInfo object or None if parsing failed + """ + node_info = self._extract_node_info_from_filename(str(log_path)) + if not node_info: + logger.warning("Could not extract node info from filename: %s", log_path) + return None + + batches = [] + memory_snapshots = [] + config = {} + launch_command = None + + try: + # Handle encoding issues gracefully + content = log_path.read_text(errors="replace") + clean_content = ANSI_ESCAPE.sub("", content) + + # Parse launch command + launch_command = self.parse_launch_command(clean_content, node_info["worker_type"]) + + # Extract MPI configuration + mpi_size_match = re.search(r"tllm_mpi_size:\s*(\d+)", clean_content) + if mpi_size_match: + config["mpi_world_size"] = int(mpi_size_match.group(1)) + + # Extract TP/PP from Config() dump + config_match = re.search(r"Config\((.*?)\)", clean_content) + if config_match: + config_str = config_match.group(1) + + tp_match = re.search(r"tensor_parallel_size=(\d+)", config_str) + if tp_match: + config["tp_size"] = int(tp_match.group(1)) + + pp_match = re.search(r"pipeline_parallel_size=(\d+)", config_str) + if pp_match: + config["pp_size"] = int(pp_match.group(1)) + + ep_match = re.search(r"expert_parallel_size=(\d+)", config_str) + if ep_match: + config["ep_size"] = int(ep_match.group(1)) + + max_batch_match = re.search(r"max_batch_size=(\d+)", config_str) + if max_batch_match: + config["max_batch_size"] = int(max_batch_match.group(1)) + + max_tokens_match = re.search(r"max_num_tokens=(\d+)", config_str) + if max_tokens_match: + config["max_num_tokens"] = int(max_tokens_match.group(1)) + + max_seq_match = re.search(r"max_seq_len=(\d+)", config_str) + if max_seq_match: + config["max_seq_len"] = int(max_seq_match.group(1)) + + # Extract from separate trtllm_config YAML references + yaml_match = re.search(r"extra_engine_args=([^\s,]+\.yaml)", clean_content) + if yaml_match: + config["extra_engine_args"] = yaml_match.group(1) + + # Also extract from TensorRT-LLM engine args line which has actual parallelism + engine_args_match = re.search(r"TensorRT-LLM engine args:\s*\{([^}]+)", clean_content) + if engine_args_match: + engine_str = engine_args_match.group(1) + + engine_tp_match = re.search(r"'tensor_parallel_size':\s*(\d+)", engine_str) + if engine_tp_match: + config["tp_size"] = int(engine_tp_match.group(1)) + + engine_pp_match = re.search(r"'pipeline_parallel_size':\s*(\d+)", engine_str) + if engine_pp_match: + config["pp_size"] = int(engine_pp_match.group(1)) + + engine_ep_match = re.search(r"'moe_expert_parallel_size':\s*(\d+)", engine_str) + if engine_ep_match: + config["ep_size"] = int(engine_ep_match.group(1)) + + engine_batch_match = re.search(r"'max_batch_size':\s*(\d+)", engine_str) + if engine_batch_match: + config["max_batch_size"] = int(engine_batch_match.group(1)) + + engine_tokens_match = re.search(r"'max_num_tokens':\s*(\d+)", engine_str) + if engine_tokens_match: + config["max_num_tokens"] = int(engine_tokens_match.group(1)) + + engine_seq_match = re.search(r"'max_seq_len':\s*(\d+)", engine_str) + if engine_seq_match: + config["max_seq_len"] = int(engine_seq_match.group(1)) + + # Parse iteration logs for batch metrics + # Format: iter = X, ... num_scheduled_requests: X, states = {'num_ctx_requests': X, 'num_ctx_tokens': X, 'num_generation_tokens': X} + batches = self._parse_iteration_logs(clean_content, node_info.get("worker_type", "unknown")) + + # Parse memory info + memory_snapshots = self._parse_memory_info(clean_content) + + except Exception as e: + logger.error("Error parsing %s: %s", log_path, e) + return None + + logger.debug( + "Parsed %s: %d batches, %d memory snapshots, config=%s", + log_path, + len(batches), + len(memory_snapshots), + config, + ) + + # Create NodeMetadata + node_metadata = NodeMetadata( + node_name=node_info["node"], + worker_type=node_info["worker_type"], + worker_id=node_info["worker_id"], + ) + + # Create NodeMetrics with metadata + metrics = NodeMetrics( + metadata=node_metadata, + batches=batches, + memory_snapshots=memory_snapshots, + config=config, + ) + + # Create NodeConfig with launch_command + node_config = {} + if launch_command: + node_config["launch_command"] = launch_command + node_config["environment"] = {} # Will be populated by NodeAnalyzer if config file exists + + # Return complete NodeInfo + return NodeInfo(metrics=metrics, node_config=node_config if node_config else None) + + def _parse_iteration_logs(self, content: str, worker_type: str) -> list[BatchMetrics]: + """Parse TRTLLM iteration logs for batch metrics. + Format: + [01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5559, ..., num_scheduled_requests: 1, + states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 3} + Args: + content: Log file content (ANSI stripped) + worker_type: Worker type (prefill, decode) + Returns: + List of BatchMetrics objects + """ + batches = [] + + # Pattern to match TRTLLM iteration logs + iter_pattern = re.compile( + r"iter\s*=\s*(\d+).*" r"num_scheduled_requests:\s*(\d+).*" r"states\s*=\s*\{([^}]+)\}" + ) + + for match in iter_pattern.finditer(content): + # Extract timestamp and parallelism from the line + line_start = content.rfind("\n", 0, match.start()) + 1 + line_end = content.find("\n", match.end()) + if line_end == -1: + line_end = len(content) + full_line = content[line_start:line_end] + + timestamp = self._extract_timestamp(full_line) + if not timestamp: + continue + + dp, tp, ep = self._parse_parallelism_tags(full_line) + + # iteration = int(match.group(1)) # Not used currently + num_scheduled = int(match.group(2)) + states_str = match.group(3) + + # Parse states dict + ctx_tokens = 0 + gen_tokens = 0 + + # Skip ctx_requests as it's not used + # ctx_req_match = re.search(r"'num_ctx_requests':\s*(\d+)", states_str) + # if ctx_req_match: + # ctx_requests = int(ctx_req_match.group(1)) + + ctx_tok_match = re.search(r"'num_ctx_tokens':\s*(\d+)", states_str) + if ctx_tok_match: + ctx_tokens = int(ctx_tok_match.group(1)) + + gen_tok_match = re.search(r"'num_generation_tokens':\s*(\d+)", states_str) + if gen_tok_match: + gen_tokens = int(gen_tok_match.group(1)) + + # Determine batch type based on content + if ctx_tokens > 0: + batch_type = "prefill" + elif gen_tokens > 0: + batch_type = "decode" + else: + batch_type = worker_type + + # Parse step time if available + step_time = None + step_match = re.search(r"host_step_time\s*=\s*([\d.]+)ms", match.group(0)) + if step_match: + step_time = float(step_match.group(1)) + + # Compute throughput (tokens/s) + input_throughput = None + gen_throughput = None + if step_time and step_time > 0: + if batch_type == "prefill" and ctx_tokens > 0: + # Prefill throughput: context tokens / step time + input_throughput = (ctx_tokens * 1000.0) / step_time + elif batch_type == "decode" and gen_tokens > 0: + # Decode throughput: generation tokens / step time + gen_throughput = (gen_tokens * 1000.0) / step_time + + batches.append( + BatchMetrics( + timestamp=timestamp, + dp=dp, + tp=tp, + ep=ep, + batch_type=batch_type, + running_req=num_scheduled, + new_token=ctx_tokens if batch_type == "prefill" else None, + num_tokens=gen_tokens if batch_type == "decode" else None, + input_throughput=input_throughput, + gen_throughput=gen_throughput, + ) + ) + + return batches + + def _parse_memory_info(self, content: str) -> list[MemoryMetrics]: + """Parse TRTLLM memory information. + Format: + Peak memory during memory usage profiling (torch + non-torch): 91.46 GiB, + available KV cache memory when calculating max tokens: 41.11 GiB, + fraction is set 0.85, kv size is 35136. device total memory 139.81 GiB + Args: + content: Log file content (ANSI stripped) + Returns: + List of MemoryMetrics objects + """ + memory_snapshots = [] + + # Pattern to match memory info + mem_pattern = re.compile( + r"Peak memory.*?:\s*([\d.]+)\s*GiB.*?" + r"available KV cache memory.*?:\s*([\d.]+)\s*GiB.*?" + r"device total memory\s*([\d.]+)\s*GiB" + ) + + for match in mem_pattern.finditer(content): + # Extract timestamp and parallelism from the line + line_start = content.rfind("\n", 0, match.start()) + 1 + line_end = content.find("\n", match.end()) + if line_end == -1: + line_end = len(content) + full_line = content[line_start:line_end] + + timestamp = self._extract_timestamp(full_line) + if not timestamp: + timestamp = "" # Some memory lines may not have timestamps + + dp, tp, ep = self._parse_parallelism_tags(full_line) + + peak_mem = float(match.group(1)) + avail_kv = float(match.group(2)) + total_mem = float(match.group(3)) + + memory_snapshots.append( + MemoryMetrics( + timestamp=timestamp, + dp=dp, + tp=tp, + ep=ep, + metric_type="memory", + mem_usage_gb=peak_mem, + avail_mem_gb=total_mem - peak_mem, + kv_cache_gb=avail_kv, + ) + ) + + # Also parse KV cache allocation info (no timestamp/DP/TP/EP for these) + kv_alloc_pattern = re.compile(r"\[MemUsageChange\] Allocated\s*([\d.]+)\s*GiB for max tokens.*?\((\d+)\)") + + for match in kv_alloc_pattern.finditer(content): + kv_gb = float(match.group(1)) + max_tokens = int(match.group(2)) + + memory_snapshots.append( + MemoryMetrics( + timestamp="", + dp=0, + tp=0, + ep=0, + metric_type="kv_cache", + kv_cache_gb=kv_gb, + kv_tokens=max_tokens, + ) + ) + + return memory_snapshots + + def _extract_node_info_from_filename(self, filename: str) -> dict | None: + """Extract node name and worker info from filename. + Example: worker-0_prefill_w0.out + Returns: {'node': 'worker-0', 'worker_type': 'prefill', 'worker_id': 'w0'} + """ + match = re.match( + r"(.+)_(prefill|decode|agg|frontend)_([^.]+)\.(err|out)", + os.path.basename(filename), + ) + if match: + return { + "node": match.group(1), + "worker_type": match.group(2), + "worker_id": match.group(3), + } + return None + + def parse_launch_command(self, log_content: str, worker_type: str = "unknown") -> NodeLaunchCommand | None: + """Parse the TRTLLM worker launch command from log content. + Looks for command lines like: + [CMD] python3 -m dynamo.trtllm --model-path /model --served-model-name dsr1-fp8 --disaggregation-mode prefill + python3 -m dynamo.trtllm --model-path /model --served-model-name dsr1-fp8 --disaggregation-mode prefill + Args: + log_content: Content of the worker log file + worker_type: Type of worker (prefill, decode, agg) + Returns: + NodeLaunchCommand with parsed parameters, or None if not found + """ + from analysis.srtlog.parsers import NodeLaunchCommand + + # Strip ANSI codes for cleaner parsing + clean_content = ANSI_ESCAPE.sub("", log_content) + + raw_command = None + + # First, try to find [CMD] tagged command (preferred - from our scripts) + cmd_match = re.search(r"\[CMD\]\s*(.+)$", clean_content, re.MULTILINE) + if cmd_match: + raw_command = cmd_match.group(1).strip() + + # Fallback: pattern to match TRTLLM launch commands (dynamo.trtllm or tensorrt_llm.serve) + if not raw_command: + patterns = [ + r"(?:Rank\d+\s+run\s+)?(python[3]?\s+-m\s+dynamo\.trtllm\s+[^\n]+)", + r"(?:Rank\d+\s+run\s+)?(python[3]?\s+-m\s+tensorrt_llm\.serve\s+[^\n]+)", + r"(trtllm-serve\s+[^\n]+)", + r"(mpirun\s+.*trtllm[^\n]+)", + ] + + for pattern in patterns: + match = re.search(pattern, clean_content) + if match: + raw_command = match.group(1).strip() + # Remove trailing "in background" if present + raw_command = re.sub(r"\s+in\s+background$", "", raw_command) + break + + if not raw_command: + return None + + extra_args: dict[str, Any] = {} + + # Parse dynamo.trtllm / tensorrt_llm server arguments from command line + arg_patterns = { + "model_path": r"--model-path[=\s]+([^\s]+)", + "served_model_name": r"--served-model-name[=\s]+([^\s]+)", + "disaggregation_mode": r"--disaggregation-mode[=\s]+([^\s]+)", + "host": r"--host[=\s]+([^\s]+)", + "port": r"--port[=\s]+(\d+)", + } + + for field, pattern in arg_patterns.items(): + match = re.search(pattern, raw_command) + if match: + value = match.group(1) + if field == "port": + value = int(value) + extra_args[field] = value + + # Also extract from TensorRT-LLM engine args if available (has actual parallelism values) + engine_args_match = re.search(r"TensorRT-LLM engine args:\s*\{([^}]+)", clean_content) + if engine_args_match: + engine_str = engine_args_match.group(1) + + engine_patterns = { + "tp_size": r"'tensor_parallel_size':\s*(\d+)", + "pp_size": r"'pipeline_parallel_size':\s*(\d+)", + "max_num_seqs": r"'max_batch_size':\s*(\d+)", + "max_model_len": r"'max_seq_len':\s*(\d+)", + } + + for field, pattern in engine_patterns.items(): + if field not in extra_args: + match = re.search(pattern, engine_str) + if match: + extra_args[field] = int(match.group(1)) + + # Fallback to Config() dump + if "tp_size" not in extra_args: + config_match = re.search(r"Config\((.*?)\)", clean_content) + if config_match: + config_str = config_match.group(1) + + config_patterns = { + "tp_size": r"tensor_parallel_size=(\d+)", + "pp_size": r"pipeline_parallel_size=(\d+)", + "max_num_seqs": r"max_batch_size=(\d+)", + "max_model_len": r"max_seq_len=(\d+)", + } + + for field, pattern in config_patterns.items(): + if field not in extra_args: + match = re.search(pattern, config_str) + if match: + extra_args[field] = int(match.group(1)) + + return NodeLaunchCommand( + backend_type=self.backend_type, + worker_type=worker_type, + raw_command=raw_command, + extra_args=extra_args, + ) diff --git a/analysis/srtlog/run_loader.py b/analysis/srtlog/run_loader.py index f093b24a..b97c91be 100644 --- a/analysis/srtlog/run_loader.py +++ b/analysis/srtlog/run_loader.py @@ -7,12 +7,14 @@ import json import logging import os -import re +from pathlib import Path import pandas as pd from .cache_manager import CacheManager -from .models import BenchmarkRun +from .log_parser import NodeAnalyzer +from .models import BenchmarkRun, NodeMetrics +from .parsers import get_benchmark_parser logger = logging.getLogger(__name__) @@ -62,7 +64,7 @@ def load_all_with_skipped(self) -> tuple[list[BenchmarkRun], list[tuple[str, str run = BenchmarkRun.from_json_file(path) if run is not None: # Skip profiling jobs (they don't have benchmark results) - if run.profiler.profiler_type == "torch-profiler": + if run.profiler_metadata.profiler_type == "torch-profiler": reason = "Profiling job (no benchmark results)" logger.debug(f"Skipping profiling job {run.job_id}") skipped.append((run.job_id, run_dir, reason)) @@ -214,20 +216,34 @@ def _load_benchmark_results(self, run: BenchmarkRun) -> None: run_path = run.metadata.path # Check both run_path and run_path/logs for benchmark results - search_paths = [run_path] - logs_subdir = os.path.join(run_path, "logs") - if os.path.exists(logs_subdir): - search_paths.append(logs_subdir) - # Initialize cache manager cache_mgr = CacheManager(run_path) - # Use profiler_type from metadata to construct directory name - profiler_type = run.profiler.profiler_type - pattern_strs = [f"{profiler_type}_isl_{run.profiler.isl}_osl_{run.profiler.osl}"] + # Use profiler_type from metadata + profiler_type = run.profiler_metadata.profiler_type - # Define source patterns for cache validation (check all possible patterns) - source_patterns = [f"{pattern}/*.json" for pattern in pattern_strs] + # Get the parser for this benchmark type + try: + parser = get_benchmark_parser(profiler_type) + except ValueError as e: + logger.warning(f"No parser available for {profiler_type}: {e}") + return + + # Let the parser find its result directory + result_dir = parser.find_result_directory( + Path(run_path), isl=run.profiler_metadata.isl, osl=run.profiler_metadata.osl + ) + + if not result_dir: + logger.warning(f"No results directory found for {profiler_type} in {run_path}") + return + + # Define source patterns for cache validation (relative to run_path) + # Use recursive glob to catch nested result files (e.g., artifacts/*/profile_export_aiperf.json) + result_dir_rel = ( + result_dir.relative_to(Path(run_path)) if result_dir.is_relative_to(Path(run_path)) else result_dir.name + ) + source_patterns = [f"{result_dir_rel}/**/*.json"] # Try to load from cache first if cache_mgr.is_cache_valid("benchmark_results", source_patterns): @@ -273,127 +289,88 @@ def _load_benchmark_results(self, run: BenchmarkRun) -> None: run.profiler.add_benchmark_results(results) return - # Cache miss or invalid - parse from JSON files - for pattern_str in pattern_strs: - profiler_pattern = re.compile(pattern_str) - for search_path in search_paths: - for entry in os.listdir(search_path): - if profiler_pattern.match(entry): - result_dir = os.path.join(search_path, entry) - if os.path.isdir(result_dir): - results = self._parse_profiler_results(result_dir) - run.profiler.add_benchmark_results(results) - - # Save to cache - if results["concurrencies"]: - # Convert to DataFrame for caching - cache ALL parsed fields - cache_data = { - "concurrency": results["concurrencies"], - "output_tps": results["output_tps"], - "mean_itl_ms": results["mean_itl_ms"], - "mean_ttft_ms": results["mean_ttft_ms"], - "request_rate": results["request_rate"], - } - - # Add all optional fields if they have data - optional_fields = { - "mean_tpot_ms": "mean_tpot_ms", - "total_tps": "total_tps", - "request_throughput": "request_throughput", - "request_goodput": "request_goodput", - "mean_e2el_ms": "mean_e2el_ms", - "median_ttft_ms": "median_ttft_ms", - "median_tpot_ms": "median_tpot_ms", - "median_itl_ms": "median_itl_ms", - "median_e2el_ms": "median_e2el_ms", - "p99_ttft_ms": "p99_ttft_ms", - "p99_tpot_ms": "p99_tpot_ms", - "p99_itl_ms": "p99_itl_ms", - "p99_e2el_ms": "p99_e2el_ms", - "std_ttft_ms": "std_ttft_ms", - "std_tpot_ms": "std_tpot_ms", - "std_itl_ms": "std_itl_ms", - "std_e2el_ms": "std_e2el_ms", - "total_input_tokens": "total_input_tokens", - "total_output_tokens": "total_output_tokens", - } - - for result_key, cache_key in optional_fields.items(): - if results.get(result_key): - cache_data[cache_key] = results[result_key] - - cache_df = pd.DataFrame(cache_data) - cache_mgr.save_to_cache("benchmark_results", cache_df, source_patterns) - - return # Found results, stop searching - - def _parse_profiler_results(self, result_dir: str) -> dict: - """Parse profiler result JSON files. + # Cache miss - parse results + results = self._parse_profiler_results(str(result_dir), profiler_type) + run.profiler.add_benchmark_results(results) + + # Save to cache + if results["concurrencies"]: + # Convert to DataFrame for caching - cache ALL parsed fields + cache_data = { + "concurrency": results["concurrencies"], + "output_tps": results["output_tps"], + "mean_itl_ms": results["mean_itl_ms"], + "mean_ttft_ms": results["mean_ttft_ms"], + "request_rate": results["request_rate"], + } + + # Add all optional fields if they have data + optional_fields = { + "mean_tpot_ms": "mean_tpot_ms", + "total_tps": "total_tps", + "request_throughput": "request_throughput", + "request_goodput": "request_goodput", + "mean_e2el_ms": "mean_e2el_ms", + "median_ttft_ms": "median_ttft_ms", + "median_tpot_ms": "median_tpot_ms", + "median_itl_ms": "median_itl_ms", + "median_e2el_ms": "median_e2el_ms", + "p99_ttft_ms": "p99_ttft_ms", + "p99_tpot_ms": "p99_tpot_ms", + "p99_itl_ms": "p99_itl_ms", + "p99_e2el_ms": "p99_e2el_ms", + "std_ttft_ms": "std_ttft_ms", + "std_tpot_ms": "std_tpot_ms", + "std_itl_ms": "std_itl_ms", + "std_e2el_ms": "std_e2el_ms", + "total_input_tokens": "total_input_tokens", + "total_output_tokens": "total_output_tokens", + } + + for result_key, cache_key in optional_fields.items(): + if results.get(result_key): + cache_data[cache_key] = results[result_key] + + cache_df = pd.DataFrame(cache_data) + cache_mgr.save_to_cache("benchmark_results", cache_df, source_patterns) + + def _parse_profiler_results(self, result_dir: str, profiler_type: str) -> dict: + """Parse profiler result JSON files using the parser infrastructure. Args: result_dir: Path to directory containing benchmark result JSON files + profiler_type: Type of profiler/benchmark (e.g., "sa-bench", "mooncake-router") Returns: Dict with concurrencies, output_tps, mean_itl_ms, etc. """ - result = [] + result_dir_path = Path(result_dir) - for file in os.listdir(result_dir): - if not file.endswith(".json"): - continue + try: + # Get the appropriate parser + parser = get_benchmark_parser(profiler_type) - filepath = os.path.join(result_dir, file) - try: - with open(filepath) as f: - content = json.load(f) + # Let the parser find and parse all result files in the directory + # The parser knows where to look (e.g., artifacts/ subdirectories) + results_list = parser.parse_result_directory(result_dir_path) - # Parse all available metrics from benchmark output - res = { - "max_concurrency": content.get("max_concurrency"), - # Throughput metrics - "output_throughput": content.get("output_throughput"), - "total_token_throughput": content.get("total_token_throughput"), - "request_throughput": content.get("request_throughput"), - "request_goodput": content.get("request_goodput"), - "request_rate": content.get("request_rate"), - # Mean latencies - "mean_ttft_ms": content.get("mean_ttft_ms"), - "mean_tpot_ms": content.get("mean_tpot_ms"), - "mean_itl_ms": content.get("mean_itl_ms"), - "mean_e2el_ms": content.get("mean_e2el_ms"), - # Median latencies - "median_ttft_ms": content.get("median_ttft_ms"), - "median_tpot_ms": content.get("median_tpot_ms"), - "median_itl_ms": content.get("median_itl_ms"), - "median_e2el_ms": content.get("median_e2el_ms"), - # P99 latencies - "p99_ttft_ms": content.get("p99_ttft_ms"), - "p99_tpot_ms": content.get("p99_tpot_ms"), - "p99_itl_ms": content.get("p99_itl_ms"), - "p99_e2el_ms": content.get("p99_e2el_ms"), - # Std dev latencies - "std_ttft_ms": content.get("std_ttft_ms"), - "std_tpot_ms": content.get("std_tpot_ms"), - "std_itl_ms": content.get("std_itl_ms"), - "std_e2el_ms": content.get("std_e2el_ms"), - # Token counts - "total_input_tokens": content.get("total_input_tokens"), - "total_output_tokens": content.get("total_output_tokens"), - # Metadata - "backend": content.get("backend"), - "model_id": content.get("model_id"), - "date": content.get("date"), - "duration": content.get("duration"), - "completed": content.get("completed"), - "num_prompts": content.get("num_prompts"), - } + # Convert results to the format expected by the rest of the code + return self._convert_parser_results_to_dict(results_list) - result.append(res) - except Exception as e: - logger.warning(f"Error parsing {filepath}: {e}") - continue + except ValueError as e: + # Parser not found - fall back to manual parsing + logger.warning(f"Parser not available for {profiler_type}, falling back to manual parsing: {e}") + return self._parse_profiler_results_manual(result_dir) + + def _convert_parser_results_to_dict(self, results_list: list[dict]) -> dict: + """Convert parser results to the dict format expected by add_benchmark_results. - # Organize results - sort by concurrency + Args: + results_list: List of result dicts from parser (one per concurrency level) + + Returns: + Dict with lists of metrics across concurrency levels + """ out = { # Primary metrics "concurrencies": [], @@ -434,12 +411,30 @@ def _parse_profiler_results(self, result_dir: str) -> dict: "num_prompts": [], } - # Sort by concurrency and aggregate - for data in sorted(result, key=lambda x: x.get("max_concurrency", 0) or 0): - out["concurrencies"].append(data.get("max_concurrency")) - # Throughput - out["output_tps"].append(data.get("output_throughput")) - out["total_tps"].append(data.get("total_token_throughput")) + # results_list is already sorted by the parser + for data in results_list: + # Concurrency - explicit None checks to preserve 0 values + if data.get("max_concurrency") is not None: + concurrency = data.get("max_concurrency") + elif data.get("concurrency") is not None: + concurrency = data.get("concurrency") + else: + concurrency = 0 + out["concurrencies"].append(concurrency) + + # Throughput - normalize field names with explicit None checks to preserve 0.0 + if "output_throughput" in data and data["output_throughput"] is not None: + output_tps = data["output_throughput"] + else: + output_tps = data.get("output_tps") + out["output_tps"].append(output_tps) + + if "total_token_throughput" in data and data["total_token_throughput"] is not None: + total_tps = data["total_token_throughput"] + else: + total_tps = data.get("total_tps") + out["total_tps"].append(total_tps) + out["request_throughput"].append(data.get("request_throughput")) out["request_goodput"].append(data.get("request_goodput")) out["request_rate"].append(data.get("request_rate")) @@ -476,6 +471,78 @@ def _parse_profiler_results(self, result_dir: str) -> dict: return out + def _parse_profiler_results_manual(self, result_dir: str) -> dict: + """Fallback manual parser for benchmark result JSON files. + + This is kept for backward compatibility when parsers are not available. + + Args: + result_dir: Path to directory containing benchmark result JSON files + + Returns: + Dict with concurrencies, output_tps, mean_itl_ms, etc. + """ + result = [] + + for file in os.listdir(result_dir): + if not file.endswith(".json"): + continue + + filepath = os.path.join(result_dir, file) + try: + with open(filepath) as f: + content = json.load(f) + + # Parse all available metrics from benchmark output + res = { + "max_concurrency": content.get("max_concurrency"), + # Throughput metrics + "output_throughput": content.get("output_throughput"), + "total_token_throughput": content.get("total_token_throughput"), + "request_throughput": content.get("request_throughput"), + "request_goodput": content.get("request_goodput"), + "request_rate": content.get("request_rate"), + # Mean latencies + "mean_ttft_ms": content.get("mean_ttft_ms"), + "mean_tpot_ms": content.get("mean_tpot_ms"), + "mean_itl_ms": content.get("mean_itl_ms"), + "mean_e2el_ms": content.get("mean_e2el_ms"), + # Median latencies + "median_ttft_ms": content.get("median_ttft_ms"), + "median_tpot_ms": content.get("median_tpot_ms"), + "median_itl_ms": content.get("median_itl_ms"), + "median_e2el_ms": content.get("median_e2el_ms"), + # P99 latencies + "p99_ttft_ms": content.get("p99_ttft_ms"), + "p99_tpot_ms": content.get("p99_tpot_ms"), + "p99_itl_ms": content.get("p99_itl_ms"), + "p99_e2el_ms": content.get("p99_e2el_ms"), + # Std dev latencies + "std_ttft_ms": content.get("std_ttft_ms"), + "std_tpot_ms": content.get("std_tpot_ms"), + "std_itl_ms": content.get("std_itl_ms"), + "std_e2el_ms": content.get("std_e2el_ms"), + # Token counts + "total_input_tokens": content.get("total_input_tokens"), + "total_output_tokens": content.get("total_output_tokens"), + # Metadata + "backend": content.get("backend"), + "model_id": content.get("model_id"), + "date": content.get("date"), + "duration": content.get("duration"), + "completed": content.get("completed"), + "num_prompts": content.get("num_prompts"), + } + + result.append(res) + except Exception as e: + logger.warning(f"Error parsing {filepath}: {e}") + continue + + # Sort by concurrency and convert to dict format + results_list = sorted(result, key=lambda x: x.get("max_concurrency", 0) or 0) + return self._convert_parser_results_to_dict(results_list) + def get_run_count(self) -> int: """Get count of valid benchmark runs in logs directory. @@ -553,9 +620,9 @@ def to_dataframe(self, runs: list[BenchmarkRun] | None = None): row = { "Run ID": run_id, "Run Date": run.metadata.run_date, - "Profiler": run.profiler.profiler_type, - "ISL": run.profiler.isl, - "OSL": run.profiler.osl, + "Profiler": run.profiler_metadata.profiler_type, + "ISL": run.profiler_metadata.isl, + "OSL": run.profiler_metadata.osl, "Prefill Nodes": run.metadata.prefill_nodes, "Decode Nodes": run.metadata.decode_nodes, "Prefill Workers": run.metadata.prefill_workers, @@ -620,3 +687,46 @@ def update_tags(self, run_path: str, tags: list[str]) -> bool: except Exception as e: logger.error(f"Error updating tags for {json_path}: {e}") return False + + def load_node_metrics(self, run_path: str, backend_type: str = "sglang") -> list[NodeMetrics]: + """Load node metrics from worker log files using NodeAnalyzer. + + Args: + run_path: Path to the run directory + backend_type: Backend type (sglang or trtllm) - deprecated, auto-detected + + Returns: + List of NodeMetrics objects, one per worker + """ + # Handle both relative and absolute paths + if not os.path.isabs(run_path): + run_path = os.path.join(self.logs_dir, run_path) + + # Use NodeAnalyzer which handles caching, backend detection, and config loading + analyzer = NodeAnalyzer() + node_infos = analyzer.parse_run_logs(run_path, return_dicts=False) + + # Extract only the metrics from each NodeInfo + return [node.metrics for node in node_infos] + + def load_node_metrics_for_run(self, run: BenchmarkRun) -> list[NodeMetrics]: + """Load node metrics for a BenchmarkRun. + + Automatically detects backend type from the run's container image. + + Args: + run: BenchmarkRun object + + Returns: + List of NodeMetrics objects + """ + # Detect backend type from container + backend_type = "sglang" # Default + container = run.metadata.container.lower() + + if "trtllm" in container or "dynamo" in container or "tensorrt" in container: + backend_type = "trtllm" + elif "sglang" in container: + backend_type = "sglang" + + return self.load_node_metrics(run.metadata.path, backend_type) diff --git a/pyproject.toml b/pyproject.toml index f2f6a6e4..257463f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,10 @@ dependencies = [ "requests>=2.31.0", "rich>=13.0.0", "questionary>=2.0.0", + "pre-commit>=4.5.1", + "pandas>=2.1.0", + "pyarrow>=23.0.0", + "plotly>=6.5.2", ] [project.scripts] @@ -34,8 +38,21 @@ packages = ["src/srtctl"] dev = [ "pytest>=8.0.0", "pytest-cov>=4.0", + "typeguard>=4.0.0", # Includes pytest integration "ruff>=0.8.0", "ty", # Astral's fast type checker (replaces mypy) + # Analysis dependencies needed for tests + "streamlit>=1.30.0", + "plotly>=5.18.0", + "pandas>=2.1.0", + "pyarrow>=14.0.0", # For parquet caching +] + +analysis = [ + "streamlit>=1.30.0", + "plotly>=5.18.0", + "pandas>=2.1.0", + "pyarrow>=14.0.0", # For parquet caching ] # ============================================================================= diff --git a/src/srtctl/backends/base.py b/src/srtctl/backends/base.py index 9a2dbb34..984cbd27 100644 --- a/src/srtctl/backends/base.py +++ b/src/srtctl/backends/base.py @@ -6,7 +6,7 @@ """ from collections.abc import Sequence -from dataclasses import dataclass, field +from dataclasses import dataclass from enum import Enum from typing import TYPE_CHECKING, Any, Optional, Protocol diff --git a/src/srtctl/backends/sglang.py b/src/srtctl/backends/sglang.py index c34f1e37..756ef3bb 100644 --- a/src/srtctl/backends/sglang.py +++ b/src/srtctl/backends/sglang.py @@ -139,11 +139,8 @@ def get_kv_events_config_for_mode(self, mode: WorkerMode) -> dict[str, str] | No # Per-mode config dict if isinstance(self.kv_events_config, dict): # Normalize mode key: use "aggregated" for aggregated mode - if mode == "agg": - mode_cfg = self.kv_events_config.get("aggregated") - else: - mode_cfg = self.kv_events_config.get(mode) - + mode_cfg = self.kv_events_config.get("aggregated") if mode == "agg" else self.kv_events_config.get(mode) + if mode_cfg is None: return None if mode_cfg is True: diff --git a/src/srtctl/backends/trtllm.py b/src/srtctl/backends/trtllm.py index 572fa351..2553860f 100644 --- a/src/srtctl/backends/trtllm.py +++ b/src/srtctl/backends/trtllm.py @@ -1,7 +1,8 @@ import builtins +from collections.abc import Sequence from dataclasses import field from pathlib import Path -from typing import Any, ClassVar, Literal, Sequence, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, ClassVar, Literal import yaml from marshmallow import Schema @@ -15,6 +16,7 @@ # Type alias for worker modes WorkerMode = Literal["prefill", "decode", "agg"] + @dataclass(frozen=True) class TRTLLMServerConfig: """SGLang server CLI configuration per mode (prefill/decode/aggregated). @@ -29,6 +31,7 @@ class TRTLLMServerConfig: Schema: ClassVar[type[Schema]] = Schema + @dataclass(frozen=True) class TRTLLMProtocol: """TRTLLM protocol - implements BackendProtocol. @@ -50,7 +53,7 @@ class TRTLLMProtocol: """ type: Literal["trtllm"] = "trtllm" - + prefill_environment: dict[str, str] = field(default_factory=dict) decode_environment: dict[str, str] = field(default_factory=dict) @@ -82,7 +85,7 @@ def get_config_for_mode(self, mode: WorkerMode) -> dict[str, Any]: elif mode == "decode": return dict(self.trtllm_config.decode or {}) elif mode == "agg": - raise ValueError(f"Aggregated mode is not supported for TRTLLM") + raise ValueError("Aggregated mode is not supported for TRTLLM") return {} def get_environment_for_mode(self, mode: WorkerMode) -> dict[str, str]: @@ -91,9 +94,9 @@ def get_environment_for_mode(self, mode: WorkerMode) -> dict[str, str]: elif mode == "decode": return dict(self.decode_environment) elif mode == "agg": - raise ValueError(f"Aggregated mode is not supported for TRTLLM") + raise ValueError("Aggregated mode is not supported for TRTLLM") return {} - + def allocate_endpoints( self, num_prefill: int, @@ -128,7 +131,7 @@ def endpoints_to_processes( from srtctl.core.topology import endpoints_to_processes return endpoints_to_processes(endpoints, base_sys_port=base_sys_port) - + def build_worker_command( self, process: "Process", @@ -168,7 +171,7 @@ def build_worker_command( "--extra-engine-args", str(container_config_path), "--request-plane", - "nats" + "nats", ] - return cmd \ No newline at end of file + return cmd diff --git a/src/srtctl/benchmarks/scripts/gpqa/bench.sh b/src/srtctl/benchmarks/scripts/gpqa/bench.sh index 01670aa2..bf2c8695 100644 --- a/src/srtctl/benchmarks/scripts/gpqa/bench.sh +++ b/src/srtctl/benchmarks/scripts/gpqa/bench.sh @@ -26,14 +26,18 @@ export OPENAI_API_KEY="${OPENAI_API_KEY:-EMPTY}" echo "Running GPQA evaluation..." -python3 -m sglang.test.run_eval \ - --base-url "${ENDPOINT}" \ - --model "${MODEL_NAME}" \ - --eval-name gpqa \ - --num-examples "${NUM_EXAMPLES}" \ - --max-tokens "${MAX_TOKENS}" \ - --repeat "${REPEAT}" \ - --num-threads "${NUM_THREADS}" +cmd=( + python3 -m sglang.test.run_eval + --base-url "$ENDPOINT" + --model "$MODEL_NAME" + --eval-name gpqa + --num-examples "$NUM_EXAMPLES" + --max-tokens "$MAX_TOKENS" + --repeat "$REPEAT" + --num-threads "$NUM_THREADS" +) +printf "[CMD] %s\n" "${cmd[*]}" +"${cmd[@]}" # Copy result file result_file=$(ls -t /tmp/gpqa_*.json 2>/dev/null | head -n1) diff --git a/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh b/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh index 7a1643b8..5cbc81c7 100644 --- a/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh +++ b/src/srtctl/benchmarks/scripts/longbenchv2/bench.sh @@ -27,26 +27,28 @@ export OPENAI_API_KEY="${OPENAI_API_KEY:-EMPTY}" echo "Running LongBench-v2 evaluation..." -# Build command -cmd="python3 -m sglang.test.run_eval \ - --base-url ${ENDPOINT} \ - --model ${MODEL_NAME} \ - --eval-name longbench_v2 \ - --max-tokens ${MAX_TOKENS} \ - --max-context-length ${MAX_CONTEXT_LENGTH} \ - --num-threads ${NUM_THREADS}" +# Build command array +cmd=( + python3 -m sglang.test.run_eval + --base-url "$ENDPOINT" + --model "$MODEL_NAME" + --eval-name longbench_v2 + --max-tokens "$MAX_TOKENS" + --max-context-length "$MAX_CONTEXT_LENGTH" + --num-threads "$NUM_THREADS" +) # Add optional arguments if [ -n "$NUM_EXAMPLES" ]; then - cmd="$cmd --num-examples ${NUM_EXAMPLES}" + cmd+=(--num-examples "$NUM_EXAMPLES") fi if [ -n "$CATEGORIES" ]; then - cmd="$cmd --categories ${CATEGORIES}" + cmd+=(--categories "$CATEGORIES") fi -echo "Executing: $cmd" -eval "$cmd" +printf "[CMD] %s\n" "${cmd[*]}" +"${cmd[@]}" # Copy result files result_file=$(ls -t /tmp/longbench_v2_*.json 2>/dev/null | head -n1) diff --git a/src/srtctl/benchmarks/scripts/mmlu/bench.sh b/src/srtctl/benchmarks/scripts/mmlu/bench.sh index aff149ce..2ccd9ba3 100644 --- a/src/srtctl/benchmarks/scripts/mmlu/bench.sh +++ b/src/srtctl/benchmarks/scripts/mmlu/bench.sh @@ -26,14 +26,18 @@ export OPENAI_API_KEY="${OPENAI_API_KEY:-EMPTY}" echo "Running MMLU evaluation..." -python3 -m sglang.test.run_eval \ - --base-url "${ENDPOINT}" \ - --model "${MODEL_NAME}" \ - --eval-name mmlu \ - --num-examples "${NUM_EXAMPLES}" \ - --max-tokens "${MAX_TOKENS}" \ - --repeat "${REPEAT}" \ - --num-threads "${NUM_THREADS}" +cmd=( + python3 -m sglang.test.run_eval + --base-url "$ENDPOINT" + --model "$MODEL_NAME" + --eval-name mmlu + --num-examples "$NUM_EXAMPLES" + --max-tokens "$MAX_TOKENS" + --repeat "$REPEAT" + --num-threads "$NUM_THREADS" +) +printf "[CMD] %s\n" "${cmd[*]}" +"${cmd[@]}" # Copy result file result_file=$(ls -t /tmp/mmlu_*.json 2>/dev/null | head -n1) diff --git a/src/srtctl/benchmarks/scripts/mooncake-router/bench.sh b/src/srtctl/benchmarks/scripts/mooncake-router/bench.sh index e84d711c..a21fc1a4 100644 --- a/src/srtctl/benchmarks/scripts/mooncake-router/bench.sh +++ b/src/srtctl/benchmarks/scripts/mooncake-router/bench.sh @@ -56,15 +56,9 @@ if [ ! -f "${INPUT_FILE}" ]; then fi # Run small benchmark for warmup -echo "Running small benchmark for warmup..." -aiperf profile \ - -m "${MODEL_NAME}" \ - --url "${ENDPOINT}" \ - --streaming \ - --ui simple \ - --concurrency 10 \ - --request-count 20 -echo "Small benchmark for warmup complete" +command="aiperf profile -m ${MODEL_NAME} --url ${ENDPOINT} --streaming --ui simple --concurrency 10 --request-count 20" +echo "[CMD-WARMUP] $command" +eval "$command" # Setup artifact directory with model and timestamp MODEL_BASE_NAME="${MODEL_NAME##*/}" @@ -80,18 +74,9 @@ echo "" echo "$(date '+%Y-%m-%d %H:%M:%S') - Starting benchmark" # Run aiperf profile exactly as dynamo does -aiperf profile \ - -m "${MODEL_NAME}" \ - --input-file "${INPUT_FILE}" \ - --custom-dataset-type mooncake_trace \ - --fixed-schedule \ - --url "${ENDPOINT}" \ - --streaming \ - --random-seed 42 \ - --ui simple \ - --artifact-dir "${RUN_ARTIFACT_DIR}" \ - --goodput "time_to_first_token:${TTFT_THRESHOLD} inter_token_latency:${ITL_THRESHOLD}" - +command="aiperf profile -m ${MODEL_NAME} --input-file ${INPUT_FILE} --custom-dataset-type mooncake_trace --fixed-schedule --url ${ENDPOINT} --streaming --random-seed 42 --ui simple --artifact-dir ${RUN_ARTIFACT_DIR} --goodput \"time_to_first_token:${TTFT_THRESHOLD} inter_token_latency:${ITL_THRESHOLD}\"" +echo "[CMD] $command" +eval "$command" BENCH_EXIT_CODE=$? echo "" diff --git a/src/srtctl/benchmarks/scripts/profiling/profile.sh b/src/srtctl/benchmarks/scripts/profiling/profile.sh index 6b426c34..92ee4a0b 100644 --- a/src/srtctl/benchmarks/scripts/profiling/profile.sh +++ b/src/srtctl/benchmarks/scripts/profiling/profile.sh @@ -130,27 +130,36 @@ done if [[ "${PROFILING_MODE}" == "prefill" ]]; then echo "" echo "Generating profiling traffic..." - python3 -m sglang.bench_serving \ - --backend sglang \ - --model "${model_name}" \ - --host "${head_node}" --port "${head_port}" \ - --dataset-name random \ - --max-concurrency "${PROFILE_CONCURRENCY}" \ - --num-prompts 128 \ - --random-input-len "${PROFILE_ISL}" \ - --random-output-len "${PROFILE_OSL}" \ - --random-range-ratio 1 \ + + cmd=( + python3 -m sglang.bench_serving + --backend sglang + --model "${model_name}" + --host "${head_node}" + --port "${head_port}" + --dataset-name random + --max-concurrency "${PROFILE_CONCURRENCY}" + --num-prompts 128 + --random-input-len "${PROFILE_ISL}" + --random-output-len "${PROFILE_OSL}" + --random-range-ratio 1 --warmup-request 0 + ) + printf "[CMD] %s\n" "${cmd[*]}" + "${cmd[@]}" # Run lm-eval for additional profiling coverage - echo "" - echo "Running lm-eval..." - pip install lm-eval tenacity > /dev/null 2>&1 - python -m lm_eval \ - --model local-completions \ - --tasks gsm8k \ - --model_args "base_url=http://${head_node}:${head_port}/v1/completions,model=${model_name},tokenized_requests=False,tokenizer_backend=None,num_concurrent=${PROFILE_CONCURRENCY},timeout=6000,max_retries=1" \ + # Note: model_args must be a single array element to prevent splitting + model_args="base_url=http://${head_node}:${head_port}/v1/completions,model=${model_name},tokenized_requests=False,tokenizer_backend=None,num_concurrent=${PROFILE_CONCURRENCY},timeout=6000,max_retries=1" + lm_cmd=( + python -m lm_eval + --model local-completions + --tasks gsm8k + --model_args "${model_args}" --limit 10 + ) + printf "[CMD-LM-EVAL] %s\n" "${lm_cmd[*]}" + "${lm_cmd[@]}" fi exit_code=$? diff --git a/src/srtctl/benchmarks/scripts/router/bench.sh b/src/srtctl/benchmarks/scripts/router/bench.sh index d559b85d..85376cd2 100644 --- a/src/srtctl/benchmarks/scripts/router/bench.sh +++ b/src/srtctl/benchmarks/scripts/router/bench.sh @@ -39,14 +39,18 @@ mkdir -p "$result_dir" echo "Running prefix ratio benchmark..." echo "Results will be saved to: $result_dir" -# shellcheck disable=SC2086 -python prefix_ratio_benchmark.py \ - --prefix-ratios $PREFIX_RATIOS \ - --isl "$ISL" \ - --osl "$OSL" \ - --requests "$REQUESTS" \ - --concurrency "$CONCURRENCY" \ +cmd=( + python prefix_ratio_benchmark.py + --prefix-ratios "$PREFIX_RATIOS" + --isl "$ISL" + --osl "$OSL" + --requests "$REQUESTS" + --concurrency "$CONCURRENCY" --output-dir "$result_dir" +) + +printf "[CMD] %s\n" "${cmd[*]}" +"${cmd[@]}" echo "Router benchmark complete. Results in $result_dir" diff --git a/src/srtctl/benchmarks/scripts/sa-bench/bench.sh b/src/srtctl/benchmarks/scripts/sa-bench/bench.sh index 82043666..64ec7fa0 100644 --- a/src/srtctl/benchmarks/scripts/sa-bench/bench.sh +++ b/src/srtctl/benchmarks/scripts/sa-bench/bench.sh @@ -52,20 +52,27 @@ mkdir -p "$result_dir" for concurrency in "${CONCURRENCY_LIST[@]}"; do num_warmup_prompts=$((concurrency * 2)) - python3 -u "${WORK_DIR}/benchmark_serving.py" \ - --model "${MODEL_NAME}" --tokenizer "${MODEL_PATH}" \ - --host "$HOST" --port "$PORT" \ - --backend "dynamo" --endpoint /v1/completions \ - --disable-tqdm \ - --dataset-name random \ - --num-prompts "$num_warmup_prompts" \ - --random-input-len "$ISL" \ - --random-output-len "$OSL" \ - --random-range-ratio 0.8 \ - --ignore-eos \ - --request-rate 250 \ - --percentile-metrics ttft,tpot,itl,e2el \ + cmd=( + python3 -u "${WORK_DIR}/benchmark_serving.py" + --model "${MODEL_NAME}" + --tokenizer "${MODEL_PATH}" + --host "$HOST" + --port "$PORT" + --backend dynamo + --endpoint /v1/completions + --disable-tqdm + --dataset-name random + --num-prompts "$num_warmup_prompts" + --random-input-len "$ISL" + --random-output-len "$OSL" + --random-range-ratio 0.8 + --ignore-eos + --request-rate 250 + --percentile-metrics ttft,tpot,itl,e2el --max-concurrency "$concurrency" + ) + echo "[CMD-WARMUP] ${cmd[*]}" + "${cmd[@]}" num_prompts=$((concurrency * 10)) @@ -79,22 +86,31 @@ for concurrency in "${CONCURRENCY_LIST[@]}"; do echo "Running benchmark with concurrency: $concurrency" echo "$(date '+%Y-%m-%d %H:%M:%S')" - python3 -u "${WORK_DIR}/benchmark_serving.py" \ - --model "${MODEL_NAME}" --tokenizer "${MODEL_PATH}" \ - --host "$HOST" --port "$PORT" \ - --backend "dynamo" --endpoint /v1/completions \ - --disable-tqdm \ - --dataset-name random \ - --num-prompts "$num_prompts" \ - --random-input-len "$ISL" \ - --random-output-len "$OSL" \ - --random-range-ratio 0.8 \ - --ignore-eos \ - --request-rate "${REQ_RATE}" \ - --percentile-metrics ttft,tpot,itl,e2el \ - --max-concurrency "$concurrency" \ - --use-chat-template \ - --save-result --result-dir "$result_dir" --result-filename "$result_filename" + cmd=( + python3 -u "${WORK_DIR}/benchmark_serving.py" + --model "${MODEL_NAME}" + --tokenizer "${MODEL_PATH}" + --host "$HOST" + --port "$PORT" + --backend dynamo + --endpoint /v1/completions + --disable-tqdm + --dataset-name random + --num-prompts "$num_prompts" + --random-input-len "$ISL" + --random-output-len "$OSL" + --random-range-ratio 0.8 + --ignore-eos + --request-rate "${REQ_RATE}" + --percentile-metrics ttft,tpot,itl,e2el + --max-concurrency "$concurrency" + --use-chat-template + --save-result + --result-dir "$result_dir" + --result-filename "$result_filename" + ) + echo "[CMD] ${cmd[*]}" + "${cmd[@]}" echo "$(date '+%Y-%m-%d %H:%M:%S')" echo "Completed benchmark with concurrency: $concurrency" diff --git a/src/srtctl/core/schema.py b/src/srtctl/core/schema.py index 09af2335..7af91efc 100644 --- a/src/srtctl/core/schema.py +++ b/src/srtctl/core/schema.py @@ -600,7 +600,7 @@ def get_install_commands(self) -> str: "cd dynamo && " f"{checkout_cmd + ' && ' if checkout_cmd else ''}" "cd lib/bindings/python/ && " - "export RUSTFLAGS=\"${RUSTFLAGS:-} -C target-cpu=native\" && " + 'export RUSTFLAGS="${RUSTFLAGS:-} -C target-cpu=native" && ' "maturin build -o /tmp && " "pip install /tmp/ai_dynamo_runtime*.whl && " "cd /sgl-workspace/dynamo/ && " diff --git a/src/srtctl/frontends/dynamo.py b/src/srtctl/frontends/dynamo.py index 5158ec0e..f8fbc6c2 100644 --- a/src/srtctl/frontends/dynamo.py +++ b/src/srtctl/frontends/dynamo.py @@ -100,7 +100,7 @@ def start_frontends( bash_preamble=bash_preamble, # TODO(jthomson): I don't have the faintest clue of # why this is needed in later versions of Dynamo, but it is. - mpi="pmix", + mpi="pmix", ) processes.append( diff --git a/tests/fixtures_parsers.py b/tests/fixtures_parsers.py new file mode 100644 index 00000000..e84c16ac --- /dev/null +++ b/tests/fixtures_parsers.py @@ -0,0 +1,362 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Test fixtures and sample data for parser tests. + +Provides reusable test data, log samples, and utilities for testing parsers. +""" + +import json +from pathlib import Path +from typing import Any + + +class SampleSABenchData: + """Sample data for SA-Bench parser testing.""" + + @staticmethod + def benchmark_out_content() -> str: + """Sample benchmark.out content.""" + return """ +SA-Bench Config: endpoint=http://localhost:8000; isl=8192; osl=1024; concurrencies=50x100x200; req_rate=inf; model=Qwen/Qwen3-32B + +[CMD] python -m sglang.bench_serving --model Qwen/Qwen3-32B --base-url http://localhost:8000 --num-prompts 1000 --request-rate inf --max-concurrency 50 --random-input-len 8192 --random-output-len 1024 + +Starting benchmark run... +Concurrency: 50, Throughput: 2500.5 tok/s, TTFT: 150.5ms, ITL: 20.0ms +Concurrency: 100, Throughput: 5000.0 tok/s, TTFT: 180.0ms, ITL: 22.0ms +Concurrency: 200, Throughput: 9500.5 tok/s, TTFT: 250.0ms, ITL: 25.0ms +Benchmark complete. + """ + + @staticmethod + def result_json(concurrency: int = 100) -> dict[str, Any]: + """Sample result JSON data.""" + return { + "max_concurrency": concurrency, + "output_throughput": concurrency * 50.0, + "total_token_throughput": concurrency * 60.0, + "request_throughput": concurrency * 0.5, + "request_goodput": concurrency * 0.48, + "request_rate": float("inf"), + # Mean latencies + "mean_ttft_ms": 150.0 + concurrency * 0.5, + "mean_tpot_ms": 20.0 + concurrency * 0.1, + "mean_itl_ms": 18.0 + concurrency * 0.08, + "mean_e2el_ms": 2000.0 + concurrency * 5.0, + # Median latencies + "median_ttft_ms": 140.0 + concurrency * 0.45, + "median_tpot_ms": 19.0 + concurrency * 0.09, + "median_itl_ms": 17.0 + concurrency * 0.07, + "median_e2el_ms": 1900.0 + concurrency * 4.5, + # P99 latencies + "p99_ttft_ms": 250.0 + concurrency * 1.0, + "p99_tpot_ms": 40.0 + concurrency * 0.2, + "p99_itl_ms": 35.0 + concurrency * 0.15, + "p99_e2el_ms": 3000.0 + concurrency * 10.0, + # Std dev + "std_ttft_ms": 25.0, + "std_tpot_ms": 5.0, + "std_itl_ms": 3.0, + "std_e2el_ms": 200.0, + # Token counts + "total_input_tokens": concurrency * 8192, + "total_output_tokens": concurrency * 1024, + # Metadata + "duration": 120.5, + "completed": concurrency * 10, + "num_prompts": concurrency * 10, + } + + +class SampleMooncakeRouterData: + """Sample data for Mooncake Router parser testing.""" + + @staticmethod + def benchmark_out_content() -> str: + """Sample benchmark.out content.""" + return """ +Mooncake Router Benchmark +Endpoint: http://localhost:8000 +Model: Qwen/Qwen3-32B +Workload: conversation + +[CMD] aiperf profile -m "Qwen/Qwen3-32B" --url "http://localhost:8000" --concurrency 10 --synthetic-input-tokens-mean 8192 --output-tokens-mean 1024 + +Starting benchmark... +Request throughput: 3.37 req/s +Output token throughput: 1150.92 tok/s +Time to first token: 150.5 ms +Inter-token latency: 18.5 ms + """ + + @staticmethod + def aiperf_result_json() -> dict[str, Any]: + """Sample AIPerf result JSON data.""" + return { + "output_token_throughput": { + "avg": 1150.92, + "p50": 1100.0, + "p99": 1200.0, + "std": 50.0, + }, + "request_throughput": {"avg": 3.37, "p50": 3.3, "p99": 3.5, "std": 0.1}, + "time_to_first_token": { + "avg": 150.5, + "p50": 145.0, + "p99": 200.0, + "std": 25.0, + }, + "inter_token_latency": { + "avg": 18.5, + "p50": 18.0, + "p99": 25.0, + "std": 3.0, + }, + "request_latency": { + "avg": 2000.0, + "p50": 1900.0, + "p99": 2500.0, + "std": 200.0, + }, + "request_count": {"avg": 1000}, + "output_token_throughput_per_user": {"avg": 115.09}, + } + + +class SampleSGLangLogData: + """Sample data for SGLang node parser testing.""" + + @staticmethod + def prefill_log_content() -> str: + """Sample prefill worker log.""" + return """ +[2025-12-30 15:52:38 DP0 TP0 EP0] INFO sglang Starting SGLang prefill worker +[2025-12-30 15:52:38 DP0 TP0 EP0] INFO sglang server_args=ServerArgs(tp_size=8, dp_size=1, ep_size=1, model_path=/models/qwen3-32b, served_model_name=Qwen3-32B, host=10.0.0.1, port=30000, disaggregation_mode=prefill, context_length=131072, max_running_requests=1024, mem_fraction_static=0.85, kv_cache_dtype=fp8_e5m2) + +[CMD] python -m sglang.launch_server --model /models/qwen3-32b --served-model-name Qwen3-32B --tp-size 8 --dp-size 1 --ep-size 1 --host 10.0.0.1 --port 30000 --disaggregation-mode prefill --context-length 131072 --max-running-requests 1024 --mem-fraction-static 0.85 --kv-cache-dtype fp8_e5m2 + +[2025-12-30 15:52:40 DP0 TP0 EP0] INFO sglang Prefill batch, #new-seq: 8, #new-token: 65536, #cached-token: 0, token usage: 0.78, #running-req: 8, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 6500.5 +[2025-12-30 15:52:40 DP0 TP0 EP0] INFO sglang Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.85, #running-req: 13, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5120.0 +[2025-12-30 15:52:41 DP0 TP0 EP0] INFO sglang Prefill batch, #new-seq: 10, #new-token: 81920, #cached-token: 16384, token usage: 0.90, #running-req: 23, #queue-req: 2, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 8192.0 +[2025-12-30 15:52:42 DP0 TP0 EP0] INFO sglang avail mem=75.11 GB, mem usage=107.07 GB +[2025-12-30 15:52:43 DP0 TP0 EP0] INFO sglang KV size: 32.50 GB, #tokens: 1048576 + """ + + @staticmethod + def decode_log_content() -> str: + """Sample decode worker log.""" + return """ +[2025-12-30 15:52:38 DP0 TP0 EP0] INFO sglang Starting SGLang decode worker +[2025-12-30 15:52:38 DP0 TP0 EP0] INFO sglang server_args=ServerArgs(tp_size=4, dp_size=1, ep_size=1, model_path=/models/qwen3-32b, disaggregation_mode=decode) + +[CMD] python -m sglang.launch_server --model /models/qwen3-32b --tp-size 4 --disaggregation-mode decode + +[2025-12-30 15:52:40 DP0 TP0 EP0] INFO sglang Decode batch, #running-req: 15, #token: 512, token usage: 0.65, pre-allocated usage: 0.10, #prealloc-req: 3, #transfer-req: 0, #queue-req: 0, gen throughput (token/s): 2048.0 +[2025-12-30 15:52:40 DP0 TP0 EP0] INFO sglang Decode batch, #running-req: 20, #token: 768, token usage: 0.72, pre-allocated usage: 0.15, #prealloc-req: 5, #transfer-req: 2, #queue-req: 0, gen throughput (token/s): 3072.0 +[2025-12-30 15:52:41 DP0 TP0 EP0] INFO sglang Decode batch, #running-req: 18, #token: 640, token usage: 0.70, pre-allocated usage: 0.12, #prealloc-req: 4, #transfer-req: 1, #queue-req: 0, gen throughput (token/s): 2560.0 +[2025-12-30 15:52:42 DP0 TP0 EP0] INFO sglang avail mem=85.00 GB, mem usage=97.00 GB +[2025-12-30 15:52:43 DP0 TP0 EP0] INFO sglang KV size: 48.00 GB, #tokens: 2097152 + """ + + +class SampleTRTLLMLogData: + """Sample data for TRTLLM node parser testing.""" + + @staticmethod + def prefill_log_content() -> str: + """Sample TRTLLM prefill worker log.""" + return """ +[33mRank0 run python3 -m dynamo.trtllm --model-path /models/qwen3-32b --served-model-name Qwen3-32B-fp8 --disaggregation-mode prefill --host 10.0.0.1 --port 30000[0m + +[CMD] python3 -m dynamo.trtllm --model-path /models/qwen3-32b --served-model-name Qwen3-32B-fp8 --disaggregation-mode prefill --host 10.0.0.1 --port 30000 + +Initializing the worker with config: Config(namespace=dynamo, component=prefill, tensor_parallel_size=8, pipeline_parallel_size=1, expert_parallel_size=1, max_batch_size=256, max_num_tokens=16384, max_seq_len=131072) + +TensorRT-LLM engine args: {'tensor_parallel_size': 8, 'pipeline_parallel_size': 1, 'moe_expert_parallel_size': 1, 'max_batch_size': 256, 'max_num_tokens': 16384, 'max_seq_len': 131072} + +[01/16/2026-06:20:15] [TRT-LLM] [I] Peak memory during memory usage profiling (torch + non-torch): 91.46 GiB, available KV cache memory when calculating max tokens: 41.11 GiB, fraction is set 0.85, kv size is 35136. device total memory 139.81 GiB + +[MemUsageChange] Allocated 41.11 GiB for max tokens (524288) + +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5559, host_step_time = 62.5ms, num_scheduled_requests: 5, states = {'num_ctx_requests': 5, 'num_ctx_tokens': 40960, 'num_generation_tokens': 0} +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5560, host_step_time = 80.0ms, num_scheduled_requests: 8, states = {'num_ctx_requests': 8, 'num_ctx_tokens': 65536, 'num_generation_tokens': 0} +[01/16/2026-06:20:18] [TRT-LLM] [RANK 0] [I] iter = 5561, host_step_time = 100.0ms, num_scheduled_requests: 10, states = {'num_ctx_requests': 10, 'num_ctx_tokens': 81920, 'num_generation_tokens': 0} + """ + + @staticmethod + def decode_log_content() -> str: + """Sample TRTLLM decode worker log.""" + return """ +[33mRank0 run python3 -m dynamo.trtllm --model-path /models/qwen3-32b --served-model-name Qwen3-32B-fp8 --disaggregation-mode decode --host 10.0.0.2 --port 30001[0m + +[CMD] python3 -m dynamo.trtllm --model-path /models/qwen3-32b --served-model-name Qwen3-32B-fp8 --disaggregation-mode decode --host 10.0.0.2 --port 30001 + +Initializing the worker with config: Config(tensor_parallel_size=4, pipeline_parallel_size=1, max_batch_size=512) + +TensorRT-LLM engine args: {'tensor_parallel_size': 4, 'pipeline_parallel_size': 1, 'max_batch_size': 512, 'max_seq_len': 131072} + +[01/16/2026-06:20:15] [TRT-LLM] [I] Peak memory during memory usage profiling (torch + non-torch): 75.50 GiB, available KV cache memory when calculating max tokens: 55.00 GiB, fraction is set 0.85, kv size is 45000. device total memory 139.81 GiB + +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 1000, host_step_time = 40.0ms, num_scheduled_requests: 20, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 1024} +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 1001, host_step_time = 50.0ms, num_scheduled_requests: 25, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 1280} +[01/16/2026-06:20:18] [TRT-LLM] [RANK 0] [I] iter = 1002, host_step_time = 45.0ms, num_scheduled_requests: 22, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 1152} + """ + + +class ParserTestHarness: + """Test harness utilities for parser testing.""" + + @staticmethod + def create_sa_bench_run(temp_dir: Path, concurrencies: list[int] | None = None) -> Path: + """Create a complete SA-Bench run directory with result files. + + Args: + temp_dir: Temporary directory to create files in + concurrencies: List of concurrency levels to create (default: [50, 100, 200]) + + Returns: + Path to the run directory + """ + if concurrencies is None: + concurrencies = [50, 100, 200] + + run_dir = temp_dir / "sa_bench_run" + run_dir.mkdir(parents=True, exist_ok=True) + + # Create benchmark.out + benchmark_out = run_dir / "benchmark.out" + benchmark_out.write_text(SampleSABenchData.benchmark_out_content()) + + # Create result JSON files + for concurrency in concurrencies: + result_json = run_dir / f"result_c{concurrency}.json" + with open(result_json, "w") as f: + json.dump(SampleSABenchData.result_json(concurrency), f, indent=2) + + return run_dir + + @staticmethod + def create_mooncake_router_run(temp_dir: Path) -> Path: + """Create a Mooncake Router run directory with result file. + + Args: + temp_dir: Temporary directory to create files in + + Returns: + Path to the run directory + """ + run_dir = temp_dir / "mooncake_router_run" + run_dir.mkdir(parents=True, exist_ok=True) + + # Create benchmark.out + benchmark_out = run_dir / "benchmark.out" + benchmark_out.write_text(SampleMooncakeRouterData.benchmark_out_content()) + + # Create AIPerf result JSON + aiperf_json = run_dir / "profile_export_aiperf.json" + with open(aiperf_json, "w") as f: + json.dump(SampleMooncakeRouterData.aiperf_result_json(), f, indent=2) + + return run_dir + + @staticmethod + def create_sglang_node_logs( + temp_dir: Path, + num_prefill: int = 2, + num_decode: int = 4, + ) -> Path: + """Create SGLang node log directory with worker logs. + + Args: + temp_dir: Temporary directory to create files in + num_prefill: Number of prefill workers + num_decode: Number of decode workers + + Returns: + Path to the log directory + """ + log_dir = temp_dir + log_dir.mkdir(parents=True, exist_ok=True) + + # Create prefill worker logs + for i in range(num_prefill): + log_file = log_dir / f"node{i:02d}_prefill_w{i}.out" + log_file.write_text(SampleSGLangLogData.prefill_log_content()) + + # Create decode worker logs + for i in range(num_decode): + log_file = log_dir / f"node{i+10:02d}_decode_w{i}.out" + log_file.write_text(SampleSGLangLogData.decode_log_content()) + + return log_dir + + @staticmethod + def create_trtllm_node_logs( + temp_dir: Path, + num_prefill: int = 2, + num_decode: int = 4, + ) -> Path: + """Create TRTLLM node log directory with worker logs. + + Args: + temp_dir: Temporary directory to create files in + num_prefill: Number of prefill workers + num_decode: Number of decode workers + + Returns: + Path to the log directory + """ + log_dir = temp_dir + log_dir.mkdir(parents=True, exist_ok=True) + + # Create prefill worker logs + for i in range(num_prefill): + log_file = log_dir / f"worker-{i}_prefill_w{i}.out" + log_file.write_text(SampleTRTLLMLogData.prefill_log_content()) + + # Create decode worker logs + for i in range(num_decode): + log_file = log_dir / f"worker-{i+10}_decode_w{i}.out" + log_file.write_text(SampleTRTLLMLogData.decode_log_content()) + + return log_dir + + @staticmethod + def assert_valid_benchmark_results(results: dict, expected_fields: list[str] | None = None): + """Assert that benchmark results contain valid data. + + Args: + results: Benchmark results dictionary + expected_fields: List of fields that must be present (optional) + """ + if expected_fields is None: + expected_fields = [ + "output_tps", + "mean_ttft_ms", + "mean_itl_ms", + ] + + for field in expected_fields: + assert field in results, f"Missing expected field: {field}" + value = results[field] + # Check it's not None and if it's a list, check it's not empty + assert value is not None and ( + not isinstance(value, list) or len(value) > 0 + ), f"Field {field} is None or empty list" + + @staticmethod + def assert_valid_node_metrics(node_metrics, min_batches: int = 0): + """Assert that node metrics are valid. + + Args: + node_metrics: NodeMetrics object + min_batches: Minimum number of batches expected + """ + assert node_metrics is not None + assert node_metrics.node_name + assert node_metrics.worker_type + assert node_metrics.worker_id + assert len(node_metrics.batches) >= min_batches + assert isinstance(node_metrics.config, dict) diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py index aa95cc28..ac9644f7 100644 --- a/tests/test_benchmarks.py +++ b/tests/test_benchmarks.py @@ -72,9 +72,7 @@ def test_validate_config_valid(self): name="test", model=ModelConfig(path="/model", container="/image", precision="fp4"), resources=ResourceConfig(gpu_type="h100"), - benchmark=BenchmarkConfig( - type="sa-bench", isl=1024, osl=1024, concurrencies="4x8" - ), + benchmark=BenchmarkConfig(type="sa-bench", isl=1024, osl=1024, concurrencies="4x8"), ) errors = runner.validate_config(config) assert errors == [] @@ -96,4 +94,3 @@ def test_mmlu_script_exists(self): """MMLU script exists.""" script = SCRIPTS_DIR / "mmlu" / "bench.sh" assert script.exists() - diff --git a/tests/test_dashboard.py b/tests/test_dashboard.py new file mode 100644 index 00000000..d50a236d --- /dev/null +++ b/tests/test_dashboard.py @@ -0,0 +1,239 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Tests for dashboard components. + +Tests timestamp parsing and other dashboard functionality. +""" + +from datetime import datetime + +import pytest + + +class TestRateMatchTab: + """Tests for rate_match_tab module.""" + + def test_parse_timestamp_yyyy_mm_dd(self): + """Test parsing YYYY-MM-DD HH:MM:SS format.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + ts = "2025-12-30 15:52:38" + dt = _parse_timestamp(ts) + + assert isinstance(dt, datetime) + assert dt.year == 2025 + assert dt.month == 12 + assert dt.day == 30 + assert dt.hour == 15 + assert dt.minute == 52 + assert dt.second == 38 + + def test_parse_timestamp_iso8601_with_microseconds(self): + """Test parsing ISO 8601 format with microseconds and Z.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + ts = "2025-12-30T15:52:38.206058Z" + dt = _parse_timestamp(ts) + + assert isinstance(dt, datetime) + assert dt.year == 2025 + assert dt.month == 12 + assert dt.day == 30 + assert dt.hour == 15 + assert dt.minute == 52 + assert dt.second == 38 + + def test_parse_timestamp_iso8601_without_z(self): + """Test parsing ISO 8601 format without Z.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + ts = "2025-12-30T15:52:38.206058" + dt = _parse_timestamp(ts) + + assert isinstance(dt, datetime) + assert dt.year == 2025 + assert dt.month == 12 + assert dt.day == 30 + + def test_parse_timestamp_iso8601_without_microseconds(self): + """Test parsing ISO 8601 format without microseconds.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + ts = "2025-12-30T15:52:38" + dt = _parse_timestamp(ts) + + assert isinstance(dt, datetime) + assert dt.year == 2025 + assert dt.hour == 15 + + def test_parse_timestamp_trtllm_format(self): + """Test parsing TRTLLM MM/DD/YYYY-HH:MM:SS format.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + ts = "01/23/2026-08:04:38" + dt = _parse_timestamp(ts) + + assert isinstance(dt, datetime) + assert dt.year == 2026 + assert dt.month == 1 + assert dt.day == 23 + assert dt.hour == 8 + assert dt.minute == 4 + assert dt.second == 38 + + def test_parse_timestamp_trtllm_various_dates(self): + """Test parsing various TRTLLM timestamps.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + # End of year + ts1 = "12/31/2025-23:59:59" + dt1 = _parse_timestamp(ts1) + assert dt1.year == 2025 + assert dt1.month == 12 + assert dt1.day == 31 + assert dt1.hour == 23 + + # Start of year + ts2 = "01/01/2026-00:00:00" + dt2 = _parse_timestamp(ts2) + assert dt2.year == 2026 + assert dt2.month == 1 + assert dt2.day == 1 + assert dt2.hour == 0 + + def test_parse_timestamp_invalid(self): + """Test that invalid timestamps raise ValueError.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + with pytest.raises(ValueError): + _parse_timestamp("invalid-timestamp") + + with pytest.raises(ValueError): + _parse_timestamp("not a date at all") + + with pytest.raises(ValueError): + _parse_timestamp("2025-13-40 25:99:99") # Invalid values + + def test_parse_timestamp_format_fallback(self): + """Test that parser tries multiple formats in order.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + # Should parse successfully with any supported format + formats = [ + ("2025-12-30 15:52:38", 2025), # Standard + ("2025-12-30T15:52:38.206058Z", 2025), # ISO 8601 with Z + ("01/23/2026-08:04:38", 2026), # TRTLLM + ] + + for ts, expected_year in formats: + dt = _parse_timestamp(ts) + assert dt.year == expected_year + + def test_parse_timestamp_time_delta(self): + """Test that timestamps can be used for time delta calculations.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + ts1 = "01/23/2026-08:04:38" + ts2 = "01/23/2026-08:04:40" + + dt1 = _parse_timestamp(ts1) + dt2 = _parse_timestamp(ts2) + + delta = dt2 - dt1 + assert delta.total_seconds() == 2.0 + + def test_parse_timestamp_mixed_formats(self): + """Test parsing a sequence of different timestamp formats.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + # Simulate what dashboard might see from different backends + timestamps = [ + "2025-12-30 15:52:38", # Standard (could be from old cache) + "2025-12-30T15:52:39.100000Z", # SGLang + "01/23/2026-08:04:40", # TRTLLM + ] + + dts = [_parse_timestamp(ts) for ts in timestamps] + + # All should parse successfully + assert len(dts) == 3 + assert all(isinstance(dt, datetime) for dt in dts) + + # Should be able to compute deltas (even if not chronological) + delta = dts[1] - dts[0] + assert delta.total_seconds() == 1.1 + + +class TestTimestampIntegration: + """Integration tests for timestamp handling across parsers and dashboard.""" + + def test_sglang_to_dashboard_pipeline(self): + """Test that SGLang timestamps work through the entire pipeline.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + from analysis.srtlog.parsers import get_node_parser + + parser = get_node_parser("sglang") + + # SGLang format timestamp + sglang_ts = "2025-12-30T15:52:38.206058Z" + + # Parser should be able to parse it + dt_parser = parser.parse_timestamp(sglang_ts) + + # Dashboard should be able to parse it + dt_dashboard = _parse_timestamp(sglang_ts) + + # Both should produce same datetime + assert dt_parser.year == dt_dashboard.year + assert dt_parser.month == dt_dashboard.month + assert dt_parser.day == dt_dashboard.day + assert dt_parser.hour == dt_dashboard.hour + assert dt_parser.minute == dt_dashboard.minute + assert dt_parser.second == dt_dashboard.second + + def test_trtllm_to_dashboard_pipeline(self): + """Test that TRTLLM timestamps work through the entire pipeline.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + from analysis.srtlog.parsers import get_node_parser + + parser = get_node_parser("trtllm") + + # TRTLLM format timestamp + trtllm_ts = "01/23/2026-08:04:38" + + # Parser should be able to parse it + dt_parser = parser.parse_timestamp(trtllm_ts) + + # Dashboard should be able to parse it + dt_dashboard = _parse_timestamp(trtllm_ts) + + # Both should produce same datetime + assert dt_parser.year == dt_dashboard.year + assert dt_parser.month == dt_dashboard.month + assert dt_parser.day == dt_dashboard.day + assert dt_parser.hour == dt_dashboard.hour + assert dt_parser.minute == dt_dashboard.minute + assert dt_parser.second == dt_dashboard.second + + def test_mixed_backend_timestamps_in_dashboard(self): + """Test that dashboard can handle timestamps from mixed backends.""" + from analysis.dashboard.rate_match_tab import _parse_timestamp + + # Simulate dashboard receiving timestamps from different backends + mixed_timestamps = [ + "2025-12-30T15:52:38.206058Z", # SGLang + "01/23/2026-08:04:38", # TRTLLM + "2025-12-30 15:52:38", # Standard format + ] + + # All should parse without error + parsed = [] + for ts in mixed_timestamps: + dt = _parse_timestamp(ts) + parsed.append(dt) + assert isinstance(dt, datetime) + + # Should be able to compute time deltas + assert len(parsed) == 3 diff --git a/tests/test_e2e.py b/tests/test_e2e.py index b3a702e6..dad2233d 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -124,9 +124,9 @@ def test_fits_in_rack(self, recipe_path): config = load_config(str(recipe_path)) r = config.resources total_nodes_needed = (r.prefill_nodes or 0) + (r.decode_nodes or 0) + (r.agg_nodes or 0) - assert total_nodes_needed <= self.RACK.NUM_NODES, ( - f"{recipe_path.name}: needs {total_nodes_needed} nodes, rack has {self.RACK.NUM_NODES}" - ) + assert ( + total_nodes_needed <= self.RACK.NUM_NODES + ), f"{recipe_path.name}: needs {total_nodes_needed} nodes, rack has {self.RACK.NUM_NODES}" @pytest.mark.parametrize("recipe_path", RECIPES, ids=lambda p: p.name) def test_endpoint_allocation(self, recipe_path): @@ -154,14 +154,14 @@ def test_endpoint_allocation(self, recipe_path): assert len(decode_eps) == r.num_decode for ep in prefill_eps: - assert ep.total_gpus == r.gpus_per_prefill, ( - f"prefill endpoint {ep.index} has {ep.total_gpus} GPUs, expected {r.gpus_per_prefill}" - ) + assert ( + ep.total_gpus == r.gpus_per_prefill + ), f"prefill endpoint {ep.index} has {ep.total_gpus} GPUs, expected {r.gpus_per_prefill}" for ep in decode_eps: - assert ep.total_gpus == r.gpus_per_decode, ( - f"decode endpoint {ep.index} has {ep.total_gpus} GPUs, expected {r.gpus_per_decode}" - ) + assert ( + ep.total_gpus == r.gpus_per_decode + ), f"decode endpoint {ep.index} has {ep.total_gpus} GPUs, expected {r.gpus_per_decode}" class TestH100Cluster: @@ -234,9 +234,9 @@ def test_multi_node_tp(self, recipe_path): ) for ep in [e for e in endpoints if e.mode == "prefill"]: - assert ep.num_nodes == expected_nodes, ( - f"prefill endpoint should span {expected_nodes} nodes, got {ep.num_nodes}" - ) + assert ( + ep.num_nodes == expected_nodes + ), f"prefill endpoint should span {expected_nodes} nodes, got {ep.num_nodes}" class TestCIConfigs: @@ -375,9 +375,9 @@ def test_disagg_kv_router_shared_node_allocation(self): for ep in decode_eps: node1_decode_gpus.update(ep.gpu_indices) - assert node1_prefill_gpus.isdisjoint(node1_decode_gpus), ( - f"GPU overlap on node1! prefill uses {node1_prefill_gpus}, decode uses {node1_decode_gpus}" - ) + assert node1_prefill_gpus.isdisjoint( + node1_decode_gpus + ), f"GPU overlap on node1! prefill uses {node1_prefill_gpus}, decode uses {node1_decode_gpus}" def test_disagg_kv_router_cuda_visible_devices(self): """Processes on shared node have non-overlapping CUDA_VISIBLE_DEVICES.""" @@ -414,15 +414,20 @@ def test_disagg_kv_router_cuda_visible_devices(self): all_gpus_on_node1 = set() for proc in node1_processes: for gpu in proc.gpu_indices: - assert gpu not in all_gpus_on_node1, ( - f"GPU {gpu} assigned to multiple processes on {nodes[1]}!" - ) + assert gpu not in all_gpus_on_node1, f"GPU {gpu} assigned to multiple processes on {nodes[1]}!" all_gpus_on_node1.add(gpu) # All 8 GPUs on node1 should be used - assert all_gpus_on_node1 == {0, 1, 2, 3, 4, 5, 6, 7}, ( - f"Expected all 8 GPUs used on node1, got {all_gpus_on_node1}" - ) + assert all_gpus_on_node1 == { + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + }, f"Expected all 8 GPUs used on node1, got {all_gpus_on_node1}" # Verify CUDA_VISIBLE_DEVICES strings are correct for proc in node1_processes: @@ -442,10 +447,7 @@ def test_disagg_kv_router_total_allocation_fits(self): config = load_config(str(recipe_path)) r = config.resources - total_gpus_needed = ( - r.num_prefill * r.gpus_per_prefill - + r.num_decode * r.gpus_per_decode - ) + total_gpus_needed = r.num_prefill * r.gpus_per_prefill + r.num_decode * r.gpus_per_decode total_gpus_available = r.total_nodes * r.gpus_per_node assert total_gpus_needed <= total_gpus_available, ( diff --git a/tests/test_frontend_topology.py b/tests/test_frontend_topology.py index 60251085..05c7d4fe 100644 --- a/tests/test_frontend_topology.py +++ b/tests/test_frontend_topology.py @@ -6,8 +6,6 @@ from pathlib import Path from unittest.mock import MagicMock, patch -import pytest - from srtctl.cli.do_sweep import SweepOrchestrator from srtctl.cli.mixins.frontend_stage import FrontendTopology from srtctl.core.runtime import Nodes, RuntimeContext diff --git a/tests/test_frontends.py b/tests/test_frontends.py index f3f94771..f11db00d 100644 --- a/tests/test_frontends.py +++ b/tests/test_frontends.py @@ -10,7 +10,6 @@ from srtctl.frontends import DynamoFrontend, SGLangFrontend, get_frontend - # ============================================================================ # get_frontend() Tests # ============================================================================ @@ -130,13 +129,15 @@ def test_mixed_args(self): """Mixed arg types are handled correctly.""" frontend = SGLangFrontend() - result = frontend.get_frontend_args_list({ - "policy": "round_robin", - "verbose": True, - "timeout": 60, - "disabled": False, - "optional": None, - }) + result = frontend.get_frontend_args_list( + { + "policy": "round_robin", + "verbose": True, + "timeout": 60, + "disabled": False, + "optional": None, + } + ) # Check all expected args are present assert "--policy" in result @@ -152,10 +153,12 @@ def test_dynamo_frontend_args_list(self): """DynamoFrontend has same args list behavior.""" frontend = DynamoFrontend() - result = frontend.get_frontend_args_list({ - "router-mode": "kv", - "router-reset-states": True, - }) + result = frontend.get_frontend_args_list( + { + "router-mode": "kv", + "router-reset-states": True, + } + ) assert "--router-mode" in result assert "kv" in result @@ -392,9 +395,7 @@ def test_sglang_env_passed_to_process(self, mock_get_ip, mock_srun): frontend = SGLangFrontend() topology = MockTopology(frontend_nodes=["node0"]) config = MockConfig( - frontend=MockFrontendConfig( - env={"MY_VAR": "my_value", "ANOTHER": "123"} - ), + frontend=MockFrontendConfig(env={"MY_VAR": "my_value", "ANOTHER": "123"}), resources=MockResourceConfig(num_agg=1), ) @@ -465,9 +466,7 @@ def test_sglang_frontend_args_in_command(self, mock_get_ip, mock_srun): frontend = SGLangFrontend() topology = MockTopology(frontend_nodes=["node0"]) config = MockConfig( - frontend=MockFrontendConfig( - args={"policy": "cache_aware", "verbose": True} - ), + frontend=MockFrontendConfig(args={"policy": "cache_aware", "verbose": True}), resources=MockResourceConfig(num_agg=1), ) @@ -492,4 +491,3 @@ def test_sglang_frontend_args_in_command(self, mock_get_ip, mock_srun): assert "--policy" in cmd assert "cache_aware" in cmd assert "--verbose" in cmd - diff --git a/tests/test_health.py b/tests/test_health.py index 034b7180..b894104e 100644 --- a/tests/test_health.py +++ b/tests/test_health.py @@ -3,15 +3,12 @@ """Tests for health check parsing (Dynamo and SGLang router).""" -import pytest - from srtctl.core.health import ( WorkerHealthResult, check_dynamo_health, check_sglang_router_health, ) - # ============================================================================ # Dynamo Health Check Tests # ============================================================================ @@ -103,7 +100,7 @@ def test_ignores_non_generate_endpoints(self): class TestDynamoHealthAggregated: """Test Dynamo /health parsing for aggregated mode (backend workers). - + In aggregated mode, workers report as "backend" and count as decode. Caller should pass expected_prefill=0, expected_decode=num_agg. """ @@ -391,9 +388,7 @@ def test_empty_response(self): def test_missing_count_fields_defaults_to_zero(self): """Missing count fields default to 0.""" - response = { - "stats": {} - } + response = {"stats": {}} result = check_sglang_router_health(response, expected_prefill=1, expected_decode=1) @@ -432,4 +427,3 @@ def test_with_counts(self): assert result.prefill_ready == 2 assert result.decode_ready == 4 - diff --git a/tests/test_parsers.py b/tests/test_parsers.py new file mode 100644 index 00000000..135bd48c --- /dev/null +++ b/tests/test_parsers.py @@ -0,0 +1,1112 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Tests for benchmark and node log parsers. + +Tests the parsing infrastructure including: +- Parser registry (benchmark and node parsers) +- SA-Bench parser +- Mooncake Router parser +- SGLang node parser +- TRTLLM node parser +""" + +import json +import tempfile +from pathlib import Path + +import pytest + +from analysis.srtlog.parsers import ( + BenchmarkLaunchCommand, + NodeLaunchCommand, + get_benchmark_parser, + get_node_parser, + list_benchmark_parsers, + list_node_parsers, +) +from tests.fixtures_parsers import ( + ParserTestHarness, + SampleMooncakeRouterData, + SampleSABenchData, + SampleSGLangLogData, + SampleTRTLLMLogData, +) + + +class TestParserRegistry: + """Test the parser registration system.""" + + def test_list_benchmark_parsers(self): + """Test listing registered benchmark parsers.""" + parsers = list_benchmark_parsers() + assert "sa-bench" in parsers + assert "mooncake-router" in parsers + assert len(parsers) >= 2 + + def test_list_node_parsers(self): + """Test listing registered node parsers.""" + parsers = list_node_parsers() + assert "sglang" in parsers + assert "trtllm" in parsers + assert len(parsers) >= 2 + + def test_get_benchmark_parser_sa_bench(self): + """Test getting SA-Bench parser.""" + parser = get_benchmark_parser("sa-bench") + assert parser.benchmark_type == "sa-bench" + + def test_get_benchmark_parser_mooncake_router(self): + """Test getting Mooncake Router parser.""" + parser = get_benchmark_parser("mooncake-router") + assert parser.benchmark_type == "mooncake-router" + + def test_get_benchmark_parser_invalid(self): + """Test getting invalid benchmark parser.""" + with pytest.raises(ValueError, match="No benchmark parser registered"): + get_benchmark_parser("invalid-benchmark") + + def test_get_node_parser_sglang(self): + """Test getting SGLang parser.""" + parser = get_node_parser("sglang") + assert parser.backend_type == "sglang" + + def test_get_node_parser_trtllm(self): + """Test getting TRTLLM parser.""" + parser = get_node_parser("trtllm") + assert parser.backend_type == "trtllm" + + def test_get_node_parser_invalid(self): + """Test getting invalid node parser.""" + with pytest.raises(ValueError, match="No node parser registered"): + get_node_parser("invalid-backend") + + +class TestSABenchParser: + """Test SA-Bench benchmark parser.""" + + @pytest.fixture + def parser(self): + """Get SA-Bench parser instance.""" + return get_benchmark_parser("sa-bench") + + @pytest.fixture + def temp_dir(self): + """Create temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + def test_parser_type(self, parser): + """Test parser type property.""" + assert parser.benchmark_type == "sa-bench" + + def test_parse_result_json(self, parser, temp_dir): + """Test parsing SA-Bench result JSON file.""" + # Create sample result JSON + result_data = { + "max_concurrency": 100, + "output_throughput": 5000.5, + "total_token_throughput": 6000.0, + "request_throughput": 50.5, + "mean_ttft_ms": 150.5, + "mean_tpot_ms": 20.5, + "mean_itl_ms": 18.5, + "mean_e2el_ms": 2000.0, + "p99_ttft_ms": 250.0, + "p99_tpot_ms": 40.0, + "p99_itl_ms": 35.0, + "p99_e2el_ms": 3000.0, + "total_input_tokens": 100000, + "total_output_tokens": 50000, + "completed": 1000, + "duration": 120.5, + } + + json_path = temp_dir / "result_c100.json" + with open(json_path, "w") as f: + json.dump(result_data, f) + + # Parse the file + result = parser.parse_result_json(json_path) + + # Verify parsing + assert result["max_concurrency"] == 100 + assert result["output_throughput"] == 5000.5 + assert result["mean_ttft_ms"] == 150.5 + assert result["p99_ttft_ms"] == 250.0 + assert result["total_input_tokens"] == 100000 + assert result["completed"] == 1000 + + def test_parse_result_directory(self, parser, temp_dir): + """Test parsing multiple result JSON files.""" + # Create multiple result files + for concurrency in [50, 100, 200]: + result_data = { + "max_concurrency": concurrency, + "output_throughput": concurrency * 50.0, + "mean_ttft_ms": 150.0, + } + json_path = temp_dir / f"result_c{concurrency}.json" + with open(json_path, "w") as f: + json.dump(result_data, f) + + # Parse all files + results = parser.parse_result_directory(temp_dir) + + # Verify results are sorted by concurrency + assert len(results) == 3 + assert results[0]["max_concurrency"] == 50 + assert results[1]["max_concurrency"] == 100 + assert results[2]["max_concurrency"] == 200 + + def test_parse_launch_command_tagged(self, parser): + """Test parsing SA-Bench command with [CMD] tag.""" + log_content = """ +[CMD] python -m sglang.bench_serving --model Qwen/Qwen3-32B --base-url http://localhost:8000 --num-prompts 1000 --request-rate inf --max-concurrency 100 --input-len 8192 --output-len 1024 + """ + + cmd = parser.parse_launch_command(log_content) + + assert cmd is not None + assert cmd.benchmark_type == "sa-bench" + assert "python -m sglang.bench_serving" in cmd.raw_command + assert cmd.extra_args["model"] == "Qwen/Qwen3-32B" + assert cmd.extra_args["base_url"] == "http://localhost:8000" + assert cmd.extra_args["num_prompts"] == 1000 + assert cmd.extra_args["max_concurrency"] == 100 + assert cmd.extra_args["input_len"] == 8192 + assert cmd.extra_args["output_len"] == 1024 + + def test_parse_launch_command_header_format(self, parser): + """Test parsing SA-Bench config from header format.""" + log_content = """ +SA-Bench Config: endpoint=http://localhost:8000; isl=8192; osl=1024; concurrencies=28; req_rate=inf; model=dsr1-fp8 + """ + + cmd = parser.parse_launch_command(log_content) + + assert cmd is not None + assert cmd.benchmark_type == "sa-bench" + assert cmd.extra_args["base_url"] == "http://localhost:8000" + assert cmd.extra_args["input_len"] == 8192 + assert cmd.extra_args["output_len"] == 1024 + assert cmd.extra_args["max_concurrency"] == 28 + assert cmd.extra_args["request_rate"] == "inf" + assert cmd.extra_args["model"] == "dsr1-fp8" + + def test_parse_launch_command_not_found(self, parser): + """Test parsing when no command is found.""" + log_content = "Some random log content\nNo benchmark commands here\nJust regular logs" + cmd = parser.parse_launch_command(log_content) + assert cmd is None + + +class TestMooncakeRouterParser: + """Test Mooncake Router benchmark parser.""" + + @pytest.fixture + def parser(self): + """Get Mooncake Router parser instance.""" + return get_benchmark_parser("mooncake-router") + + @pytest.fixture + def temp_dir(self): + """Create temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + def test_parser_type(self, parser): + """Test parser type property.""" + assert parser.benchmark_type == "mooncake-router" + + def test_parse_result_json(self, parser, temp_dir): + """Test parsing AIPerf result JSON file.""" + # Create sample AIPerf JSON + result_data = { + "output_token_throughput": {"avg": 1150.92, "p50": 1100.0, "p99": 1200.0, "std": 50.0}, + "request_throughput": {"avg": 3.37, "p50": 3.3, "p99": 3.5, "std": 0.1}, + "time_to_first_token": {"avg": 150.5, "p50": 145.0, "p99": 200.0, "std": 25.0}, + "inter_token_latency": {"avg": 18.5, "p50": 18.0, "p99": 25.0, "std": 3.0}, + "request_latency": {"avg": 2000.0, "p50": 1900.0, "p99": 2500.0, "std": 200.0}, + "request_count": {"avg": 1000}, + } + + json_path = temp_dir / "profile_export_aiperf.json" + with open(json_path, "w") as f: + json.dump(result_data, f) + + # Parse the file + result = parser.parse_result_json(json_path) + + # Verify parsing + assert result["output_tps"] == 1150.92 + assert result["request_throughput"] == 3.37 + assert result["mean_ttft_ms"] == 150.5 + assert result["median_ttft_ms"] == 145.0 + assert result["p99_ttft_ms"] == 200.0 + assert result["mean_itl_ms"] == 18.5 + assert result["completed"] == 1000 + + def test_parse_launch_command_aiperf(self, parser): + """Test parsing AIPerf command.""" + log_content = """ +[CMD] aiperf profile -m "Qwen/Qwen3-32B" --url "http://localhost:8000" --concurrency 10 --request-count 1000 + """ + + cmd = parser.parse_launch_command(log_content) + + assert cmd is not None + assert cmd.benchmark_type == "mooncake-router" + assert "aiperf" in cmd.raw_command + assert cmd.extra_args["model"] == "Qwen/Qwen3-32B" + assert cmd.extra_args["base_url"] == "http://localhost:8000" + assert cmd.extra_args["max_concurrency"] == 10 + assert cmd.extra_args["num_prompts"] == 1000 + + def test_parse_launch_command_header(self, parser): + """Test parsing from header format.""" + log_content = """ +Mooncake Router Benchmark +Endpoint: http://localhost:8000 +Model: Qwen/Qwen3-32B +Workload: conversation + """ + + cmd = parser.parse_launch_command(log_content) + + assert cmd is not None + assert cmd.benchmark_type == "mooncake-router" + assert cmd.extra_args["base_url"] == "http://localhost:8000" + assert cmd.extra_args["model"] == "Qwen/Qwen3-32B" + assert cmd.extra_args["dataset"] == "conversation" + + +class TestSGLangNodeParser: + """Test SGLang node log parser.""" + + @pytest.fixture + def parser(self): + """Get SGLang parser instance.""" + return get_node_parser("sglang") + + @pytest.fixture + def temp_dir(self): + """Create temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + def test_parser_type(self, parser): + """Test parser type property.""" + assert parser.backend_type == "sglang" + + def test_parse_prefill_batch_line(self, parser): + """Test parsing prefill batch log line.""" + line = "[2025-12-30 15:52:38 DP0 TP0 EP0] Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.78, #running-req: 5, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5000.5" + + metrics = parser._parse_prefill_batch_line(line) + + assert metrics is not None + assert metrics["type"] == "prefill" + assert metrics["new_seq"] == 5 + assert metrics["new_token"] == 40960 + assert metrics["cached_token"] == 0 + assert metrics["token_usage"] == 0.78 + assert metrics["running_req"] == 5 + assert metrics["input_throughput"] == 5000.5 + + def test_parse_decode_batch_line(self, parser): + """Test parsing decode batch log line.""" + line = "[2025-12-30 15:52:38 DP0 TP0 EP0] Decode batch, #running-req: 10, #token: 512, token usage: 0.85, pre-allocated usage: 0.10, #prealloc-req: 2, #transfer-req: 0, #queue-req: 0, gen throughput (token/s): 1500.5" + + metrics = parser._parse_decode_batch_line(line) + + assert metrics is not None + assert metrics["type"] == "decode" + assert metrics["running_req"] == 10 + assert metrics["num_tokens"] == 512 + assert metrics["token_usage"] == 0.85 + assert metrics["preallocated_usage"] == 0.10 + assert metrics["gen_throughput"] == 1500.5 + + def test_parse_memory_line(self, parser): + """Test parsing memory log line.""" + line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m avail mem=75.11 GB, mem usage=107.07 GB, KV size: 17.16 GB, #tokens: 524288" + + metrics = parser._parse_memory_line(line) + + assert metrics is not None + # This line has KV size, so it should be marked as kv_cache type + assert metrics["type"] == "kv_cache" + assert metrics["avail_mem_gb"] == 75.11 + assert metrics["mem_usage_gb"] == 107.07 + assert metrics["kv_cache_gb"] == 17.16 + assert metrics["kv_tokens"] == 524288 + + def test_parse_memory_line_without_kv(self, parser): + """Test parsing memory log line without KV info.""" + line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m avail mem=75.11 GB, mem usage=107.07 GB" + + metrics = parser._parse_memory_line(line) + + assert metrics is not None + assert metrics["type"] == "memory" + assert metrics["avail_mem_gb"] == 75.11 + assert metrics["mem_usage_gb"] == 107.07 + + def test_parse_single_log(self, parser, temp_dir): + """Test parsing a complete SGLang log file.""" + log_content = """ +[2025-12-30 15:52:38 DP0 TP0 EP0] Starting SGLang server with server_args=ServerArgs(tp_size=8, dp_size=1, ep_size=1, model_path=/models/qwen3-32b) +[2025-12-30 15:52:40 DP0 TP0 EP0] Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.78, #running-req: 5, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5000.5 +[2025-12-30 15:52:41 DP0 TP0 EP0] Decode batch, #running-req: 5, #token: 512, token usage: 0.85, gen throughput (token/s): 1500.5 +[2025-12-30 15:52:42 DP0 TP0 EP0] avail mem=75.11 GB, mem usage=107.07 GB + """ + + log_path = temp_dir / "eos0219_prefill_w0.out" + log_path.write_text(log_content) + + node = parser.parse_single_log(log_path) + + assert node is not None + assert node.node_name == "eos0219" + assert node.worker_type == "prefill" + assert node.worker_id == "w0" + assert len(node.batches) == 2 # 1 prefill + 1 decode + assert len(node.memory_snapshots) == 1 + assert node.config["tp_size"] == 8 + assert node.config["dp_size"] == 1 + assert node.config["ep_size"] == 1 + + def test_parse_launch_command(self, parser): + """Test parsing SGLang launch command.""" + log_content = """ +[CMD] python -m sglang.launch_server --model /models/qwen3-32b --tp-size 8 --dp-size 1 --host 10.0.0.1 --port 30000 --max-num-seqs 1024 --disaggregation-mode prefill + """ + + cmd = parser.parse_launch_command(log_content, "prefill") + + assert cmd is not None + assert cmd.backend_type == "sglang" + assert cmd.worker_type == "prefill" + assert cmd.extra_args["model_path"] == "/models/qwen3-32b" + assert cmd.extra_args["tp_size"] == 8 + assert cmd.extra_args["dp_size"] == 1 + assert cmd.extra_args["host"] == "10.0.0.1" + assert cmd.extra_args["port"] == 30000 + assert cmd.extra_args["max_num_seqs"] == 1024 + assert cmd.extra_args["disaggregation_mode"] == "prefill" + + def test_parse_timestamp(self, parser): + """Test parsing SGLang timestamp format.""" + from datetime import datetime + + # Test ISO 8601 format with microseconds and Z + ts1 = "2025-12-30T15:52:38.206058Z" + dt1 = parser.parse_timestamp(ts1) + assert isinstance(dt1, datetime) + assert dt1.year == 2025 + assert dt1.month == 12 + assert dt1.day == 30 + assert dt1.hour == 15 + assert dt1.minute == 52 + assert dt1.second == 38 + + # Test ISO 8601 format without Z + ts2 = "2025-12-30T15:52:38.206058" + dt2 = parser.parse_timestamp(ts2) + assert isinstance(dt2, datetime) + assert dt2.year == 2025 + + # Test ISO 8601 format without microseconds + ts3 = "2025-12-30T15:52:38" + dt3 = parser.parse_timestamp(ts3) + assert isinstance(dt3, datetime) + assert dt3.year == 2025 + assert dt3.hour == 15 + + def test_parse_timestamp_invalid(self, parser): + """Test parsing invalid timestamp raises ValueError.""" + import pytest + + with pytest.raises(ValueError): + parser.parse_timestamp("invalid-timestamp") + + with pytest.raises(ValueError): + parser.parse_timestamp("2025-13-40T25:99:99") # Invalid date/time + + def test_parse_dp_tp_ep_tag_full_format(self, parser): + """Test parsing full DP/TP/EP tag format.""" + line = "[2025-11-04 05:31:43 DP0 TP2 EP1] Prefill batch, #new-seq: 5" + + timestamp = parser._parse_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "2025-11-04 05:31:43" + assert dp == 0 + assert tp == 2 + assert ep == 1 + + def test_parse_dp_tp_ep_tag_simple_tp(self, parser): + """Test parsing simple TP-only format (1P4D style).""" + line = "[2025-11-04 07:05:55 TP0] Decode batch, #running-req: 10" + + timestamp = parser._parse_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "2025-11-04 07:05:55" + assert dp == 0 # Default + assert tp == 0 + assert ep == 0 # Default + + def test_parse_dp_tp_ep_tag_pipeline(self, parser): + """Test parsing pipeline parallelism format.""" + line = "[2025-12-08 14:34:44 PP3] Prefill batch, #new-seq: 8" + + timestamp = parser._parse_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "2025-12-08 14:34:44" + assert dp == 0 # Default + assert tp == 3 # PP mapped to TP + assert ep == 0 # Default + + def test_parse_dp_tp_ep_tag_no_tags(self, parser): + """Test parsing line without parallelism tags.""" + line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m Prefill batch" + + timestamp = parser._parse_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "2025-12-30T15:52:38.206058Z" # ISO format fallback + assert dp == 0 + assert tp == 0 + assert ep == 0 + + def test_parse_parallelism_wrapper(self, parser): + """Test _parse_parallelism_tags method.""" + # With full tags + line_with_tags = "[2025-11-04 05:31:43 DP1 TP2 EP3] Prefill batch" + dp, tp, ep = parser._parse_parallelism_tags(line_with_tags) + assert dp == 1 + assert tp == 2 + assert ep == 3 + + # Without tags - should default to 0 + line_without_tags = "Some log line without tags" + dp, tp, ep = parser._parse_parallelism_tags(line_without_tags) + assert dp == 0 + assert tp == 0 + assert ep == 0 + + def test_parse_prefill_batch_with_dp_tp_ep(self, parser): + """Test that prefill batch parsing extracts DP/TP/EP values.""" + line = "[2025-11-04 05:31:43 DP1 TP2 EP3] Prefill batch, #new-seq: 5, #new-token: 40960, #running-req: 5, input throughput (token/s): 5000.5" + + metrics = parser._parse_prefill_batch_line(line) + + assert metrics is not None + assert metrics["dp"] == 1 + assert metrics["tp"] == 2 + assert metrics["ep"] == 3 + assert metrics["timestamp"] == "2025-11-04 05:31:43" + assert metrics["type"] == "prefill" + assert metrics["new_seq"] == 5 + assert metrics["new_token"] == 40960 + + def test_parse_decode_batch_with_dp_tp_ep(self, parser): + """Test that decode batch parsing extracts DP/TP/EP values.""" + line = "[2025-11-04 05:31:45 DP0 TP1 EP0] Decode batch, #running-req: 10, #token: 512, gen throughput (token/s): 1500.5" + + metrics = parser._parse_decode_batch_line(line) + + assert metrics is not None + assert metrics["dp"] == 0 + assert metrics["tp"] == 1 + assert metrics["ep"] == 0 + assert metrics["timestamp"] == "2025-11-04 05:31:45" + assert metrics["type"] == "decode" + assert metrics["running_req"] == 10 + + def test_parse_memory_with_dp_tp_ep(self, parser): + """Test that memory line parsing extracts DP/TP/EP values.""" + line = "[2025-11-04 05:31:50 DP0 TP2 EP1] avail mem=75.11 GB, mem usage=107.07 GB, KV size: 17.16 GB" + + metrics = parser._parse_memory_line(line) + + assert metrics is not None + assert metrics["dp"] == 0 + assert metrics["tp"] == 2 + assert metrics["ep"] == 1 + assert metrics["timestamp"] == "2025-11-04 05:31:50" + assert metrics["type"] == "kv_cache" + assert metrics["kv_cache_gb"] == 17.16 + + def test_parse_batch_fallback_to_iso_timestamp(self, parser): + """Test that parser supports ISO timestamp fallback.""" + # Prefill batch with ISO timestamp (old format) - should parse with default parallelism + line = "[2m2025-12-30T15:52:38.206058Z[0m [32m INFO[0m Prefill batch, #new-seq: 5, #new-token: 40960" + + metrics = parser._parse_prefill_batch_line(line) + + # Should parse successfully with ISO timestamp and default parallelism tags + assert metrics is not None + assert metrics["timestamp"] == "2025-12-30T15:52:38.206058Z" + assert metrics["dp"] == 0 + assert metrics["tp"] == 0 + assert metrics["ep"] == 0 + + def test_parse_batch_with_simple_tp_format(self, parser): + """Test parsing batch with simple TP format (1P4D disaggregated style).""" + line = "[2025-11-04 07:05:55 TP0] Prefill batch, #new-seq: 3, #new-token: 24576, input throughput (token/s): 3000.0" + + metrics = parser._parse_prefill_batch_line(line) + + assert metrics is not None + assert metrics["dp"] == 0 + assert metrics["tp"] == 0 + assert metrics["ep"] == 0 + assert metrics["new_token"] == 24576 + + def test_parse_batch_with_pipeline_format(self, parser): + """Test parsing batch with pipeline parallelism format.""" + line = "[2025-12-08 14:34:44 PP2] Prefill batch, #new-seq: 4, #new-token: 32768" + + metrics = parser._parse_prefill_batch_line(line) + + assert metrics is not None + assert metrics["dp"] == 0 + assert metrics["tp"] == 2 # PP mapped to TP + assert metrics["ep"] == 0 + assert metrics["new_token"] == 32768 + + def test_parse_single_log_with_parallelism_tags(self, parser, temp_dir): + """Test parsing complete log file with parallelism tags.""" + log_content = """ +[2025-11-04 05:31:43 DP0 TP0 EP0] Starting SGLang server with server_args=ServerArgs(tp_size=8, dp_size=1, ep_size=1, model_path=/models/qwen3-32b) +[2025-11-04 05:31:45 DP0 TP0 EP0] Prefill batch, #new-seq: 5, #new-token: 40960, #cached-token: 0, token usage: 0.78, #running-req: 5, #queue-req: 0, #prealloc-req: 0, #inflight-req: 0, input throughput (token/s): 5000.5 +[2025-11-04 05:31:46 DP0 TP0 EP0] Decode batch, #running-req: 5, #token: 512, token usage: 0.85, gen throughput (token/s): 1500.5 +[2025-11-04 05:31:47 DP0 TP0 EP0] avail mem=75.11 GB, mem usage=107.07 GB + """ + + log_path = temp_dir / "test_node_prefill_w0.out" + log_path.write_text(log_content) + + node = parser.parse_single_log(log_path) + + assert node is not None + assert node.node_name == "test_node" + assert node.worker_type == "prefill" + assert node.worker_id == "w0" + + # Check that batches have correct DP/TP/EP values + assert len(node.batches) == 2 + for batch in node.batches: + assert batch.dp == 0 + assert batch.tp == 0 + assert batch.ep == 0 + + # Check memory snapshots have correct DP/TP/EP values + assert len(node.memory_snapshots) == 1 + assert node.memory_snapshots[0].dp == 0 + assert node.memory_snapshots[0].tp == 0 + assert node.memory_snapshots[0].ep == 0 + + # Verify config extraction still works + assert node.config["tp_size"] == 8 + assert node.config["dp_size"] == 1 + assert node.config["ep_size"] == 1 + + def test_extract_node_info_from_filename(self, parser): + """Test extracting node info from filename.""" + result = parser._extract_node_info_from_filename("eos0219_prefill_w0.out") + + assert result is not None + assert result["node"] == "eos0219" + assert result["worker_type"] == "prefill" + assert result["worker_id"] == "w0" + + +class TestTRTLLMNodeParser: + """Test TRTLLM node log parser.""" + + @pytest.fixture + def parser(self): + """Get TRTLLM parser instance.""" + return get_node_parser("trtllm") + + @pytest.fixture + def temp_dir(self): + """Create temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + def test_parser_type(self, parser): + """Test parser type property.""" + assert parser.backend_type == "trtllm" + + def test_parse_iteration_logs(self, parser): + """Test parsing TRTLLM iteration logs.""" + log_content = """ +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5559, host_step_time = 50.5ms, num_scheduled_requests: 3, states = {'num_ctx_requests': 2, 'num_ctx_tokens': 16384, 'num_generation_tokens': 0} +[01/16/2026-06:20:18] [TRT-LLM] [RANK 0] [I] iter = 5560, host_step_time = 20.0ms, num_scheduled_requests: 5, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 512} + """ + + batches = parser._parse_iteration_logs(log_content, "prefill") + + assert len(batches) == 2 + + # First batch (prefill) + assert batches[0].batch_type == "prefill" + assert batches[0].running_req == 3 + assert batches[0].new_token == 16384 + assert batches[0].input_throughput is not None + assert batches[0].input_throughput > 300000 # 16384 * 1000 / 50.5 + + # Second batch (decode) + assert batches[1].batch_type == "decode" + assert batches[1].running_req == 5 + assert batches[1].num_tokens == 512 + assert batches[1].gen_throughput is not None + assert batches[1].gen_throughput > 25000 # 512 * 1000 / 20 + + def test_parse_memory_info(self, parser): + """Test parsing TRTLLM memory information.""" + log_content = """ +[01/16/2026-06:20:17] [TRT-LLM] [I] Peak memory during memory usage profiling (torch + non-torch): 91.46 GiB, available KV cache memory when calculating max tokens: 41.11 GiB, fraction is set 0.85, kv size is 35136. device total memory 139.81 GiB +[MemUsageChange] Allocated 41.11 GiB for max tokens (524288) + """ + + memory_snapshots = parser._parse_memory_info(log_content) + + assert len(memory_snapshots) == 2 + + # First snapshot (peak memory) + assert memory_snapshots[0].metric_type == "memory" + assert memory_snapshots[0].mem_usage_gb == 91.46 + assert memory_snapshots[0].kv_cache_gb == 41.11 + assert memory_snapshots[0].avail_mem_gb > 48 # 139.81 - 91.46 + + # Second snapshot (KV allocation) + assert memory_snapshots[1].metric_type == "kv_cache" + assert memory_snapshots[1].kv_cache_gb == 41.11 + assert memory_snapshots[1].kv_tokens == 524288 + + def test_parse_single_log(self, parser, temp_dir): + """Test parsing a complete TRTLLM log file.""" + log_content = """ +[33mRank0 run python3 -m dynamo.trtllm --model-path /model --served-model-name dsr1-fp8 --disaggregation-mode prefill[0m +Initializing the worker with config: Config(tensor_parallel_size=8, pipeline_parallel_size=1, expert_parallel_size=1, max_batch_size=256) +TensorRT-LLM engine args: {'tensor_parallel_size': 8, 'pipeline_parallel_size': 1, 'moe_expert_parallel_size': 1, 'max_batch_size': 256, 'max_seq_len': 131072} +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5559, num_scheduled_requests: 3, states = {'num_ctx_requests': 2, 'num_ctx_tokens': 16384, 'num_generation_tokens': 0} + """ + + log_path = temp_dir / "worker-0_prefill_w0.out" + log_path.write_text(log_content) + + node = parser.parse_single_log(log_path) + + assert node is not None + assert node.node_name == "worker-0" + assert node.worker_type == "prefill" + assert node.worker_id == "w0" + assert len(node.batches) == 1 + assert node.config["tp_size"] == 8 + assert node.config["pp_size"] == 1 + assert node.config["ep_size"] == 1 + assert node.config["max_batch_size"] == 256 + assert node.config["max_seq_len"] == 131072 + + def test_parse_launch_command(self, parser): + """Test parsing TRTLLM launch command.""" + log_content = """ +[CMD] python3 -m dynamo.trtllm --model-path /models/qwen3-32b --served-model-name dsr1-fp8 --disaggregation-mode prefill --host 10.0.0.1 --port 30000 +TensorRT-LLM engine args: {'tensor_parallel_size': 8, 'pipeline_parallel_size': 1, 'max_batch_size': 256, 'max_seq_len': 131072} + """ + + cmd = parser.parse_launch_command(log_content, "prefill") + + assert cmd is not None + assert cmd.backend_type == "trtllm" + assert cmd.worker_type == "prefill" + assert cmd.extra_args["model_path"] == "/models/qwen3-32b" + assert cmd.extra_args["served_model_name"] == "dsr1-fp8" + assert cmd.extra_args["disaggregation_mode"] == "prefill" + assert cmd.extra_args["host"] == "10.0.0.1" + assert cmd.extra_args["port"] == 30000 + assert cmd.extra_args["tp_size"] == 8 + assert cmd.extra_args["pp_size"] == 1 + assert cmd.extra_args["max_num_seqs"] == 256 + assert cmd.extra_args["max_model_len"] == 131072 + + def test_extract_node_info_from_filename(self, parser): + """Test extracting node info from filename.""" + result = parser._extract_node_info_from_filename("worker-0_decode_w1.err") + + assert result is not None + assert result["node"] == "worker-0" + assert result["worker_type"] == "decode" + assert result["worker_id"] == "w1" + + def test_parse_timestamp(self, parser): + """Test parsing TRTLLM timestamp format.""" + from datetime import datetime + + # Test MM/DD/YYYY-HH:MM:SS format + ts1 = "01/23/2026-08:04:38" + dt1 = parser.parse_timestamp(ts1) + assert isinstance(dt1, datetime) + assert dt1.year == 2026 + assert dt1.month == 1 + assert dt1.day == 23 + assert dt1.hour == 8 + assert dt1.minute == 4 + assert dt1.second == 38 + + # Test another timestamp + ts2 = "12/31/2025-23:59:59" + dt2 = parser.parse_timestamp(ts2) + assert isinstance(dt2, datetime) + assert dt2.year == 2025 + assert dt2.month == 12 + assert dt2.day == 31 + assert dt2.hour == 23 + assert dt2.minute == 59 + assert dt2.second == 59 + + # Test with leading zeros + ts3 = "01/01/2026-00:00:00" + dt3 = parser.parse_timestamp(ts3) + assert isinstance(dt3, datetime) + assert dt3.year == 2026 + assert dt3.month == 1 + assert dt3.day == 1 + assert dt3.hour == 0 + assert dt3.minute == 0 + assert dt3.second == 0 + + def test_parse_timestamp_invalid(self, parser): + """Test parsing invalid timestamp raises ValueError.""" + import pytest + + with pytest.raises(ValueError): + parser.parse_timestamp("invalid-timestamp") + + with pytest.raises(ValueError): + parser.parse_timestamp("13/40/2026-25:99:99") # Invalid date/time + + with pytest.raises(ValueError): + parser.parse_timestamp("2025-12-30 15:52:38") # Wrong format + + def test_timestamp_preserved_in_batches(self, parser): + """Test that timestamps are preserved in their original format.""" + log_content = """ +[01/16/2026-06:20:17] [TRT-LLM] [RANK 0] [I] iter = 5559, host_step_time = 50.5ms, num_scheduled_requests: 3, states = {'num_ctx_requests': 2, 'num_ctx_tokens': 16384, 'num_generation_tokens': 0} + """ + + batches = parser._parse_iteration_logs(log_content, "prefill") + + assert len(batches) == 1 + # Timestamp should be preserved in original format + assert batches[0].timestamp == "01/16/2026-06:20:17" + + def test_timestamp_preserved_in_memory(self, parser): + """Test that timestamps are preserved in memory snapshots.""" + log_content = """ +[01/16/2026-06:20:17] [TRT-LLM] [I] Peak memory during memory usage profiling (torch + non-torch): 91.46 GiB, available KV cache memory when calculating max tokens: 41.11 GiB, fraction is set 0.85, kv size is 35136. device total memory 139.81 GiB + """ + + memory_snapshots = parser._parse_memory_info(log_content) + + assert len(memory_snapshots) == 1 + # Timestamp should be preserved in original format + assert memory_snapshots[0].timestamp == "01/16/2026-06:20:17" + + def test_parse_dp_tp_ep_tag_full_format(self, parser): + """Test parsing full DP/TP/EP tag format in TRTLLM logs.""" + line = "[01/23/2026-08:04:38 DP1 TP2 EP3] [TRT-LLM] iter = 100, num_scheduled_requests: 5, states = {'num_ctx_requests': 2, 'num_ctx_tokens': 1024, 'num_generation_tokens': 0}" + + timestamp = parser._extract_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "01/23/2026-08:04:38" + assert dp == 1 + assert tp == 2 + assert ep == 3 + + def test_parse_dp_tp_ep_tag_simple_tp(self, parser): + """Test parsing simple TP-only format (1P4D style) in TRTLLM logs.""" + line = "[01/23/2026-08:04:38 TP0] [TRT-LLM] iter = 100, num_scheduled_requests: 10, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 512}" + + timestamp = parser._extract_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "01/23/2026-08:04:38" + assert dp == 0 # Default + assert tp == 0 + assert ep == 0 # Default + + def test_parse_dp_tp_ep_tag_pipeline(self, parser): + """Test parsing pipeline parallelism format in TRTLLM logs.""" + line = "[01/23/2026-08:04:38 PP3] [TRT-LLM] iter = 100, num_scheduled_requests: 8, states = {'num_ctx_requests': 0, 'num_ctx_tokens': 0, 'num_generation_tokens': 256}" + + timestamp = parser._extract_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "01/23/2026-08:04:38" + assert dp == 0 # Default + assert tp == 3 # PP mapped to TP + assert ep == 0 # Default + + def test_parse_dp_tp_ep_tag_no_tags(self, parser): + """Test parsing line without parallelism tags in TRTLLM logs.""" + line = "[01/23/2026-08:04:38] [TRT-LLM] iter = 100, num_scheduled_requests: 5" + + timestamp = parser._extract_timestamp(line) + dp, tp, ep = parser._parse_parallelism_tags(line) + + assert timestamp == "01/23/2026-08:04:38" + assert dp == 0 + assert tp == 0 + assert ep == 0 + + def test_parse_iteration_with_dp_tp_ep(self, parser): + """Test that iteration parsing extracts DP/TP/EP values.""" + log_content = """ +[01/23/2026-08:04:38 DP0 TP1 EP0] [TRT-LLM] [RANK 0] [I] iter = 100, num_scheduled_requests: 5, states = {'num_ctx_requests': 2, 'num_ctx_tokens': 1024, 'num_generation_tokens': 0}, host_step_time = 50.0ms + """ + + batches = parser._parse_iteration_logs(log_content, "prefill") + + assert len(batches) == 1 + assert batches[0].dp == 0 + assert batches[0].tp == 1 + assert batches[0].ep == 0 + assert batches[0].timestamp == "01/23/2026-08:04:38" + + def test_parse_memory_with_dp_tp_ep(self, parser): + """Test that memory parsing extracts DP/TP/EP values.""" + log_content = """ +[01/23/2026-08:04:38 DP0 TP2 EP1] [TRT-LLM] [RANK 0] [I] Peak memory during memory usage profiling (torch + non-torch): 91.46 GiB, available KV cache memory when calculating max tokens: 41.11 GiB, fraction is set 0.85, kv size is 35136. device total memory 139.81 GiB + """ + + memory_snapshots = parser._parse_memory_info(log_content) + + assert len(memory_snapshots) >= 1 + assert memory_snapshots[0].dp == 0 + assert memory_snapshots[0].tp == 2 + assert memory_snapshots[0].ep == 1 + assert memory_snapshots[0].timestamp == "01/23/2026-08:04:38" + + +class TestBenchmarkLaunchCommand: + """Test BenchmarkLaunchCommand dataclass.""" + + def test_create_benchmark_launch_command(self): + """Test creating BenchmarkLaunchCommand.""" + cmd = BenchmarkLaunchCommand( + benchmark_type="sa-bench", + raw_command="python -m sglang.bench_serving --model test", + extra_args={"model": "test", "num_prompts": 1000}, + ) + + assert cmd.benchmark_type == "sa-bench" + assert "sglang.bench_serving" in cmd.raw_command + assert cmd.extra_args["model"] == "test" + assert cmd.extra_args["num_prompts"] == 1000 + + +class TestNodeLaunchCommand: + """Test NodeLaunchCommand dataclass.""" + + def test_create_node_launch_command(self): + """Test creating NodeLaunchCommand.""" + cmd = NodeLaunchCommand( + backend_type="sglang", + worker_type="prefill", + raw_command="python -m sglang.launch_server --model test", + extra_args={"model_path": "test", "tp_size": 8}, + ) + + assert cmd.backend_type == "sglang" + assert cmd.worker_type == "prefill" + assert "sglang.launch_server" in cmd.raw_command + assert cmd.extra_args["model_path"] == "test" + assert cmd.extra_args["tp_size"] == 8 + + +class TestParserIntegration: + """Integration tests for parser workflows.""" + + @pytest.fixture + def temp_dir(self): + """Create temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + def test_parse_complete_sa_bench_run(self, temp_dir): + """Test parsing a complete SA-Bench run with multiple concurrencies.""" + parser = get_benchmark_parser("sa-bench") + + # Use test harness to create run directory + run_dir = ParserTestHarness.create_sa_bench_run(temp_dir, concurrencies=[50, 100, 200]) + + # Parse all results + results = parser.parse_result_directory(run_dir) + + assert len(results) == 3 + # Verify it's sorted by concurrency + assert [r["max_concurrency"] for r in results] == [50, 100, 200] + # Verify throughput scales with concurrency + assert results[0]["output_throughput"] == 2500.0 + assert results[1]["output_throughput"] == 5000.0 + assert results[2]["output_throughput"] == 10000.0 + + # Verify using harness utility + for result in results: + ParserTestHarness.assert_valid_benchmark_results( + result, + expected_fields=[ + "output_throughput", + "mean_ttft_ms", + "mean_itl_ms", + "p99_ttft_ms", + ], + ) + + def test_parse_mooncake_router_run(self, temp_dir): + """Test parsing a complete Mooncake Router run.""" + parser = get_benchmark_parser("mooncake-router") + + # Use test harness to create run directory + run_dir = ParserTestHarness.create_mooncake_router_run(temp_dir) + + # Find and parse AIPerf results + aiperf_files = parser.find_aiperf_results(run_dir) + assert len(aiperf_files) == 1 + + result = parser.parse_result_json(aiperf_files[0]) + ParserTestHarness.assert_valid_benchmark_results( + result, + expected_fields=["output_tps", "request_throughput", "mean_ttft_ms"], + ) + + def test_parse_sglang_node_logs_multiple_workers(self, temp_dir): + """Test parsing multiple SGLang node log files.""" + parser = get_node_parser("sglang") + + # Use test harness to create log directory + log_dir = ParserTestHarness.create_sglang_node_logs(temp_dir, num_prefill=2, num_decode=4) + + # Parse all logs + nodes = parser.parse_logs(log_dir) + + assert len(nodes) == 6 # 2 prefill + 4 decode + worker_types = [node.worker_type for node in nodes] + assert worker_types.count("prefill") == 2 + assert worker_types.count("decode") == 4 + + # Verify each node + for node in nodes: + ParserTestHarness.assert_valid_node_metrics(node, min_batches=1) + + def test_parse_trtllm_node_logs_multiple_workers(self, temp_dir): + """Test parsing multiple TRTLLM node log files.""" + parser = get_node_parser("trtllm") + + # Use test harness to create log directory + log_dir = ParserTestHarness.create_trtllm_node_logs(temp_dir, num_prefill=2, num_decode=4) + + # Parse all logs + nodes = parser.parse_logs(log_dir) + + assert len(nodes) == 6 # 2 prefill + 4 decode + worker_types = [node.worker_type for node in nodes] + assert worker_types.count("prefill") == 2 + assert worker_types.count("decode") == 4 + + # Verify each node has config + for node in nodes: + ParserTestHarness.assert_valid_node_metrics(node, min_batches=1) + assert "tp_size" in node.config or "max_batch_size" in node.config + + +class TestParserWithFixtures: + """Tests using sample data fixtures.""" + + def test_sa_bench_sample_data(self): + """Test SA-Bench parser with sample data.""" + parser = get_benchmark_parser("sa-bench") + + # Parse launch command from sample + log_content = SampleSABenchData.benchmark_out_content() + cmd = parser.parse_launch_command(log_content) + + assert cmd is not None + assert cmd.benchmark_type == "sa-bench" + assert "model" in cmd.extra_args + assert "base_url" in cmd.extra_args + + def test_mooncake_router_sample_data(self): + """Test Mooncake Router parser with sample data.""" + parser = get_benchmark_parser("mooncake-router") + + # Parse launch command from sample + log_content = SampleMooncakeRouterData.benchmark_out_content() + cmd = parser.parse_launch_command(log_content) + + assert cmd is not None + assert cmd.benchmark_type == "mooncake-router" + + def test_sglang_prefill_sample_data(self): + """Test SGLang parser with prefill sample data.""" + parser = get_node_parser("sglang") + + # Parse launch command from sample + log_content = SampleSGLangLogData.prefill_log_content() + cmd = parser.parse_launch_command(log_content, "prefill") + + assert cmd is not None + assert cmd.backend_type == "sglang" + assert cmd.worker_type == "prefill" + assert "tp_size" in cmd.extra_args + assert cmd.extra_args["tp_size"] == 8 + + def test_sglang_decode_sample_data(self): + """Test SGLang parser with decode sample data.""" + parser = get_node_parser("sglang") + + # Parse launch command from sample + log_content = SampleSGLangLogData.decode_log_content() + cmd = parser.parse_launch_command(log_content, "decode") + + assert cmd is not None + assert cmd.backend_type == "sglang" + assert cmd.worker_type == "decode" + assert "tp_size" in cmd.extra_args + + def test_trtllm_prefill_sample_data(self): + """Test TRTLLM parser with prefill sample data.""" + parser = get_node_parser("trtllm") + + # Parse launch command from sample + log_content = SampleTRTLLMLogData.prefill_log_content() + cmd = parser.parse_launch_command(log_content, "prefill") + + assert cmd is not None + assert cmd.backend_type == "trtllm" + assert cmd.worker_type == "prefill" + assert "disaggregation_mode" in cmd.extra_args + assert cmd.extra_args["disaggregation_mode"] == "prefill" + + def test_trtllm_decode_sample_data(self): + """Test TRTLLM parser with decode sample data.""" + parser = get_node_parser("trtllm") + + # Parse launch command from sample + log_content = SampleTRTLLMLogData.decode_log_content() + cmd = parser.parse_launch_command(log_content, "decode") + + assert cmd is not None + assert cmd.backend_type == "trtllm" + assert cmd.worker_type == "decode" diff --git a/tests/test_process_registry.py b/tests/test_process_registry.py index 0e556ec3..45e3e793 100644 --- a/tests/test_process_registry.py +++ b/tests/test_process_registry.py @@ -7,8 +7,6 @@ from subprocess import Popen from unittest.mock import MagicMock -import pytest - from srtctl.core.processes import ManagedProcess, ProcessRegistry diff --git a/tests/test_profiling.py b/tests/test_profiling.py index 3002ccb0..fe1d2f60 100644 --- a/tests/test_profiling.py +++ b/tests/test_profiling.py @@ -387,4 +387,3 @@ def test_profiling_script_exists(self): """Profiling script exists.""" script = SCRIPTS_DIR / "profiling" / "profile.sh" assert script.exists() - diff --git a/tests/test_runloader_parsers.py b/tests/test_runloader_parsers.py new file mode 100644 index 00000000..4b7b4fff --- /dev/null +++ b/tests/test_runloader_parsers.py @@ -0,0 +1,330 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Tests for RunLoader integration with parsers. + +Tests that the RunLoader correctly uses the parser infrastructure. +""" + +import json +import tempfile +from pathlib import Path + +import pytest + +from analysis.srtlog.run_loader import RunLoader +from tests.fixtures_parsers import ParserTestHarness, SampleSABenchData + + +class TestRunLoaderWithParsers: + """Test RunLoader integration with parser infrastructure.""" + + @pytest.fixture + def temp_dir(self): + """Create temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + @pytest.fixture + def sample_run_metadata(self): + """Sample run metadata JSON.""" + return { + "job_id": "12345", + "job_name": "test_run", + "generated_at": "20250126_120000", + "model": { + "path": "/models/test", + "container": "sglang:latest", + }, + "resources": { + "prefill_nodes": 1, + "decode_nodes": 1, + "prefill_workers": 2, + "decode_workers": 4, + "agg_workers": 0, + "gpus_per_node": 8, + "gpu_type": "H100", + }, + "benchmark": { + "type": "sa-bench", + "isl": "8192", + "osl": "1024", + "concurrencies": "50x100x200", + "req-rate": "inf", + }, + "tags": ["test"], + } + + def test_parse_sa_bench_with_parser(self, temp_dir, sample_run_metadata): + """Test that RunLoader uses SA-Bench parser correctly.""" + # Create run directory + run_dir = temp_dir / "12345_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata JSON + metadata_path = run_dir / "12345.json" + with open(metadata_path, "w") as f: + json.dump(sample_run_metadata, f) + + # Create benchmark results using test harness + bench_dir = run_dir / "sa-bench_isl_8192_osl_1024" + bench_dir.mkdir() + + # Create result JSON files directly in bench_dir + for concurrency in [50, 100, 200]: + result_data = SampleSABenchData.result_json(concurrency) + result_path = bench_dir / f"result_c{concurrency}.json" + with open(result_path, "w") as f: + json.dump(result_data, f) + + # Load the run + loader = RunLoader(str(temp_dir)) + run = loader.load_single("12345_2P_4D_20250126_120000") + + # Verify run was loaded + assert run is not None + assert run.job_id == "12345" + + # Verify benchmark results were parsed + assert len(run.profiler.output_tps) == 3 + assert run.profiler.output_tps[0] == 2500.0 # 50 * 50 + assert run.profiler.output_tps[1] == 5000.0 # 100 * 50 + assert run.profiler.output_tps[2] == 10000.0 # 200 * 50 + + # Verify concurrencies + assert run.profiler.concurrency_values == [50, 100, 200] + + def test_load_all_runs_with_parsers(self, temp_dir, sample_run_metadata): + """Test loading multiple runs with parser infrastructure.""" + # Create multiple run directories + for job_id in [12345, 12346]: + run_dir = temp_dir / f"{job_id}_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata + metadata = sample_run_metadata.copy() + metadata["job_id"] = str(job_id) + metadata_path = run_dir / f"{job_id}.json" + with open(metadata_path, "w") as f: + json.dump(metadata, f) + + # Create benchmark results + bench_dir = run_dir / "sa-bench_isl_8192_osl_1024" + bench_dir.mkdir() + + for concurrency in [50, 100]: + result_data = SampleSABenchData.result_json(concurrency) + result_path = bench_dir / f"result_c{concurrency}.json" + with open(result_path, "w") as f: + json.dump(result_data, f) + + # Load all runs + loader = RunLoader(str(temp_dir)) + runs = loader.load_all() + + # Verify both runs were loaded + assert len(runs) == 2 + job_ids = {run.job_id for run in runs} + assert "12345" in job_ids + assert "12346" in job_ids + + # Verify each run has benchmark data + for run in runs: + assert len(run.profiler.output_tps) == 2 + + def test_parser_fallback_to_manual(self, temp_dir, sample_run_metadata): + """Test fallback to manual parsing when parser fails.""" + # Create run directory + run_dir = temp_dir / "12345_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata + metadata_path = run_dir / "12345.json" + with open(metadata_path, "w") as f: + json.dump(sample_run_metadata, f) + + # Create benchmark results with unknown benchmark type + bench_dir = run_dir / "unknown-bench_isl_8192_osl_1024" + bench_dir.mkdir() + + # Create result JSON file + result_data = SampleSABenchData.result_json(100) + result_path = bench_dir / "result_c100.json" + with open(result_path, "w") as f: + json.dump(result_data, f) + + # Load the run - should fall back to manual parsing + loader = RunLoader(str(temp_dir)) + run = loader.load_single("12345_2P_4D_20250126_120000") + + # Verify run was loaded with manual parser + assert run is not None + # Note: fallback won't find results in unknown-bench directory + # but it shouldn't crash + + def test_load_node_metrics_sglang(self, temp_dir, sample_run_metadata): + """Test loading node metrics for SGLang runs.""" + # Create run directory + run_dir = temp_dir / "12345_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata + metadata = sample_run_metadata.copy() + metadata["model"]["container"] = "sglang:latest" + metadata_path = run_dir / "12345.json" + with open(metadata_path, "w") as f: + json.dump(metadata, f) + + # Create logs subdirectory + logs_dir = run_dir / "logs" + logs_dir.mkdir() + + # Create SGLang node logs using test harness + ParserTestHarness.create_sglang_node_logs(logs_dir, num_prefill=2, num_decode=4) + + # Load node metrics + loader = RunLoader(str(temp_dir)) + nodes = loader.load_node_metrics(str(run_dir), backend_type="sglang") + + # Verify nodes were loaded + assert len(nodes) == 6 # 2 prefill + 4 decode + worker_types = [node.worker_type for node in nodes] + assert worker_types.count("prefill") == 2 + assert worker_types.count("decode") == 4 + + def test_load_node_metrics_trtllm(self, temp_dir, sample_run_metadata): + """Test loading node metrics for TRTLLM runs.""" + # Create run directory + run_dir = temp_dir / "12345_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata + metadata = sample_run_metadata.copy() + metadata["model"]["container"] = "trtllm:latest" + metadata_path = run_dir / "12345.json" + with open(metadata_path, "w") as f: + json.dump(metadata, f) + + # Create logs subdirectory + logs_dir = run_dir / "logs" + logs_dir.mkdir() + + # Create TRTLLM node logs using test harness + ParserTestHarness.create_trtllm_node_logs(logs_dir, num_prefill=2, num_decode=4) + + # Load node metrics + loader = RunLoader(str(temp_dir)) + nodes = loader.load_node_metrics(str(run_dir), backend_type="trtllm") + + # Verify nodes were loaded + assert len(nodes) == 6 # 2 prefill + 4 decode + worker_types = [node.worker_type for node in nodes] + assert worker_types.count("prefill") == 2 + assert worker_types.count("decode") == 4 + + def test_load_node_metrics_for_run(self, temp_dir, sample_run_metadata): + """Test loading node metrics with automatic backend detection.""" + # Create run directory + run_dir = temp_dir / "12345_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata with SGLang container + metadata = sample_run_metadata.copy() + metadata["model"]["container"] = "sglang:v0.2.0" + metadata_path = run_dir / "12345.json" + with open(metadata_path, "w") as f: + json.dump(metadata, f) + + # Create benchmark results + bench_dir = run_dir / "sa-bench_isl_8192_osl_1024" + bench_dir.mkdir() + result_data = SampleSABenchData.result_json(100) + result_path = bench_dir / "result_c100.json" + with open(result_path, "w") as f: + json.dump(result_data, f) + + # Create logs subdirectory with SGLang logs + logs_dir = run_dir / "logs" + logs_dir.mkdir() + ParserTestHarness.create_sglang_node_logs(logs_dir, num_prefill=1, num_decode=2) + + # Load the run + loader = RunLoader(str(temp_dir)) + run = loader.load_single("12345_2P_4D_20250126_120000") + + # Load node metrics with automatic detection + nodes = loader.load_node_metrics_for_run(run) + + # Verify nodes were loaded + assert len(nodes) == 3 # 1 prefill + 2 decode + + def test_convert_parser_results_to_dict(self, temp_dir): + """Test conversion of parser results to dict format.""" + loader = RunLoader(str(temp_dir)) + + # Sample parser results + parser_results = [ + { + "max_concurrency": 50, + "output_throughput": 2500.0, + "mean_ttft_ms": 175.0, + "mean_itl_ms": 20.0, + "p99_ttft_ms": 300.0, + }, + { + "max_concurrency": 100, + "output_throughput": 5000.0, + "mean_ttft_ms": 200.0, + "mean_itl_ms": 22.0, + "p99_ttft_ms": 350.0, + }, + ] + + # Convert to dict format + result_dict = loader._convert_parser_results_to_dict(parser_results) + + # Verify structure + assert result_dict["concurrencies"] == [50, 100] + assert result_dict["output_tps"] == [2500.0, 5000.0] + assert result_dict["mean_ttft_ms"] == [175.0, 200.0] + assert result_dict["mean_itl_ms"] == [20.0, 22.0] + assert result_dict["p99_ttft_ms"] == [300.0, 350.0] + + def test_mooncake_router_directory_detection(self, temp_dir, sample_run_metadata): + """Test that mooncake-router directories are detected correctly.""" + # Create run directory + run_dir = temp_dir / "12345_2P_4D_20250126_120000" + run_dir.mkdir() + + # Create metadata with mooncake-router benchmark type + metadata = sample_run_metadata.copy() + metadata["benchmark"]["type"] = "mooncake-router" + metadata_path = run_dir / "12345.json" + with open(metadata_path, "w") as f: + json.dump(metadata, f) + + # Create benchmark results directory + bench_dir = run_dir / "mooncake-router_isl_8192_osl_1024" + bench_dir.mkdir() + + # Create AIPerf result JSON + aiperf_data = { + "output_token_throughput": {"avg": 1150.92}, + "request_throughput": {"avg": 3.37}, + "time_to_first_token": {"avg": 150.5}, + "inter_token_latency": {"avg": 18.5}, + "request_count": {"avg": 1000}, + } + result_path = bench_dir / "profile_export_aiperf.json" + with open(result_path, "w") as f: + json.dump(aiperf_data, f) + + # Load the run + loader = RunLoader(str(temp_dir)) + run = loader.load_single("12345_2P_4D_20250126_120000") + + # Verify run was loaded + assert run is not None + # Verify mooncake-router results were parsed + assert len(run.profiler.output_tps) >= 1 diff --git a/tests/test_sweep.py b/tests/test_sweep.py index 35dfa71d..ce1168df 100644 --- a/tests/test_sweep.py +++ b/tests/test_sweep.py @@ -287,4 +287,3 @@ def test_placeholder_substitution_in_generated_config(self): assert prefill1["mem-fraction-static"] == "0.85" assert prefill2["mem-fraction-static"] == "0.9" -