ai-dynamo · tedzhouhk · Aug 20, 2025 · Aug 19, 2025 · Aug 19, 2025 · Aug 19, 2025
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -28,6 +28,6 @@ pip install -e .
 Currently, this will install lightweight tools for:
 - Analyzing prefix-structured data (`datagen analyze`)
 - Synthesizing structured data customizable for testing purposes (`datagen synthesize`)
-Detailed information are provided in the `data_generator` directory.
+Detailed information are provided in the `prefix_data_generator` directory.
 
 The benchmarking scripts for the core dynamo components are to come soon (e.g. routing, disagg, Planner).
diff --git a/benchmarks/data_generator/README.md → benchmarks/prefix_data_generator/README.md b/benchmarks/data_generator/README.md → benchmarks/prefix_data_generator/README.md
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from data_generator.cli import main as cli_main
+from prefix_data_generator.cli import main as cli_main
 
 
 def main():

@@ -36,13 +36,13 @@ def main():
 
     if args.command == "analyze":
         # Import and run the analyzer main
-        from data_generator import prefix_analyzer
+        from prefix_data_generator import prefix_analyzer
 
         sys.argv = [sys.argv[0]] + remaining
         prefix_analyzer.main()
     elif args.command == "synthesize":
         # Import and run the synthesizer main
-        from data_generator import synthesizer
+        from prefix_data_generator import synthesizer
 
         sys.argv = [sys.argv[0]] + remaining
         synthesizer.main()

@@ -17,8 +17,8 @@
 import tempfile
 
 import requests
-from data_generator.hasher import hashes_to_texts
-from data_generator.synthesizer import Synthesizer
+from prefix_data_generator.hasher import hashes_to_texts
+from prefix_data_generator.synthesizer import Synthesizer
 
 # download the mooncake trace file
 mooncake_trace_permalink = "https://raw.githubusercontent.com/kvcache-ai/Mooncake/f09c501b2a5d73e4d60cdeb612d7d0d54e1ec228/mooncake_trace.jsonl"

@@ -15,8 +15,8 @@
 
 import networkx as nx
 import numpy as np
-from data_generator.protocols import CACHE_END, END_NODE, SUPER_ROOT
-from data_generator.sampler import get_cdf
+from prefix_data_generator.protocols import CACHE_END, END_NODE, SUPER_ROOT
+from prefix_data_generator.sampler import get_cdf
 
 
 def _verify_tree(G: nx.DiGraph) -> None:

@@ -16,7 +16,7 @@
 import json
 from collections import Counter
 
-from data_generator.logging_utils import calculate_and_print_statistics
+from prefix_data_generator.logging_utils import calculate_and_print_statistics
 
 
 class PrefixAnalyzer:

@@ -20,15 +20,15 @@
 import networkx as nx
 import numpy as np
 import pandas as pd
-from data_generator.graph_utils import (
+from prefix_data_generator.graph_utils import (
     _mark_visited,
     _merge_chains,
     _precompute_transition_cdfs,
     _remove_leaves,
     _verify_tree,
 )
-from data_generator.protocols import CACHE_END, END_NODE, SUPER_ROOT
-from data_generator.sampler import EmpiricalSampler, sample_from_cdf
+from prefix_data_generator.protocols import CACHE_END, END_NODE, SUPER_ROOT
+from prefix_data_generator.sampler import EmpiricalSampler, sample_from_cdf
 
 
 class Synthesizer:
@@ -334,7 +334,7 @@ def main():
     import argparse
     from pathlib import Path
 
-    from data_generator.logging_utils import calculate_and_print_statistics
+    from prefix_data_generator.logging_utils import calculate_and_print_statistics
 
     parser = argparse.ArgumentParser(description="Synthesize Mooncake-Esque dataset")
     parser.add_argument(

@@ -17,7 +17,7 @@
 import random
 
 import pytest
-from data_generator.hasher import hashes_to_texts, texts_to_hashes
+from prefix_data_generator.hasher import hashes_to_texts, texts_to_hashes
 from tokenizers import Tokenizer, decoders, models, normalizers, pre_tokenizers
 from transformers import AutoTokenizer, PreTrainedTokenizerFast
 

@@ -16,7 +16,7 @@
 from collections import Counter
 
 import numpy as np
-from data_generator.sampler import EmpiricalSampler
+from prefix_data_generator.sampler import EmpiricalSampler
 
 
 def test_empirical_sampler_distribution():

@@ -19,7 +19,7 @@
 import tempfile
 import unittest
 
-from data_generator.synthesizer import Synthesizer
+from prefix_data_generator.synthesizer import Synthesizer
 
 
 # Helper function to create and dump data

diff --git a/benchmarks/pyproject.toml b/benchmarks/pyproject.toml
@@ -49,7 +49,7 @@ dependencies = [
 ]
 
 [project.scripts]
-datagen = "data_generator.cli:main"
+datagen = "prefix_data_generator.cli:main"
 
 [project.urls]
 Repository = "https://github.com/ai-dynamo/dynamo.git"
@@ -59,10 +59,10 @@ requires = ["setuptools>=42", "wheel"]
 build-backend = "setuptools.build_meta"
 
 [tool.setuptools]
-packages = ["data_generator"]
+packages = ["prefix_data_generator"]
 
 [tool.setuptools.package-data]
-data_generator = ["**/*.py"]
+prefix_data_generator = ["**/*.py"]
 
 [tool.mypy]
 explicit_package_bases = true

diff --git a/benchmarks/sin_load_generator/README.md b/benchmarks/sin_load_generator/README.md
@@ -0,0 +1,123 @@
+<!--
+SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+-->
+
+# Sinusoidal Load Generator
+
+`sin_synth.py` is a simple script to generate synthetic load with sinusoidal request rate and isl/osl ratio. The output is in [mooncake-style](https://github.com/kvcache-ai/Mooncake) jsonl format, which can be directly used in [GenAI-Perf](https://github.com/triton-inference-server/perf_analyzer/tree/main/genai-perf/genai_perf).
+
+## Usage
+
+```bash
+cd benchmarks/sin_load_generator
+python sin_synth.py [OPTIONS]
+```
+
+### Basic Options
+
+- `--block-size INT` (default: 512)
+  - Block size for hashing, since there is no prefix caching, the block size does not need to be the same as the engine's KV block size.
+
+- `--total-blocks INT` (default: 10000)
+  - ISL prompt blocks are randomly sampled from this range. Use a larger number to reduce the chance of duplicated prompts.
+
+- `--output-file STR` (default: auto-generated)
+  - Output file name (in jsonl format)
+  - If not specified, the script will generate a filename based on parameters
+
+- `--time-duration INT` (default: 100)
+  - Total time duration of the dataset in seconds
+
+- `--process-interval INT` (default: 1)
+  - Sampling interval used to generate the dataset
+  - Smaller interval leads to more precise changes in request rate and isl/osl ratio but longer generation time.
+
+### Request Rate Parameters
+
+The request rate follows a sinusoidal pattern:
+```
+request_rate(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * π / period * t - π / 2)
+```
+
+Note the phase shift of `-π/2` is to make the request rate start from the minimum at `t = 0`.
+
+- `--request-rate-min FLOAT` (default: 5)
+  - Minimum request rate in requests per second
+
+- `--request-rate-max FLOAT` (default: 10)
+  - Maximum request rate in requests per second
+
+- `--request-rate-period FLOAT` (default: 10)
+  - Period of the sinusoidal request rate in seconds
+
+### Input/Output Sequence Length Parameters
+
+The script will generate load with requests sampled from two preset ISL/OSL combinations.
+The ISL/OSL ratio defines how much of requests follow the first preset ISL/OSL pattern. ISl/OSL 0 means all requests follow the first preset ISL/OSL pattern, while ISL/OSL 1 means all requests follow the second preset ISL/OSL pattern.
+
+The ISL/OSL ratio follows a sinusoidal pattern:
+```
+isl-osl-ratio(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * π / period * t - π / 2)
+```
+
+Similarly, the phase shift of `-π/2` is to make the ISL/OSL ratio start from the minimum at `t = 0`.
+
+- `--isl1 INT` (default: 100)
+  - Minimum input sequence length
+
+- `--osl1 INT` (default: 2000)
+  - Minimum output sequence length
+
+- `--isl2 INT` (default: 5000)
+  - Maximum input sequence length
+
+- `--osl2 INT` (default: 100)
+  - Maximum output sequence length
+
+- `--isl-osl-ratio-min FLOAT` (default: 0.2)
+  - Minimum ratio of input sequence length to output sequence length
+
+- `--isl-osl-ratio-max FLOAT` (default: 0.8)
+  - Maximum ratio of input sequence length to output sequence length
+
+- `--isl-osl-ratio-period FLOAT` (default: 10)
+  - Period of the sinusoidal input/output sequence length ratio
+
+### Examples
+
+#### Varying Request Rate with Fixed ISL/OSL Ratio
+
+```bash
+python sin_synth.py \
+  --time-duration 60 \
+  --request-rate-min 2 \
+  --request-rate-max 8 \
+  --request-rate-period 20 \
+  --isl1 3000 \
+  --osl1 150 \
+  --isl2 3000 \
+  --osl2 150 \
+  --output-file dataset.jsonl
+```
+
+This generates a 60-second dataset with request rates varying between 2-8 requests/second over a 20-second period, with 3000 ISL and 150 OSL. The ISL/OSL ratio is fixed at 0.2.
+
+#### Varying ISL/OSL Ratio with Fixed Request Rate
+
+```bash
+python sin_synth.py \
+  --time-duration 60 \
+  --request-rate-min 5 \
+  --request-rate-max 5 \
+  --isl1 3000 \
+  --osl1 150 \
+  --isl2 500 \
+  --osl2 2000 \
+  --isl-osl-ratio-min 0.2 \
+  --isl-osl-ratio-max 0.8 \
+  --isl-osl-ratio-period 20 \
+  --output-file dataset.jsonl
+```
+
+This generates a 60-second dataset with request rate fixed at 5 requests/second, with ISL/OSL ratio varying between 0.2 and 0.8 between I3000O150 and I500O2000over a 20-second period.
@@ -31,7 +31,7 @@ def main(args):
     def get_isl_osl(t):
         isl_osl_ratio = (args.isl_osl_ratio_min + args.isl_osl_ratio_max) / 2 + (
             args.isl_osl_ratio_max - args.isl_osl_ratio_min
-        ) / 2 * np.sin(2 * np.pi / args.isl_osl_ratio_period * t)
+        ) / 2 * np.sin(2 * np.pi / args.isl_osl_ratio_period * t - np.pi / 2)
         logger.info(f"isl_osl_ratio at {t:.2f}: {isl_osl_ratio:.2f}")
         if np.random.uniform(0, 1) < isl_osl_ratio:
             return (args.isl1, args.osl1)
@@ -43,7 +43,7 @@ def get_isl_osl(t):
         t_e = min(t + args.process_interval, args.time_duration)
         request_rate = (args.request_rate_min + args.request_rate_max) / 2 + (
             args.request_rate_max - args.request_rate_min
-        ) / 2 * np.sin(2 * np.pi / args.request_rate_period * t)
+        ) / 2 * np.sin(2 * np.pi / args.request_rate_period * t - np.pi / 2)
         logger.info(f"request_rate at {t:.2f}: {request_rate:.2f}")
         num_requests = np.random.poisson(request_rate * (t_e - t))
         for req_idx in range(num_requests):
@@ -100,7 +100,8 @@ def get_isl_osl(t):
     # request rate parameters
     # for the process interval at [t, t + process_interval), the number of requests to generate is sampled
     # from a poison distribution with the following parameters:
-    # request_rate(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * pi / period * t)
+    # request_rate(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * pi / period * t - pi / 2)
+    # the phase shift is pi / 2 to make the request rate start from the minimum at t = 0
     # num_requests[t, t + process_interval) ~ Poisson(request_rate(t) * process_interval)
     # requests are uniformly distributed in the interval [t, t + process_interval)
     parser.add_argument(
@@ -125,7 +126,7 @@ def get_isl_osl(t):
     # isl/osl parameters
     # isl/osl is randomly sampled from two candidates following the isl-osl-ratio.
     # at time t, the isl-osl-ratio is calculated as:
-    # isl-osl-ratio(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * pi / period * t)
+    # isl-osl-ratio(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * pi / period * t - pi / 2)
     # Then, we sample [isl1/osl1, isl2/osl2] from the distribution [isl-osl-ratio(t), 1 - isl-osl-ratio(t)]
     parser.add_argument(
         "--isl1", type=int, default=100, help="Minimum input sequence length"