Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ pip install -e .
Currently, this will install lightweight tools for:
- Analyzing prefix-structured data (`datagen analyze`)
- Synthesizing structured data customizable for testing purposes (`datagen synthesize`)
Detailed information are provided in the `data_generator` directory.
Detailed information are provided in the `prefix_data_generator` directory.

The benchmarking scripts for the core dynamo components are to come soon (e.g. routing, disagg, Planner).
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from data_generator.cli import main as cli_main
from prefix_data_generator.cli import main as cli_main


def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ def main():

if args.command == "analyze":
# Import and run the analyzer main
from data_generator import prefix_analyzer
from prefix_data_generator import prefix_analyzer

sys.argv = [sys.argv[0]] + remaining
prefix_analyzer.main()
elif args.command == "synthesize":
# Import and run the synthesizer main
from data_generator import synthesizer
from prefix_data_generator import synthesizer

sys.argv = [sys.argv[0]] + remaining
synthesizer.main()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
import tempfile

import requests
from data_generator.hasher import hashes_to_texts
from data_generator.synthesizer import Synthesizer
from prefix_data_generator.hasher import hashes_to_texts
from prefix_data_generator.synthesizer import Synthesizer

# download the mooncake trace file
mooncake_trace_permalink = "https://raw.githubusercontent.com/kvcache-ai/Mooncake/f09c501b2a5d73e4d60cdeb612d7d0d54e1ec228/mooncake_trace.jsonl"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

import networkx as nx
import numpy as np
from data_generator.protocols import CACHE_END, END_NODE, SUPER_ROOT
from data_generator.sampler import get_cdf
from prefix_data_generator.protocols import CACHE_END, END_NODE, SUPER_ROOT
from prefix_data_generator.sampler import get_cdf


def _verify_tree(G: nx.DiGraph) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import json
from collections import Counter

from data_generator.logging_utils import calculate_and_print_statistics
from prefix_data_generator.logging_utils import calculate_and_print_statistics


class PrefixAnalyzer:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@
import networkx as nx
import numpy as np
import pandas as pd
from data_generator.graph_utils import (
from prefix_data_generator.graph_utils import (
_mark_visited,
_merge_chains,
_precompute_transition_cdfs,
_remove_leaves,
_verify_tree,
)
from data_generator.protocols import CACHE_END, END_NODE, SUPER_ROOT
from data_generator.sampler import EmpiricalSampler, sample_from_cdf
from prefix_data_generator.protocols import CACHE_END, END_NODE, SUPER_ROOT
from prefix_data_generator.sampler import EmpiricalSampler, sample_from_cdf


class Synthesizer:
Expand Down Expand Up @@ -334,7 +334,7 @@ def main():
import argparse
from pathlib import Path

from data_generator.logging_utils import calculate_and_print_statistics
from prefix_data_generator.logging_utils import calculate_and_print_statistics

parser = argparse.ArgumentParser(description="Synthesize Mooncake-Esque dataset")
parser.add_argument(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import random

import pytest
from data_generator.hasher import hashes_to_texts, texts_to_hashes
from prefix_data_generator.hasher import hashes_to_texts, texts_to_hashes
from tokenizers import Tokenizer, decoders, models, normalizers, pre_tokenizers
from transformers import AutoTokenizer, PreTrainedTokenizerFast

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from collections import Counter

import numpy as np
from data_generator.sampler import EmpiricalSampler
from prefix_data_generator.sampler import EmpiricalSampler


def test_empirical_sampler_distribution():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import tempfile
import unittest

from data_generator.synthesizer import Synthesizer
from prefix_data_generator.synthesizer import Synthesizer


# Helper function to create and dump data
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ dependencies = [
]

[project.scripts]
datagen = "data_generator.cli:main"
datagen = "prefix_data_generator.cli:main"

[project.urls]
Repository = "https://github.com/ai-dynamo/dynamo.git"
Expand All @@ -59,10 +59,10 @@ requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
packages = ["data_generator"]
packages = ["prefix_data_generator"]

[tool.setuptools.package-data]
data_generator = ["**/*.py"]
prefix_data_generator = ["**/*.py"]

[tool.mypy]
explicit_package_bases = true
Expand Down
123 changes: 123 additions & 0 deletions benchmarks/sin_load_generator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
<!--
SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
-->

# Sinusoidal Load Generator

`sin_synth.py` is a simple script to generate synthetic load with sinusoidal request rate and isl/osl ratio. The output is in [mooncake-style](https://github.com/kvcache-ai/Mooncake) jsonl format, which can be directly used in [GenAI-Perf](https://github.com/triton-inference-server/perf_analyzer/tree/main/genai-perf/genai_perf).

## Usage

```bash
cd benchmarks/sin_load_generator
python sin_synth.py [OPTIONS]
```

### Basic Options

- `--block-size INT` (default: 512)
- Block size for hashing, since there is no prefix caching, the block size does not need to be the same as the engine's KV block size.

- `--total-blocks INT` (default: 10000)
- ISL prompt blocks are randomly sampled from this range. Use a larger number to reduce the chance of duplicated prompts.

- `--output-file STR` (default: auto-generated)
- Output file name (in jsonl format)
- If not specified, the script will generate a filename based on parameters

- `--time-duration INT` (default: 100)
- Total time duration of the dataset in seconds

- `--process-interval INT` (default: 1)
- Sampling interval used to generate the dataset
- Smaller interval leads to more precise changes in request rate and isl/osl ratio but longer generation time.

### Request Rate Parameters

The request rate follows a sinusoidal pattern:
```
request_rate(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * π / period * t - π / 2)
```

Note the phase shift of `-π/2` is to make the request rate start from the minimum at `t = 0`.

- `--request-rate-min FLOAT` (default: 5)
- Minimum request rate in requests per second

- `--request-rate-max FLOAT` (default: 10)
- Maximum request rate in requests per second

- `--request-rate-period FLOAT` (default: 10)
- Period of the sinusoidal request rate in seconds

### Input/Output Sequence Length Parameters

The script will generate load with requests sampled from two preset ISL/OSL combinations.
The ISL/OSL ratio defines how much of requests follow the first preset ISL/OSL pattern. ISl/OSL 0 means all requests follow the first preset ISL/OSL pattern, while ISL/OSL 1 means all requests follow the second preset ISL/OSL pattern.

The ISL/OSL ratio follows a sinusoidal pattern:
```
isl-osl-ratio(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * π / period * t - π / 2)
```

Similarly, the phase shift of `-π/2` is to make the ISL/OSL ratio start from the minimum at `t = 0`.

- `--isl1 INT` (default: 100)
- Minimum input sequence length

- `--osl1 INT` (default: 2000)
- Minimum output sequence length

- `--isl2 INT` (default: 5000)
- Maximum input sequence length

- `--osl2 INT` (default: 100)
- Maximum output sequence length

- `--isl-osl-ratio-min FLOAT` (default: 0.2)
- Minimum ratio of input sequence length to output sequence length

- `--isl-osl-ratio-max FLOAT` (default: 0.8)
- Maximum ratio of input sequence length to output sequence length

- `--isl-osl-ratio-period FLOAT` (default: 10)
- Period of the sinusoidal input/output sequence length ratio

### Examples

#### Varying Request Rate with Fixed ISL/OSL Ratio

```bash
python sin_synth.py \
--time-duration 60 \
--request-rate-min 2 \
--request-rate-max 8 \
--request-rate-period 20 \
--isl1 3000 \
--osl1 150 \
--isl2 3000 \
--osl2 150 \
--output-file dataset.jsonl
```

This generates a 60-second dataset with request rates varying between 2-8 requests/second over a 20-second period, with 3000 ISL and 150 OSL. The ISL/OSL ratio is fixed at 0.2.

#### Varying ISL/OSL Ratio with Fixed Request Rate

```bash
python sin_synth.py \
--time-duration 60 \
--request-rate-min 5 \
--request-rate-max 5 \
--isl1 3000 \
--osl1 150 \
--isl2 500 \
--osl2 2000 \
--isl-osl-ratio-min 0.2 \
--isl-osl-ratio-max 0.8 \
--isl-osl-ratio-period 20 \
--output-file dataset.jsonl
```

This generates a 60-second dataset with request rate fixed at 5 requests/second, with ISL/OSL ratio varying between 0.2 and 0.8 between I3000O150 and I500O2000over a 20-second period.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def main(args):
def get_isl_osl(t):
isl_osl_ratio = (args.isl_osl_ratio_min + args.isl_osl_ratio_max) / 2 + (
args.isl_osl_ratio_max - args.isl_osl_ratio_min
) / 2 * np.sin(2 * np.pi / args.isl_osl_ratio_period * t)
) / 2 * np.sin(2 * np.pi / args.isl_osl_ratio_period * t - np.pi / 2)
logger.info(f"isl_osl_ratio at {t:.2f}: {isl_osl_ratio:.2f}")
if np.random.uniform(0, 1) < isl_osl_ratio:
return (args.isl1, args.osl1)
Expand All @@ -43,7 +43,7 @@ def get_isl_osl(t):
t_e = min(t + args.process_interval, args.time_duration)
request_rate = (args.request_rate_min + args.request_rate_max) / 2 + (
args.request_rate_max - args.request_rate_min
) / 2 * np.sin(2 * np.pi / args.request_rate_period * t)
) / 2 * np.sin(2 * np.pi / args.request_rate_period * t - np.pi / 2)
logger.info(f"request_rate at {t:.2f}: {request_rate:.2f}")
num_requests = np.random.poisson(request_rate * (t_e - t))
for req_idx in range(num_requests):
Expand Down Expand Up @@ -100,7 +100,8 @@ def get_isl_osl(t):
# request rate parameters
# for the process interval at [t, t + process_interval), the number of requests to generate is sampled
# from a poison distribution with the following parameters:
# request_rate(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * pi / period * t)
# request_rate(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * pi / period * t - pi / 2)
# the phase shift is pi / 2 to make the request rate start from the minimum at t = 0
# num_requests[t, t + process_interval) ~ Poisson(request_rate(t) * process_interval)
# requests are uniformly distributed in the interval [t, t + process_interval)
parser.add_argument(
Expand All @@ -125,7 +126,7 @@ def get_isl_osl(t):
# isl/osl parameters
# isl/osl is randomly sampled from two candidates following the isl-osl-ratio.
# at time t, the isl-osl-ratio is calculated as:
# isl-osl-ratio(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * pi / period * t)
# isl-osl-ratio(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * pi / period * t - pi / 2)
# Then, we sample [isl1/osl1, isl2/osl2] from the distribution [isl-osl-ratio(t), 1 - isl-osl-ratio(t)]
parser.add_argument(
"--isl1", type=int, default=100, help="Minimum input sequence length"
Expand Down
Loading
Loading