Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update TensorRT-LLM #1688

Merged
merged 1 commit into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ __pycache__/
build*/
*.egg-info/
.coverage
*.csv
*.onnx
tmp/
venv/
Expand Down
210 changes: 0 additions & 210 deletions CHANGELOG.md

This file was deleted.

4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ TensorRT-LLM

[![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://nvidia.github.io/TensorRT-LLM/)
[![python](https://img.shields.io/badge/python-3.10.12-green)](https://www.python.org/downloads/release/python-31012/)
[![cuda](https://img.shields.io/badge/cuda-12.4.0-green)](https://developer.nvidia.com/cuda-downloads)
[![cuda](https://img.shields.io/badge/cuda-12.4.1-green)](https://developer.nvidia.com/cuda-downloads)
[![trt](https://img.shields.io/badge/TRT-10.0.1-green)](https://developer.nvidia.com/tensorrt)
[![version](https://img.shields.io/badge/release-0.10.0.dev-green)](./setup.py)
[![version](https://img.shields.io/badge/release-0.11.0.dev-green)](./tensorrt_llm/version.py)
[![license](https://img.shields.io/badge/license-Apache%202-blue)](./LICENSE)

[Architecture](./docs/source/architecture/overview.md)   |   [Results](./docs/source/performance/perf-overview.md)   |   [Examples](./examples/)   |   [Documentation](./docs/source/)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/cpp/gptManagerBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,7 @@ class GptServer
[this](uint64_t requestId, std::list<NamedTensor> const& response_tensors, bool final_response,
std::string const& errMsg)
{ return sendResponse(requestId, response_tensors, final_response, errMsg); },
nullptr, iterationDataCallback, optionalParams, terminateReqId, std::nullopt, excludeInputInOutput);
nullptr, iterationDataCallback, optionalParams, terminateReqId, excludeInputInOutput);
}

~GptServer()
Expand Down
12 changes: 11 additions & 1 deletion benchmarks/cpp/prepare_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import click
from pydantic import BaseModel, field_validator
from transformers import AutoTokenizer
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
from utils.prepare_real_data import dataset
from utils.prepare_synthetic_data import token_norm_dist

Expand All @@ -27,10 +29,12 @@ class RootArgs(BaseModel):
output: str
random_seed: int
task_id: int
std_out: bool
rand_task_id: Optional[Tuple[int, int]]

@field_validator('tokenizer')
def get_tokenizer(cls, v: str):
def get_tokenizer(cls,
v: str) -> PreTrainedTokenizer | PreTrainedTokenizerFast:
try:
tokenizer = AutoTokenizer.from_pretrained(v, padding_side='left')
except EnvironmentError as e:
Expand All @@ -53,6 +57,11 @@ def get_tokenizer(cls, v: str):
type=str,
help="Output json filename.",
default="preprocessed_dataset.json")
@click.option(
"--stdout",
is_flag=True,
help="Print output to stdout with a JSON dataset entry on each line.",
default=False)
@click.option("--random-seed",
required=False,
type=int,
Expand Down Expand Up @@ -80,6 +89,7 @@ def cli(ctx, **kwargs):

ctx.obj = RootArgs(tokenizer=kwargs['tokenizer'],
output=kwargs['output'],
std_out=kwargs['stdout'],
random_seed=kwargs['random_seed'],
task_id=kwargs['task_id'],
rand_task_id=kwargs['rand_task_id'])
Expand Down
26 changes: 17 additions & 9 deletions benchmarks/cpp/utils/prepare_real_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import click
from datasets import load_dataset
from pydantic import BaseModel, model_validator
from utils.utils import dataset_dump, get_norm_dist_tokens
from utils.utils import dataset_dump, get_norm_dist_tokens, print_dataset


def validate_output_len_dist(ctx, param, value):
Expand Down Expand Up @@ -220,11 +220,19 @@ def dataset(root_args, **kwargs):
logging.debug(f"Input lengths: {[len(i) for i in input_ids]}")
logging.debug(f"Output lengths: {output_lens}")

dataset_dump(
input_lens, input_ids, output_lens, task_ids, {
"workload_type": "dataset",
"tokenizer": root_args.tokenizer.__class__.__name__,
"num_requests": len(input_ids),
"max_input_len": max(input_lens),
"max_output_len": max(output_lens)
}, root_args.output)
if not root_args.std_out:
dataset_dump(
input_lens, input_ids, output_lens, task_ids, {
"workload_type": "dataset",
"tokenizer": root_args.tokenizer.__class__.__name__,
"num_requests": len(input_ids),
"max_input_len": max(input_lens),
"max_output_len": max(output_lens)
}, root_args.output)
else:
print_dataset(
task_ids,
input_ids,
output_lens,
tokenizer=None,
)
33 changes: 20 additions & 13 deletions benchmarks/cpp/utils/prepare_synthetic_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import random

import click
from utils.utils import dataset_dump, gen_random_tokens, get_norm_dist_tokens
from utils.utils import (dataset_dump, gen_random_tokens, get_norm_dist_tokens,
print_dataset)


@click.command()
Expand Down Expand Up @@ -55,15 +56,21 @@ def token_norm_dist(root_args, **kwargs):
min_id, max_id = root_args.rand_task_id
task_ids = [random.randint(min_id, max_id) for _ in range(num_reqs)]

dataset_dump(
input_lens, input_ids, output_lens, task_ids, {
"workload_type": "token-norm-dist",
"input_mean": kwargs['input_mean'],
"input_stdev": kwargs['input_stdev'],
"output_mean": kwargs['output_mean'],
"output_stdev": kwargs['output_stdev'],
"num_requests": kwargs['num_requests'],
"tokenize_vocabsize": root_args.tokenizer.vocab_size,
"max_input_len": max_input_len,
"max_output_len": max_output_len
}, root_args.output)
if not root_args.std_out:
dataset_dump(
input_lens, input_ids, output_lens, task_ids, {
"workload_type": "token-norm-dist",
"input_mean": kwargs['input_mean'],
"input_stdev": kwargs['input_stdev'],
"output_mean": kwargs['output_mean'],
"output_stdev": kwargs['output_stdev'],
"num_requests": kwargs['num_requests'],
"tokenize_vocabsize": root_args.tokenizer.vocab_size,
"max_input_len": max_input_len,
"max_output_len": max_output_len
}, root_args.output)
else:
print_dataset(
input_ids,
output_lens,
)
12 changes: 11 additions & 1 deletion benchmarks/cpp/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,17 @@ def dataset_dump(input_lens, input_ids, output_lens, task_ids, metadata,
task_id=task_ids[i]))
workload = Workload(metadata=metadata, samples=samples)
with open(output_file, 'w') as f:
json.dump(workload.dict(), f)
json.dump(workload.model_dump(), f)


def print_dataset(input_ids, output_lens):
for i, input_tokens in enumerate(input_ids):
d = {
"task_id": i,
"logits": input_tokens,
"output_tokens": output_lens[i]
}
print(json.dumps(d, separators=(',', ':'), ensure_ascii=False))


def get_list_of_delays(delay_dist, mean_time_bet_reqs, num_reqs, random_seed):
Expand Down
Loading