Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions examples/image-to-text/run_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,19 @@ def main():
help="The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"generated when running `huggingface-cli login` (stored in `~/.huggingface`).",
)
parser.add_argument(
"--bucket_size",
Comment thread
ANSHUMAN87 marked this conversation as resolved.
default=-1,
type=int,
help="Bucket size to maintain static shapes. If a positive number is passed \
we increase the bucket in steps of `bucket_size` instead of allocating to max (`prompt_length + max_new_tokens`). \
It can never be negative value.",
)
parser.add_argument(
"--bucket_internal",
action="store_true",
help="Split kv sequence into buckets in decode phase. It improves throughput when max_new_tokens is large.",
)
parser.add_argument("--batch_size", type=int, default=1, help="Input batch size.")
parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
Expand Down Expand Up @@ -335,6 +348,8 @@ def main():
"ignore_eos": args.ignore_eos,
"use_flash_attention": args.use_flash_attention,
"flash_attention_recompute": args.flash_attention_recompute,
"bucket_internal": args.bucket_internal,
"bucket_size": args.bucket_size,
"limit_hpu_graphs": args.limit_hpu_graphs,
"do_sample": args.do_sample,
}
Expand Down