Skip to content
This repository was archived by the owner on Mar 13, 2025. It is now read-only.

Commit f063f15

Browse files
committed
Increase input length, reduce batch size (#107)
Signed-off-by: Antoni Baum <[email protected]>
1 parent 8e4e965 commit f063f15

8 files changed

+19
-8
lines changed

aviary/backend/llm/predictor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def init_model(
7676
# will raise CUDA errors if use_kernel=True.
7777
batch_size = max_batch_size or 1
7878
prompt = [WARMUP_PROMPT] * (
79-
int(llm_config.max_input_words / (len(WARMUP_PROMPT) + 1)) + 1
79+
int(llm_config.max_input_words / (len(WARMUP_PROMPT.split()) + 1)) + 1
8080
)
8181
prompt = " ".join(prompt)
8282
logger.info(

models/amazon--LightGPT.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ deployment_config:
1414
accelerator_type_cpu: 0.01
1515
model_config:
1616
model_id: amazon/LightGPT
17+
max_input_words: 800
1718
initialization:
1819
runtime_env:
1920
pip:
@@ -26,9 +27,10 @@ model_config:
2627
from_pretrained_kwargs:
2728
use_cache: true
2829
use_kernel: true
30+
max_tokens: 1536
2931
pipeline: default
3032
generation:
31-
max_batch_size: 26
33+
max_batch_size: 18
3234
generate_kwargs:
3335
do_sample: true
3436
max_new_tokens: 512

models/databricks--dolly-v2-12b.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ deployment_config:
1414
accelerator_type_cpu: 0.01
1515
model_config:
1616
model_id: databricks/dolly-v2-12b
17+
max_input_words: 800
1718
model_description: "Databricks’ dolly-v2-12b, an instruction-following large language model trained on the Databricks machine learning platform that is licensed for commercial use. Based on pythia-12b, Dolly is trained on ~15k instruction/response fine tuning records databricks-dolly-15k generated by Databricks employees in capability domains from the InstructGPT paper, including brainstorming, classification, closed QA, generation, information extraction, open QA and summarization. dolly-v2-12b is not a state-of-the-art model, but does exhibit surprisingly high quality instruction following behavior not characteristic of the foundation model on which it is based.\n\nDolly v2 is also available in these smaller models sizes:\n\ndolly-v2-7b, a 6.9 billion parameter based on pythia-6.9b\ndolly-v2-3b, a 2.8 billion parameter based on pythia-2.8b\nPlease refer to the dolly GitHub repo for tips on running inference for various GPU configurations."
1819
initialization:
1920
s3_mirror_config:
@@ -24,9 +25,10 @@ model_config:
2425
from_pretrained_kwargs:
2526
use_cache: true
2627
use_kernel: true
28+
max_tokens: 1536
2729
pipeline: default
2830
generation:
29-
max_batch_size: 6
31+
max_batch_size: 4
3032
generate_kwargs:
3133
do_sample: true
3234
max_new_tokens: 512

models/h2oai--h2ogpt-oasst1-512-12b.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ deployment_config:
1414
accelerator_type_cpu: 0.01
1515
model_config:
1616
model_id: h2oai/h2ogpt-oasst1-512-12b
17+
max_input_words: 800
1718
initialization:
1819
s3_mirror_config:
1920
bucket_uri: s3://large-dl-models-mirror/models--h2oai--h2ogpt-oasst1-512-12b/main-safetensors/
@@ -24,9 +25,10 @@ model_config:
2425
trust_remote_code: true
2526
use_cache: true
2627
use_kernel: true
28+
max_tokens: 1536
2729
pipeline: default
2830
generation:
29-
max_batch_size: 6
31+
max_batch_size: 4
3032
generate_kwargs:
3133
do_sample: true
3234
num_beams: 1

models/mosaicml--mpt-7b-chat.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ deployment_config:
1414
accelerator_type_cpu: 0.01
1515
model_config:
1616
model_id: mosaicml/mpt-7b-chat
17+
max_input_words: 800
1718
initialization:
1819
s3_mirror_config:
1920
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-chat/main-safetensors/
@@ -29,7 +30,7 @@ model_config:
2930
mode: max-autotune
3031
pipeline: default
3132
generation:
32-
max_batch_size: 22
33+
max_batch_size: 8
3334
generate_kwargs:
3435
do_sample: true
3536
max_new_tokens: 512

models/mosaicml--mpt-7b-instruct.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ deployment_config:
1414
accelerator_type_cpu: 0.01
1515
model_config:
1616
model_id: mosaicml/mpt-7b-instruct
17+
max_input_words: 800
1718
initialization:
1819
s3_mirror_config:
1920
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-instruct/main-safetensors/
@@ -29,7 +30,7 @@ model_config:
2930
mode: max-autotune
3031
pipeline: default
3132
generation:
32-
max_batch_size: 22
33+
max_batch_size: 8
3334
generate_kwargs:
3435
do_sample: true
3536
max_new_tokens: 512

models/mosaicml--mpt-7b-storywriter.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ deployment_config:
1414
accelerator_type_cpu: 0.01
1515
model_config:
1616
model_id: mosaicml/mpt-7b-storywriter
17+
max_input_words: 800
1718
initialization:
1819
s3_mirror_config:
1920
bucket_uri: s3://large-dl-models-mirror/models--mosaicml--mpt-7b-storywriter/main-safetensors/
@@ -29,7 +30,7 @@ model_config:
2930
mode: max-autotune
3031
pipeline: default
3132
generation:
32-
max_batch_size: 12
33+
max_batch_size: 8
3334
generate_kwargs:
3435
do_sample: true
3536
max_new_tokens: 512

models/stabilityai--stablelm-tuned-alpha-7b.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ deployment_config:
1414
accelerator_type_cpu: 0.01
1515
model_config:
1616
model_id: stabilityai/stablelm-tuned-alpha-7b
17+
max_input_words: 800
1718
initialization:
1819
s3_mirror_config:
1920
bucket_uri: s3://large-dl-models-mirror/models--stabilityai--stablelm-tuned-alpha-7b/main-safetensors/
@@ -23,9 +24,10 @@ model_config:
2324
from_pretrained_kwargs:
2425
use_cache: true
2526
use_kernel: true
27+
max_tokens: 1536
2628
pipeline: default
2729
generation:
28-
max_batch_size: 14
30+
max_batch_size: 8
2931
generate_kwargs:
3032
do_sample: true
3133
max_new_tokens: 512

0 commit comments

Comments
 (0)