Skip to content

Update/23.09 #133

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM nvcr.io/nvidia/tensorrt:22.12-py3
FROM nvcr.io/nvidia/tensorrt:23.08-py3

WORKDIR /app

Expand All @@ -16,7 +16,7 @@ COPY requirements.txt .
RUN --mount=type=cache,target=/root/.cache pip install -r requirements.txt

# Install auto-gptq
RUN --mount=type=cache,target=/root/.cache BUILD_CUDA_EXT=0 pip install auto-gptq[triton]==0.2.2
RUN --mount=type=cache,target=/root/.cache pip install auto-gptq==0.4.2+cu118 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118

COPY . .

Expand Down
3 changes: 2 additions & 1 deletion docker-compose-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@ services:
- ${LISTEN_IP}:${MEDIA_PORT_RANGE}:${MEDIA_PORT_RANGE}
volumes:
- ./:/app
- ./cache:/root/.cache
command: ./entrypoint.sh

nginx:
restart: unless-stopped
depends_on:
- wis
image: nginx:1.25.0
image: nginx:1.25.2
volumes:
- ./nginx:/nginx
- ./nginx/nginx.conf:/etc/nginx/nginx.conf
Expand Down
3 changes: 2 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@ services:
capabilities: [gpu]
volumes:
- ./:/app
- ./cache:/root/.cache
command: ./entrypoint.sh

nginx:
restart: unless-stopped
depends_on:
- wis
image: nginx:1.25.0
image: nginx:1.25.2
volumes:
- ./nginx:/nginx
- ./nginx/nginx.conf:/etc/nginx/nginx.conf
Expand Down
25 changes: 5 additions & 20 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,6 @@ async def create_datagram_endpoint(self, protocol_factory, local_addr: Tuple[str
# Path to chatbot model
chatbot_model_path = settings.chatbot_model_path

# Chatbot model basename
chatbot_model_basename = settings.chatbot_model_basename

# Chatbot device
chatbot_device = settings.chatbot_device

# Chatbot temperature
chatbot_temperature = settings.chatbot_temperature

Expand Down Expand Up @@ -392,19 +386,14 @@ def load_models() -> Models:

if support_chatbot and device == "cuda":
logger.info(f'CHATBOT: Using model {chatbot_model_path} and CUDA, attempting load (this takes a while)...')
from transformers import AutoTokenizer
from auto_gptq import AutoGPTQForCausalLM
from transformers import AutoTokenizer, AutoModelForCausalLM

chatbot_tokenizer = AutoTokenizer.from_pretrained(chatbot_model_path, use_fast=True)

# load quantized model, currently only support single gpu
chatbot_model = AutoGPTQForCausalLM.from_quantized(chatbot_model_path,
model_basename=chatbot_model_basename,
use_safetensors=True,
trust_remote_code=False,
device=chatbot_device,
use_triton=True,
quantize_config=None)
chatbot_model = AutoModelForCausalLM.from_pretrained(chatbot_model_path,
torch_dtype=torch.float16,
device_map="auto")

else:
chatbot_tokenizer = None
Expand Down Expand Up @@ -437,10 +426,6 @@ def warm_models():
logger.info("Warming TTS... This takes a while on first run.")
do_tts("Hello from Willow")

# Warm chatbot once
if models.chatbot_model is not None:
logger.info("Warming chatbot... This takes a while on first run.")
do_chatbot("Tell me about AI")
else:
logger.info("Skipping warm_models for CPU")
return
Expand All @@ -454,7 +439,7 @@ def do_chatbot(text, max_new_tokens=chatbot_max_new_tokens, temperature=chatbot_
prompt = f'''USER: {text}
ASSISTANT:'''
logger.debug(f'CHATBOT: Pipeline parameters are max_new_tokens {max_new_tokens} temperature {temperature}'
f'top_p {top_p} repetition_penalty {repetition_penalty}')
f' top_p {top_p} repetition_penalty {repetition_penalty}')
chatbot_pipeline = transformers.pipeline(
"text-generation",
model=models.chatbot_model,
Expand Down
3 changes: 2 additions & 1 deletion nginx/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ http {

server {
listen 19001;
listen 19000 ssl http2;
listen 19000 ssl;
http2 on;
server_name wis;
ssl_certificate /nginx/cert.pem;
ssl_certificate_key /nginx/key.pem;
Expand Down
152 changes: 79 additions & 73 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,100 +1,104 @@
accelerate==0.21.0
aiofiles==23.1.0
accelerate==0.22.0
aiofiles==23.2.1
aiohttp==3.8.5
aioice==0.9.0
aiortc==1.5.0
aiosignal==1.3.1
altair==5.0.1
anyio==3.7.0
altair==5.1.1
anyio==3.7.1
appdirs==1.4.4
async-timeout==4.0.2
async-timeout==4.0.3
attrs==23.1.0
audioread==3.0.0
av==10.0.0
certifi==2023.7.22
certifi==2022.12.7
cffi==1.15.1
charset-normalizer==2.1.1
click==8.1.3
click==8.1.7
cmake==3.25.0
coloredlogs==15.0.1
colorlog==6.7.0
contourpy==1.1.0
cryptography==41.0.2
ctranslate2==3.19.0
cryptography==41.0.3
ctranslate2==3.20.0
cycler==0.11.0
datasets==2.13.0
datasets==2.14.5
decorator==5.1.1
dill==0.3.6
dnspython==2.3.0
dill==0.3.7
dnspython==2.4.2
docker-pycreds==0.4.0
docopt==0.6.2
email-validator==2.0.0.post2
entrypoints==0.4
exceptiongroup==1.1.1
fastapi==0.97.0
ffmpy==0.3.0
exceptiongroup==1.1.3
fastapi==0.103.1
ffmpy==0.3.1
filelock==3.9.0
fonttools==4.40.0
frozenlist==1.3.3
fonttools==4.42.1
frozenlist==1.4.0
fsspec==2023.6.0
gitdb==4.0.10
GitPython==3.1.32
GitPython==3.1.35
google-crc32c==1.5.0
gunicorn==20.1.0
gunicorn==21.2.0
h11==0.14.0
httpcore==0.17.2
httptools==0.5.0
httpcore==0.17.3
httptools==0.6.0
httpx==0.24.1
huggingface-hub==0.15.1
huggingface-hub==0.16.4
humanfriendly==10.0
HyperPyYAML==1.2.1
idna==3.4
ifaddr==0.2.0
importlib-metadata==6.6.0
importlib-resources==5.12.0
importlib-metadata==6.8.0
importlib-resources==6.0.1
itsdangerous==2.1.2
Jinja2==3.1.2
joblib==1.2.0
jsonschema==4.17.3
kiwisolver==1.4.4
lazy_loader==0.2
librosa==0.10.0.post2
joblib==1.3.2
jsonschema==4.19.0
jsonschema-specifications==2023.7.1
kiwisolver==1.4.5
lazy_loader==0.3
librosa==0.10.1
linkify-it-py==2.0.2
lit==15.0.7
llvmlite==0.40.0
llvmlite==0.40.1
Mako==1.2.4
markdown-it-py==2.2.0
markdown2==2.4.8
MarkupSafe==2.1.1
matplotlib==3.7.1
mdit-py-plugins==0.3.3
markdown-it-py==3.0.0
markdown2==2.4.10
MarkupSafe==2.1.3
matplotlib==3.7.2
mdit-py-plugins==0.4.0
mdurl==0.1.2
mpmath==1.3.0
mpmath==1.2.1
msgpack==1.0.5
multidict==6.0.4
multiprocess==0.70.14
multiprocess==0.70.15
networkx==3.0
nh3==0.2.13
nh3==0.2.14
num2words==0.5.12
numba==0.57.0
numba==0.57.1
numpy==1.23.5
orjson==3.9.1
optimum==1.13.1
orjson==3.9.7
packaging==23.1
pandas==2.0.2
pandas==2.1.0
pathtools==0.1.2
peft==0.3.0
Pillow==9.3.0
peft==0.5.0
Pillow==10.0.0
pkgutil_resolve_name==1.3.10
platformdirs==2.6.0
pooch==1.6.0
prompt-toolkit==3.0.38
protobuf==4.21.12
platformdirs==3.10.0
pooch==1.7.0
prompt-toolkit==3.0.39
protobuf==4.23.4
psutil==5.9.5
pyarrow==12.0.1
pyarrow==13.0.0
pycparser==2.21
pycuda==2022.2.2
pydantic==1.10.9
pydantic==1.10.12
pydub==0.25.1
pyee==10.0.1
Pygments==2.15.1
pyee==11.0.0
Pygments==2.16.1
pylibsrtp==0.8.0
pyOpenSSL==23.2.0
pyparsing==3.0.9
Expand All @@ -104,54 +108,56 @@ pyston-lite-autoload==2.3.5
python-dateutil==2.8.2
python-dotenv==1.0.0
python-multipart==0.0.6
pytools==2022.1.13
pytz==2023.3
PyYAML==6.0
regex==2023.6.3
pytools==2023.1.1
pytz==2023.3.post1
PyYAML==6.0.1
referencing==0.30.2
regex==2023.8.8
requests==2.31.0
responses==0.23.1
responses==0.23.3
rfc3986==2.0.0
rich==13.4.2
rich==13.5.2
rouge==1.0.1
rpds-py==0.10.2
ruamel.yaml==0.17.28
ruamel.yaml.clib==0.2.7
safetensors==0.3.1
scikit-learn==1.2.2
scipy==1.10.1
safetensors==0.3.3
scikit-learn==1.3.0
scipy==1.11.2
semantic-version==2.10.0
sentencepiece==0.1.99
sentry-sdk==1.25.1
sentry-sdk==1.30.0
setproctitle==1.3.2
shortuuid==1.0.11
six==1.16.0
smmap==5.0.0
sniffio==1.3.0
soundfile==0.12.1
soxr==0.3.5
speechbrain==0.5.14
soxr==0.3.6
speechbrain==0.5.15
starlette==0.27.0
svgwrite==1.4.3
sympy==1.11.1
threadpoolctl==3.1.0
threadpoolctl==3.2.0
tiktoken==0.4.0
tokenizers==0.13.3
toolz==0.12.0
tqdm==4.65.0
transformers==4.31.0
tqdm==4.66.1
transformers==4.33.1
triton==2.0.0
types-PyYAML==6.0.12.10
typing_extensions==4.4.0
types-PyYAML==6.0.12.11
typing_extensions==4.7.1
tzdata==2023.3
uc-micro-py==1.0.2
ujson==5.8.0
urllib3==1.26.13
uvicorn==0.22.0
uvicorn==0.23.2
uvloop==0.17.0
wandb==0.15.4
watchfiles==0.19.0
wandb==0.15.10
watchfiles==0.20.0
wavedrom==2.0.3.post3
wcwidth==0.2.6
websockets==11.0.3
xxhash==3.2.0
xxhash==3.3.0
yarl==1.9.2
zipp==3.15.0
zipp==3.16.2
8 changes: 1 addition & 7 deletions settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,7 @@ class APISettings(BaseSettings):
support_chatbot: bool = False

# Path to chatbot model - download from HuggingFace at runtime by default (gets cached)
chatbot_model_path: str = 'TheBloke/vicuna-13b-v1.3-GPTQ'

# Chatbot model basename
chatbot_model_basename: str = 'vicuna-13b-v1.3-GPTQ-4bit-128g.no-act.order'

# Chatbot device
chatbot_device: str = 'cuda:0'
chatbot_model_path: str = 'TheBloke/vicuna-13b-v1.3.0-GPTQ'

# Chatbot pipeline default temperature
chatbot_temperature: float = 0.7
Expand Down
2 changes: 1 addition & 1 deletion utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ dep_check() {
fi

# Make sure we have it just in case
mkdir -p speakers/custom_tts speakers/voice_auth nginx/cache
mkdir -p speakers/custom_tts speakers/voice_auth nginx/cache cache

# Check for new certs
if [ ! -r nginx/cert.pem ] || [ ! -r nginx/key.pem ]; then
Expand Down