Skip to content

Commit dcefe12

Browse files
authored
Release Neuronx TGI 0.0.22 (#74)
* release 0.0.22 * increase test timeout * update test to use Llama2-7b again * test 0.0.21 * placeholder change * update * update * trigger tests
1 parent 991cf1d commit dcefe12

File tree

3 files changed

+177
-6
lines changed

3 files changed

+177
-6
lines changed
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
# Fetch and extract the TGI sources (TGI_VERSION is mandatory)
2+
FROM alpine AS tgi
3+
RUN mkdir -p /tgi
4+
ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v2.0.2.tar.gz /tgi/sources.tar.gz
5+
RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
6+
7+
# Build cargo components (adapted from TGI original Dockerfile)
8+
# Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
9+
FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
10+
WORKDIR /usr/src
11+
12+
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
13+
14+
FROM chef as planner
15+
COPY --from=tgi /tgi/Cargo.toml Cargo.toml
16+
COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
17+
COPY --from=tgi /tgi/proto proto
18+
COPY --from=tgi /tgi/benchmark benchmark
19+
COPY --from=tgi /tgi/router router
20+
COPY --from=tgi /tgi/launcher launcher
21+
RUN cargo chef prepare --recipe-path recipe.json
22+
23+
FROM chef AS builder
24+
25+
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
26+
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
27+
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
28+
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
29+
rm -f $PROTOC_ZIP
30+
31+
COPY --from=planner /usr/src/recipe.json recipe.json
32+
RUN cargo chef cook --release --recipe-path recipe.json
33+
34+
COPY --from=tgi /tgi/Cargo.toml Cargo.toml
35+
COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
36+
COPY --from=tgi /tgi/proto proto
37+
COPY --from=tgi /tgi/benchmark benchmark
38+
COPY --from=tgi /tgi/router router
39+
COPY --from=tgi /tgi/launcher launcher
40+
RUN cargo build --release --workspace --exclude benchmark
41+
42+
# Fetch optimum-neuron sources
43+
FROM alpine/git AS optimum-neuron
44+
RUN git clone --depth 1 --branch v0.0.22 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
45+
46+
# Python base image
47+
FROM ubuntu:22.04 AS base
48+
49+
RUN apt-get update -y \
50+
&& apt-get install -y --no-install-recommends \
51+
python3-pip \
52+
python3-setuptools \
53+
python-is-python3 \
54+
&& rm -rf /var/lib/apt/lists/* \
55+
&& apt-get clean
56+
RUN pip3 --no-cache-dir install --upgrade pip
57+
58+
# Python server build image
59+
FROM base AS pyserver
60+
61+
RUN apt-get update -y \
62+
&& apt-get install -y --no-install-recommends \
63+
make \
64+
python3-venv \
65+
&& rm -rf /var/lib/apt/lists/* \
66+
&& apt-get clean
67+
68+
RUN install -d /pyserver
69+
WORKDIR /pyserver
70+
COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
71+
COPY --from=tgi /tgi/proto proto
72+
RUN pip3 install -r server/build-requirements.txt
73+
RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
74+
75+
# Neuron base image (used for deployment)
76+
FROM base AS neuron
77+
78+
# Install system prerequisites
79+
RUN apt-get update -y \
80+
&& apt-get install -y --no-install-recommends \
81+
gnupg2 \
82+
wget \
83+
python3-dev \
84+
&& rm -rf /var/lib/apt/lists/* \
85+
&& apt-get clean
86+
87+
RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
88+
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
89+
90+
# Install neuronx packages
91+
RUN apt-get update -y \
92+
&& apt-get install -y --no-install-recommends \
93+
aws-neuronx-dkms=2.16.7.0 \
94+
aws-neuronx-collectives=2.20.22.0-c101c322e \
95+
aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 \
96+
aws-neuronx-tools=2.17.1.0 \
97+
&& rm -rf /var/lib/apt/lists/* \
98+
&& apt-get clean
99+
100+
ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
101+
102+
RUN pip3 install \
103+
neuronx-cc==2.13.66.0 \
104+
torch-neuronx==2.1.2.2.1.0 \
105+
transformers-neuronx==0.10.0.21 \
106+
--extra-index-url=https://pip.repos.neuron.amazonaws.com
107+
108+
# Install HuggingFace packages
109+
RUN pip3 install \
110+
hf_transfer huggingface_hub
111+
112+
# Install optimum-neuron
113+
COPY --from=optimum-neuron /optimum-neuron optimum-neuron
114+
RUN pip3 install ./optimum-neuron
115+
116+
# TGI base env
117+
ENV HUGGINGFACE_HUB_CACHE=/tmp \
118+
HF_HUB_ENABLE_HF_TRANSFER=1 \
119+
PORT=80
120+
121+
# Disable color logs as they are not supported by CloudWatch
122+
ENV LOGURU_COLORIZE=NO
123+
124+
# Install router
125+
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
126+
# Install launcher
127+
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
128+
# Install python server
129+
COPY --from=pyserver /pyserver/build/dist dist
130+
RUN pip install dist/text_generation_server*.tar.gz
131+
132+
# AWS Sagemaker compatible image
133+
FROM neuron as sagemaker
134+
135+
COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
136+
RUN chmod +x entrypoint.sh
137+
138+
ENTRYPOINT ["./entrypoint.sh"]
139+
140+
141+
RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
142+
&& rm -rf /var/lib/apt/lists/*
143+
RUN HOME_DIR=/root && \
144+
pip install requests && \
145+
curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
146+
unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
147+
cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
148+
chmod +x /usr/local/bin/testOSSCompliance && \
149+
chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
150+
${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
151+
rm -rf ${HOME_DIR}/oss_compliance*
152+
153+
RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
154+
has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
155+
<docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
156+
\n\n\
157+
N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
158+
third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
159+
includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
160+
license." > /root/THIRD_PARTY_LICENSES
161+
162+
LABEL dlc_major_version="1"
163+
LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
164+
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"

releases.json

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,24 @@
4343
"os_version": "ubuntu22.04",
4444
"python_version": "py310",
4545
"pytorch_version": "1.13.1"
46+
},
47+
{
48+
"device": "inf2",
49+
"min_version": "0.0.22",
50+
"max_version": "0.0.22",
51+
"os_version": "ubuntu22.04",
52+
"python_version": "py310",
53+
"pytorch_version": "2.1.2"
4654
}
4755
],
4856
"ignore_vulnerabilities": [],
4957
"releases": [
5058
{
51-
"device": "gpu",
52-
"version": "2.0.2",
59+
"device": "inf2",
60+
"version": "0.0.22",
5361
"os_version": "ubuntu22.04",
5462
"python_version": "py310",
55-
"cuda_version": "cu121",
56-
"pytorch_version": "2.3.0"
63+
"pytorch_version": "2.1.2"
5764
}
5865
]
5966
}

tests/huggingface/sagemaker_dlc_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def run_test(args):
7979
pytest.param("google/flan-t5-xxl", None, "ml.g5.12xlarge", marks=pytest.mark.gpu),
8080
pytest.param("HuggingFaceTB/cosmo-1b", None, "ml.inf2.8xlarge", marks=pytest.mark.inf2),
8181
])
82-
def test(model_id: str, model_revision: str, instance_type: str, timeout: str = "1800"):
82+
def test(model_id: str, model_revision: str, instance_type: str, timeout: str = "2400"):
8383
image_uri = os.getenv("IMAGE_URI")
8484
test_role_arn = os.getenv("TEST_ROLE_ARN")
8585
assert image_uri, f"Please set IMAGE_URI environment variable."
@@ -92,7 +92,7 @@ def test(model_id: str, model_revision: str, instance_type: str, timeout: str =
9292
role=test_role_arn,
9393
timeout=timeout)
9494

95-
logging.info(f"Running sanity test with the following args: {args}.")
95+
logging.info(f"Running sanity test with the following arguments: {args}.")
9696
run_test(args)
9797

9898

0 commit comments

Comments
 (0)