Skip to content

Commit e37025d

Browse files
committed
release 0.0.22
1 parent 83779d5 commit e37025d

File tree

2 files changed

+175
-4
lines changed

2 files changed

+175
-4
lines changed
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
# Fetch and extract the TGI sources (TGI_VERSION is mandatory)
2+
FROM alpine AS tgi
3+
RUN mkdir -p /tgi
4+
ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v2.0.2.tar.gz /tgi/sources.tar.gz
5+
RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
6+
7+
# Build cargo components (adapted from TGI original Dockerfile)
8+
# Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
9+
FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
10+
WORKDIR /usr/src
11+
12+
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
13+
14+
FROM chef as planner
15+
COPY --from=tgi /tgi/Cargo.toml Cargo.toml
16+
COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
17+
COPY --from=tgi /tgi/proto proto
18+
COPY --from=tgi /tgi/benchmark benchmark
19+
COPY --from=tgi /tgi/router router
20+
COPY --from=tgi /tgi/launcher launcher
21+
RUN cargo chef prepare --recipe-path recipe.json
22+
23+
FROM chef AS builder
24+
25+
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
26+
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
27+
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
28+
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
29+
rm -f $PROTOC_ZIP
30+
31+
COPY --from=planner /usr/src/recipe.json recipe.json
32+
RUN cargo chef cook --release --recipe-path recipe.json
33+
34+
COPY --from=tgi /tgi/Cargo.toml Cargo.toml
35+
COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
36+
COPY --from=tgi /tgi/proto proto
37+
COPY --from=tgi /tgi/benchmark benchmark
38+
COPY --from=tgi /tgi/router router
39+
COPY --from=tgi /tgi/launcher launcher
40+
RUN cargo build --release --workspace --exclude benchmark
41+
42+
# Fetch optimum-neuron sources
43+
FROM alpine/git AS optimum-neuron
44+
RUN git clone --depth 1 --branch v0.0.22 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
45+
46+
# Python base image
47+
FROM ubuntu:22.04 AS base
48+
49+
RUN apt-get update -y \
50+
&& apt-get install -y --no-install-recommends \
51+
python3-pip \
52+
python3-setuptools \
53+
python-is-python3 \
54+
&& rm -rf /var/lib/apt/lists/* \
55+
&& apt-get clean
56+
RUN pip3 --no-cache-dir install --upgrade pip
57+
58+
# Python server build image
59+
FROM base AS pyserver
60+
61+
RUN apt-get update -y \
62+
&& apt-get install -y --no-install-recommends \
63+
make \
64+
python3-venv \
65+
&& rm -rf /var/lib/apt/lists/* \
66+
&& apt-get clean
67+
68+
RUN install -d /pyserver
69+
WORKDIR /pyserver
70+
COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
71+
COPY --from=tgi /tgi/proto proto
72+
RUN pip3 install -r server/build-requirements.txt
73+
RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
74+
75+
# Neuron base image (used for deployment)
76+
FROM base AS neuron
77+
78+
# Install system prerequisites
79+
RUN apt-get update -y \
80+
&& apt-get install -y --no-install-recommends \
81+
gnupg2 \
82+
wget \
83+
python3-dev \
84+
&& rm -rf /var/lib/apt/lists/* \
85+
&& apt-get clean
86+
87+
RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
88+
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
89+
90+
# Install neuronx packages
91+
RUN apt-get update -y \
92+
&& apt-get install -y --no-install-recommends \
93+
aws-neuronx-dkms=2.16.7.0 \
94+
aws-neuronx-collectives=2.20.22.0-c101c322e \
95+
aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 \
96+
aws-neuronx-tools=2.17.1.0 \
97+
&& rm -rf /var/lib/apt/lists/* \
98+
&& apt-get clean
99+
100+
ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
101+
102+
RUN pip3 install \
103+
neuronx-cc==2.13.66.0 \
104+
torch-neuronx==2.1.2.2.1.0 \
105+
transformers-neuronx==0.10.0.21 \
106+
--extra-index-url=https://pip.repos.neuron.amazonaws.com
107+
108+
# Install HuggingFace packages
109+
RUN pip3 install \
110+
hf_transfer huggingface_hub
111+
112+
# Install optimum-neuron
113+
COPY --from=optimum-neuron /optimum-neuron optimum-neuron
114+
RUN pip3 install ./optimum-neuron
115+
116+
# TGI base env
117+
ENV HUGGINGFACE_HUB_CACHE=/tmp \
118+
HF_HUB_ENABLE_HF_TRANSFER=1 \
119+
PORT=80
120+
121+
# Disable color logs as they are not supported by CloudWatch
122+
ENV LOGURU_COLORIZE=NO
123+
124+
# Install router
125+
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
126+
# Install launcher
127+
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
128+
# Install python server
129+
COPY --from=pyserver /pyserver/build/dist dist
130+
RUN pip install dist/text_generation_server*.tar.gz
131+
132+
# AWS Sagemaker compatible image
133+
FROM neuron as sagemaker
134+
135+
COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
136+
RUN chmod +x entrypoint.sh
137+
138+
ENTRYPOINT ["./entrypoint.sh"]
139+
140+
141+
RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
142+
&& rm -rf /var/lib/apt/lists/*
143+
RUN HOME_DIR=/root && \
144+
pip install requests && \
145+
curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
146+
unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
147+
cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
148+
chmod +x /usr/local/bin/testOSSCompliance && \
149+
chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
150+
${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
151+
rm -rf ${HOME_DIR}/oss_compliance*
152+
153+
RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
154+
has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
155+
<docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
156+
\n\n\
157+
N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
158+
third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
159+
includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
160+
license." > /root/THIRD_PARTY_LICENSES
161+
162+
LABEL dlc_major_version="1"
163+
LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
164+
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"

releases.json

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,24 @@
3434
"os_version": "ubuntu22.04",
3535
"python_version": "py310",
3636
"pytorch_version": "1.13.1"
37+
},
38+
{
39+
"device": "inf2",
40+
"min_version": "0.0.22",
41+
"max_version": "0.0.22",
42+
"os_version": "ubuntu22.04",
43+
"python_version": "py310",
44+
"pytorch_version": "2.1.2"
3745
}
3846
],
3947
"ignore_vulnerabilities": [],
4048
"releases": [
4149
{
42-
"device": "gpu",
43-
"version": "2.0.1",
50+
"device": "inf2",
51+
"version": "0.0.22",
4452
"os_version": "ubuntu22.04",
4553
"python_version": "py310",
46-
"cuda_version": "cu121",
47-
"pytorch_version": "2.1.1"
54+
"pytorch_version": "2.1.2"
4855
}
4956
]
5057
}

0 commit comments

Comments
 (0)