1+ # Fetch and extract the TGI sources (TGI_VERSION is mandatory)
2+ FROM alpine AS tgi
3+ RUN mkdir -p /tgi
4+ ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v2.0.2.tar.gz /tgi/sources.tar.gz
5+ RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
6+
7+ # Build cargo components (adapted from TGI original Dockerfile)
8+ # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
9+ FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
10+ WORKDIR /usr/src
11+
12+ ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
13+
14+ FROM chef as planner
15+ COPY --from=tgi /tgi/Cargo.toml Cargo.toml
16+ COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
17+ COPY --from=tgi /tgi/proto proto
18+ COPY --from=tgi /tgi/benchmark benchmark
19+ COPY --from=tgi /tgi/router router
20+ COPY --from=tgi /tgi/launcher launcher
21+ RUN cargo chef prepare --recipe-path recipe.json
22+
23+ FROM chef AS builder
24+
25+ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
26+ curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
27+ unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
28+ unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
29+ rm -f $PROTOC_ZIP
30+
31+ COPY --from=planner /usr/src/recipe.json recipe.json
32+ RUN cargo chef cook --release --recipe-path recipe.json
33+
34+ COPY --from=tgi /tgi/Cargo.toml Cargo.toml
35+ COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
36+ COPY --from=tgi /tgi/proto proto
37+ COPY --from=tgi /tgi/benchmark benchmark
38+ COPY --from=tgi /tgi/router router
39+ COPY --from=tgi /tgi/launcher launcher
40+ RUN cargo build --release --workspace --exclude benchmark
41+
42+ # Fetch optimum-neuron sources
43+ FROM alpine/git AS optimum-neuron
44+ RUN git clone --depth 1 --branch v0.0.22 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
45+
46+ # Python base image
47+ FROM ubuntu:22.04 AS base
48+
49+ RUN apt-get update -y \
50+ && apt-get install -y --no-install-recommends \
51+ python3-pip \
52+ python3-setuptools \
53+ python-is-python3 \
54+ && rm -rf /var/lib/apt/lists/* \
55+ && apt-get clean
56+ RUN pip3 --no-cache-dir install --upgrade pip
57+
58+ # Python server build image
59+ FROM base AS pyserver
60+
61+ RUN apt-get update -y \
62+ && apt-get install -y --no-install-recommends \
63+ make \
64+ python3-venv \
65+ && rm -rf /var/lib/apt/lists/* \
66+ && apt-get clean
67+
68+ RUN install -d /pyserver
69+ WORKDIR /pyserver
70+ COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
71+ COPY --from=tgi /tgi/proto proto
72+ RUN pip3 install -r server/build-requirements.txt
73+ RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
74+
75+ # Neuron base image (used for deployment)
76+ FROM base AS neuron
77+
78+ # Install system prerequisites
79+ RUN apt-get update -y \
80+ && apt-get install -y --no-install-recommends \
81+ gnupg2 \
82+ wget \
83+ python3-dev \
84+ && rm -rf /var/lib/apt/lists/* \
85+ && apt-get clean
86+
87+ RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
88+ RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
89+
90+ # Install neuronx packages
91+ RUN apt-get update -y \
92+ && apt-get install -y --no-install-recommends \
93+ aws-neuronx-dkms=2.16.7.0 \
94+ aws-neuronx-collectives=2.20.22.0-c101c322e \
95+ aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 \
96+ aws-neuronx-tools=2.17.1.0 \
97+ && rm -rf /var/lib/apt/lists/* \
98+ && apt-get clean
99+
100+ ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
101+
102+ RUN pip3 install \
103+ neuronx-cc==2.13.66.0 \
104+ torch-neuronx==2.1.2.2.1.0 \
105+ transformers-neuronx==0.10.0.21 \
106+ --extra-index-url=https://pip.repos.neuron.amazonaws.com
107+
108+ # Install HuggingFace packages
109+ RUN pip3 install \
110+ hf_transfer huggingface_hub
111+
112+ # Install optimum-neuron
113+ COPY --from=optimum-neuron /optimum-neuron optimum-neuron
114+ RUN pip3 install ./optimum-neuron
115+
116+ # TGI base env
117+ ENV HUGGINGFACE_HUB_CACHE=/tmp \
118+ HF_HUB_ENABLE_HF_TRANSFER=1 \
119+ PORT=80
120+
121+ # Disable color logs as they are not supported by CloudWatch
122+ ENV LOGURU_COLORIZE=NO
123+
124+ # Install router
125+ COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
126+ # Install launcher
127+ COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
128+ # Install python server
129+ COPY --from=pyserver /pyserver/build/dist dist
130+ RUN pip install dist/text_generation_server*.tar.gz
131+
132+ # AWS Sagemaker compatible image
133+ FROM neuron as sagemaker
134+
135+ COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
136+ RUN chmod +x entrypoint.sh
137+
138+ ENTRYPOINT ["./entrypoint.sh" ]
139+
140+
141+ RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
142+ && rm -rf /var/lib/apt/lists/*
143+ RUN HOME_DIR=/root && \
144+ pip install requests && \
145+ curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
146+ unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
147+ cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
148+ chmod +x /usr/local/bin/testOSSCompliance && \
149+ chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
150+ ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
151+ rm -rf ${HOME_DIR}/oss_compliance*
152+
153+ RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
154+ has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
155+ <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
156+ \n\n \
157+ N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
158+ third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
159+ includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
160+ license." > /root/THIRD_PARTY_LICENSES
161+
162+ LABEL dlc_major_version="1"
163+ LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
164+ LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
0 commit comments