Skip to content
This repository was archived by the owner on Mar 13, 2025. It is now read-only.

Commit 54d0ebb

Browse files
committed
Slim down docker, fix OSS cluster config (#106)
Closes https://github.com/anyscale/aviary/issues/92 Closes #5 --------- Signed-off-by: Antoni Baum <[email protected]>
1 parent f063f15 commit 54d0ebb

File tree

4 files changed

+28
-26
lines changed

4 files changed

+28
-26
lines changed

deploy/_internal/backend/cluster-env.yaml

-7
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ post_build_cmds:
1515
pip install \
1616
"async_timeout" \
1717
"markdown-it-py[plugins]" \
18-
"git+https://github.com/huggingface/diffusers.git" \
1918
"accelerate" \
2019
"transformers>=4.25.1" \
2120
"datasets" \
@@ -29,14 +28,8 @@ post_build_cmds:
2928
"bitsandbytes" \
3029
"git+https://github.com/Yard1/DeepSpeed.git@aviary" \
3130
"numpy<1.24" \
32-
"pytorch-lightning" \
3331
"ninja" \
3432
"protobuf<3.21.0" \
3533
"git+https://github.com/huggingface/optimum.git" \
3634
"torchmetrics" \
37-
"git+https://github.com/EleutherAI/lm_dataformat.git@4eec05349977071bf67fc072290b95e31c8dd836" \
38-
"lm_eval==0.3.0" \
39-
"tiktoken==0.1.2" \
40-
"pybind11==2.6.2" \
41-
"einops==0.3.0" \
4235
"safetensors"

deploy/ray/Dockerfile

+8-12
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,18 @@
1-
FROM rayproject/ray-ml:nightly-gpu
1+
FROM rayproject/ray:nightly-cu118
22

3-
ENV HF_HUB_ENABLE_HF_TRANSFER=1
4-
5-
RUN sudo apt-get update
6-
RUN sudo apt-get install -y libaio-dev git-lfs awscli
3+
RUN sudo apt-get update && sudo apt-get install -y libaio-dev git-lfs awscli && sudo rm -rf /var/lib/apt/lists/*
74

8-
RUN pip install --upgrade pip
9-
RUN pip uninstall -y ray torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric tensorflow
105
RUN conda install python=3.10
11-
RUN pip install "ray[default,serve] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl"
6+
RUN pip install --upgrade pip && pip install "ray[default,serve] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl"
7+
RUN pip install -i https://download.pytorch.org/whl/cu118 torch torchvision torchaudio
128
COPY "./dist" "/home/ray/dist"
139
RUN cd /home/ray/dist && pip install "$(ls *.whl | head -n1)[backend]"
1410

15-
16-
COPY "./deploy/ray/backend.yaml" "/home/ray/abcd"
17-
1811
# The build context should be the root of the repo
1912
# So this gives the model definitions
2013
COPY "./models" "/home/ray/models"
2114

22-
RUN echo "Testing aviary install" && python -c "import aviary.backend"
15+
ENV HF_HUB_ENABLE_HF_TRANSFER=1
16+
RUN echo "Testing aviary install" && python -c "import aviary.backend"
17+
18+
RUN pip cache purge && conda clean -a && rm -rf ~/.cache

deploy/ray/aviary-cluster.yaml

+20
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,26 @@ docker:
1010
image: "anyscale/aviary:latest"
1111
container_name: "aviary"
1212

13+
# All the 'conda activate' are necessary to ensure we are in the
14+
# python 3.10 conda env.
15+
setup_commands:
16+
- echo "conda activate" >> ~/.bashrc
17+
18+
head_setup_commands:
19+
- conda activate && pip install 'boto3>=1.4.8'
20+
21+
worker_setup_commands: []
22+
23+
head_start_ray_commands:
24+
- conda activate && ray stop
25+
- conda activate && ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host=0.0.0.0
26+
27+
worker_start_ray_commands:
28+
- conda activate && ray stop
29+
# We need to make sure RAY_HEAD_IP env var is accessible
30+
# after conda activate.
31+
- export RAY_HEAD_IP && echo "export RAY_HEAD_IP=$RAY_HEAD_IP" >> ~/.bashrc && conda activate && ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
32+
1333
available_node_types:
1434
head_node_type:
1535
node_config:

setup.py

-7
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
"torch>=2.0.0",
2222
"torchaudio>=2.0.0",
2323
"torchvision>=0.15.2",
24-
"diffusers @ git+https://github.com/huggingface/diffusers.git",
2524
"accelerate",
2625
"transformers>=4.25.1",
2726
"datasets",
@@ -35,16 +34,10 @@
3534
"bitsandbytes",
3635
"deepspeed @ git+https://github.com/Yard1/DeepSpeed.git@aviary",
3736
"numpy<1.24",
38-
"pytorch-lightning",
3937
"ninja",
4038
"protobuf<3.21.0",
4139
"optimum @ git+https://github.com/huggingface/optimum.git",
4240
"torchmetrics",
43-
"lm_dataformat @ git+https://github.com/EleutherAI/lm_dataformat.git@4eec05349977071bf67fc072290b95e31c8dd836",
44-
"lm_eval==0.3.0",
45-
"tiktoken==0.1.2",
46-
"pybind11==2.6.2",
47-
"einops==0.3.0",
4841
"safetensors",
4942
"pydantic==1.10.7",
5043
"markdown-it-py[plugins]",

0 commit comments

Comments
 (0)