@@ -44,19 +44,27 @@ RUN apt-get update && \
44
44
45
45
WORKDIR /workspace/
46
46
47
- WORKDIR /tmp/
47
+ # Install megatron core, this can be removed once 0.3 pip package is released
48
+ # We leave it here in case we need to work off of a specific commit in main
49
+ RUN git clone https://github.com/NVIDIA/Megatron-LM.git && \
50
+ cd Megatron-LM && \
51
+ git checkout 375395c187ff64b8d56a1cd40572bc779864b1bd && \
52
+ pip install .
48
53
49
54
# Distributed Adam support for multiple dtypes
50
55
RUN git clone https://github.com/NVIDIA/apex.git && \
51
56
cd apex && \
52
57
git checkout 52e18c894223800cb611682dce27d88050edf1de && \
53
- pip3 install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --global-option= "--cpp_ext" --global- option="--cuda_ext" --global-option= "-- fast_layer_norm" --global-option= "-- distributed_adam" --global-option= " --deprecated_fused_adam" ./
58
+ pip install install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build- option=--cpp_ext --cuda_ext -- fast_layer_norm --distributed_adam --deprecated_fused_adam" ./
54
59
55
- # install megatron core, this can be removed once 0.3 pip package is released
56
- RUN git clone https://github.com/NVIDIA/Megatron-LM.git && \
57
- cd Megatron-LM && \
58
- git checkout ab0336a5c8eab77aa74ae604ba1e73decbf6d560 && \
59
- pip install -e .
60
+ RUN git clone https://github.com/NVIDIA/TransformerEngine.git && \
61
+ cd TransformerEngine && \
62
+ git fetch origin a03f8bc9ae004e69aae4902fdd4a6d81fd95bc89 && \
63
+ git checkout FETCH_HEAD && \
64
+ git submodule init && git submodule update && \
65
+ NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install .
66
+
67
+ WORKDIR /tmp/
60
68
61
69
# uninstall stuff from base container
62
70
RUN pip3 uninstall -y sacrebleu torchtext
0 commit comments