Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 9 additions & 14 deletions docker/Dockerfile-cloud-uv
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
ARG BASE_TAG=main
FROM axolotlai/axolotl-uv:$BASE_TAG

USER root

ENV HF_DATASETS_CACHE="/workspace/data/huggingface-cache/datasets"
ENV HF_HUB_CACHE="/workspace/data/huggingface-cache/hub"
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
Expand All @@ -11,7 +9,7 @@ ENV HF_HUB_ENABLE_HF_TRANSFER="1"
EXPOSE 8888
EXPOSE 22

COPY scripts/cloud-entrypoint.sh /etc/cloud-entrypoint.sh
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
COPY scripts/motd /etc/motd

RUN uv pip install jupyterlab notebook ipywidgets && \
Expand All @@ -20,16 +18,13 @@ RUN apt update && \
apt install --yes --no-install-recommends openssh-server tmux iproute2 nvtop && \
rm -rf /var/cache/apt/archives && \
rm -rf /var/lib/apt/lists/* && \
mkdir -p /home/ubuntu/.ssh && \
chmod 700 /home/ubuntu/.ssh && \
printf "\n[[ -z \"\$TMUX\" ]] && tty -s && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> /home/ubuntu/.bashrc && \
printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> /home/ubuntu/.bashrc && \
printf "\n[[ -z \"\$AXOLOTL_SKIP_SWITCH\" ]] && exec sudo -u ubuntu AXOLOTL_SKIP_SWITCH=1 -i\n" >> /root/.bashrc && \
chmod +x /etc/cloud-entrypoint.sh && \
echo 'set-option -g history-limit 5000' >> /home/ubuntu/.tmux.conf && \
chown -R ubuntu:ubuntu /home/ubuntu /workspace

# USER ubuntu
mkdir -p ~/.ssh && \
chmod 700 ~/.ssh && \
printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \
chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
chmod +x /root/cloud-entrypoint.sh && \
echo 'set-option -g history-limit 5000' >> ~/.tmux.conf

ENTRYPOINT ["/etc/cloud-entrypoint.sh"]
ENTRYPOINT ["/root/cloud-entrypoint.sh"]
CMD ["sleep", "infinity"]
18 changes: 3 additions & 15 deletions docker/Dockerfile-uv
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,6 @@ RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
git config --get remote.origin.fetch && \
git config --global credential.helper store

COPY .axolotl-complete.bash /home/ubuntu/.axolotl-complete.bash
RUN chmod +x /home/ubuntu/.axolotl-complete.bash && \
echo 'source /home/ubuntu/.axolotl-complete.bash' >> /home/ubuntu/.bashrc

# Ensure ubuntu user exists (may already exist from base image)
RUN id ubuntu &>/dev/null || ( \
useradd -m -s /bin/bash -u 1000 ubuntu && \
apt-get update && apt-get install -y --no-install-recommends sudo && rm -rf /var/lib/apt/lists/* \
); \
echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/ubuntu && \
chmod 0440 /etc/sudoers.d/ubuntu

RUN chown -R ubuntu:ubuntu /workspace /home/ubuntu

USER ubuntu
COPY .axolotl-complete.bash /root/.axolotl-complete.bash
RUN chmod +x /root/.axolotl-complete.bash && \
echo 'source /root/.axolotl-complete.bash' >> ~/.bashrc
16 changes: 4 additions & 12 deletions docker/Dockerfile-uv-base
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,13 @@ ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
ENV UV_TORCH_BACKEND="cu${CUDA}"

RUN apt-get update \
&& apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config curl sudo && rm -rf /var/lib/apt/lists/* \
&& apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config curl && rm -rf /var/lib/apt/lists/* \
&& git lfs install --skip-repo \
&& curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR="/usr/local/bin" sh
&& curl -LsSf https://astral.sh/uv/install.sh | sh

# Create ubuntu user with passwordless sudo
RUN useradd -m -s /bin/bash -u 1000 ubuntu 2>/dev/null; \
usermod -aG sudo ubuntu && \
echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/ubuntu && \
chmod 0440 /etc/sudoers.d/ubuntu
ENV PATH="/root/.local/bin:${PATH}"

ENV UV_PYTHON_INSTALL_DIR="/opt/uv/python"
RUN uv python install ${PYTHON_VERSION} && \
chmod -R a+rX /opt/uv
RUN uv python install ${PYTHON_VERSION}

WORKDIR /workspace

Expand Down Expand Up @@ -61,5 +55,3 @@ RUN PYTHON_CP="cp$(echo $PYTHON_VERSION | tr -d '.')" && \
wget -nv "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/${WHL_VERSION}/${WHL_FILE}" && \
uv pip install --no-cache-dir "${WHL_FILE}" && \
rm "${WHL_FILE}"

RUN chown -R ubuntu:ubuntu /workspace
84 changes: 20 additions & 64 deletions scripts/cloud-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,37 +1,19 @@
#!/bin/bash

# Detect if running as non-root and set sudo prefix accordingly
if [ "$(id -u)" -ne 0 ]; then
SUDO="sudo"
RUN_AS_USER=""
else
SUDO=""
RUN_AS_USER="sudo -u ubuntu"
fi

# Export specific ENV variables to /etc/rp_environment
echo "Exporting environment variables..."
printenv | grep -E '^HF_|^BNB_|^CUDA_|^NCCL_|^NV|^RUNPOD_|^PATH=|^_=' | sed 's/^\([^=]*\)=\(.*\)$/export \1="\2"/' | grep -v 'printenv' | $SUDO tee /etc/rp_environment > /dev/null

# Add rp_environment sourcing to ubuntu's bashrc (if ubuntu user exists and line not already present)
if id ubuntu &>/dev/null; then
grep -q 'source /etc/rp_environment' /home/ubuntu/.bashrc 2>/dev/null || \
echo 'source /etc/rp_environment' >> /home/ubuntu/.bashrc
fi
# Also add to current user's bashrc if different from ubuntu
grep -q 'source /etc/rp_environment' ~/.bashrc 2>/dev/null || \
echo 'source /etc/rp_environment' >> ~/.bashrc
printenv | grep -E '^HF_|^BNB_|^CUDA_|^NCCL_|^NV|^RUNPOD_|^PATH=|^_=' | sed 's/^\([^=]*\)=\(.*\)$/export \1="\2"/' | grep -v 'printenv' >> /etc/rp_environment
echo 'source /etc/rp_environment' >> ~/.bashrc

add_keys_to_authorized() {
local key_value=$1
local target_home=$2

# Create the .ssh directory and set permissions
mkdir -p "$target_home/.ssh"
chmod 700 "$target_home/.ssh"
# Create the ~/.ssh directory and set permissions
mkdir -p ~/.ssh
chmod 700 ~/.ssh

# Create the authorized_keys file if it doesn't exist
touch "$target_home/.ssh/authorized_keys"
touch ~/.ssh/authorized_keys

# Initialize an empty key variable
local key=""
Expand All @@ -42,7 +24,7 @@ add_keys_to_authorized() {
if [[ $word == ssh-* ]]; then
# If there's a key being built, add it to the authorized_keys file
if [[ -n $key ]]; then
echo $key >> "$target_home/.ssh/authorized_keys"
echo $key >> ~/.ssh/authorized_keys
fi
# Start a new key
key=$word
Expand All @@ -54,42 +36,29 @@ add_keys_to_authorized() {

# Add the last key to the authorized_keys file
if [[ -n $key ]]; then
echo $key >> "$target_home/.ssh/authorized_keys"
echo $key >> ~/.ssh/authorized_keys
fi

# Set the correct permissions
chmod 600 "$target_home/.ssh/authorized_keys"
chmod 700 -R "$target_home/.ssh"
}

setup_ssh_keys() {
local key_value=$1

# Set up keys for the current user
add_keys_to_authorized "$key_value" "$HOME"

# Also set up keys for ubuntu user if we're root and ubuntu exists
if [ "$(id -u)" -eq 0 ] && id ubuntu &>/dev/null; then
add_keys_to_authorized "$key_value" "/home/ubuntu"
chown -R ubuntu:ubuntu /home/ubuntu/.ssh
fi
chmod 600 ~/.ssh/authorized_keys
chmod 700 -R ~/.ssh
}

# Set SSH port
if [ ! -z "$SSH_PORT" ]; then
$SUDO sed -i "s/#Port 22/Port $SSH_PORT/" /etc/ssh/sshd_config
sed -i "s/#Port 22/Port $SSH_PORT/" /etc/ssh/sshd_config
fi

if [[ $PUBLIC_KEY ]]; then
# runpod, prime intellect
setup_ssh_keys "$PUBLIC_KEY"
add_keys_to_authorized "$PUBLIC_KEY"
# Start the SSH service in the background
$SUDO service ssh start
service ssh start
elif [[ $SSH_KEY ]]; then
# latitude.sh
setup_ssh_keys "$SSH_KEY"
add_keys_to_authorized "$SSH_KEY"
# Start the SSH service in the background
$SUDO service ssh start
service ssh start
else
echo "No PUBLIC_KEY or SSH_KEY environment variable provided, not starting openSSH daemon"
fi
Expand All @@ -101,16 +70,8 @@ if [ -n "$JUPYTER_PASSWORD" ]; then
fi

if [ "$JUPYTER_DISABLE" != "1" ]; then
# Run Jupyter Lab as ubuntu user when possible
JUPYTER_ARGS="--port=8888 --ip=* --ServerApp.allow_origin=*"
if [ "$(id -u)" -eq 0 ] && id ubuntu &>/dev/null; then
sudo --preserve-env=PATH,JUPYTER_TOKEN -u ubuntu jupyter lab $JUPYTER_ARGS &
else
if [ "$(id -u)" -eq 0 ]; then
JUPYTER_ARGS="$JUPYTER_ARGS --allow-root"
fi
jupyter lab $JUPYTER_ARGS &
fi
# Run Jupyter Lab in the background
jupyter lab --port=8888 --ip=* --allow-root --ServerApp.allow_origin=* &
fi

if [ ! -d "/workspace/data/axolotl-artifacts" ]; then
Expand All @@ -119,19 +80,14 @@ fi
if [ ! -L "/workspace/axolotl/outputs" ]; then
ln -sf /workspace/data/axolotl-artifacts /workspace/axolotl/outputs
fi
chown -R ubuntu:ubuntu /workspace 2>/dev/null || true

# start the runpod slurm init
SLURM_INIT="${SLURM_INIT:-/slurm-init.sh}"

if [[ -f "$SLURM_INIT" ]]; then
echo "[entrypoint] running $SLURM_INIT..."
$SUDO bash "$SLURM_INIT"
bash "$SLURM_INIT"
fi

# Execute the passed arguments (CMD) as ubuntu when possible
if [ "$(id -u)" -eq 0 ] && id ubuntu &>/dev/null; then
exec sudo --preserve-env=PATH -u ubuntu "$@"
else
exec "$@"
fi
# Execute the passed arguments (CMD)
exec "$@"
Loading