From f0f29214c74677fad0879dd453ff811a9940367d Mon Sep 17 00:00:00 2001 From: Keiven C <213854356+keivenchang@users.noreply.github.com> Date: Thu, 25 Sep 2025 21:50:02 -0700 Subject: [PATCH] feat: add network isolation modes to container/run.sh script (#3237) Signed-off-by: Keiven Chang Signed-off-by: Harrison King Saturley-Hall --- container/README.md | 137 +++++++++++++++++++++++++++++++++++++------- container/run.sh | 26 ++++++++- 2 files changed, 142 insertions(+), 21 deletions(-) diff --git a/container/README.md b/container/README.md index 2acd6152ac..fa2722592b 100644 --- a/container/README.md +++ b/container/README.md @@ -167,36 +167,112 @@ The `run.sh` script launches Docker containers with the appropriate configuratio - **GPU Management**: Automatic GPU detection and allocation - **Volume Mounting**: Workspace and HuggingFace cache mounting - **User Management**: Root or user-based container execution -- **Network Configuration**: Host networking for service communication +- **Network Configuration**: Configurable networking modes (host, bridge, none, container sharing) - **Resource Limits**: Memory, file descriptors, and IPC configuration **Common Usage Examples:** ```bash -# Basic container launch (inference/production) -./run.sh --image dynamo:latest-vllm +# Basic container launch (inference/production, runs as root user) +./run.sh --image dynamo:latest-vllm -v $HOME/.cache:/home/ubuntu/.cache -# Mount workspace for development (use local-dev image for local user permissions) -./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace +# Mount workspace for development (use local-dev image for local host user permissions) +./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/ubuntu/.cache # Use specific image and framework for development -./run.sh --image v0.1.0.dev.08cc44965-vllm-local-dev --framework vllm --mount-workspace +./run.sh --image v0.1.0.dev.08cc44965-vllm-local-dev --framework vllm --mount-workspace -v $HOME/.cache:/home/ubuntu/.cache # Interactive development shell with workspace mounted -./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -it -- bash +./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/ubuntu/.cache -it -- bash # Development with custom environment variables -./run.sh --image dynamo:latest-vllm-local-dev -e CUDA_VISIBLE_DEVICES=0,1 --mount-workspace - -# Production inference without GPU access -./run.sh --image dynamo:latest-vllm --gpus none +./run.sh --image dynamo:latest-vllm-local-dev -e CUDA_VISIBLE_DEVICES=0,1 --mount-workspace -v $HOME/.cache:/home/ubuntu/.cache # Dry run to see docker command ./run.sh --dry-run # Development with custom volume mounts -./run.sh --image dynamo:latest-vllm-local-dev -v /host/path:/container/path --mount-workspace +./run.sh --image dynamo:latest-vllm-local-dev -v /host/path:/container/path --mount-workspace -v $HOME/.cache:/home/ubuntu/.cache +``` + +### Network Configuration Options + +The `run.sh` script supports different networking modes via the `--network` flag (defaults to `host`): + +#### Host Networking (Default) +```bash +# Same examples with local host user permissions +./run.sh --image dynamo:latest-vllm-local-dev --network host -v $HOME/.cache:/home/ubuntu/.cache +./run.sh --image dynamo:latest-vllm-local-dev -v $HOME/.cache:/home/ubuntu/.cache +``` +**Use cases:** +- High-performance ML inference (default for GPU workloads) +- Services that need direct host port access +- Maximum network performance with minimal overhead +- Sharing services with the host machine (NATS, etcd, etc.) + +**⚠️ Port Sharing Limitation:** Host networking shares all ports with the host machine, which means you can only run **one instance** of services like NATS (port 4222) or etcd (port 2379) across all containers and the host. + +#### Bridge Networking (Isolated) +```bash +# CI/testing with isolated bridge networking and host cache sharing +./run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/ubuntu/.cache +``` +**Use cases:** +- Secure isolation from host network +- CI/CD pipelines requiring complete isolation +- When you need absolute control of ports +- Exposing specific services to host while maintaining isolation + +**Note:** For port sharing with the host, use the `--port` or `-p` option with format `host_port:container_port` (e.g., `--port 8000:8000` or `-p 9081:8081`) to expose specific container ports to the host. + +#### No Networking ⚠️ **LIMITED FUNCTIONALITY** +```bash +# Complete network isolation - no external connectivity +./run.sh --image dynamo:latest-vllm --network none --mount-workspace -v $HOME/.cache:/home/ubuntu/.cache + +# Same with local user permissions +./run.sh --image dynamo:latest-vllm-local-dev --network none --mount-workspace -v $HOME/.cache:/home/ubuntu/.cache ``` +**⚠️ WARNING: `--network none` severely limits Dynamo functionality:** +- **No model downloads** - HuggingFace models cannot be downloaded +- **No API access** - Cannot reach external APIs or services +- **No distributed inference** - Multi-node setups won't work +- **No monitoring/logging** - External monitoring systems unreachable +- **Limited debugging** - Cannot access external debugging tools + +**Very limited use cases:** +- Pre-downloaded models with purely local processing +- Air-gapped security environments (models must be pre-staged) + +#### Container Network Sharing +Use `--network container:name` to share the network namespace with another container. + +**Use cases:** +- Sidecar patterns (logging, monitoring, caching) +- Service mesh architectures +- Sharing network namespaces between related containers + +See Docker documentation for `--network container:name` usage. + +#### Custom Networks +Use custom Docker networks for multi-container applications. Create with `docker network create` and specify with `--network network-name`. + +**Use cases:** +- Multi-container applications +- Service discovery by container name + +See Docker documentation for custom network creation and management. + +#### Network Mode Comparison + +| Mode | Performance | Security | Use Case | Dynamo Compatibility | Port Sharing | Port Publishing | +|------|-------------|----------|----------|---------------------|---------------|-----------------| +| `host` | Highest | Lower | ML/GPU workloads, high-performance services | ✅ Full | ⚠️ **Shared with host** (one NATS/etcd only) | ❌ Not needed | +| `bridge` | Good | Higher | General web services, controlled port exposure | ✅ Full | ✅ Isolated ports | ✅ `-p host:container` | +| `none` | N/A | Highest | Air-gapped environments only | ⚠️ **Very Limited** | ✅ No network | ❌ No network | +| `container:name` | Good | Medium | Sidecar patterns, shared network stacks | ✅ Full | ⚠️ Shared with target container | ❌ Use target's ports | +| Custom networks | Good | Medium | Multi-container applications | ✅ Full | ✅ Isolated ports | ✅ `-p host:container` | ## Workflow Examples @@ -206,14 +282,14 @@ The `run.sh` script launches Docker containers with the appropriate configuratio ./build.sh --framework vllm --target local-dev # 2. Run development container using the local-dev image -./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -it +./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/ubuntu/.cache -it # 3. Inside container, run inference (requires both frontend and backend) # Start frontend python -m dynamo.frontend & # Start backend (vLLM example) -python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.50 & +python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.20 & ``` ### Production Workflow @@ -221,15 +297,36 @@ python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.50 & # 1. Build production image ./build.sh --framework vllm --release-build -# 2. Run production container -./run.sh --image dynamo:latest-vllm-local-dev --gpus all +# 2. Run production container (runs as root) +./run.sh --image dynamo:latest-vllm --gpus all ``` -### Testing Workflow +### CI/CD Workflow ```bash -# 1. Build with no cache for clean build +# 1. Build image for CI ./build.sh --framework vllm --no-cache -# 2. Test container functionality (--image defaults to dynamo:latest-vllm) -./run.sh --mount-workspace -it -- python -m pytest tests/ +# 2. Run tests with network isolation for reproducible results +./run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/ubuntu/.cache -- python -m pytest tests/ + +# 3. Inside the container with bridge networking, start services +# Note: Services are only accessible from the same container - no port conflicts with host +nats-server -js & +etcd --listen-client-urls http://0.0.0.0:2379 --advertise-client-urls http://0.0.0.0:2379 --data-dir /tmp/etcd & +python -m dynamo.frontend & + +# 4. Start worker backend (choose one framework): +# vLLM +DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.20 --enforce-eager --no-enable-prefix-caching --max-num-seqs 64 & + +# SGLang +DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 python -m dynamo.sglang --model Qwen/Qwen3-0.6B --mem-fraction-static 0.20 --max-running-requests 64 & + +# TensorRT-LLM +DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 python -m dynamo.trtllm --model Qwen/Qwen3-0.6B --free-gpu-memory-fraction 0.20 --max-num-tokens 8192 --max-batch-size 64 & ``` + +**Framework-Specific GPU Memory Arguments:** +- **vLLM**: `--gpu-memory-utilization 0.20` (use 20% GPU memory), `--enforce-eager` (disable CUDA graphs), `--no-enable-prefix-caching` (save memory), `--max-num-seqs 64` (max concurrent sequences) +- **SGLang**: `--mem-fraction-static 0.20` (20% GPU memory for static allocation), `--max-running-requests 64` (max concurrent requests) +- **TensorRT-LLM**: `--free-gpu-memory-fraction 0.20` (reserve 20% GPU memory), `--max-num-tokens 8192` (max tokens in batch), `--max-batch-size 64` (max batch size) diff --git a/container/run.sh b/container/run.sh index 4e09c8cbd2..046ef57f54 100755 --- a/container/run.sh +++ b/container/run.sh @@ -36,6 +36,7 @@ DEFAULT_HF_CACHE=${SOURCE_DIR}/.cache/huggingface GPUS="all" PRIVILEGED= VOLUME_MOUNTS= +PORT_MAPPINGS= MOUNT_WORKSPACE= ENVIRONMENT_VARIABLES= REMAINING_ARGS= @@ -43,6 +44,7 @@ INTERACTIVE= USE_NIXL_GDS= RUNTIME=nvidia WORKDIR=/workspace +NETWORK=host get_options() { while :; do @@ -148,6 +150,14 @@ get_options() { missing_requirement "$1" fi ;; + -p|--port) + if [ "$2" ]; then + PORT_MAPPINGS+=" -p $2 " + shift + else + missing_requirement "$1" + fi + ;; -e) if [ "$2" ]; then ENVIRONMENT_VARIABLES+=" -e $2 " @@ -165,6 +175,14 @@ get_options() { --use-nixl-gds) USE_NIXL_GDS=TRUE ;; + --network) + if [ "$2" ]; then + NETWORK=$2 + shift + else + missing_requirement "$1" + fi + ;; --dry-run) RUN_PREFIX="echo" echo "" @@ -304,7 +322,12 @@ show_help() { echo " [--hf-cache directory to volume mount as the hf cache, default is NONE unless mounting workspace]" echo " [--gpus gpus to enable, default is 'all', 'none' disables gpu support]" echo " [--use-nixl-gds add volume mounts and capabilities needed for NVIDIA GPUDirect Storage]" + echo " [--network network mode for container, default is 'host']" + echo " Options: 'host' (default), 'bridge', 'none', 'container:name'" + echo " Examples: --network bridge (isolated), --network none (no network - WARNING: breaks most functionality)" + echo " --network container:redis (share network with 'redis' container)" echo " [-v add volume mount]" + echo " [-p|--port add port mapping (host_port:container_port)]" echo " [-e add environment variable]" echo " [--mount-workspace set up for local development]" echo " [-- stop processing and pass remaining args as command to docker run]" @@ -335,7 +358,7 @@ ${RUN_PREFIX} docker run \ ${GPU_STRING} \ ${INTERACTIVE} \ ${RM_STRING} \ - --network host \ + --network "$NETWORK" \ ${RUNTIME:+--runtime "$RUNTIME"} \ --shm-size=10G \ --ulimit memlock=-1 \ @@ -343,6 +366,7 @@ ${RUN_PREFIX} docker run \ --ulimit nofile=65536:65536 \ ${ENVIRONMENT_VARIABLES} \ ${VOLUME_MOUNTS} \ + ${PORT_MAPPINGS} \ -w "$WORKDIR" \ --cap-add CAP_SYS_PTRACE \ ${NIXL_GDS_CAPS} \