docs: update readme install instructions (#2170)

nv-anants · web-flow · commit a8cb6554779f · 2025-07-29T18:52:12.000Z
diff --git a/README.md b/README.md
@@ -168,12 +168,16 @@ To specify which GPUs to use set environment variable `CUDA_VISIBLE_DEVICES`.
 ## SGLang
 
 ```
+# Install libnuma
+apt install -y libnuma-dev
+
 uv pip install ai-dynamo[sglang]
 ```
 
 Run the backend/worker like this:
 ```
-python -m dynamo.sglang.worker --help    #Note the '.worker' in the module path for SGLang
+# Note the '.worker' in the module path for SGLang
+python -m dynamo.sglang.worker --help
 ```
 
 You can pass any sglang flags directly to this worker, see https://docs.sglang.ai/backend/server_arguments.html . See there to use multiple GPUs.
@@ -203,7 +207,7 @@ sudo apt-get -y install libopenmpi-dev
 
 ### After installing the pre-requisites above, install Dynamo
 ```
-uv pip install --upgrade pip setuptools && uv pip install ai-dynamo[trtllm]
+uv pip install ai-dynamo[trtllm]
 ```
 
 Run the backend/worker like this:
@@ -273,9 +277,12 @@ maturin develop --uv
 ```
 cd $PROJECT_ROOT
 uv pip install .
+# For development, use
+export PYTHONPATH="${PYTHONPATH}:$(pwd)/components/frontend/src:$(pwd)/components/planner/src:$(pwd)/components/backends/vllm/src:$(pwd)/components/backends/sglang/src:$(pwd)/components/backends/trtllm/src:$(pwd)/components/backends/llama_cpp/src:$(pwd)/components/backends/mocker/src"
 ```
 
-Note editable (`-e`) does not work because the `dynamo` package is split over multiple directories, one per backend.
+> [!Note]
+> Editable (`-e`) does not work because the `dynamo` package is split over multiple directories, one per backend.
 
 You should now be able to run `python -m dynamo.frontend`.
 
diff --git a/components/backends/llama_cpp/README.md b/components/backends/llama_cpp/README.md
@@ -1,8 +1,15 @@
 # llama.cpp engine for Dynamo
 
 Usage:
-- `pip install -r requirements.txt` # Need a recent pip, `uv pip` might be too old.
-- `python -m dynamo.llama_cpp --model-path /data/models/Qwen3-0.6B-Q8_0.gguf [args]`
+```
+# Install ai-dynamo llama.cpp backend (CPU Mode)
+pip install "ai-dynamo[llama_cpp]"
+
+# [Optional] To build llama.cpp for CUDA (needs a recent pip)
+pip install -r --force-reinstall requirements.gpu.txt
+
+python -m dynamo.llama_cpp --model-path /data/models/Qwen3-0.6B-Q8_0.gguf [args]
+```
 
 ## Request Migration