wpowiertowski · wpowiertowski · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026
diff --git a/README.md b/README.md
@@ -4,6 +4,7 @@ A collection of docker containers I use for various hobby projects, currently co
 
 - `ghost` -> ghost6 based blog setup
 - `homebridge` -> latest Homebridge container for HomeKit integration (armv8 architecture)
+- `llama-vision` -> Llama 3.2 Vision model with CPU inference (Q4_K_M quantization) and Flask webhook API
 - `python` -> base "hello-world" docker setup with poetry for dependency resolution and VSCode debug capabilities
 - `verilator` -> minimal Verilator simulator for RTL simulation (based on OpenTitan container setup)
 - `watchtower` -> automatic Docker image updates for all containers
diff --git a/llama-vision/.gitignore b/llama-vision/.gitignore
@@ -0,0 +1,42 @@
+# Models directory
+models/
+*.gguf
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+venv/
+ENV/
+.venv
+
+# Flask
+instance/
+.webassets-cache
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Logs
+*.log
+
+# Temporary files
+tmp/
+temp/
+*.tmp
diff --git a/llama-vision/Dockerfile b/llama-vision/Dockerfile
@@ -0,0 +1,58 @@
+# Multi-stage build for llama.cpp with Llama 3.2 Vision support
+FROM python:3.11-slim as builder
+
+# Install build dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    cmake \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Build llama.cpp from source
+WORKDIR /build
+RUN git clone https://github.com/ggerganov/llama.cpp.git
+WORKDIR /build/llama.cpp
+RUN cmake -B build -DLLAMA_CURL=ON
+RUN cmake --build build --config Release
+
+# Final runtime image
+FROM python:3.11-slim
+
+# Install runtime dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy llama.cpp build from builder
+COPY --from=builder /build/llama.cpp/build/bin/llama-server /usr/local/bin/
+COPY --from=builder /build/llama.cpp/build/bin/llama-cli /usr/local/bin/
+
+# Set working directory
+WORKDIR /app
+
+# Copy application files
+COPY requirements.txt /app/
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY app/ /app/
+
+# Environment variables
+ENV MODEL_NAME="llama-3.2-11b-vision-instruct-q4_k_m.gguf"
+ENV CLIP_MODEL_NAME="mmproj-model-f16.gguf"
+ENV MODEL_PATH="/models"
+ENV FLASK_APP=webhook.py
+ENV PYTHONUNBUFFERED=1
+
+# Create models directory
+RUN mkdir -p /models
+
+# Expose Flask port
+EXPOSE 5000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:5000/health || exit 1
+
+# Run Flask app
+CMD ["python", "webhook.py"]
diff --git a/llama-vision/Makefile b/llama-vision/Makefile
@@ -0,0 +1,65 @@
+.PHONY: build run stop clean test help
+
+# Default model names (can be overridden)
+MODEL_NAME ?= llama-3.2-11b-vision-instruct-q4_k_m.gguf
+CLIP_MODEL_NAME ?= mmproj-model-f16.gguf
+
+help:
+	@echo "Llama Vision Docker - Available targets:"
+	@echo ""
+	@echo "  make build       - Build the Docker image"
+	@echo "  make run         - Run the container with models volume"
+	@echo "  make stop        - Stop the running container"
+	@echo "  make clean       - Stop and remove the container"
+	@echo "  make logs        - Show container logs"
+	@echo "  make test        - Run API tests"
+	@echo "  make shell       - Open shell in running container"
+	@echo "  make health      - Check service health"
+	@echo ""
+	@echo "Environment variables:"
+	@echo "  MODEL_NAME       - Model filename (default: $(MODEL_NAME))"
+	@echo "  CLIP_MODEL_NAME  - CLIP projector filename (default: $(CLIP_MODEL_NAME))"
+
+build:
+	docker build --pull --rm -t llama-vision:latest .
+
+run:
+	docker run -d \
+		-p 5000:5000 \
+		-v $$(pwd)/models:/models \
+		-e MODEL_NAME=$(MODEL_NAME) \
+		-e CLIP_MODEL_NAME=$(CLIP_MODEL_NAME) \
+		--name llama-vision \
+		llama-vision:latest
+
+stop:
+	docker stop llama-vision || true
+
+clean: stop
+	docker rm llama-vision || true
+
+logs:
+	docker logs -f llama-vision
+
+shell:
+	docker exec -it llama-vision /bin/bash
+
+health:
+	@echo "Checking health..."
+	@curl -s http://localhost:5000/health | python -m json.tool
+
+test:
+	python test_api.py
+
+# Docker compose targets
+compose-up:
+	docker compose up -d
+
+compose-down:
+	docker compose down
+
+compose-logs:
+	docker compose logs -f
+
+compose-build:
+	docker compose build