vlm-run · spillai · Feb 8, 2026 · Feb 7, 2026 · gemini-code-assist · Feb 8, 2026
diff --git a/.gitignore b/.gitignore
@@ -208,3 +208,4 @@ cython_debug/
 marimo/_static/
 marimo/_lsp/
 __marimo__/
+.subtask/
diff --git a/README.md b/README.md
@@ -255,7 +255,7 @@ See [MODELS.md](.claude/skills/vlmbench/MODELS.md) for tested models and their r
 | Type | Extensions | Processing |
 |---|---|---|
 | Image | `.png`, `.jpg`, `.jpeg`, `.webp`, `.tiff`, `.bmp` | Base64 encode |
-| PDF | `.pdf` | `pdf2image` per-page -> base64 |
+| PDF | `.pdf` | `pypdfium2` per-page -> base64 |
 | Video | `.mp4`, `.mov`, `.avi`, `.mkv`, `.webm` | `ffmpeg` 1fps -> frames -> base64 |
 
 Directories processed recursively, sorted alphabetically.
@@ -272,4 +272,4 @@ Results saved as JSON to `./results/{model-slug}-{timestamp}.json` with model me
 - vLLM (`uv pip install vllm`) for native `--backend vllm`
 - tmux (for server management and monitoring)
 - macmon (`brew install macmon`) or nvitop (GPU monitoring)
-- ffmpeg (video input), poppler (PDF input) — optional
+- ffmpeg (video input) — optional
diff --git a/pyproject.toml b/pyproject.toml
@@ -31,7 +31,7 @@ dependencies = [
     "openai>=1.0",
     "tenacity>=8",
     "Pillow>=10",
-    "pdf2image>=1.16",
+    "pypdfium2>=4",
 ]
 
 [project.optional-dependencies]

diff --git a/uv.lock b/uv.lock
diff --git a/vlmbench/cli.py b/vlmbench/cli.py
@@ -6,7 +6,7 @@
 #   "openai>=1.0",
 #   "tenacity>=8",
 #   "Pillow>=10",
-#   "pdf2image>=1.16",
+#   "pypdfium2>=4",
 # ]
 # ///
 """
@@ -972,17 +972,21 @@ def image_to_base64(path: Path) -> str:
     return f"data:{mime};base64,{b64}"
 
 
-def pdf_to_base64_images(path: Path) -> list[str]:
-    """Convert PDF pages to base64 data URIs using pdf2image."""
-    from pdf2image import convert_from_path
+def pdf_to_base64_images(path: Path, dpi: int = 150) -> list[str]:
+    """Convert PDF pages to base64 data URIs using pypdfium2."""
+    import pypdfium2 as pdfium
 
-    images = convert_from_path(str(path))
+    doc = pdfium.PdfDocument(str(path))
     results = []
-    for img in images:
+    for idx in range(len(doc)):
+        page = doc[idx]
+        bitmap = page.render(scale=dpi / 72)
+        img = bitmap.to_pil()
         buf = io.BytesIO()
         img.save(buf, format="PNG")
         b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
         results.append(f"data:image/png;base64,{b64}")
+    doc.close()
     return results
-    doc = pdfium.PdfDocument(str(path))
-    results = []
-    for img in images:
-    for idx in range(len(doc)):
-        page = doc[idx]
-        bitmap = page.render(scale=dpi / 72)
-        img = bitmap.to_pil()
-        buf = io.BytesIO()
-        img.save(buf, format="PNG")
-        b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
-        results.append(f"data:image/png;base64,{b64}")
-    doc.close()
-    return results
+    with pdfium.PdfDocument(str(path)) as doc:
+        results = []
+        for page in doc:
+            bitmap = page.render(scale=dpi / 72)
+            img = bitmap.to_pil()
+            buf = io.BytesIO()
+            img.save(buf, format="PNG")
+            b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+            results.append(f"data:image/png;base64,{b64}")
+    return results
-    doc = pdfium.PdfDocument(str(path))
-    results = []
-    for img in images:
-    for idx in range(len(doc)):
-        page = doc[idx]
-        bitmap = page.render(scale=dpi / 72)
-        img = bitmap.to_pil()
-        buf = io.BytesIO()
-        img.save(buf, format="PNG")
-        b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
-        results.append(f"data:image/png;base64,{b64}")
-    doc.close()
-    return results
+    with pdfium.PdfDocument(str(path)) as doc:
+        results = []
+        for page in doc:
+            bitmap = page.render(scale=dpi / 72)
+            img = bitmap.to_pil()
+            buf = io.BytesIO()
+            img.save(buf, format="PNG")
+            b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+            results.append(f"data:image/png;base64,{b64}")
+    return results