From 7b178243e9f09328d6c3f099d85eaf25b1515e1b Mon Sep 17 00:00:00 2001 From: Nid01 <30300755+Nid01@users.noreply.github.com> Date: Sun, 5 Oct 2025 23:20:20 +0200 Subject: [PATCH 1/4] BUG: PageObject.scale() scales media box incorrectly (#3487) --- pypdf/_page.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index e7b47882c..f41342b73 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1503,7 +1503,7 @@ def scale(self, sx: float, sy: float) -> None: Scale a page by the given factors by applying a transformation matrix to its content and updating the page size. - This updates the various page boundaries (mediabox, cropbox, etc.) + This updates the various page boundaries (artbox, cropbox, etc.) and the contents of the page. Args: @@ -1512,11 +1512,11 @@ def scale(self, sx: float, sy: float) -> None: """ self.add_transformation((sx, 0, 0, sy, 0, 0)) - self.mediabox = self.mediabox.scale(sx, sy) + self.artbox = self.artbox.scale(sx, sy) self.cropbox = self.cropbox.scale(sx, sy) self.bleedbox = self.bleedbox.scale(sx, sy) self.trimbox = self.trimbox.scale(sx, sy) - self.artbox = self.artbox.scale(sx, sy) + self.mediabox = self.mediabox.scale(sx, sy) if PG.ANNOTS in self: annotations = self[PG.ANNOTS] From c33abad7c20b1828cf9e21326cdc0257d0f59ad5 Mon Sep 17 00:00:00 2001 From: Nid01 <30300755+Nid01@users.noreply.github.com> Date: Mon, 6 Oct 2025 23:29:23 +0200 Subject: [PATCH 2/4] BUG: Fix order of scaling for artbox and cropbox in PageObject (#3487) --- pypdf/_page.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index f41342b73..5fed53636 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1512,10 +1512,10 @@ def scale(self, sx: float, sy: float) -> None: """ self.add_transformation((sx, 0, 0, sy, 0, 0)) - self.artbox = self.artbox.scale(sx, sy) - self.cropbox = self.cropbox.scale(sx, sy) self.bleedbox = self.bleedbox.scale(sx, sy) self.trimbox = self.trimbox.scale(sx, sy) + self.artbox = self.artbox.scale(sx, sy) + self.cropbox = self.cropbox.scale(sx, sy) self.mediabox = self.mediabox.scale(sx, sy) if PG.ANNOTS in self: From ac8a80269cc7b929a8bce10f316342b9765da4e5 Mon Sep 17 00:00:00 2001 From: Nid01 <30300755+Nid01@users.noreply.github.com> Date: Mon, 6 Oct 2025 23:37:48 +0200 Subject: [PATCH 3/4] TEST: Add test for scaling boxes in PageObject to address issue #3487 --- tests/test_page.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test_page.py b/tests/test_page.py index f7f1b9430..56ed41b8c 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -1504,3 +1504,27 @@ def __getitem__(self, item) -> Any: page[NameObject("/Resources")] = resources with mock.patch.object(none_reference, "get_object", return_value=None): assert page.extract_text() == "" + + +@pytest.mark.enable_socket +def test_scale_by(): + """Tests for #3487""" + url = "https://github.com/user-attachments/files/22685841/input.pdf" + name = "issue3487.pdf" + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + + original_box = RectangleObject((0, 0, 595.275604, 841.88974)) + expected_box = RectangleObject((0.0, 0.0, 297.637802, 420.94487)) + for page in reader.pages: + assert page.artbox == original_box + assert page.bleedbox == original_box + assert page.cropbox == original_box + assert page.mediabox == original_box + assert page.trimbox == original_box + + page.scale_by(0.5) + assert page.artbox == expected_box + assert page.bleedbox == expected_box + assert page.cropbox == expected_box + assert page.mediabox == expected_box + assert page.trimbox == expected_box From 398a748955313f06b16d2dcbd5a132a6c49564b0 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Thu, 16 Oct 2025 15:07:20 +0200 Subject: [PATCH 4/4] Add Ghostscript-based test --- tests/test_page.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/test_page.py b/tests/test_page.py index 56ed41b8c..5bbbd45fd 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -2,6 +2,8 @@ import json import logging import math +import shutil +import subprocess from copy import deepcopy from io import BytesIO from pathlib import Path @@ -28,11 +30,13 @@ ) from . import get_data_from_url, normalize_warnings +from .test_images import image_similarity TESTS_ROOT = Path(__file__).parent.resolve() PROJECT_ROOT = TESTS_ROOT.parent RESOURCE_ROOT = PROJECT_ROOT / "resources" SAMPLE_ROOT = PROJECT_ROOT / "sample-files" +GHOSTSCRIPT_BINARY = shutil.which("gs") def get_all_sample_files(): @@ -1528,3 +1532,43 @@ def test_scale_by(): assert page.cropbox == expected_box assert page.mediabox == expected_box assert page.trimbox == expected_box + + +@pytest.mark.enable_socket +@pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript") +def test_box_rendering(tmp_path): + """Tests for issue #3487.""" + url = "https://github.com/user-attachments/files/22685841/input.pdf" + name = "issue3487.pdf" + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + writer = PdfWriter() + + for page in reader.pages: + page.scale_by(0.5) + writer.add_page(page) + + target_png_path = tmp_path / "target.png" + url = "https://github.com/user-attachments/assets/e9c2271c-bfc3-4a6f-8c91-ffefa24502e2" + name = "issue3487.png" + target_png_path.write_bytes(get_data_from_url(url, name=name)) + + pdf_path = tmp_path / "out.pdf" + writer.write(pdf_path) + + for box in ["Art", "Bleed", "Crop", "Media", "Trim"]: + png_path = tmp_path / f"{box}.png" + # False positive: https://github.com/PyCQA/bandit/issues/333 + subprocess.run( # noqa: S603 + [ + GHOSTSCRIPT_BINARY, + f"-dUse{box}Box", + "-dFirstPage=1", + "-dLastPage=1", + "-sDEVICE=pngalpha", + "-o", + png_path, + pdf_path, + ] + ) + assert png_path.is_file(), box + assert image_similarity(png_path, target_png_path) >= 0.95, box