From b02863e47efa4512aa8a8290b2131d3e8e20899d Mon Sep 17 00:00:00 2001 From: "William G. Gagnon" Date: Fri, 2 Aug 2024 09:38:51 -0400 Subject: [PATCH 1/4] Handle images with empty data when processing an image from bytes --- CONTRIBUTORS.md | 1 + pypdf/_xobj_image_helpers.py | 7 +++++-- tests/test_xobject_image_helpers.py | 11 ++++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 84f0b6ee4..89fec3b14 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -19,6 +19,7 @@ history and [GitHub's 'Contributors' feature](https://github.com/py-pdf/pypdf/gr * [ediamondscience](https://github.com/ediamondscience) * [Ermeson, Felipe](https://github.com/FelipeErmeson) * [Freitag, François](https://github.com/francoisfreitag) +* [Gagnon, William G.](https://github.com/williamgagnon) * [Górny, Michał](https://github.com/mgorny) * [Grillo, Miguel](https://github.com/Ineffable22) * [Gutteridge, David H.](https://github.com/dhgutteridge) diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index 45b0c145b..43e353535 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -148,9 +148,12 @@ def _extended_image_frombytes( img = Image.frombytes(mode, size, data) except ValueError as exc: nb_pix = size[0] * size[1] - if len(data) % nb_pix != 0: + data_length = len(data) + if data_length == 0: + raise ValueError("Data is 0 bytes, cannot process an image from empty data.") from exc + if data_length % nb_pix != 0: raise exc - k = nb_pix * len(mode) / len(data) + k = nb_pix * len(mode) / data_length data = b"".join([bytes((x,) * int(k)) for x in data]) img = Image.frombytes(mode, size, data) return img diff --git a/tests/test_xobject_image_helpers.py b/tests/test_xobject_image_helpers.py index 63ecebd9b..ff12cc535 100644 --- a/tests/test_xobject_image_helpers.py +++ b/tests/test_xobject_image_helpers.py @@ -4,7 +4,7 @@ import pytest from pypdf import PdfReader -from pypdf._xobj_image_helpers import _handle_flate +from pypdf._xobj_image_helpers import _handle_flate, _extended_image_frombytes from pypdf.errors import PdfReadError from pypdf.generic import ArrayObject, DecodedStreamObject, NameObject, NumberObject @@ -113,3 +113,12 @@ def test_handle_flate__image_mode_1(): colors=2, obj_as_text="dummy", ) + + +def test_extended_image_frombytes_zero_data(): + mode = "RGB" + size = (1, 1) + data = b"" + + with pytest.raises(ValueError, match="Data is 0 bytes, cannot process an image from empty data."): + _extended_image_frombytes(mode, size, data) \ No newline at end of file From 13b5bf3de690c9fe50b71b95f2ff8a5e612af82d Mon Sep 17 00:00:00 2001 From: "William G. Gagnon" Date: Fri, 2 Aug 2024 10:58:07 -0400 Subject: [PATCH 2/4] Fix formatting error --- tests/test_xobject_image_helpers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_xobject_image_helpers.py b/tests/test_xobject_image_helpers.py index ff12cc535..a7ce109ff 100644 --- a/tests/test_xobject_image_helpers.py +++ b/tests/test_xobject_image_helpers.py @@ -7,7 +7,6 @@ from pypdf._xobj_image_helpers import _handle_flate, _extended_image_frombytes from pypdf.errors import PdfReadError from pypdf.generic import ArrayObject, DecodedStreamObject, NameObject, NumberObject - from . import get_data_from_url @@ -121,4 +120,4 @@ def test_extended_image_frombytes_zero_data(): data = b"" with pytest.raises(ValueError, match="Data is 0 bytes, cannot process an image from empty data."): - _extended_image_frombytes(mode, size, data) \ No newline at end of file + _extended_image_frombytes(mode, size, data) From 8ca9555e2e8bb2b4d1e31dd5f3445df42e9d2ee1 Mon Sep 17 00:00:00 2001 From: "William G. Gagnon" Date: Fri, 2 Aug 2024 11:02:07 -0400 Subject: [PATCH 3/4] Use appropriate PyPdf error --- pypdf/_xobj_image_helpers.py | 4 ++-- pypdf/errors.py | 4 ++++ tests/test_xobject_image_helpers.py | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index 43e353535..bd7ef6721 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -6,7 +6,7 @@ from ._utils import check_if_whitespace_only, logger_warning from .constants import ColorSpaces -from .errors import PdfReadError +from .errors import PdfReadError, EmptyImageDataError from .generic import ( ArrayObject, DecodedStreamObject, @@ -150,7 +150,7 @@ def _extended_image_frombytes( nb_pix = size[0] * size[1] data_length = len(data) if data_length == 0: - raise ValueError("Data is 0 bytes, cannot process an image from empty data.") from exc + raise EmptyImageDataError("Data is 0 bytes, cannot process an image from empty data.") from exc if data_length % nb_pix != 0: raise exc k = nb_pix * len(mode) / data_length diff --git a/pypdf/errors.py b/pypdf/errors.py index c962dec66..ad197ffc1 100644 --- a/pypdf/errors.py +++ b/pypdf/errors.py @@ -59,4 +59,8 @@ class EmptyFileError(PdfReadError): """Raised when a PDF file is empty or has no content.""" +class EmptyImageDataError(PyPdfError): + """Raised when trying to process an image that has no data.""" + + STREAM_TRUNCATED_PREMATURELY = "Stream has ended unexpectedly" diff --git a/tests/test_xobject_image_helpers.py b/tests/test_xobject_image_helpers.py index a7ce109ff..9780e96a4 100644 --- a/tests/test_xobject_image_helpers.py +++ b/tests/test_xobject_image_helpers.py @@ -5,7 +5,7 @@ from pypdf import PdfReader from pypdf._xobj_image_helpers import _handle_flate, _extended_image_frombytes -from pypdf.errors import PdfReadError +from pypdf.errors import PdfReadError, EmptyImageDataError from pypdf.generic import ArrayObject, DecodedStreamObject, NameObject, NumberObject from . import get_data_from_url @@ -119,5 +119,5 @@ def test_extended_image_frombytes_zero_data(): size = (1, 1) data = b"" - with pytest.raises(ValueError, match="Data is 0 bytes, cannot process an image from empty data."): + with pytest.raises(EmptyImageDataError, match="Data is 0 bytes, cannot process an image from empty data."): _extended_image_frombytes(mode, size, data) From ee031d844afe84f5623333dfd2dbf3ff8b15a777 Mon Sep 17 00:00:00 2001 From: "William G. Gagnon" Date: Fri, 2 Aug 2024 11:06:39 -0400 Subject: [PATCH 4/4] Fix formatting errors --- pypdf/_xobj_image_helpers.py | 2 +- tests/test_xobject_image_helpers.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index bd7ef6721..5ae8894fa 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -6,7 +6,7 @@ from ._utils import check_if_whitespace_only, logger_warning from .constants import ColorSpaces -from .errors import PdfReadError, EmptyImageDataError +from .errors import EmptyImageDataError, PdfReadError from .generic import ( ArrayObject, DecodedStreamObject, diff --git a/tests/test_xobject_image_helpers.py b/tests/test_xobject_image_helpers.py index 9780e96a4..39b7131fc 100644 --- a/tests/test_xobject_image_helpers.py +++ b/tests/test_xobject_image_helpers.py @@ -4,9 +4,10 @@ import pytest from pypdf import PdfReader -from pypdf._xobj_image_helpers import _handle_flate, _extended_image_frombytes -from pypdf.errors import PdfReadError, EmptyImageDataError +from pypdf._xobj_image_helpers import _extended_image_frombytes, _handle_flate +from pypdf.errors import EmptyImageDataError, PdfReadError from pypdf.generic import ArrayObject, DecodedStreamObject, NameObject, NumberObject + from . import get_data_from_url