From b449664b0396163d809b5335b48c68fe460d4b20 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Fri, 3 May 2024 23:04:15 +0200
Subject: [PATCH 01/42] ROB: improve inline image extraction

closes  #2598
---
 pypdf/generic/_data_structures.py |  99 +++++-------
 pypdf/generic/_image_inline.py    | 242 ++++++++++++++++++++++++++++++
 tests/test_images.py              |  12 ++
 3 files changed, 294 insertions(+), 59 deletions(-)
 create mode 100644 pypdf/generic/_image_inline.py

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 3ca761403..c70f5421a 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -49,7 +49,6 @@
 
 from .._protocols import PdfReaderProtocol, PdfWriterProtocol, XmpInformationProtocol
 from .._utils import (
-    WHITESPACES,
     StreamType,
     b_,
     deprecate_no_replacement,
@@ -81,6 +80,13 @@
     TextStringObject,
 )
 from ._fit import Fit
+from ._image_inline import (
+    extract_inline_A85,
+    extract_inline_AHex,
+    extract_inline_DCT,
+    extract_inline_default,
+    extract_inline_RL,
+)
 from ._utils import read_hex_string_from_stream, read_string_from_stream
 
 if sys.version_info >= (3, 11):
@@ -1152,65 +1158,40 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
         # left at beginning of ID
         tmp = stream.read(3)
         assert tmp[:2] == b"ID"
-        data = BytesIO()
-        # Read the inline image, while checking for EI (End Image) operator.
-        while True:
-            # Read 8 kB at a time and check if the chunk contains the E operator.
-            buf = stream.read(8192)
-            # We have reached the end of the stream, but haven't found the EI operator.
-            if not buf:
-                raise PdfReadError("Unexpected end of stream")
-            loc = buf.find(
-                b"E"
-            )  # we can not look straight for "EI" because it may not have been loaded in the buffer
-
-            if loc == -1:
-                data.write(buf)
+        filtr = settings.get("/F", "not set")
+        # print("inline", stream.tell(),filtr,"*",settings)
+        if isinstance(filtr, list):
+            filtr = filtr[0]  # used forencoding
+        if filtr == "AHx":
+            data = extract_inline_AHex(stream)
+        elif filtr == "A85":
+            data = extract_inline_A85(stream)
+        elif filtr == "RL":
+            data = extract_inline_RL(stream)
+        elif filtr == "DCT":
+            data = extract_inline_DCT(stream)
+        elif filtr == "not set":
+            cs = settings["/CS"]
+            if cs == "/I" or cs == "/G":
+                lcs = 1
+            elif cs == "/RGB":
+                lcs = 3
+            elif cs == "/CMYK":
+                lcs = 4
             else:
-                # Write out everything before the E.
-                data.write(buf[0:loc])
-
-                # Seek back in the stream to read the E next.
-                stream.seek(loc - len(buf), 1)
-                tok = stream.read(1)  # E of "EI"
-                # Check for End Image
-                tok2 = stream.read(1)  # I of "EI"
-                if tok2 != b"I":
-                    stream.seek(-1, 1)
-                    data.write(tok)
-                    continue
-                # for further debug : print("!!!!",buf[loc-1:loc+10])
-                info = tok + tok2
-                tok3 = stream.read(
-                    1
-                )  # possible space after "EI" may not been loaded  in buf
-                if tok3 not in WHITESPACES:
-                    stream.seek(-2, 1)  # to step back on I
-                    data.write(tok)
-                elif buf[loc - 1 : loc] in WHITESPACES:  # and tok3 in WHITESPACES:
-                    # Data can contain [\s]EI[\s]: 4 chars sufficient, checking Q operator not required.
-                    while tok3 in WHITESPACES:
-                        # needed ???? : info += tok3
-                        tok3 = stream.read(1)
-                    stream.seek(-1, 1)
-                    # we do not insert EI
-                    break
-                else:  # buf[loc - 1 : loc] not in WHITESPACES and tok3 in WHITESPACES:
-                    # Data can contain [!\s]EI[\s],  so check for Q or EMC operator is required to have 4 chars.
-                    while tok3 in WHITESPACES:
-                        info += tok3
-                        tok3 = stream.read(1)
-                    stream.seek(-1, 1)
-                    if tok3 == b"Q":
-                        break
-                    elif tok3 == b"E":
-                        ope = stream.read(3)
-                        stream.seek(-3, 1)
-                        if ope == b"EMC":
-                            break
-                    else:
-                        data.write(info)
-        return {"settings": settings, "data": data.getvalue()}
+                raise PdfReadError("Invalid CS value:", cs)
+            data = stream.read(
+                cast(int, settings["/W"]) * cast(int, settings["/H"]) * lcs
+            )
+            ei = read_non_whitespace(stream)
+            ei += stream.read(1)
+            stream.seek(-2, 1)
+        else:
+            data = extract_inline_default(stream)
+
+        ei = stream.read(2)
+        assert ei == b"EI"
+        return {"settings": settings, "data": data}
 
     # This overrides the parent method:
     def get_data(self) -> bytes:
diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
new file mode 100644
index 000000000..4c1ab1b62
--- /dev/null
+++ b/pypdf/generic/_image_inline.py
@@ -0,0 +1,242 @@
+# Copyright (c) 2024, PubPub-ZZ
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import logging
+from io import BytesIO
+
+from .._utils import (
+    WHITESPACES,
+    StreamType,
+    read_non_whitespace,
+)
+from ..errors import PdfReadError
+
+logger = logging.getLogger(__name__)
+
+BUFFER_SIZE = 8192
+
+
+def extract_inline_AHex(stream: StreamType) -> bytes:
+    """
+    Extract HexEncoded Stream from Inline Image.
+    the stream will be moved onto the EI
+    """
+    data: bytes = b""
+    # Read data until delimiter > and EI as backup
+    # ignoring backup.
+    while True:
+        buf = stream.read(BUFFER_SIZE)
+        if not buf:
+            raise PdfReadError("Unexpected end of stream")
+        loc = buf.find(b">")
+        if loc >= 0:  # found >
+            data += buf[: (loc + 1)]
+            stream.seek(-BUFFER_SIZE + loc + 1)
+            break
+        loc = buf.find(b"EI")
+        if loc >= 0:  # found EI
+            stream.seek(-BUFFER_SIZE + loc - 1, 1)
+            c = stream.read(1)
+            while c in WHITESPACES:
+                stream.seek(-2, 1)
+                c = stream.read(1)
+                loc -= 1
+            data += buf[:loc]
+        else:  # > nor EI found
+            data += buf[:-1]
+            stream.seek(-1, 1)
+
+    ei = read_non_whitespace(stream)
+    ei += stream.read(1)
+    stream.seek(-2, 1)
+    if ei != b"EI":
+        raise PdfReadError("EI stream not found")
+    return data
+
+
+def extract_inline_A85(stream: StreamType) -> bytes:
+    """
+    Extract A85 Stream from Inline Image.
+    the stream will be moved onto the EI
+    """
+    data: bytes = b""
+    # Read data up to delimiter ~>
+    # see §3.3.2 from PDF ref 1.7
+    while True:
+        buf = stream.read(BUFFER_SIZE)
+        if not buf:
+            raise PdfReadError("Unexpected end of stream")
+        loc = buf.find(b"~>")
+        if loc >= 0:  # found!
+            data += buf[: loc + 2]
+            stream.seek(-BUFFER_SIZE + loc + 2, 1)
+            break
+        data += buf[:-1]  # back by one char in case of in the middle of ~>
+        stream.seek(-1, 1)
+
+    ei = read_non_whitespace(stream)
+    ei += stream.read(1)
+    stream.seek(-2, 1)
+    if ei != b"EI":
+        raise PdfReadError("EI stream not found")
+    return data
+
+
+def extract_inline_RL(stream: StreamType) -> bytes:
+    """
+    Extract RL Stream from Inline Image.
+    the stream will be moved onto the EI
+    """
+    data: bytes = b""
+    # Read data up to delimiter ~>
+    # see §3.3.4 from PDF ref 1.7
+    while True:
+        buf = stream.read(BUFFER_SIZE)
+        if not buf:
+            raise PdfReadError("Unexpected end of stream")
+        loc = buf.find(b"\x80")
+        if loc >= 0:  # found
+            data = buf[: loc + 1]
+            stream.seek(-BUFFER_SIZE + loc + 1, 1)
+            break
+        data += buf  # back by one char in case of in the middle of ~>
+
+    data += buf[:loc]
+    ei = read_non_whitespace(stream)
+    ei += stream.read(1)
+    stream.seek(-2, 1)
+    if ei != b"EI":
+        raise PdfReadError("EI stream not found")
+    return data
+
+
+def extract_inline_DCT(stream: StreamType) -> bytes:
+    """
+    Extract DCT (JPEG) Stream from Inline Image.
+    the stream will be moved onto the EI
+    """
+    data: bytes = b""
+    # Read Blocks of data (ID/Size/data) up to ID=FF/D9
+    # see https://www.digicamsoft.com/itu/itu-t81-36.html
+    while True:
+        c = stream.read(1)
+        data += c
+        if c != b"\xff":
+            continue
+        c = stream.read(1)
+        if c == b"\xff":
+            stream.seek(-1, 1)
+        elif c == b"\x00":  # stuffing
+            data += c
+        elif c == b"\xd9":  # end
+            data += c
+            break
+        elif c in (
+            b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc9\xca\xcb\xcc\xcd\xce\xcf"
+            b"\xda\xdb\xdc\xdd\xde\xdf"
+            b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xfe"
+        ):
+            data += c
+            c = stream.read(2)
+            data += c
+            sz = ord(c[0]) * 256 + c[1]
+            data += stream.read(sz - 2)
+        else:
+            data += c
+
+    ei = read_non_whitespace(stream)
+    ei += stream.read(1)
+    stream.seek(-2, 1)
+    if ei != b"EI":
+        raise PdfReadError("EI stream not found")
+    return data
+
+
+def extract_inline_default(stream: StreamType) -> bytes:
+    """
+    Legacy method
+    used by default
+    """
+    data = BytesIO()
+    # Read the inline image, while checking for EI (End Image) operator.
+    while True:
+        buf = stream.read(BUFFER_SIZE)
+        if not buf:
+            raise PdfReadError("Unexpected end of stream")
+        loc = buf.find(
+            b"E"
+        )  # we can not look straight for "EI" because it may not have been loaded in the buffer
+
+        if loc == -1:
+            data.write(buf)
+        else:
+            # Write out everything before the E.
+            data.write(buf[0:loc])
+
+            # Seek back in the stream to read the E next.
+            stream.seek(loc - len(buf), 1)
+            saved_pos = stream.tell()
+            tok = stream.read(1)  # E of "EI"
+            # Check for End Image
+            tok2 = stream.read(1)  # I of "EI"
+            if tok2 != b"I":
+                stream.seek(-1, 1)
+                data.write(tok)
+                continue
+            # for further debug : print("!!!!",buf[loc-1:loc+10])
+            info = tok + tok2
+            tok3 = stream.read(
+                1
+            )  # possible space after "EI" may not been loaded  in buf
+            if tok3 not in WHITESPACES:
+                stream.seek(-2, 1)  # to step back on I
+                data.write(tok)
+            elif buf[loc - 1 : loc] in WHITESPACES:  # and tok3 in WHITESPACES:
+                # Data can contain [\s]EI[\s]: 4 chars sufficient, checking Q operator not required.
+                while tok3 in WHITESPACES:
+                    # needed ???? : info += tok3
+                    tok3 = stream.read(1)
+                stream.seek(-1, 1)
+                # we do not insert EI
+                break
+            else:  # buf[loc - 1 : loc] not in WHITESPACES and tok3 in WHITESPACES:
+                # Data can contain [!\s]EI[\s],  so check for Q or EMC operator is required to have 4 chars.
+                while tok3 in WHITESPACES:
+                    info += tok3
+                    tok3 = stream.read(1)
+                stream.seek(-1, 1)
+                if tok3 == b"Q":
+                    break
+                elif tok3 == b"E":
+                    ope = stream.read(3)
+                    stream.seek(-3, 1)
+                    if ope == b"EMC":
+                        break
+                else:
+                    data.write(info)
+    stream.seek(saved_pos, 0)
+    return data.getvalue()
diff --git a/tests/test_images.py b/tests/test_images.py
index ad694d669..148893abb 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -346,3 +346,15 @@ def test_corrupted_jpeg_iss2266(pdf, pdf_name, images, images_name, filtr):
             print(fn)  # noqa: T201
             img = Image.open(BytesIO(zf.read(fn)))
             assert image_similarity(reader.pages[p].images[i].image, img) >= 0.99
+
+
+@pytest.mark.enable_socket()
+def test_inline_image_extraction():
+    """Cf #2598"""
+    url = "https://github.com/py-pdf/pypdf/files/14982414/lebo102.pdf"
+    name = "iss2598.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    # there is no error because images are correctly extracted
+    reader.pages[1].extract_text()
+    reader.pages[2].extract_text()
+    reader.pages[3].extract_text()

From 44b41a7a78a0450d49253931f20eb5c7f20bff46 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sat, 4 May 2024 15:04:07 +0200
Subject: [PATCH 02/42] fix

---
 pypdf/generic/_data_structures.py | 30 +++++++++++++++++++-----------
 tests/test_workflows.py           |  4 +---
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index c70f5421a..9463efe38 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -33,6 +33,7 @@
 import re
 import sys
 from io import BytesIO
+from math import ceil
 from typing import (
     Any,
     Callable,
@@ -1159,29 +1160,34 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
         tmp = stream.read(3)
         assert tmp[:2] == b"ID"
         filtr = settings.get("/F", "not set")
+        savpos = stream.tell()
         # print("inline", stream.tell(),filtr,"*",settings)
         if isinstance(filtr, list):
             filtr = filtr[0]  # used forencoding
-        if filtr == "AHx":
+        if filtr == "AHx" or "ASCIIHexDecode" in filtr:
             data = extract_inline_AHex(stream)
-        elif filtr == "A85":
+        elif filtr == "A85" or "ASCII85Decode" in filtr:
             data = extract_inline_A85(stream)
-        elif filtr == "RL":
+        elif filtr == "RL" or "RunLengthDecode" in filtr:
             data = extract_inline_RL(stream)
-        elif filtr == "DCT":
+        elif filtr == "DCT" or "DCTDecode" in filtr:
             data = extract_inline_DCT(stream)
         elif filtr == "not set":
-            cs = settings["/CS"]
-            if cs == "/I" or cs == "/G":
+            cs = settings.get("/CS", "")
+            if cs == "/I" or cs == "/G" or cs == "/Indexed" or cs == "/DeviceGray":
                 lcs = 1
-            elif cs == "/RGB":
+            elif "RGB" in cs:
                 lcs = 3
-            elif cs == "/CMYK":
+            elif "CMYK" in cs:
                 lcs = 4
             else:
-                raise PdfReadError("Invalid CS value:", cs)
+                bits = settings.get("/BPC", -1)
+                if bits > 0:
+                    lcs = bits / 8.0
+                else:
+                    raise PdfReadError("Invalid CS value:", cs)
             data = stream.read(
-                cast(int, settings["/W"]) * cast(int, settings["/H"]) * lcs
+                ceil(cast(int, settings["/W"]) * lcs) * cast(int, settings["/H"])
             )
             ei = read_non_whitespace(stream)
             ei += stream.read(1)
@@ -1190,7 +1196,9 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
             data = extract_inline_default(stream)
 
         ei = stream.read(2)
-        assert ei == b"EI"
+        if ei != b"EI":
+            stream.seek(savpos, 0)
+            data = extract_inline_default(stream)
         return {"settings": settings, "data": data}
 
     # This overrides the parent method:
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 94e380dca..c79a36b51 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -935,9 +935,7 @@ def test_extra_test_iss1541():
     stream = BytesIO()
     cs.write_to_stream(stream)
     stream.seek(0)
-    with pytest.raises(PdfReadError) as exc:
-        ContentStream(read_object(stream, None, None), None, None).operations
-    assert exc.value.args[0] == "Unexpected end of stream"
+    ContentStream(read_object(stream, None, None), None, None).operations
 
     b = BytesIO(data.getbuffer())
     reader = PdfReader(

From 0952fee4a34a46f80730bcbe6b8811129dbaf745 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sat, 4 May 2024 16:39:57 +0200
Subject: [PATCH 03/42] complete testing

---
 pypdf/generic/_data_structures.py | 16 +++++++++-------
 tests/test_images.py              |  6 ++++++
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 9463efe38..941c020fb 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1185,13 +1185,15 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
                 if bits > 0:
                     lcs = bits / 8.0
                 else:
-                    raise PdfReadError("Invalid CS value:", cs)
-            data = stream.read(
-                ceil(cast(int, settings["/W"]) * lcs) * cast(int, settings["/H"])
-            )
-            ei = read_non_whitespace(stream)
-            ei += stream.read(1)
-            stream.seek(-2, 1)
+                    data = extract_inline_default(stream)
+                    lcs = -1
+            if lcs > 0:
+                data = stream.read(
+                    ceil(cast(int, settings["/W"]) * lcs) * cast(int, settings["/H"])
+                )
+                ei = read_non_whitespace(stream)
+                ei += stream.read(1)
+                stream.seek(-2, 1)
         else:
             data = extract_inline_default(stream)
 
diff --git a/tests/test_images.py b/tests/test_images.py
index 148893abb..4bfcfd0a3 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -358,3 +358,9 @@ def test_inline_image_extraction():
     reader.pages[1].extract_text()
     reader.pages[2].extract_text()
     reader.pages[3].extract_text()
+
+    url = "https://github.com/py-pdf/pypdf/files/15210011/Pages.62.73.from.0560-22_WSP.Plan_July.2022_Version.1.pdf"
+    name = "iss2598a.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    reader.pages[0].extract_text()
+    reader.pages[1].extract_text()

From 0ba5ae41d3617c7fd4e85911381a76898e632b44 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 5 May 2024 22:17:02 +0200
Subject: [PATCH 04/42] complete test

---
 pypdf/_page.py               | 117 ++++++++++++++++++++++-------------
 pypdf/_xobj_image_helpers.py |  42 +++++++------
 pypdf/filters.py             |   2 +-
 tests/test_images.py         |   9 +++
 4 files changed, 107 insertions(+), 63 deletions(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index 47cbc866b..a0e5b96fc 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -82,6 +82,7 @@
     NameObject,
     NullObject,
     NumberObject,
+    PdfObject,
     RectangleObject,
     StreamObject,
 )
@@ -551,6 +552,46 @@ def images(self) -> List[ImageFile]:
         """
         return _VirtualListImages(self._get_ids_image, self._get_image)  # type: ignore
 
+    def _translate_value_inlineimage(self, k: str, v: PdfObject) -> PdfObject:
+        """Translate values used in inline image"""
+        try:
+            v = NameObject(
+                {
+                    "/G": "/DeviceGray",
+                    "/RGB": "/DeviceRGB",
+                    "/CMYK": "/DeviceCMYK",
+                    "/I": "/Indexed",
+                    "/AHx": "/ASCIIHexDecode",
+                    "/A85": "/ASCII85Decode",
+                    "/LZW": "/LZWDecode",
+                    "/Fl": "/FlateDecode",
+                    "/RL": "/RunLengthDecode",
+                    "/CCF": "/CCITTFaxDecode",
+                    "/DCT": "/DCTDecode",
+                    "/DeviceGray": "/DeviceGray",
+                    "/DeviceRGB": "/DeviceRGB",
+                    "/DeviceCMYK": "/DeviceCMYK",
+                    "/Indexed": "/Indexed",
+                    "/ASCIIHexDecode": "/ASCIIHexDecode",
+                    "/ASCII85Decode": "/ASCII85Decode",
+                    "/LZWDecode": "/LZWDecode",
+                    "/FlateDecode": "/FlateDecode",
+                    "/RunLengthDecode": "/RunLengthDecode",
+                    "/CCITTFaxDecode": "/CCITTFaxDecode",
+                    "/DCTDecode": "/DCTDecode",
+                }[cast(str, v)]
+            )
+        except (TypeError, KeyError):
+            if isinstance(v, NameObject):
+                #  it is a custom name : we have to look in resources :
+                # the only applicable case is for ColorSpace
+                try:
+                    res = cast(DictionaryObject, self["/Resources"])["/ColorSpace"]
+                    v = cast(DictionaryObject, res)[v]
+                except KeyError:  # for res and v
+                    raise PdfReadError(f"Can not find resource entry {v} for {k}")
+        return v
+
     def _get_inline_images(self) -> Dict[str, ImageFile]:
         """
         get inline_images
@@ -593,51 +634,39 @@ def _get_inline_images(self) -> Dict[str, ImageFile]:
                 "/Length": len(ii["__streamdata__"]),
             }
             for k, v in ii["settings"].items():
-                try:
-                    v = NameObject(
-                        {
-                            "/G": "/DeviceGray",
-                            "/RGB": "/DeviceRGB",
-                            "/CMYK": "/DeviceCMYK",
-                            "/I": "/Indexed",
-                            "/AHx": "/ASCIIHexDecode",
-                            "/A85": "/ASCII85Decode",
-                            "/LZW": "/LZWDecode",
-                            "/Fl": "/FlateDecode",
-                            "/RL": "/RunLengthDecode",
-                            "/CCF": "/CCITTFaxDecode",
-                            "/DCT": "/DCTDecode",
-                        }[v]
+                if k in ("/Length", "/L"):  # no length is expected
+                    continue
+                if isinstance(v, list):
+                    v = ArrayObject(
+                        [self._translate_value_inlineimage(k, x) for x in v]
                     )
-                except (TypeError, KeyError):
-                    if isinstance(v, NameObject):
-                        #  it is a custom name : we have to look in resources :
-                        # the only applicable case is for ColorSpace
-                        try:
-                            res = cast(DictionaryObject, self["/Resources"])[
-                                "/ColorSpace"
-                            ]
-                            v = cast(DictionaryObject, res)[v]
-                        except KeyError:  # for res and v
-                            raise PdfReadError(
-                                f"Can not find resource entry {v} for {k}"
-                            )
-                init[
-                    NameObject(
-                        {
-                            "/BPC": "/BitsPerComponent",
-                            "/CS": "/ColorSpace",
-                            "/D": "/Decode",
-                            "/DP": "/DecodeParms",
-                            "/F": "/Filter",
-                            "/H": "/Height",
-                            "/W": "/Width",
-                            "/I": "/Interpolate",
-                            "/Intent": "/Intent",
-                            "/IM": "/ImageMask",
-                        }[k]
-                    )
-                ] = v
+                else:
+                    v = self._translate_value_inlineimage(k, v)
+                k = NameObject(
+                    {
+                        "/BPC": "/BitsPerComponent",
+                        "/CS": "/ColorSpace",
+                        "/D": "/Decode",
+                        "/DP": "/DecodeParms",
+                        "/F": "/Filter",
+                        "/H": "/Height",
+                        "/W": "/Width",
+                        "/I": "/Interpolate",
+                        "/Intent": "/Intent",
+                        "/IM": "/ImageMask",
+                        "/BitsPerComponent": "/BitsPerComponent",
+                        "/ColorSpace": "/ColorSpace",
+                        "/Decode": "/Decode",
+                        "/DecodeParms": "/DecodeParms",
+                        "/Filter": "/Filter",
+                        "/Height": "/Height",
+                        "/Width": "/Width",
+                        "/Interpolate": "/Interpolate",
+                        "/ImageMask": "/ImageMask",
+                    }[k]
+                )
+                if k not in init:
+                    init[k] = v
             ii["object"] = EncodedStreamObject.initialize_from_dictionary(init)
             extension, byte_stream, img = _xobj_to_image(ii["object"])
             files[f"~{num}~"] = ImageFile(
diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py
index cc0123ff2..cd1cdca17 100644
--- a/pypdf/_xobj_image_helpers.py
+++ b/pypdf/_xobj_image_helpers.py
@@ -123,10 +123,34 @@ def _get_imagemode(
     return mode, mode == "CMYK"
 
 
+def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
+    mask = (1 << bits) - 1
+    nbuff = bytearray(size[0] * size[1])
+    by = 0
+    bit = 8 - bits
+    for y in range(size[1]):
+        if (bit != 0) and (bit != 8 - bits):
+            by += 1
+            bit = 8 - bits
+        for x in range(size[0]):
+            nbuff[y * size[0] + x] = (data[by] >> bit) & mask
+            bit -= bits
+            if bit < 0:
+                by += 1
+                bit = 8 - bits
+    return bytes(nbuff)
+
+
 def _extended_image_frombytes(
     mode: str, size: Tuple[int, int], data: bytes
 ) -> Image.Image:
     try:
+        if mode == "2bits":
+            mode = "P"
+            data = bits2byte(data, size, 2)
+        elif mode == "4bits":
+            mode = "P"
+            data = bits2byte(data, size, 4)
         img = Image.frombytes(mode, size, data)
     except ValueError as exc:
         nb_pix = size[0] * size[1]
@@ -150,24 +174,6 @@ def _handle_flate(
     Process image encoded in flateEncode
     Returns img, image_format, extension, color inversion
     """
-
-    def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
-        mask = (2 << bits) - 1
-        nbuff = bytearray(size[0] * size[1])
-        by = 0
-        bit = 8 - bits
-        for y in range(size[1]):
-            if (bit != 0) and (bit != 8 - bits):
-                by += 1
-                bit = 8 - bits
-            for x in range(size[0]):
-                nbuff[y * size[0] + x] = (data[by] >> bit) & mask
-                bit -= bits
-                if bit < 0:
-                    by += 1
-                    bit = 8 - bits
-        return bytes(nbuff)
-
     extension = ".png"  # mime_type = "image/png"
     image_format = "PNG"
     lookup: Any
diff --git a/pypdf/filters.py b/pypdf/filters.py
index d62cf7842..26d71229d 100644
--- a/pypdf/filters.py
+++ b/pypdf/filters.py
@@ -803,7 +803,7 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes,
         # I'm not sure if the following logic is correct.
         # There might not be any relationship between the filters and the
         # extension
-        if x_object_obj[SA.FILTER] in [[FT.LZW_DECODE], [FT.CCITT_FAX_DECODE]]:
+        if lfilters in (FT.LZW_DECODE, FT.CCITT_FAX_DECODE):
             extension = ".tiff"  # mime_type = "image/tiff"
             image_format = "TIFF"
         else:
diff --git a/tests/test_images.py b/tests/test_images.py
index 4bfcfd0a3..90732e8f8 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -364,3 +364,12 @@ def test_inline_image_extraction():
     reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
     reader.pages[0].extract_text()
     reader.pages[1].extract_text()
+
+    url = "https://github.com/mozilla/pdf.js/raw/master/test/pdfs/issue14256.pdf"
+    name = "iss2598b.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    url = "https://github.com/py-pdf/pypdf/assets/4083478/71bc5053-cfc7-44ba-b7be-8e2333e2c749"
+    name = "iss2598b.png"
+    img = Image.open(BytesIO(get_data_from_url(url, name=name)))
+    for i in range(8):
+        assert image_similarity(reader.pages[0].images[i].image, img) == 1

From fdbc0923adee909e4ffc2f476f5d8f77f2c75e21 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 5 May 2024 22:40:57 +0200
Subject: [PATCH 05/42] tests

---
 tests/test_images.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_images.py b/tests/test_images.py
index 90732e8f8..e860aece1 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -373,3 +373,4 @@ def test_inline_image_extraction():
     img = Image.open(BytesIO(get_data_from_url(url, name=name)))
     for i in range(8):
         assert image_similarity(reader.pages[0].images[i].image, img) == 1
+    reader.pages[0].extract_text()

From fd57ef7803f971b1cced234aadb3c3e5f6ad6b9c Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 6 May 2024 08:51:43 +0200
Subject: [PATCH 06/42] fix

---
 pypdf/generic/_data_structures.py | 14 +++++++-------
 pypdf/generic/_image_inline.py    |  8 ++++----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 941c020fb..f3d6b7868 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1164,13 +1164,13 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
         # print("inline", stream.tell(),filtr,"*",settings)
         if isinstance(filtr, list):
             filtr = filtr[0]  # used forencoding
-        if filtr == "AHx" or "ASCIIHexDecode" in filtr:
+        if "AHx" in filtr or "ASCIIHexDecode" in filtr:
             data = extract_inline_AHex(stream)
-        elif filtr == "A85" or "ASCII85Decode" in filtr:
+        elif "A85" in filtr or "ASCII85Decode" in filtr:
             data = extract_inline_A85(stream)
-        elif filtr == "RL" or "RunLengthDecode" in filtr:
+        elif "RL" in filtr or "RunLengthDecode" in filtr:
             data = extract_inline_RL(stream)
-        elif filtr == "DCT" or "DCTDecode" in filtr:
+        elif "DCT" in filtr or "DCTDecode" in filtr:
             data = extract_inline_DCT(stream)
         elif filtr == "not set":
             cs = settings.get("/CS", "")
@@ -1191,9 +1191,9 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
                 data = stream.read(
                     ceil(cast(int, settings["/W"]) * lcs) * cast(int, settings["/H"])
                 )
-                ei = read_non_whitespace(stream)
-                ei += stream.read(1)
-                stream.seek(-2, 1)
+            ei = read_non_whitespace(stream)
+            ei += stream.read(1)
+            stream.seek(-2, 1)
         else:
             data = extract_inline_default(stream)
 
diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 4c1ab1b62..0ca1b5d1c 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -55,11 +55,11 @@ def extract_inline_AHex(stream: StreamType) -> bytes:
         loc = buf.find(b">")
         if loc >= 0:  # found >
             data += buf[: (loc + 1)]
-            stream.seek(-BUFFER_SIZE + loc + 1)
+            stream.seek(-len(buf) + loc + 1, 1)
             break
         loc = buf.find(b"EI")
         if loc >= 0:  # found EI
-            stream.seek(-BUFFER_SIZE + loc - 1, 1)
+            stream.seek(-len(buf) + loc - 1, 1)
             c = stream.read(1)
             while c in WHITESPACES:
                 stream.seek(-2, 1)
@@ -93,7 +93,7 @@ def extract_inline_A85(stream: StreamType) -> bytes:
         loc = buf.find(b"~>")
         if loc >= 0:  # found!
             data += buf[: loc + 2]
-            stream.seek(-BUFFER_SIZE + loc + 2, 1)
+            stream.seek(-len(buf) + loc + 2, 1)
             break
         data += buf[:-1]  # back by one char in case of in the middle of ~>
         stream.seek(-1, 1)
@@ -121,7 +121,7 @@ def extract_inline_RL(stream: StreamType) -> bytes:
         loc = buf.find(b"\x80")
         if loc >= 0:  # found
             data = buf[: loc + 1]
-            stream.seek(-BUFFER_SIZE + loc + 1, 1)
+            stream.seek(-len(buf) + loc + 1, 1)
             break
         data += buf  # back by one char in case of in the middle of ~>
 

From 70f9c02ffc35db0c29f5f3ca4ce12e341634fa0d Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Tue, 7 May 2024 12:09:44 +0200
Subject: [PATCH 07/42] fix DCT

---
 pypdf/generic/_data_structures.py | 14 +++++++-------
 pypdf/generic/_image_inline.py    | 24 +++++++++++++-----------
 tests/test_images.py              |  8 ++++++++
 3 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 941c020fb..f3d6b7868 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1164,13 +1164,13 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
         # print("inline", stream.tell(),filtr,"*",settings)
         if isinstance(filtr, list):
             filtr = filtr[0]  # used forencoding
-        if filtr == "AHx" or "ASCIIHexDecode" in filtr:
+        if "AHx" in filtr or "ASCIIHexDecode" in filtr:
             data = extract_inline_AHex(stream)
-        elif filtr == "A85" or "ASCII85Decode" in filtr:
+        elif "A85" in filtr or "ASCII85Decode" in filtr:
             data = extract_inline_A85(stream)
-        elif filtr == "RL" or "RunLengthDecode" in filtr:
+        elif "RL" in filtr or "RunLengthDecode" in filtr:
             data = extract_inline_RL(stream)
-        elif filtr == "DCT" or "DCTDecode" in filtr:
+        elif "DCT" in filtr or "DCTDecode" in filtr:
             data = extract_inline_DCT(stream)
         elif filtr == "not set":
             cs = settings.get("/CS", "")
@@ -1191,9 +1191,9 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
                 data = stream.read(
                     ceil(cast(int, settings["/W"]) * lcs) * cast(int, settings["/H"])
                 )
-                ei = read_non_whitespace(stream)
-                ei += stream.read(1)
-                stream.seek(-2, 1)
+            ei = read_non_whitespace(stream)
+            ei += stream.read(1)
+            stream.seek(-2, 1)
         else:
             data = extract_inline_default(stream)
 
diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 4c1ab1b62..17f1e9c97 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -55,11 +55,11 @@ def extract_inline_AHex(stream: StreamType) -> bytes:
         loc = buf.find(b">")
         if loc >= 0:  # found >
             data += buf[: (loc + 1)]
-            stream.seek(-BUFFER_SIZE + loc + 1)
+            stream.seek(-len(buf) + loc + 1, 1)
             break
         loc = buf.find(b"EI")
         if loc >= 0:  # found EI
-            stream.seek(-BUFFER_SIZE + loc - 1, 1)
+            stream.seek(-len(buf) + loc - 1, 1)
             c = stream.read(1)
             while c in WHITESPACES:
                 stream.seek(-2, 1)
@@ -93,7 +93,7 @@ def extract_inline_A85(stream: StreamType) -> bytes:
         loc = buf.find(b"~>")
         if loc >= 0:  # found!
             data += buf[: loc + 2]
-            stream.seek(-BUFFER_SIZE + loc + 2, 1)
+            stream.seek(-len(buf) + loc + 2, 1)
             break
         data += buf[:-1]  # back by one char in case of in the middle of ~>
         stream.seek(-1, 1)
@@ -121,7 +121,7 @@ def extract_inline_RL(stream: StreamType) -> bytes:
         loc = buf.find(b"\x80")
         if loc >= 0:  # found
             data = buf[: loc + 1]
-            stream.seek(-BUFFER_SIZE + loc + 1, 1)
+            stream.seek(-len(buf) + loc + 1, 1)
             break
         data += buf  # back by one char in case of in the middle of ~>
 
@@ -142,31 +142,33 @@ def extract_inline_DCT(stream: StreamType) -> bytes:
     data: bytes = b""
     # Read Blocks of data (ID/Size/data) up to ID=FF/D9
     # see https://www.digicamsoft.com/itu/itu-t81-36.html
+    notfirst = False
     while True:
         c = stream.read(1)
-        data += c
+        if notfirst or (c == b"\xff"):
+            data += c
         if c != b"\xff":
             continue
+        else:
+            notfirst = True
         c = stream.read(1)
+        data += c
         if c == b"\xff":
             stream.seek(-1, 1)
         elif c == b"\x00":  # stuffing
-            data += c
+            pass
         elif c == b"\xd9":  # end
-            data += c
             break
         elif c in (
             b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc9\xca\xcb\xcc\xcd\xce\xcf"
             b"\xda\xdb\xdc\xdd\xde\xdf"
             b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xfe"
         ):
-            data += c
             c = stream.read(2)
             data += c
-            sz = ord(c[0]) * 256 + c[1]
+            sz = c[0] * 256 + c[1]
             data += stream.read(sz - 2)
-        else:
-            data += c
+        # else: pass
 
     ei = read_non_whitespace(stream)
     ei += stream.read(1)
diff --git a/tests/test_images.py b/tests/test_images.py
index e860aece1..3674a6870 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -374,3 +374,11 @@ def test_inline_image_extraction():
     for i in range(8):
         assert image_similarity(reader.pages[0].images[i].image, img) == 1
     reader.pages[0].extract_text()
+
+    url = "https://github.com/py-pdf/pypdf/files/15233597/bug1065245.pdf"
+    name = "iss2598c.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    url = "https://github.com/py-pdf/pypdf/assets/4083478/bfb221be-11bd-46fe-8129-55a58088a4b6"
+    name = "iss2598c.jpg"
+    img = Image.open(BytesIO(get_data_from_url(url, name=name)))
+    image_similarity(reader.pages[0].images[0].image, img) >= 0.99

From 8996a739fad674bfab6e94da640e2fdcf3acaa9c Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Tue, 7 May 2024 12:22:35 +0200
Subject: [PATCH 08/42] Fix A85

---
 pypdf/_utils.py                   |  3 +-
 pypdf/_xobj_image_helpers.py      |  2 +-
 pypdf/filters.py                  | 57 ++++++++++++++++++-------------
 pypdf/generic/_data_structures.py |  3 +-
 pypdf/generic/_image_inline.py    |  4 +--
 tests/test_filters.py             | 10 +++---
 6 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/pypdf/_utils.py b/pypdf/_utils.py
index 366a24eb4..d98162205 100644
--- a/pypdf/_utils.py
+++ b/pypdf/_utils.py
@@ -391,7 +391,8 @@ def ord_(b: Union[int, str, bytes]) -> Union[int, bytes]:
 
 
 WHITESPACES = (b" ", b"\n", b"\r", b"\t", b"\x00")
-WHITESPACES_AS_REGEXP = b"[ \n\r\t\x00]"
+WHITESPACES_AS_BYTES = b"".join(WHITESPACES)
+WHITESPACES_AS_REGEXP = b"[" + WHITESPACES_AS_BYTES + b"]"
 
 
 def paeth_predictor(left: int, up: int, up_left: int) -> int:
diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py
index cd1cdca17..ba49f1179 100644
--- a/pypdf/_xobj_image_helpers.py
+++ b/pypdf/_xobj_image_helpers.py
@@ -29,7 +29,7 @@
 
 
 try:
-    from PIL import Image
+    from PIL import Image, UnidentifiedImageError  # noqa: F401
 except ImportError:
     raise ImportError(
         "pillow is required to do image extraction. "
diff --git a/pypdf/filters.py b/pypdf/filters.py
index 26d71229d..cc47d051c 100644
--- a/pypdf/filters.py
+++ b/pypdf/filters.py
@@ -37,10 +37,12 @@
 import math
 import struct
 import zlib
+from base64 import a85decode, a85encode
 from io import BytesIO
 from typing import Any, Dict, List, Optional, Tuple, Union, cast
 
 from ._utils import (
+    WHITESPACES_AS_BYTES,
     b_,
     deprecate_with_replacement,
     deprecation_no_replacement,
@@ -462,7 +464,7 @@ def decode(
         Decode an LZW encoded data stream.
 
         Args:
-          data: bytes`` or ``str`` text to decode.
+          data: ``bytes`` or ``str`` text to decode.
           decode_parms: a dictionary of parameter values.
 
         Returns:
@@ -482,29 +484,34 @@ def decode(
         decode_parms: Optional[DictionaryObject] = None,
         **kwargs: Any,
     ) -> bytes:
-        # decode_parms is unused here
+        """
+        Decode an Ascii85 encoded data stream.
+
+        Args:
+          data: ``bytes`` or ``str`` text to decode.
+          decode_parms: a dictionary of parameter values.
 
+        Returns:
+          decoded data.
+        """
         if isinstance(data, str):
-            data = data.encode("ascii")
-        group_index = b = 0
-        out = bytearray()
-        for char in data:
-            if ord("!") <= char <= ord("u"):
-                group_index += 1
-                b = b * 85 + (char - 33)
-                if group_index == 5:
-                    out += struct.pack(b">L", b)
-                    group_index = b = 0
-            elif char == ord("z"):
-                assert group_index == 0
-                out += b"\0\0\0\0"
-            elif char == ord("~"):
-                if group_index:
-                    for _ in range(5 - group_index):
-                        b = b * 85 + 84
-                    out += struct.pack(b">L", b)[: group_index - 1]
-                break
-        return bytes(out)
+            data = data.encode()
+        data = data.strip(WHITESPACES_AS_BYTES)
+        return a85decode(data, adobe=True, ignorechars=WHITESPACES_AS_BYTES)
+
+    @staticmethod
+    def encode(data: bytes, level: int = -1) -> bytes:
+        """
+        Compress the input data using A85 encoding in Adobe format.
+
+        Args:
+            data: The data to be compressed.
+            level: See https://docs.python.org/3/library/zlib.html#zlib.compress
+
+        Returns:
+            The compressed data.
+        """
+        return a85encode(data, adobe=True, wrapcol=32)
 
 
 class DCTDecode:
@@ -737,6 +744,7 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes,
     """
     from ._xobj_image_helpers import (
         Image,
+        UnidentifiedImageError,
         _extended_image_frombytes,
         _get_imagemode,
         _handle_flate,
@@ -809,7 +817,10 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes,
         else:
             extension = ".png"  # mime_type = "image/png"
             image_format = "PNG"
-        img = Image.open(BytesIO(data), formats=("TIFF", "PNG"))
+        try:
+            img = Image.open(BytesIO(data), formats=("TIFF", "PNG"))
+        except UnidentifiedImageError:
+            img = _extended_image_frombytes(mode, size, data)
     elif lfilters == FT.DCT_DECODE:
         img, image_format, extension = Image.open(BytesIO(data)), "JPEG", ".jpg"
         # invert_color kept unchanged
diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index f3d6b7868..31594f439 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1159,8 +1159,9 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
         # left at beginning of ID
         tmp = stream.read(3)
         assert tmp[:2] == b"ID"
-        filtr = settings.get("/F", "not set")
+        filtr = settings.get("/F", settings.get("/Filter", "not set"))
         savpos = stream.tell()
+        # import pdb;pdb.set_trace()
         # print("inline", stream.tell(),filtr,"*",settings)
         if isinstance(filtr, list):
             filtr = filtr[0]  # used forencoding
diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 17f1e9c97..0fb75a586 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -49,7 +49,7 @@ def extract_inline_AHex(stream: StreamType) -> bytes:
     # Read data until delimiter > and EI as backup
     # ignoring backup.
     while True:
-        buf = stream.read(BUFFER_SIZE)
+        buf = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
         if not buf:
             raise PdfReadError("Unexpected end of stream")
         loc = buf.find(b">")
@@ -87,7 +87,7 @@ def extract_inline_A85(stream: StreamType) -> bytes:
     # Read data up to delimiter ~>
     # see §3.3.2 from PDF ref 1.7
     while True:
-        buf = stream.read(BUFFER_SIZE)
+        buf = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
         if not buf:
             raise PdfReadError("Unexpected end of stream")
         loc = buf.find(b"~>")
diff --git a/tests/test_filters.py b/tests/test_filters.py
index d3980be0b..146ce43cb 100644
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@@ -147,11 +147,10 @@ def test_decode_ahx():
         _ = list(p.images.keys())
 
 
-@pytest.mark.xfail()
 def test_ascii85decode_with_overflow():
     inputs = (
         v + "~>"
-        for v in "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0e\x0f"
+        for v in "\x01\x02\x03\x04\x05\x06\x07\x08\x0e\x0f"
         "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a"
         "\x1b\x1c\x1d\x1e\x1fvwxy{|}~\x7f\x80\x81\x82"
         "\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d"
@@ -161,9 +160,8 @@ def test_ascii85decode_with_overflow():
     )
 
     for i in inputs:
-        with pytest.raises(ValueError) as exc:
+        with pytest.raises(ValueError):
             ASCII85Decode.decode(i)
-        assert exc.value.args[0] == ""
 
 
 def test_ascii85decode_five_zero_bytes():
@@ -183,10 +181,10 @@ def test_ascii85decode_five_zero_bytes():
         b"\x00\x00\x00\x00" * 3,
     )
 
-    assert ASCII85Decode.decode("!!!!!") == ASCII85Decode.decode("z")
+    assert ASCII85Decode.decode("!!!!!~>") == ASCII85Decode.decode("z~>")
 
     for expected, i in zip(exp_outputs, inputs):
-        assert ASCII85Decode.decode(i) == expected
+        assert ASCII85Decode.decode(i + "~>") == expected
 
 
 def test_ccitparameters():

From 5b38f344713ceaea4f27139b50f86d5fb2dcb08c Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Tue, 7 May 2024 13:27:55 +0200
Subject: [PATCH 09/42] blank

---
 tests/test_reader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_reader.py b/tests/test_reader.py
index ff39189e0..837b9b8c2 100644
--- a/tests/test_reader.py
+++ b/tests/test_reader.py
@@ -1408,7 +1408,7 @@ def test_iss1689():
 
 @pytest.mark.enable_socket()
 def test_iss1710():
-    url = "https://nlp.stanford.edu/IR-book/pdf/irbookonlinereading.pdf"
+    url = ""
     name = "irbookonlinereading.pdf"
     reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
     reader.outline

From 67d51ea3e8b893988a334ff04475a7d263d40631 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Tue, 7 May 2024 13:31:38 +0200
Subject: [PATCH 10/42] with new link

---
 tests/test_reader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_reader.py b/tests/test_reader.py
index 837b9b8c2..83b61bc59 100644
--- a/tests/test_reader.py
+++ b/tests/test_reader.py
@@ -1408,7 +1408,7 @@ def test_iss1689():
 
 @pytest.mark.enable_socket()
 def test_iss1710():
-    url = ""
+    url = "https://github.com/py-pdf/pypdf/files/15234776/irbookonlinereading.pdf"
     name = "irbookonlinereading.pdf"
     reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
     reader.outline

From 092e2a5fcef3878dead9caedcce83b097d3c7857 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Tue, 7 May 2024 13:36:16 +0200
Subject: [PATCH 11/42] fix test

---
 tests/test_images.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_images.py b/tests/test_images.py
index 3674a6870..95ce5f413 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -381,4 +381,4 @@ def test_inline_image_extraction():
     url = "https://github.com/py-pdf/pypdf/assets/4083478/bfb221be-11bd-46fe-8129-55a58088a4b6"
     name = "iss2598c.jpg"
     img = Image.open(BytesIO(get_data_from_url(url, name=name)))
-    image_similarity(reader.pages[0].images[0].image, img) >= 0.99
+    assert image_similarity(reader.pages[0].images[0].image, img) >= 0.99

From c5d62a344ddf98a345cd51eb4763819e56e5406f Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Wed, 8 May 2024 11:18:05 +0200
Subject: [PATCH 12/42] BUG: Incorrect number of inline images

closes #2629
---
 pypdf/_page.py          | 24 ++++++------------------
 tests/test_workflows.py |  5 +++++
 2 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index 6f44aa522..1cd665ee1 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -28,7 +28,6 @@
 # POSSIBILITY OF SUCH DAMAGE.
 
 import math
-import re
 import sys
 from decimal import Decimal
 from pathlib import Path
@@ -58,7 +57,6 @@
     mult,
 )
 from ._utils import (
-    WHITESPACES_AS_REGEXP,
     CompressedTransformationMatrix,
     File,
     ImageFile,
@@ -335,7 +333,6 @@ def __init__(
         self.pdf = pdf
         self.inline_images: Optional[Dict[str, ImageFile]] = None
         # below Union for mypy but actually Optional[List[str]]
-        self.inline_images_keys: Optional[List[Union[str, List[str]]]] = None
         self.indirect_reference = indirect_reference
 
     def hash_value_data(self) -> bytes:
@@ -439,19 +436,8 @@ def _get_ids_image(
             return []
         else:
             call_stack.append(_i)
-        if self.inline_images_keys is None:
-            content = self._get_contents_as_bytes() or b""
-            nb_inlines = 0
-            for matching in re.finditer(
-                WHITESPACES_AS_REGEXP + b"BI" + WHITESPACES_AS_REGEXP,
-                content,
-            ):
-                start_of_string = content[: matching.start()]
-                if len(re.findall(b"[^\\\\]\\(", start_of_string)) == len(
-                    re.findall(b"[^\\\\]\\)", start_of_string)
-                ):
-                    nb_inlines += 1
-            self.inline_images_keys = [f"~{x}~" for x in range(nb_inlines)]
+        if self.inline_images is None:
+            self.inline_images = self._get_inline_images()
         if obj is None:
             obj = self
         if ancest is None:
@@ -460,7 +446,7 @@ def _get_ids_image(
         if PG.RESOURCES not in obj or RES.XOBJECT not in cast(
             DictionaryObject, obj[PG.RESOURCES]
         ):
-            return self.inline_images_keys
+            return [] if self.inline_images is None else list(self.inline_images.keys())
 
         x_object = obj[PG.RESOURCES][RES.XOBJECT].get_object()  # type: ignore
         for o in x_object:
@@ -470,7 +456,9 @@ def _get_ids_image(
                 lst.append(o if len(ancest) == 0 else ancest + [o])
             else:  # is a form with possible images inside
                 lst.extend(self._get_ids_image(x_object[o], ancest + [o], call_stack))
-        return lst + self.inline_images_keys
+        if self.inline_images is not None:
+            lst.extend(list(self.inline_images.keys()))
+        return lst
 
     def _get_image(
         self,
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 94e380dca..8e9c1b219 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -1025,6 +1025,11 @@ def test_inline_images():
     with pytest.raises(KeyError) as exc:
         reader.pages[2]._get_image(("test",))
 
+    url = "https://github.com/py-pdf/pypdf/files/15233597/bug1065245.pdf"
+    name = "iss2598c.pdf"  # test coming from another test in test_image.py
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    assert len(reader.pages[0].images) == 3
+
 
 @pytest.mark.enable_socket()
 def test_iss():

From 51bea2cfa4458b21489de86e67ab04d6b47f84a6 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sat, 11 May 2024 15:26:34 +0200
Subject: [PATCH 13/42] add test for RL + fix

---
 pypdf/generic/_image_inline.py | 1 -
 tests/test_images.py           | 8 ++++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 0fb75a586..8836d1991 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -125,7 +125,6 @@ def extract_inline_RL(stream: StreamType) -> bytes:
             break
         data += buf  # back by one char in case of in the middle of ~>
 
-    data += buf[:loc]
     ei = read_non_whitespace(stream)
     ei += stream.read(1)
     stream.seek(-2, 1)
diff --git a/tests/test_images.py b/tests/test_images.py
index 95ce5f413..56ceb418a 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -382,3 +382,11 @@ def test_inline_image_extraction():
     name = "iss2598c.jpg"
     img = Image.open(BytesIO(get_data_from_url(url, name=name)))
     assert image_similarity(reader.pages[0].images[0].image, img) >= 0.99
+
+    url = "https://github.com/py-pdf/pypdf/files/15282904/tt.pdf"
+    name = "iss2598d.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    url = "https://github.com/py-pdf/pypdf/assets/4083478/1a770e1b-9ad2-4125-89ae-6069992dda23"
+    name = "iss2598d.png"
+    img = Image.open(BytesIO(get_data_from_url(url, name=name)))
+    assert image_similarity(reader.pages[0].images[0].image, img) == 1

From bd8449600075e9bfd9a7809d08f0f87ef3ce8975 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sat, 11 May 2024 16:25:42 +0200
Subject: [PATCH 14/42] remove encode as not used for the moment

---
 pypdf/filters.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pypdf/filters.py b/pypdf/filters.py
index cc47d051c..896f9dd76 100644
--- a/pypdf/filters.py
+++ b/pypdf/filters.py
@@ -37,7 +37,7 @@
 import math
 import struct
 import zlib
-from base64 import a85decode, a85encode
+from base64 import a85decode
 from io import BytesIO
 from typing import Any, Dict, List, Optional, Tuple, Union, cast
 
@@ -499,9 +499,9 @@ def decode(
         data = data.strip(WHITESPACES_AS_BYTES)
         return a85decode(data, adobe=True, ignorechars=WHITESPACES_AS_BYTES)
 
-    @staticmethod
+    """@staticmethod
     def encode(data: bytes, level: int = -1) -> bytes:
-        """
+        '''
         Compress the input data using A85 encoding in Adobe format.
 
         Args:
@@ -510,8 +510,9 @@ def encode(data: bytes, level: int = -1) -> bytes:
 
         Returns:
             The compressed data.
-        """
+        '''
         return a85encode(data, adobe=True, wrapcol=32)
+    """
 
 
 class DCTDecode:

From 770aabaf02b64c088219131fca89ea6d79e4fb8d Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sat, 11 May 2024 16:27:14 +0200
Subject: [PATCH 15/42] Fix + Test

---
 pypdf/generic/_image_inline.py | 10 +++++++---
 tests/test_generic.py          | 28 ++++++++++++++++++++++++++--
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 8836d1991..4486f091c 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -66,9 +66,13 @@ def extract_inline_AHex(stream: StreamType) -> bytes:
                 c = stream.read(1)
                 loc -= 1
             data += buf[:loc]
+            break
+        elif len(buf) == 2:
+            data += buf
+            break
         else:  # > nor EI found
-            data += buf[:-1]
-            stream.seek(-1, 1)
+            data += buf[:-2]
+            stream.seek(-2, 1)
 
     ei = read_non_whitespace(stream)
     ei += stream.read(1)
@@ -153,7 +157,7 @@ def extract_inline_DCT(stream: StreamType) -> bytes:
         c = stream.read(1)
         data += c
         if c == b"\xff":
-            stream.seek(-1, 1)
+            stream.seek(-1, 1)  # pragma: no cover
         elif c == b"\x00":  # stuffing
             pass
         elif c == b"\xd9":  # end
diff --git a/tests/test_generic.py b/tests/test_generic.py
index 24da063a2..e42aae5b7 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -35,6 +35,11 @@
     read_object,
     read_string_from_stream,
 )
+from pypdf.generic._image_inline import (
+    extract_inline_A85,
+    extract_inline_AHex,
+    extract_inline_RL,
+)
 
 from . import ReaderDummy, get_data_from_url
 
@@ -883,7 +888,7 @@ def test_annotation_builder_highlight(pdf_file_path):
                     FloatObject(705.4493),
                 ]
             ),
-            printing=False
+            printing=False,
         )
     writer.add_annotation(0, highlight_annotation)
     for annot in writer.pages[0]["/Annots"]:
@@ -910,7 +915,7 @@ def test_annotation_builder_highlight(pdf_file_path):
                     FloatObject(705.4493),
                 ]
             ),
-            printing=True
+            printing=True,
         )
     writer.add_annotation(1, highlight_annotation)
     for annot in writer.pages[1]["/Annots"]:
@@ -1350,3 +1355,22 @@ def test_array_operators():
     la = len(a)
     a -= 300
     assert len(a) == la
+
+
+def test_unitary_extract_inline_buffer_empty():
+    with pytest.raises(PdfReadError):
+        extract_inline_AHex(BytesIO())
+    with pytest.raises(PdfReadError):
+        extract_inline_A85(BytesIO())
+    with pytest.raises(PdfReadError):
+        extract_inline_RL(BytesIO())
+
+
+def test_unitary_extract_inline_ahx():
+    b = 16000 * b"00"
+    b += b" EI"
+    assert len(extract_inline_AHex(BytesIO(b))) == 16000 * 2
+    b = 16000 * b"00"
+    b += b">"
+    with pytest.raises(PdfReadError):
+        extract_inline_AHex(BytesIO(b))

From a37b73f9acb0d461b054d3e15fc05cf432f7b538 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sat, 11 May 2024 18:41:41 +0200
Subject: [PATCH 16/42] test+fix

---
 pypdf/generic/_image_inline.py | 3 +++
 tests/test_generic.py          | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 4486f091c..314c71c7f 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -99,6 +99,9 @@ def extract_inline_A85(stream: StreamType) -> bytes:
             data += buf[: loc + 2]
             stream.seek(-len(buf) + loc + 2, 1)
             break
+        elif len(buf) == 2:  # end of buffer
+            data += buf
+            break
         data += buf[:-1]  # back by one char in case of in the middle of ~>
         stream.seek(-1, 1)
 
diff --git a/tests/test_generic.py b/tests/test_generic.py
index e42aae5b7..6cdf2d352 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -1,5 +1,6 @@
 """Test the pypdf.generic module."""
 
+from base64 import a85encode
 from copy import deepcopy
 from io import BytesIO
 from pathlib import Path
@@ -1360,8 +1361,12 @@ def test_array_operators():
 def test_unitary_extract_inline_buffer_empty():
     with pytest.raises(PdfReadError):
         extract_inline_AHex(BytesIO())
+    with pytest.raises(PdfReadError):
+        extract_inline_AHex(BytesIO(4095 * b"00" + b"   "))
     with pytest.raises(PdfReadError):
         extract_inline_A85(BytesIO())
+    with pytest.raises(PdfReadError):
+        extract_inline_A85(BytesIO(a85encode(b"1")))
     with pytest.raises(PdfReadError):
         extract_inline_RL(BytesIO())
 

From 184e141b697160f6ccae392a0489b9113b13daa6 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sat, 11 May 2024 18:52:25 +0200
Subject: [PATCH 17/42] test

---
 pypdf/generic/_image_inline.py | 2 +-
 tests/test_generic.py          | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 314c71c7f..ac448c0a6 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -130,7 +130,7 @@ def extract_inline_RL(stream: StreamType) -> bytes:
             data = buf[: loc + 1]
             stream.seek(-len(buf) + loc + 1, 1)
             break
-        data += buf  # back by one char in case of in the middle of ~>
+        data += buf
 
     ei = read_non_whitespace(stream)
     ei += stream.read(1)
diff --git a/tests/test_generic.py b/tests/test_generic.py
index 6cdf2d352..3ab68a569 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -39,6 +39,7 @@
 from pypdf.generic._image_inline import (
     extract_inline_A85,
     extract_inline_AHex,
+    extract_inline_DCT,
     extract_inline_RL,
 )
 
@@ -1369,6 +1370,10 @@ def test_unitary_extract_inline_buffer_empty():
         extract_inline_A85(BytesIO(a85encode(b"1")))
     with pytest.raises(PdfReadError):
         extract_inline_RL(BytesIO())
+    with pytest.raises(PdfReadError):
+        extract_inline_RL(BytesIO(b"\x01\x01\x80"))
+    with pytest.raises(PdfReadError):
+        extract_inline_DCT(BytesIO(b"\xFF\xD9"))
 
 
 def test_unitary_extract_inline_ahx():

From 85e08bb66bf9504d19c0e8f6f5aed3bce9a2a460 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sat, 11 May 2024 19:16:15 +0200
Subject: [PATCH 18/42] test + fix

---
 pypdf/generic/_image_inline.py | 4 ++--
 tests/test_generic.py          | 6 +++++-
 tests/test_images.py           | 1 +
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index ac448c0a6..d3df959d3 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -102,8 +102,8 @@ def extract_inline_A85(stream: StreamType) -> bytes:
         elif len(buf) == 2:  # end of buffer
             data += buf
             break
-        data += buf[:-1]  # back by one char in case of in the middle of ~>
-        stream.seek(-1, 1)
+        data += buf[:-2]  # back by one char in case of in the middle of ~>
+        stream.seek(-2, 1)
 
     ei = read_non_whitespace(stream)
     ei += stream.read(1)
diff --git a/tests/test_generic.py b/tests/test_generic.py
index 3ab68a569..4d12bbadd 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -1359,15 +1359,19 @@ def test_array_operators():
     assert len(a) == la
 
 
-def test_unitary_extract_inline_buffer_empty():
+def test_unitary_extract_inline_buffer_invalid():
     with pytest.raises(PdfReadError):
         extract_inline_AHex(BytesIO())
     with pytest.raises(PdfReadError):
         extract_inline_AHex(BytesIO(4095 * b"00" + b"   "))
+    with pytest.raises(PdfReadError):
+        extract_inline_AHex(BytesIO(b"00"))
     with pytest.raises(PdfReadError):
         extract_inline_A85(BytesIO())
     with pytest.raises(PdfReadError):
         extract_inline_A85(BytesIO(a85encode(b"1")))
+    with pytest.raises(PdfReadError):
+        extract_inline_A85(BytesIO(a85encode(b"1234578" * 990)))
     with pytest.raises(PdfReadError):
         extract_inline_RL(BytesIO())
     with pytest.raises(PdfReadError):
diff --git a/tests/test_images.py b/tests/test_images.py
index 56ceb418a..6f8a35e12 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -373,6 +373,7 @@ def test_inline_image_extraction():
     img = Image.open(BytesIO(get_data_from_url(url, name=name)))
     for i in range(8):
         assert image_similarity(reader.pages[0].images[i].image, img) == 1
+    reader.pages[0].images[i].image  # to test acceleration of second call
     reader.pages[0].extract_text()
 
     url = "https://github.com/py-pdf/pypdf/files/15233597/bug1065245.pdf"

From a7ce07cbdc453628be0044281532ced4c793dc87 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sat, 11 May 2024 22:52:26 +0200
Subject: [PATCH 19/42] test + fix +refactor

---
 pypdf/generic/_image_inline.py | 58 +++++++++++-----------------------
 tests/test_generic.py          | 13 ++++----
 2 files changed, 26 insertions(+), 45 deletions(-)

diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index d3df959d3..ae701cec4 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -69,7 +69,7 @@ def extract_inline_AHex(stream: StreamType) -> bytes:
             break
         elif len(buf) == 2:
             data += buf
-            break
+            raise PdfReadError("Unexpected end of stream")
         else:  # > nor EI found
             data += buf[:-2]
             stream.seek(-2, 1)
@@ -101,7 +101,7 @@ def extract_inline_A85(stream: StreamType) -> bytes:
             break
         elif len(buf) == 2:  # end of buffer
             data += buf
-            break
+            raise PdfReadError("Unexpected end of stream")
         data += buf[:-2]  # back by one char in case of in the middle of ~>
         stream.seek(-2, 1)
 
@@ -127,7 +127,7 @@ def extract_inline_RL(stream: StreamType) -> bytes:
             raise PdfReadError("Unexpected end of stream")
         loc = buf.find(b"\x80")
         if loc >= 0:  # found
-            data = buf[: loc + 1]
+            data += buf[: loc + 1]
             stream.seek(-len(buf) + loc + 1, 1)
             break
         data += buf
@@ -203,48 +203,28 @@ def extract_inline_default(stream: StreamType) -> bytes:
             data.write(buf)
         else:
             # Write out everything before the E.
-            data.write(buf[0:loc])
+            data.write(buf[0 : (loc + 1)])
 
             # Seek back in the stream to read the E next.
-            stream.seek(loc - len(buf), 1)
+            stream.seek(loc + 1 - len(buf), 1)
             saved_pos = stream.tell()
-            tok = stream.read(1)  # E of "EI"
             # Check for End Image
             tok2 = stream.read(1)  # I of "EI"
             if tok2 != b"I":
-                stream.seek(-1, 1)
-                data.write(tok)
+                stream.seek(saved_pos, 0)
                 continue
-            # for further debug : print("!!!!",buf[loc-1:loc+10])
-            info = tok + tok2
-            tok3 = stream.read(
-                1
-            )  # possible space after "EI" may not been loaded  in buf
+            tok3 = stream.read(1)  # possible space after "EI"
             if tok3 not in WHITESPACES:
-                stream.seek(-2, 1)  # to step back on I
-                data.write(tok)
-            elif buf[loc - 1 : loc] in WHITESPACES:  # and tok3 in WHITESPACES:
-                # Data can contain [\s]EI[\s]: 4 chars sufficient, checking Q operator not required.
-                while tok3 in WHITESPACES:
-                    # needed ???? : info += tok3
-                    tok3 = stream.read(1)
-                stream.seek(-1, 1)
-                # we do not insert EI
-                break
-            else:  # buf[loc - 1 : loc] not in WHITESPACES and tok3 in WHITESPACES:
-                # Data can contain [!\s]EI[\s],  so check for Q or EMC operator is required to have 4 chars.
-                while tok3 in WHITESPACES:
-                    info += tok3
-                    tok3 = stream.read(1)
-                stream.seek(-1, 1)
-                if tok3 == b"Q":
-                    break
-                elif tok3 == b"E":
-                    ope = stream.read(3)
-                    stream.seek(-3, 1)
-                    if ope == b"EMC":
-                        break
-                else:
-                    data.write(info)
-    stream.seek(saved_pos, 0)
+                stream.seek(saved_pos, 0)
+                continue
+            while tok3 in WHITESPACES:
+                tok3 = stream.read(1)
+            if buf[loc - 1 : loc] not in WHITESPACES and tok3 not in (
+                b"Q",
+                b"E",
+            ):  # for Q ou EMC
+                stream.seek(saved_pos, 0)
+                continue
+            # Data contains [\s]EI[\s](Q|EMC): 4 chars sufficient, checking Q operator not required.
+            break
     return data.getvalue()
diff --git a/tests/test_generic.py b/tests/test_generic.py
index 4d12bbadd..5133e162d 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -1380,11 +1380,12 @@ def test_unitary_extract_inline_buffer_invalid():
         extract_inline_DCT(BytesIO(b"\xFF\xD9"))
 
 
-def test_unitary_extract_inline_ahx():
+def test_unitary_extract_inline():
+    # AHx
     b = 16000 * b"00"
-    b += b" EI"
-    assert len(extract_inline_AHex(BytesIO(b))) == 16000 * 2
-    b = 16000 * b"00"
-    b += b">"
+    assert len(extract_inline_AHex(BytesIO(b + b" EI"))) == len(b)
     with pytest.raises(PdfReadError):
-        extract_inline_AHex(BytesIO(b))
+        extract_inline_AHex(BytesIO(b + b"> "))
+    # RL
+    b = 8200 * b"\x00\xAB" + b"\x80"
+    assert len(extract_inline_RL(BytesIO(b + b" EI"))) == len(b)

From d17d1927872b50012f2d23b98f36487dc03dac14 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 12 May 2024 10:39:51 +0200
Subject: [PATCH 20/42] fix regeneration of inline images

---
 pypdf/_page.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index b91288ffe..6173b3bb7 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -951,6 +951,8 @@ def replace_contents(
                 # as a backup solution, we put content as an object although not in accordance with pdf ref
                 # this will be fixed with the _add_object
                 self[NameObject(PG.CONTENTS)] = content
+        # forces recalculation of inline_images
+        self.inline_images = None
 
     def merge_page(
         self, page2: "PageObject", expand: bool = False, over: bool = True

From 6807f3c6d43c804df1519b7cc085c5d5c34758ad Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 12 May 2024 10:43:08 +0200
Subject: [PATCH 21/42] coverage

---
 tests/test_generic.py |  2 ++
 tests/test_images.py  | 54 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/tests/test_generic.py b/tests/test_generic.py
index 5133e162d..23d6289b0 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -1370,6 +1370,8 @@ def test_unitary_extract_inline_buffer_invalid():
         extract_inline_A85(BytesIO())
     with pytest.raises(PdfReadError):
         extract_inline_A85(BytesIO(a85encode(b"1")))
+    with pytest.raises(PdfReadError):
+        extract_inline_A85(BytesIO(a85encode(b"1") + b"~> Q"))
     with pytest.raises(PdfReadError):
         extract_inline_A85(BytesIO(a85encode(b"1234578" * 990)))
     with pytest.raises(PdfReadError):
diff --git a/tests/test_images.py b/tests/test_images.py
index 6f8a35e12..c97d4bbb1 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -13,7 +13,7 @@
 import pytest
 from PIL import Image, ImageChops, ImageDraw
 
-from pypdf import PageObject, PdfReader
+from pypdf import PageObject, PdfReader, PdfWriter
 from pypdf.generic import NameObject, NullObject
 
 from . import get_data_from_url
@@ -367,14 +367,58 @@ def test_inline_image_extraction():
 
     url = "https://github.com/mozilla/pdf.js/raw/master/test/pdfs/issue14256.pdf"
     name = "iss2598b.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter(BytesIO(get_data_from_url(url, name=name)))
     url = "https://github.com/py-pdf/pypdf/assets/4083478/71bc5053-cfc7-44ba-b7be-8e2333e2c749"
     name = "iss2598b.png"
     img = Image.open(BytesIO(get_data_from_url(url, name=name)))
     for i in range(8):
-        assert image_similarity(reader.pages[0].images[i].image, img) == 1
-    reader.pages[0].images[i].image  # to test acceleration of second call
-    reader.pages[0].extract_text()
+        assert image_similarity(writer.pages[0].images[i].image, img) == 1
+    writer.pages[0].extract_text()
+    # check recalculation of inline images
+    assert writer.pages[0].inline_images is not None
+    writer.pages[0].merge_scaled_page(writer.pages[0], 0.25)
+    assert writer.pages[0].inline_images is None
+    reader = PdfReader(RESOURCE_ROOT / "imagemagick-ASCII85Decode.pdf")
+    writer.pages[0].merge_page(reader.pages[0])
+    assert list(writer.pages[0].images.keys()) == [
+        "/Im0",
+        "~0~",
+        "~1~",
+        "~2~",
+        "~3~",
+        "~4~",
+        "~5~",
+        "~6~",
+        "~7~",
+        "~8~",
+        "~9~",
+        "~10~",
+        "~11~",
+        "~12~",
+        "~13~",
+        "~14~",
+        "~15~",
+    ]
+    # 2nd call for acceleration test
+    assert list(writer.pages[0].images.keys()) == [
+        "/Im0",
+        "~0~",
+        "~1~",
+        "~2~",
+        "~3~",
+        "~4~",
+        "~5~",
+        "~6~",
+        "~7~",
+        "~8~",
+        "~9~",
+        "~10~",
+        "~11~",
+        "~12~",
+        "~13~",
+        "~14~",
+        "~15~",
+    ]
 
     url = "https://github.com/py-pdf/pypdf/files/15233597/bug1065245.pdf"
     name = "iss2598c.pdf"

From 5d713fcedd94479761a14cc5b5d73ca3ffba6471 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 12 May 2024 11:58:08 +0200
Subject: [PATCH 22/42] coverage

---
 pypdf/_xobj_image_helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py
index ba49f1179..183b28ed5 100644
--- a/pypdf/_xobj_image_helpers.py
+++ b/pypdf/_xobj_image_helpers.py
@@ -30,7 +30,7 @@
 
 try:
     from PIL import Image, UnidentifiedImageError  # noqa: F401
-except ImportError:
+except ImportError:  # deprecated
     raise ImportError(
         "pillow is required to do image extraction. "
         "It can be installed via 'pip install pypdf[image]'"

From 623b7153a48cf0d12f2743492c97999c2349bcaf Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 12 May 2024 12:14:29 +0200
Subject: [PATCH 23/42] check for space after EI

---
 pypdf/generic/_data_structures.py |  6 ++++--
 pypdf/generic/_image_inline.py    | 24 ++++++++++++------------
 tests/test_generic.py             | 28 ++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 31594f439..9a2671016 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -50,6 +50,7 @@
 
 from .._protocols import PdfReaderProtocol, PdfWriterProtocol, XmpInformationProtocol
 from .._utils import (
+    WHITESPACES,
     StreamType,
     b_,
     deprecate_no_replacement,
@@ -1198,8 +1199,9 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
         else:
             data = extract_inline_default(stream)
 
-        ei = stream.read(2)
-        if ei != b"EI":
+        ei = stream.read(3)
+        stream.seek(-1, 1)
+        if ei[0:2] != b"EI" or ei[2:3] not in WHITESPACES:
             stream.seek(savpos, 0)
             data = extract_inline_default(stream)
         return {"settings": settings, "data": data}
diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index ae701cec4..5804f8389 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -75,9 +75,9 @@ def extract_inline_AHex(stream: StreamType) -> bytes:
             stream.seek(-2, 1)
 
     ei = read_non_whitespace(stream)
-    ei += stream.read(1)
-    stream.seek(-2, 1)
-    if ei != b"EI":
+    ei += stream.read(2)
+    stream.seek(-3, 1)
+    if ei[0:2] != b"EI" or not (ei[2:3] == b"" or ei[2:3] in WHITESPACES):
         raise PdfReadError("EI stream not found")
     return data
 
@@ -106,9 +106,9 @@ def extract_inline_A85(stream: StreamType) -> bytes:
         stream.seek(-2, 1)
 
     ei = read_non_whitespace(stream)
-    ei += stream.read(1)
-    stream.seek(-2, 1)
-    if ei != b"EI":
+    ei += stream.read(2)
+    stream.seek(-3, 1)
+    if ei[0:2] != b"EI" or not (ei[2:3] == b"" or ei[2:3] in WHITESPACES):
         raise PdfReadError("EI stream not found")
     return data
 
@@ -133,9 +133,9 @@ def extract_inline_RL(stream: StreamType) -> bytes:
         data += buf
 
     ei = read_non_whitespace(stream)
-    ei += stream.read(1)
-    stream.seek(-2, 1)
-    if ei != b"EI":
+    ei += stream.read(2)
+    stream.seek(-3, 1)
+    if ei[0:2] != b"EI" or not (ei[2:3] == b"" or ei[2:3] in WHITESPACES):
         raise PdfReadError("EI stream not found")
     return data
 
@@ -177,9 +177,9 @@ def extract_inline_DCT(stream: StreamType) -> bytes:
         # else: pass
 
     ei = read_non_whitespace(stream)
-    ei += stream.read(1)
-    stream.seek(-2, 1)
-    if ei != b"EI":
+    ei += stream.read(2)
+    stream.seek(-3, 1)
+    if ei[0:2] != b"EI" or not (ei[2:3] == b"" or ei[2:3] in WHITESPACES):
         raise PdfReadError("EI stream not found")
     return data
 
diff --git a/tests/test_generic.py b/tests/test_generic.py
index 23d6289b0..a72d2f4fb 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -16,6 +16,8 @@
     ArrayObject,
     BooleanObject,
     ByteStringObject,
+    ContentStream,
+    DecodedStreamObject,
     Destination,
     DictionaryObject,
     Fit,
@@ -1391,3 +1393,29 @@ def test_unitary_extract_inline():
     # RL
     b = 8200 * b"\x00\xAB" + b"\x80"
     assert len(extract_inline_RL(BytesIO(b + b" EI"))) == len(b)
+
+    # default
+    # EIDD instead of EI; using A85
+    b = b"""1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET\nq 100 0 0 100 100 100 cm
+BI\n/W 16 /H 16 /BPC 8 /CS /RGB /F [/A85 /Fl]\nID
+Gar8O(o6*is8QV#;;JAuTq2lQ8J;%6#\'d5b"Q[+ZD?\'\\+CGj9~>
+EIDD
+Q\nBT 1 0 0 1 200 100 Tm (Test) Tj T* ET\n \n"""
+    ec = DecodedStreamObject()
+    ec.set_data(b)
+    co = ContentStream(ec, None)
+    with pytest.raises(PdfReadError) as exc:
+        co.operations
+    assert "EI stream not found" in exc.value.args[0]
+    # EIDD instead of EI; using /Fl (default extraction)
+    b = b"""1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET\nq 100 0 0 100 100 100 cm
+BI\n/W 16 /H 16 /BPC 8 /CS /RGB /F /Fl \nID
+Gar8O(o6*is8QV#;;JAuTq2lQ8J;%6#\'d5b"Q[+ZD?\'\\+CGj9~>
+EIDD
+Q\nBT 1 0 0 1 200 100 Tm (Test) Tj T* ET\n \n"""
+    ec = DecodedStreamObject()
+    ec.set_data(b)
+    co = ContentStream(ec, None)
+    with pytest.raises(PdfReadError) as exc:
+        co.operations
+    assert "Unexpected end of stream" in exc.value.args[0]

From 0da933e3748eb4ed1aa7ca2ba4724e3cf2e4cc91 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 12 May 2024 12:44:33 +0200
Subject: [PATCH 24/42] coverage

---
 pypdf/_xobj_image_helpers.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py
index 183b28ed5..d797feda0 100644
--- a/pypdf/_xobj_image_helpers.py
+++ b/pypdf/_xobj_image_helpers.py
@@ -31,7 +31,7 @@
 try:
     from PIL import Image, UnidentifiedImageError  # noqa: F401
 except ImportError:  # deprecated
-    raise ImportError(
+    raise ImportError(  # deprecated
         "pillow is required to do image extraction. "
         "It can be installed via 'pip install pypdf[image]'"
     )
@@ -145,12 +145,6 @@ def _extended_image_frombytes(
     mode: str, size: Tuple[int, int], data: bytes
 ) -> Image.Image:
     try:
-        if mode == "2bits":
-            mode = "P"
-            data = bits2byte(data, size, 2)
-        elif mode == "4bits":
-            mode = "P"
-            data = bits2byte(data, size, 4)
         img = Image.frombytes(mode, size, data)
     except ValueError as exc:
         nb_pix = size[0] * size[1]

From 422eb180f1503568c4c5a88753eea4dc50b86149 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 12 May 2024 13:04:49 +0200
Subject: [PATCH 25/42] coverage

---
 pypdf/_page.py        |  4 ++--
 tests/test_generic.py | 11 +++++++++++
 tests/test_images.py  | 20 --------------------
 3 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index 6173b3bb7..dcaf8adbc 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -457,8 +457,8 @@ def _get_ids_image(
                 lst.append(o if len(ancest) == 0 else ancest + [o])
             else:  # is a form with possible images inside
                 lst.extend(self._get_ids_image(x_object[o], ancest + [o], call_stack))
-        if self.inline_images is not None:
-            lst.extend(list(self.inline_images.keys()))
+        assert self.inline_images is not None
+        lst.extend(list(self.inline_images.keys()))
         return lst
 
     def _get_image(
diff --git a/tests/test_generic.py b/tests/test_generic.py
index a72d2f4fb..3ae434e1e 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -1419,3 +1419,14 @@ def test_unitary_extract_inline():
     with pytest.raises(PdfReadError) as exc:
         co.operations
     assert "Unexpected end of stream" in exc.value.args[0]
+
+    b = b"""1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET\nq 100 0 0 100 100 100 cm
+BI\n/W 16 /H 16 /BPC 8 /CS /RGB /F /Fl \nID
+Gar8O(o6*is8QV#;;JAuTq2lQ8J;%6#\'d5b"Q[+ZD?\'\\+CGj9~>EI
+BT\nQ\nBT 1 0 0 1 200 100 Tm (Test) Tj T* ET\n \n"""
+    ec = DecodedStreamObject()
+    ec.set_data(b)
+    co = ContentStream(ec, None)
+    with pytest.raises(PdfReadError) as exc:
+        co.operations
+    assert "Unexpected end of stream" in exc.value.args[0]
diff --git a/tests/test_images.py b/tests/test_images.py
index c97d4bbb1..dbd3f9109 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -399,26 +399,6 @@ def test_inline_image_extraction():
         "~14~",
         "~15~",
     ]
-    # 2nd call for acceleration test
-    assert list(writer.pages[0].images.keys()) == [
-        "/Im0",
-        "~0~",
-        "~1~",
-        "~2~",
-        "~3~",
-        "~4~",
-        "~5~",
-        "~6~",
-        "~7~",
-        "~8~",
-        "~9~",
-        "~10~",
-        "~11~",
-        "~12~",
-        "~13~",
-        "~14~",
-        "~15~",
-    ]
 
     url = "https://github.com/py-pdf/pypdf/files/15233597/bug1065245.pdf"
     name = "iss2598c.pdf"

From b79164ed75ab2e085fef3eceeaa2d4d9d61e20a1 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 12 May 2024 14:50:01 +0200
Subject: [PATCH 26/42] test / fix /refactoring

---
 pypdf/generic/_data_structures.py | 17 +++++++++++------
 pypdf/generic/_image_inline.py    |  2 +-
 tests/test_generic.py             | 18 ++++++++++++++++++
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 9a2671016..89f972c3e 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1176,14 +1176,20 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
             data = extract_inline_DCT(stream)
         elif filtr == "not set":
             cs = settings.get("/CS", "")
-            if cs == "/I" or cs == "/G" or cs == "/Indexed" or cs == "/DeviceGray":
-                lcs = 1
-            elif "RGB" in cs:
+            if "RGB" in cs:
                 lcs = 3
             elif "CMYK" in cs:
                 lcs = 4
             else:
-                bits = settings.get("/BPC", -1)
+                bits = settings.get(
+                    "/BPC",
+                    8
+                    if cs == "/I"
+                    or cs == "/G"
+                    or cs == "/Indexed"
+                    or cs == "/DeviceGray"
+                    else -1,
+                )
                 if bits > 0:
                     lcs = bits / 8.0
                 else:
@@ -1194,8 +1200,7 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
                     ceil(cast(int, settings["/W"]) * lcs) * cast(int, settings["/H"])
                 )
             ei = read_non_whitespace(stream)
-            ei += stream.read(1)
-            stream.seek(-2, 1)
+            stream.seek(-1, 1)
         else:
             data = extract_inline_default(stream)
 
diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 5804f8389..56f5de72a 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -203,7 +203,7 @@ def extract_inline_default(stream: StreamType) -> bytes:
             data.write(buf)
         else:
             # Write out everything before the E.
-            data.write(buf[0 : (loc + 1)])
+            data.write(buf[0:loc])
 
             # Seek back in the stream to read the E next.
             stream.seek(loc + 1 - len(buf), 1)
diff --git a/tests/test_generic.py b/tests/test_generic.py
index 3ae434e1e..12d64c9f5 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -1430,3 +1430,21 @@ def test_unitary_extract_inline():
     with pytest.raises(PdfReadError) as exc:
         co.operations
     assert "Unexpected end of stream" in exc.value.args[0]
+
+    b = b"""1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET\nq 100 0 0 100 100 100 cm
+BI\n/W 4 /H 4 /CS /G \nID
+abcdefghijklmnopEI
+Q\nQ\nBT 1 0 0 1 200 100 Tm (Test) Tj T* ET\n \n"""
+    ec = DecodedStreamObject()
+    ec.set_data(b)
+    co = ContentStream(ec, None)
+    assert co.operations[7][0]["data"] == b"abcdefghijklmnop"
+
+    b = b"""1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET\nq 100 0 0 100 100 100 cm
+BI\n/W 4 /H 4 \nID
+abcdefghijklmnopEI
+Q\nQ\nBT 1 0 0 1 200 100 Tm (Test) Tj T* ET\n \n"""
+    ec = DecodedStreamObject()
+    ec.set_data(b)
+    co = ContentStream(ec, None)
+    assert co.operations[7][0]["data"] == b"abcdefghijklmnop"

From 66f858cc5256f03501f5ef82d12f6e8db85ebe4a Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Tue, 14 May 2024 22:13:17 +0200
Subject: [PATCH 27/42] fix

---
 pypdf/generic/_image_inline.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 56f5de72a..1c1bde079 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -203,7 +203,7 @@ def extract_inline_default(stream: StreamType) -> bytes:
             data.write(buf)
         else:
             # Write out everything before the E.
-            data.write(buf[0:loc])
+            data.write(buf[0 : loc + 1])
 
             # Seek back in the stream to read the E next.
             stream.seek(loc + 1 - len(buf), 1)
@@ -227,4 +227,5 @@ def extract_inline_default(stream: StreamType) -> bytes:
                 continue
             # Data contains [\s]EI[\s](Q|EMC): 4 chars sufficient, checking Q operator not required.
             break
+
     return data.getvalue()

From ee637c02e611d71fe18aaf15fbe9c14a02c126e8 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Tue, 14 May 2024 22:36:44 +0200
Subject: [PATCH 28/42] fix2

---
 pypdf/generic/_image_inline.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 1c1bde079..579514692 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -202,9 +202,9 @@ def extract_inline_default(stream: StreamType) -> bytes:
         if loc == -1:
             data.write(buf)
         else:
-            # Write out everything before the E.
+            # Write out everything including E (the one from EI to be removed).
             data.write(buf[0 : loc + 1])
-
+            dataposE = data.tell() - 1
             # Seek back in the stream to read the E next.
             stream.seek(loc + 1 - len(buf), 1)
             saved_pos = stream.tell()
@@ -225,7 +225,9 @@ def extract_inline_default(stream: StreamType) -> bytes:
             ):  # for Q ou EMC
                 stream.seek(saved_pos, 0)
                 continue
-            # Data contains [\s]EI[\s](Q|EMC): 4 chars sufficient, checking Q operator not required.
+            # Data contains [\s]EI[\s](Q|EMC): 4 chars are sufficients
+            # remove E(I) wrongly inserted earlier
+            data.truncate(dataposE)
             break
 
     return data.getvalue()

From 2874e567e98a40e0c95009025b0d54ff64f94cf8 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 20 May 2024 10:36:04 +0200
Subject: [PATCH 29/42] Update pypdf/_page.py

Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com>
---
 pypdf/_page.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index cdd541502..1c1976b3e 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -571,7 +571,7 @@ def _translate_value_inlineimage(self, k: str, v: PdfObject) -> PdfObject:
             )
         except (TypeError, KeyError):
             if isinstance(v, NameObject):
-                #  it is a custom name : we have to look in resources :
+                # It is a custom name, thus we have to look in resources.
                 # the only applicable case is for ColorSpace
                 try:
                     res = cast(DictionaryObject, self["/Resources"])["/ColorSpace"]

From 81e1f30837f7fe7525a9f7573f03dbd931ed03f9 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 20 May 2024 10:36:17 +0200
Subject: [PATCH 30/42] Update pypdf/_page.py

Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com>
---
 pypdf/_page.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index 1c1976b3e..ddd47b017 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -572,7 +572,7 @@ def _translate_value_inlineimage(self, k: str, v: PdfObject) -> PdfObject:
         except (TypeError, KeyError):
             if isinstance(v, NameObject):
                 # It is a custom name, thus we have to look in resources.
-                # the only applicable case is for ColorSpace
+                # The only applicable case is for ColorSpace.
                 try:
                     res = cast(DictionaryObject, self["/Resources"])["/ColorSpace"]
                     v = cast(DictionaryObject, res)[v]

From 90fe4598787cf35fd972a9380135504d4d19f8cc Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 20 May 2024 10:36:26 +0200
Subject: [PATCH 31/42] Update pypdf/_page.py

Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com>
---
 pypdf/_page.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index ddd47b017..a897434e0 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -577,7 +577,7 @@ def _translate_value_inlineimage(self, k: str, v: PdfObject) -> PdfObject:
                     res = cast(DictionaryObject, self["/Resources"])["/ColorSpace"]
                     v = cast(DictionaryObject, res)[v]
                 except KeyError:  # for res and v
-                    raise PdfReadError(f"Can not find resource entry {v} for {k}")
+                    raise PdfReadError(f"Cannot find resource entry {v} for {k}")
         return v
 
     def _get_inline_images(self) -> Dict[str, ImageFile]:

From 54e4c1d7c30bd387fc51a0121e3eb3a2e6dcd798 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 20 May 2024 10:37:05 +0200
Subject: [PATCH 32/42] Update pypdf/_page.py

Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com>
---
 pypdf/_page.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index a897434e0..50d030250 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -622,7 +622,7 @@ def _get_inline_images(self) -> Dict[str, ImageFile]:
                 "/Length": len(ii["__streamdata__"]),
             }
             for k, v in ii["settings"].items():
-                if k in ("/Length", "/L"):  # no length is expected
+                if k in {"/Length", "/L"}:  # no length is expected
                     continue
                 if isinstance(v, list):
                     v = ArrayObject(

From d9841ddc40b1d1ab438ae474abec9d9351de1119 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 20 May 2024 10:39:07 +0200
Subject: [PATCH 33/42] Update pypdf/generic/_data_structures.py

Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com>
---
 pypdf/generic/_data_structures.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 4e5b0ad0f..4365c4929 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1162,7 +1162,6 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
         assert tmp[:2] == b"ID"
         filtr = settings.get("/F", settings.get("/Filter", "not set"))
         savpos = stream.tell()
-        # import pdb;pdb.set_trace()
         # print("inline", stream.tell(),filtr,"*",settings)
         if isinstance(filtr, list):
             filtr = filtr[0]  # used forencoding

From ecdba022214ef425cde8357adf10009365b17500 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 20 May 2024 10:39:23 +0200
Subject: [PATCH 34/42] Update pypdf/generic/_data_structures.py

Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com>
---
 pypdf/generic/_data_structures.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 4365c4929..e3de2259b 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1162,7 +1162,6 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
         assert tmp[:2] == b"ID"
         filtr = settings.get("/F", settings.get("/Filter", "not set"))
         savpos = stream.tell()
-        # print("inline", stream.tell(),filtr,"*",settings)
         if isinstance(filtr, list):
             filtr = filtr[0]  # used forencoding
         if "AHx" in filtr or "ASCIIHexDecode" in filtr:

From ae9fdfc2105c2421ebe639c0e6804c1591da5780 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 20 May 2024 10:58:37 +0200
Subject: [PATCH 35/42] update from comments

---
 pypdf/_xobj_image_helpers.py      |   4 +-
 pypdf/filters.py                  |  15 ----
 pypdf/generic/_data_structures.py |   7 +-
 pypdf/generic/_image_inline.py    | 136 +++++++++++++++---------------
 4 files changed, 72 insertions(+), 90 deletions(-)

diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py
index d797feda0..33905d850 100644
--- a/pypdf/_xobj_image_helpers.py
+++ b/pypdf/_xobj_image_helpers.py
@@ -30,8 +30,8 @@
 
 try:
     from PIL import Image, UnidentifiedImageError  # noqa: F401
-except ImportError:  # deprecated
-    raise ImportError(  # deprecated
+except ImportError:  # pragma: no cover
+    raise ImportError(  # pragma: no cover
         "pillow is required to do image extraction. "
         "It can be installed via 'pip install pypdf[image]'"
     )
diff --git a/pypdf/filters.py b/pypdf/filters.py
index 39a7f8e3d..069a3d023 100644
--- a/pypdf/filters.py
+++ b/pypdf/filters.py
@@ -504,21 +504,6 @@ def decode(
         data = data.strip(WHITESPACES_AS_BYTES)
         return a85decode(data, adobe=True, ignorechars=WHITESPACES_AS_BYTES)
 
-    """@staticmethod
-    def encode(data: bytes, level: int = -1) -> bytes:
-        '''
-        Compress the input data using A85 encoding in Adobe format.
-
-        Args:
-            data: The data to be compressed.
-            level: See https://docs.python.org/3/library/zlib.html#zlib.compress
-
-        Returns:
-            The compressed data.
-        '''
-        return a85encode(data, adobe=True, wrapcol=32)
-    """
-
 
 class DCTDecode:
     @staticmethod
diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index e3de2259b..f983fc625 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1181,12 +1181,7 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
             else:
                 bits = settings.get(
                     "/BPC",
-                    8
-                    if cs == "/I"
-                    or cs == "/G"
-                    or cs == "/Indexed"
-                    or cs == "/DeviceGray"
-                    else -1,
+                    8 if cs in {"/I", "/G", "/Indexed", "/DeviceGray"} else -1,
                 )
                 if bits > 0:
                     lcs = bits / 8.0
diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 579514692..14c302902 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -45,41 +45,41 @@ def extract_inline_AHex(stream: StreamType) -> bytes:
     Extract HexEncoded Stream from Inline Image.
     the stream will be moved onto the EI
     """
-    data: bytes = b""
+    data_out: bytes = b""
     # Read data until delimiter > and EI as backup
     # ignoring backup.
     while True:
-        buf = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
-        if not buf:
+        data_buffered = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
+        if not data_buffered:
             raise PdfReadError("Unexpected end of stream")
-        loc = buf.find(b">")
-        if loc >= 0:  # found >
-            data += buf[: (loc + 1)]
-            stream.seek(-len(buf) + loc + 1, 1)
+        pos_tok = data_buffered.find(b">")
+        if pos_tok >= 0:  # found >
+            data_out += data_buffered[: (pos_tok + 1)]
+            stream.seek(-len(data_buffered) + pos_tok + 1, 1)
             break
-        loc = buf.find(b"EI")
-        if loc >= 0:  # found EI
-            stream.seek(-len(buf) + loc - 1, 1)
+        pos_ei = data_buffered.find(b"EI")
+        if pos_ei >= 0:  # found EI
+            stream.seek(-len(data_buffered) + pos_ei - 1, 1)
             c = stream.read(1)
             while c in WHITESPACES:
                 stream.seek(-2, 1)
                 c = stream.read(1)
-                loc -= 1
-            data += buf[:loc]
+                pos_ei -= 1
+            data_out += data_buffered[:pos_ei]
             break
-        elif len(buf) == 2:
-            data += buf
+        elif len(data_buffered) == 2:
+            data_out += data_buffered
             raise PdfReadError("Unexpected end of stream")
         else:  # > nor EI found
-            data += buf[:-2]
+            data_out += data_buffered[:-2]
             stream.seek(-2, 1)
 
-    ei = read_non_whitespace(stream)
-    ei += stream.read(2)
+    ei_tok = read_non_whitespace(stream)
+    ei_tok += stream.read(2)
     stream.seek(-3, 1)
-    if ei[0:2] != b"EI" or not (ei[2:3] == b"" or ei[2:3] in WHITESPACES):
+    if ei_tok[0:2] != b"EI" or not (ei_tok[2:3] == b"" or ei_tok[2:3] in WHITESPACES):
         raise PdfReadError("EI stream not found")
-    return data
+    return data_out
 
 
 def extract_inline_A85(stream: StreamType) -> bytes:
@@ -87,30 +87,32 @@ def extract_inline_A85(stream: StreamType) -> bytes:
     Extract A85 Stream from Inline Image.
     the stream will be moved onto the EI
     """
-    data: bytes = b""
+    data_out: bytes = b""
     # Read data up to delimiter ~>
     # see §3.3.2 from PDF ref 1.7
     while True:
-        buf = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
-        if not buf:
+        data_buffered = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
+        if not data_buffered:
             raise PdfReadError("Unexpected end of stream")
-        loc = buf.find(b"~>")
-        if loc >= 0:  # found!
-            data += buf[: loc + 2]
-            stream.seek(-len(buf) + loc + 2, 1)
+        pos_tok = data_buffered.find(b"~>")
+        if pos_tok >= 0:  # found!
+            data_out += data_buffered[: pos_tok + 2]
+            stream.seek(-len(data_buffered) + pos_tok + 2, 1)
             break
-        elif len(buf) == 2:  # end of buffer
-            data += buf
+        elif len(data_buffered) == 2:  # end of buffer
+            data_out += data_buffered
             raise PdfReadError("Unexpected end of stream")
-        data += buf[:-2]  # back by one char in case of in the middle of ~>
+        data_out += data_buffered[
+            :-2
+        ]  # back by one char in case of in the middle of ~>
         stream.seek(-2, 1)
 
-    ei = read_non_whitespace(stream)
-    ei += stream.read(2)
+    ei_tok = read_non_whitespace(stream)
+    ei_tok += stream.read(2)
     stream.seek(-3, 1)
-    if ei[0:2] != b"EI" or not (ei[2:3] == b"" or ei[2:3] in WHITESPACES):
+    if ei_tok[0:2] != b"EI" or not (ei_tok[2:3] == b"" or ei_tok[2:3] in WHITESPACES):
         raise PdfReadError("EI stream not found")
-    return data
+    return data_out
 
 
 def extract_inline_RL(stream: StreamType) -> bytes:
@@ -118,26 +120,26 @@ def extract_inline_RL(stream: StreamType) -> bytes:
     Extract RL Stream from Inline Image.
     the stream will be moved onto the EI
     """
-    data: bytes = b""
+    data_out: bytes = b""
     # Read data up to delimiter ~>
     # see §3.3.4 from PDF ref 1.7
     while True:
-        buf = stream.read(BUFFER_SIZE)
-        if not buf:
+        data_buffered = stream.read(BUFFER_SIZE)
+        if not data_buffered:
             raise PdfReadError("Unexpected end of stream")
-        loc = buf.find(b"\x80")
-        if loc >= 0:  # found
-            data += buf[: loc + 1]
-            stream.seek(-len(buf) + loc + 1, 1)
+        pos_tok = data_buffered.find(b"\x80")
+        if pos_tok >= 0:  # found
+            data_out += data_buffered[: pos_tok + 1]
+            stream.seek(-len(data_buffered) + pos_tok + 1, 1)
             break
-        data += buf
+        data_out += data_buffered
 
-    ei = read_non_whitespace(stream)
-    ei += stream.read(2)
+    ei_tok = read_non_whitespace(stream)
+    ei_tok += stream.read(2)
     stream.seek(-3, 1)
-    if ei[0:2] != b"EI" or not (ei[2:3] == b"" or ei[2:3] in WHITESPACES):
+    if ei_tok[0:2] != b"EI" or not (ei_tok[2:3] == b"" or ei_tok[2:3] in WHITESPACES):
         raise PdfReadError("EI stream not found")
-    return data
+    return data_out
 
 
 def extract_inline_DCT(stream: StreamType) -> bytes:
@@ -145,20 +147,20 @@ def extract_inline_DCT(stream: StreamType) -> bytes:
     Extract DCT (JPEG) Stream from Inline Image.
     the stream will be moved onto the EI
     """
-    data: bytes = b""
+    data_out: bytes = b""
     # Read Blocks of data (ID/Size/data) up to ID=FF/D9
     # see https://www.digicamsoft.com/itu/itu-t81-36.html
     notfirst = False
     while True:
         c = stream.read(1)
         if notfirst or (c == b"\xff"):
-            data += c
+            data_out += c
         if c != b"\xff":
             continue
         else:
             notfirst = True
         c = stream.read(1)
-        data += c
+        data_out += c
         if c == b"\xff":
             stream.seek(-1, 1)  # pragma: no cover
         elif c == b"\x00":  # stuffing
@@ -171,17 +173,17 @@ def extract_inline_DCT(stream: StreamType) -> bytes:
             b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xfe"
         ):
             c = stream.read(2)
-            data += c
+            data_out += c
             sz = c[0] * 256 + c[1]
-            data += stream.read(sz - 2)
+            data_out += stream.read(sz - 2)
         # else: pass
 
-    ei = read_non_whitespace(stream)
-    ei += stream.read(2)
+    ei_tok = read_non_whitespace(stream)
+    ei_tok += stream.read(2)
     stream.seek(-3, 1)
-    if ei[0:2] != b"EI" or not (ei[2:3] == b"" or ei[2:3] in WHITESPACES):
+    if ei_tok[0:2] != b"EI" or not (ei_tok[2:3] == b"" or ei_tok[2:3] in WHITESPACES):
         raise PdfReadError("EI stream not found")
-    return data
+    return data_out
 
 
 def extract_inline_default(stream: StreamType) -> bytes:
@@ -189,24 +191,24 @@ def extract_inline_default(stream: StreamType) -> bytes:
     Legacy method
     used by default
     """
-    data = BytesIO()
+    stream_out = BytesIO()
     # Read the inline image, while checking for EI (End Image) operator.
     while True:
-        buf = stream.read(BUFFER_SIZE)
-        if not buf:
+        data_buffered = stream.read(BUFFER_SIZE)
+        if not data_buffered:
             raise PdfReadError("Unexpected end of stream")
-        loc = buf.find(
+        pos_ei = data_buffered.find(
             b"E"
         )  # we can not look straight for "EI" because it may not have been loaded in the buffer
 
-        if loc == -1:
-            data.write(buf)
+        if pos_ei == -1:
+            stream_out.write(data_buffered)
         else:
             # Write out everything including E (the one from EI to be removed).
-            data.write(buf[0 : loc + 1])
-            dataposE = data.tell() - 1
+            stream_out.write(data_buffered[0 : pos_ei + 1])
+            sav_pos_ei = stream_out.tell() - 1
             # Seek back in the stream to read the E next.
-            stream.seek(loc + 1 - len(buf), 1)
+            stream.seek(pos_ei + 1 - len(data_buffered), 1)
             saved_pos = stream.tell()
             # Check for End Image
             tok2 = stream.read(1)  # I of "EI"
@@ -219,15 +221,15 @@ def extract_inline_default(stream: StreamType) -> bytes:
                 continue
             while tok3 in WHITESPACES:
                 tok3 = stream.read(1)
-            if buf[loc - 1 : loc] not in WHITESPACES and tok3 not in (
+            if data_buffered[pos_ei - 1 : pos_ei] not in WHITESPACES and tok3 not in {
                 b"Q",
                 b"E",
-            ):  # for Q ou EMC
+            }:  # for Q ou EMC
                 stream.seek(saved_pos, 0)
                 continue
             # Data contains [\s]EI[\s](Q|EMC): 4 chars are sufficients
             # remove E(I) wrongly inserted earlier
-            data.truncate(dataposE)
+            stream_out.truncate(sav_pos_ei)
             break
 
-    return data.getvalue()
+    return stream_out.getvalue()

From bcabdc8fc70c0076e0a7871ac7d91f14dca6eeff Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 20 May 2024 18:10:50 +0200
Subject: [PATCH 36/42] Update _data_structures.py

---
 pypdf/generic/_data_structures.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index f983fc625..1688d5d5c 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -84,7 +84,7 @@
 from ._fit import Fit
 from ._image_inline import (
     extract_inline_A85,
-    extract_inline_AHex,
+    extract_inline_AHx,
     extract_inline_DCT,
     extract_inline_default,
     extract_inline_RL,
@@ -1165,7 +1165,7 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]:
         if isinstance(filtr, list):
             filtr = filtr[0]  # used forencoding
         if "AHx" in filtr or "ASCIIHexDecode" in filtr:
-            data = extract_inline_AHex(stream)
+            data = extract_inline_AHx(stream)
         elif "A85" in filtr or "ASCII85Decode" in filtr:
             data = extract_inline_A85(stream)
         elif "RL" in filtr or "RunLengthDecode" in filtr:

From dc045b6fda3bf0a5e79990e032a7e80935293835 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 20 May 2024 18:11:26 +0200
Subject: [PATCH 37/42] Update _image_inline.py

---
 pypdf/generic/_image_inline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 14c302902..776f69660 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -40,7 +40,7 @@
 BUFFER_SIZE = 8192
 
 
-def extract_inline_AHex(stream: StreamType) -> bytes:
+def extract_inline_AHx(stream: StreamType) -> bytes:
     """
     Extract HexEncoded Stream from Inline Image.
     the stream will be moved onto the EI

From 9c03aa77855b04d32eca5af19827c8763448051d Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 20 May 2024 18:29:14 +0200
Subject: [PATCH 38/42] Update test_generic.py

---
 tests/test_generic.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/test_generic.py b/tests/test_generic.py
index 12d64c9f5..f59c559e0 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -40,7 +40,7 @@
 )
 from pypdf.generic._image_inline import (
     extract_inline_A85,
-    extract_inline_AHex,
+    extract_inline_AHx,
     extract_inline_DCT,
     extract_inline_RL,
 )
@@ -1363,11 +1363,11 @@ def test_array_operators():
 
 def test_unitary_extract_inline_buffer_invalid():
     with pytest.raises(PdfReadError):
-        extract_inline_AHex(BytesIO())
+        extract_inline_AHx(BytesIO())
     with pytest.raises(PdfReadError):
-        extract_inline_AHex(BytesIO(4095 * b"00" + b"   "))
+        extract_inline_AHx(BytesIO(4095 * b"00" + b"   "))
     with pytest.raises(PdfReadError):
-        extract_inline_AHex(BytesIO(b"00"))
+        extract_inline_AHx(BytesIO(b"00"))
     with pytest.raises(PdfReadError):
         extract_inline_A85(BytesIO())
     with pytest.raises(PdfReadError):
@@ -1387,9 +1387,9 @@ def test_unitary_extract_inline_buffer_invalid():
 def test_unitary_extract_inline():
     # AHx
     b = 16000 * b"00"
-    assert len(extract_inline_AHex(BytesIO(b + b" EI"))) == len(b)
+    assert len(extract_inline_AHx(BytesIO(b + b" EI"))) == len(b)
     with pytest.raises(PdfReadError):
-        extract_inline_AHex(BytesIO(b + b"> "))
+        extract_inline_AHx(BytesIO(b + b"> "))
     # RL
     b = 8200 * b"\x00\xAB" + b"\x80"
     assert len(extract_inline_RL(BytesIO(b + b" EI"))) == len(b)

From a56959899c01870a0a86f962e5b3d0a9b6e49cf0 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 26 May 2024 23:13:48 +0200
Subject: [PATCH 39/42] Update test_workflows.py

---
 tests/test_workflows.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 8e3fd6e42..93bc0c9e5 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -1024,7 +1024,7 @@ def test_inline_images():
         reader.pages[2]._get_image(("test",))
 
     url = "https://github.com/py-pdf/pypdf/files/15233597/bug1065245.pdf"
-    name = "iss2598c.pdf"  # test coming from another test in test_image.py
+    name = "iss2598c.pdf"  # test data also used in test_images.py/test_inline_image_extraction()
     reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
     assert len(reader.pages[0].images) == 3
 

From a52541e23b230dc5b77e8b53fb618e6b6ecd540c Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 26 May 2024 23:14:12 +0200
Subject: [PATCH 40/42] Update _image_inline.py

---
 pypdf/generic/_image_inline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index 776f69660..f6c48e883 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, PubPub-ZZ
+# Copyright (c) 2024, pypdf contributors 
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without

From cfe61a91ad19bba4d2a4fc06997cd1480f8904f4 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 26 May 2024 23:59:46 +0200
Subject: [PATCH 41/42] Update _image_inline.py

---
 pypdf/generic/_image_inline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/generic/_image_inline.py b/pypdf/generic/_image_inline.py
index f6c48e883..41826ac31 100644
--- a/pypdf/generic/_image_inline.py
+++ b/pypdf/generic/_image_inline.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, pypdf contributors 
+# Copyright (c) 2024, pypdf contributors
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without

From 7be1fd6c7f3b328cf23da32ac0059a584e368279 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 27 May 2024 08:40:17 +0200
Subject: [PATCH 42/42] remove coverage ignore on PIL import

---
 pypdf/_xobj_image_helpers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py
index 33905d850..45b0c145b 100644
--- a/pypdf/_xobj_image_helpers.py
+++ b/pypdf/_xobj_image_helpers.py
@@ -30,8 +30,8 @@
 
 try:
     from PIL import Image, UnidentifiedImageError  # noqa: F401
-except ImportError:  # pragma: no cover
-    raise ImportError(  # pragma: no cover
+except ImportError:
+    raise ImportError(
         "pillow is required to do image extraction. "
         "It can be installed via 'pip install pypdf[image]'"
     )