From 0b589cff93dd29c1c7b8d3803c4deb6a8683a463 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 29 Aug 2022 10:35:08 +0200
Subject: [PATCH 1/3] ROB : Multi-line entries in bfrange(cmap)

Fixes #1285
---
 PyPDF2/_cmap.py | 46 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/PyPDF2/_cmap.py b/PyPDF2/_cmap.py
index afce26088..2863c03d3 100644
--- a/PyPDF2/_cmap.py
+++ b/PyPDF2/_cmap.py
@@ -180,10 +180,13 @@ def parse_to_unicode(
         return {}, space_code, []
     process_rg: bool = False
     process_char: bool = False
+    multiline_rg: Union[
+        None, Tuple[int, int]
+    ] = None  # tuple = (current_char, remaining size) ; cf #1285 for example of file
     cm = prepare_cm(ft)
     for l in cm.split(b"\n"):
-        process_rg, process_char = process_cm_line(
-            l.strip(b" "), process_rg, process_char, map_dict, int_entry
+        process_rg, process_char, multiline_rg = process_cm_line(
+            l.strip(b" "), process_rg, process_char, multiline_rg, map_dict, int_entry
         )
 
     for a, value in map_dict.items():
@@ -228,11 +231,12 @@ def process_cm_line(
     l: bytes,
     process_rg: bool,
     process_char: bool,
+    multiline_rg: Union[None, Tuple[int, int]],
     map_dict: Dict[Any, Any],
     int_entry: List[int],
-) -> Tuple[bool, bool]:
+) -> Tuple[bool, bool, Union[None, Tuple[int, int]]]:
     if l in (b"", b" ") or l[0] == 37:  # 37 = %
-        return process_rg, process_char
+        return process_rg, process_char, multiline_rg
     if b"beginbfrange" in l:
         process_rg = True
     elif b"endbfrange" in l:
@@ -242,22 +246,44 @@ def process_cm_line(
     elif b"endbfchar" in l:
         process_char = False
     elif process_rg:
-        parse_bfrange(l, map_dict, int_entry)
+        multiline_rg = parse_bfrange(l, map_dict, int_entry, multiline_rg)
     elif process_char:
         parse_bfchar(l, map_dict, int_entry)
-    return process_rg, process_char
+    return process_rg, process_char, multiline_rg
 
 
-def parse_bfrange(l: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> None:
+def parse_bfrange(
+    l: bytes,
+    map_dict: Dict[Any, Any],
+    int_entry: List[int],
+    multiline_rg: Union[None, Tuple[int, int]],
+) -> Union[None, Tuple[int, int]]:
     lst = [x for x in l.split(b" ") if x]
     a = int(lst[0], 16)
     b = int(lst[1], 16)
     nbi = len(lst[0])
+    closure_found = False
     map_dict[-1] = nbi // 2
     fmt = b"%%0%dX" % nbi
-    if lst[2] == b"[":
+    if multiline_rg is not None:
+        a = multiline_rg[0]  # a, b not in the current line
+        b = multiline_rg[1]
+        for sq in lst[1:]:
+            if sq == b"]":
+                closure_found = True
+                break
+            map_dict[
+                unhexlify(fmt % a).decode(
+                    "charmap" if map_dict[-1] == 1 else "utf-16-be",
+                    "surrogatepass",
+                )
+            ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
+            int_entry.append(a)
+            a += 1
+    elif lst[2] == b"[":
         for sq in lst[3:]:
             if sq == b"]":
+                closure_found = True
                 break
             map_dict[
                 unhexlify(fmt % a).decode(
@@ -267,9 +293,10 @@ def parse_bfrange(l: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> N
             ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
             int_entry.append(a)
             a += 1
-    else:
+    else:  # case without list
         c = int(lst[2], 16)
         fmt2 = b"%%0%dX" % max(4, len(lst[2]))
+        closure_found = True
         while a <= b:
             map_dict[
                 unhexlify(fmt % a).decode(
@@ -280,6 +307,7 @@ def parse_bfrange(l: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> N
             int_entry.append(a)
             a += 1
             c += 1
+    return None if closure_found else (a, b)
 
 
 def parse_bfchar(l: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> None:

From 91fc95e38ec08ad4763bdfe355c075d58c8d13dd Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 29 Aug 2022 10:42:20 +0200
Subject: [PATCH 2/3] add test

---
 tests/test_cmap.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/test_cmap.py b/tests/test_cmap.py
index 8aa151436..3d991c115 100644
--- a/tests/test_cmap.py
+++ b/tests/test_cmap.py
@@ -46,3 +46,12 @@ def test_get_font_width_from_default():  # L40
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     for page in reader.pages:
         page.extract_text()
+
+
+def test_multiline_bfrange():
+    # non regression test for iss_1285
+    url = "https://github.com/alexanderquispe/1REI05/raw/main/reports/report_1/The%20lean%20times%20in%20the%20Peruvian%20economy.pdf"
+    name = "tika-908104.pdf"
+    reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
+    for page in reader.pages:
+        page.extract_text()

From f3aa9031ef66d066f79b8cae21de6e6d36fb08d4 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 29 Aug 2022 20:46:40 +0200
Subject: [PATCH 3/3] ROB : ending list with only one item on the line

fixes #1274
---
 PyPDF2/_cmap.py    | 61 +++++++++++++++++++++++-----------------------
 tests/test_cmap.py |  5 ++++
 2 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/PyPDF2/_cmap.py b/PyPDF2/_cmap.py
index 2863c03d3..13dc9a906 100644
--- a/PyPDF2/_cmap.py
+++ b/PyPDF2/_cmap.py
@@ -259,10 +259,8 @@ def parse_bfrange(
     multiline_rg: Union[None, Tuple[int, int]],
 ) -> Union[None, Tuple[int, int]]:
     lst = [x for x in l.split(b" ") if x]
-    a = int(lst[0], 16)
-    b = int(lst[1], 16)
-    nbi = len(lst[0])
     closure_found = False
+    nbi = len(lst[0])
     map_dict[-1] = nbi // 2
     fmt = b"%%0%dX" % nbi
     if multiline_rg is not None:
@@ -280,33 +278,36 @@ def parse_bfrange(
             ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
             int_entry.append(a)
             a += 1
-    elif lst[2] == b"[":
-        for sq in lst[3:]:
-            if sq == b"]":
-                closure_found = True
-                break
-            map_dict[
-                unhexlify(fmt % a).decode(
-                    "charmap" if map_dict[-1] == 1 else "utf-16-be",
-                    "surrogatepass",
-                )
-            ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
-            int_entry.append(a)
-            a += 1
-    else:  # case without list
-        c = int(lst[2], 16)
-        fmt2 = b"%%0%dX" % max(4, len(lst[2]))
-        closure_found = True
-        while a <= b:
-            map_dict[
-                unhexlify(fmt % a).decode(
-                    "charmap" if map_dict[-1] == 1 else "utf-16-be",
-                    "surrogatepass",
-                )
-            ] = unhexlify(fmt2 % c).decode("utf-16-be", "surrogatepass")
-            int_entry.append(a)
-            a += 1
-            c += 1
+    else:
+        a = int(lst[0], 16)
+        b = int(lst[1], 16)
+        if lst[2] == b"[":
+            for sq in lst[3:]:
+                if sq == b"]":
+                    closure_found = True
+                    break
+                map_dict[
+                    unhexlify(fmt % a).decode(
+                        "charmap" if map_dict[-1] == 1 else "utf-16-be",
+                        "surrogatepass",
+                    )
+                ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
+                int_entry.append(a)
+                a += 1
+        else:  # case without list
+            c = int(lst[2], 16)
+            fmt2 = b"%%0%dX" % max(4, len(lst[2]))
+            closure_found = True
+            while a <= b:
+                map_dict[
+                    unhexlify(fmt % a).decode(
+                        "charmap" if map_dict[-1] == 1 else "utf-16-be",
+                        "surrogatepass",
+                    )
+                ] = unhexlify(fmt2 % c).decode("utf-16-be", "surrogatepass")
+                int_entry.append(a)
+                a += 1
+                c += 1
     return None if closure_found else (a, b)
 
 
diff --git a/tests/test_cmap.py b/tests/test_cmap.py
index 3d991c115..4a8053669 100644
--- a/tests/test_cmap.py
+++ b/tests/test_cmap.py
@@ -55,3 +55,8 @@ def test_multiline_bfrange():
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     for page in reader.pages:
         page.extract_text()
+    url = "https://github.com/yxj-HGNwmb5kdp8ewr/yxj-HGNwmb5kdp8ewr.github.io/raw/master/files/Giacalone%20Llobell%20Jaeger%20(2022)%20Food%20Qual%20Prefer.pdf"
+    name = "Giacalone.pdf"
+    reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
+    for page in reader.pages:
+        page.extract_text()