From de1f55430d40400f5d4fc003bfeaeffd3f85f804 Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 24 Jul 2024 08:52:55 +0100 Subject: [PATCH 1/3] STY: Comment and refactor b_ Comment courtesy of @shartzog, issue #2726. --- pypdf/_utils.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pypdf/_utils.py b/pypdf/_utils.py index 38c0d67d7..296d366c8 100644 --- a/pypdf/_utils.py +++ b/pypdf/_utils.py @@ -347,14 +347,15 @@ def b_(s: Union[str, bytes]) -> bytes: return bc[s] try: r = s.encode("latin-1") - if len(s) < 2: - bc[s] = r - return r except Exception: r = s.encode("utf-8") - if len(s) < 2: - bc[s] = r - return r + if len(s) < 2: + # Automated reporting frameworks and 'document to PDF' conversion tools love to + # use a 'render every character one at a time' paradigm for creating a + # 'justified' text layout, and many of them just behave that way by default to + # simplify their implementation. + bc[s] = r + return r def str_(b: Any) -> str: From 06baf4fb48deece2188044eb1b0fd46acd02af02 Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Thu, 25 Jul 2024 08:07:16 +0100 Subject: [PATCH 2/3] STY: Small refactor of b_ --- pypdf/_utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pypdf/_utils.py b/pypdf/_utils.py index 296d366c8..33776249d 100644 --- a/pypdf/_utils.py +++ b/pypdf/_utils.py @@ -350,10 +350,6 @@ def b_(s: Union[str, bytes]) -> bytes: except Exception: r = s.encode("utf-8") if len(s) < 2: - # Automated reporting frameworks and 'document to PDF' conversion tools love to - # use a 'render every character one at a time' paradigm for creating a - # 'justified' text layout, and many of them just behave that way by default to - # simplify their implementation. bc[s] = r return r From c22b6e233e43733cee2bcd0d0554877768d35d08 Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 7 Aug 2024 09:59:25 +0100 Subject: [PATCH 3/3] STY: Refactor b_ --- pypdf/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_utils.py b/pypdf/_utils.py index 139c2a27b..5fecb38e7 100644 --- a/pypdf/_utils.py +++ b/pypdf/_utils.py @@ -347,7 +347,7 @@ def b_(s: Union[str, bytes]) -> bytes: return bc[s] try: r = s.encode("latin-1") - except Exception: + except UnicodeEncodeError: r = s.encode("utf-8") if len(s) < 2: bc[s] = r