Skip to content

Commit

Permalink
MAINT: Remove b_ and str_ (#2792)
Browse files Browse the repository at this point in the history
Closes #2726. Closes #2791.
  • Loading branch information
pubpub-zz authored Aug 12, 2024
1 parent 219eb13 commit 46c89dd
Show file tree
Hide file tree
Showing 18 changed files with 193 additions and 174 deletions.
30 changes: 11 additions & 19 deletions pypdf/_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
from typing import Any, Dict, List, Tuple, Union, cast

from ._codecs import adobe_glyphs, charset_encoding
from ._utils import b_, logger_error, logger_warning
from ._utils import logger_error, logger_warning
from .generic import (
DecodedStreamObject,
DictionaryObject,
IndirectObject,
NullObject,
StreamObject,
)
Expand Down Expand Up @@ -258,7 +257,7 @@ def prepare_cm(ft: DictionaryObject) -> bytes:
tu = ft["/ToUnicode"]
cm: bytes
if isinstance(tu, StreamObject):
cm = b_(cast(DecodedStreamObject, ft["/ToUnicode"]).get_data())
cm = cast(DecodedStreamObject, ft["/ToUnicode"]).get_data()
elif isinstance(tu, str) and tu.startswith("/Identity"):
# the full range 0000-FFFF will be processed
cm = b"beginbfrange\n<0000> <0001> <0000>\nendbfrange"
Expand Down Expand Up @@ -448,34 +447,27 @@ def compute_space_width(
en: int = cast(int, ft["/LastChar"])
if st > space_code or en < space_code:
raise Exception("Not in range")
if w[space_code - st] == 0:
if w[space_code - st].get_object() == 0:
raise Exception("null width")
sp_width = w[space_code - st]
sp_width = w[space_code - st].get_object()
except Exception:
if "/FontDescriptor" in ft and "/MissingWidth" in cast(
DictionaryObject, ft["/FontDescriptor"]
):
sp_width = ft["/FontDescriptor"]["/MissingWidth"] # type: ignore
sp_width = ft["/FontDescriptor"]["/MissingWidth"].get_object() # type: ignore
else:
# will consider width of char as avg(width)/2
m = 0
cpt = 0
for x in w:
if x > 0:
m += x
for xx in w:
xx = xx.get_object()
if xx > 0:
m += xx
cpt += 1
sp_width = m / max(1, cpt) / 2

if isinstance(sp_width, IndirectObject):
# According to
# 'Table 122 - Entries common to all font descriptors (continued)'
# the MissingWidth should be a number, but according to #2286 it can
# be an indirect object
obj = sp_width.get_object()
if obj is None or isinstance(obj, NullObject):
return 0.0
return obj # type: ignore

if sp_width is None or isinstance(sp_width, NullObject):
sp_width = 0.0
return sp_width


Expand Down
3 changes: 1 addition & 2 deletions pypdf/_doc_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
from ._page import PageObject, _VirtualList
from ._page_labels import index2label as page_index2page_label
from ._utils import (
b_,
deprecate_with_replacement,
logger_warning,
parse_iso8824_date,
Expand Down Expand Up @@ -1258,7 +1257,7 @@ def xfa(self) -> Optional[Dict[str, Any]]:
if isinstance(f, IndirectObject):
field = cast(Optional[EncodedStreamObject], f.get_object())
if field:
es = zlib.decompress(b_(field._data))
es = zlib.decompress(field._data)
retval[tag] = es
return retval

Expand Down
6 changes: 3 additions & 3 deletions pypdf/_encryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
rc4_encrypt,
)

from ._utils import b_, logger_warning
from ._utils import logger_warning
from .generic import (
ArrayObject,
ByteStringObject,
Expand Down Expand Up @@ -78,7 +78,7 @@ def encrypt_object(self, obj: PdfObject) -> PdfObject:
elif isinstance(obj, StreamObject):
obj2 = StreamObject()
obj2.update(obj)
obj2.set_data(self.stm_crypt.encrypt(b_(obj._data)))
obj2.set_data(self.stm_crypt.encrypt(obj._data))
for key, value in obj.items(): # Dont forget the Stream dict.
obj2[key] = self.encrypt_object(value)
obj = obj2
Expand All @@ -96,7 +96,7 @@ def decrypt_object(self, obj: PdfObject) -> PdfObject:
data = self.str_crypt.decrypt(obj.original_bytes)
obj = create_string_object(data)
elif isinstance(obj, StreamObject):
obj._data = self.stm_crypt.decrypt(b_(obj._data))
obj._data = self.stm_crypt.decrypt(obj._data)
for key, value in obj.items(): # Dont forget the Stream dict.
obj[key] = self.decrypt_object(value)
elif isinstance(obj, DictionaryObject):
Expand Down
10 changes: 9 additions & 1 deletion pypdf/_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
from ._utils import (
StrByteType,
deprecate_with_replacement,
str_,
)
from ._writer import PdfWriter
from .constants import GoToActionArguments, TypArguments, TypFitArguments
Expand Down Expand Up @@ -82,6 +81,15 @@ def __init__(self, pagedata: PageObject, src: PdfReader, id: int) -> None:
self.id = id


# transfered from _utils : as this function is only required here
# and merger will be soon deprecated
def str_(b: Any) -> str: # pragma: no cover
if isinstance(b, bytes):
return b.decode("latin-1")
else:
return str(b) # will return b.__str__() if defined


class PdfMerger:
"""
Use :class:`PdfWriter` instead.
Expand Down
16 changes: 8 additions & 8 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,7 +846,7 @@ def _add_transformation_matrix(
FloatObject(e),
FloatObject(f),
],
" cm",
b"cm",
],
)
return contents
Expand All @@ -864,7 +864,7 @@ def _get_contents_as_bytes(self) -> Optional[bytes]:
if isinstance(obj, list):
return b"".join(x.get_object().get_data() for x in obj)
else:
return cast(bytes, cast(EncodedStreamObject, obj).get_data())
return cast(EncodedStreamObject, obj).get_data()
else:
return None

Expand Down Expand Up @@ -1057,11 +1057,11 @@ def _merge_page(
rect.height,
],
),
"re",
b"re",
),
)
page2content.operations.insert(1, ([], "W"))
page2content.operations.insert(2, ([], "n"))
page2content.operations.insert(1, ([], b"W"))
page2content.operations.insert(2, ([], b"n"))
if page2transformation is not None:
page2content = page2transformation(page2content)
page2content = PageObject._content_stream_rename(
Expand Down Expand Up @@ -1195,11 +1195,11 @@ def _merge_page_writer(
rect.height,
],
),
"re",
b"re",
),
)
page2content.operations.insert(1, ([], "W"))
page2content.operations.insert(2, ([], "n"))
page2content.operations.insert(1, ([], b"W"))
page2content.operations.insert(2, ([], b"n"))
if page2transformation is not None:
page2content = page2transformation(page2content)
page2content = PageObject._content_stream_rename(
Expand Down
5 changes: 2 additions & 3 deletions pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
from ._utils import (
StrByteType,
StreamType,
b_,
logger_warning,
read_non_whitespace,
read_previous_line,
Expand Down Expand Up @@ -328,7 +327,7 @@ def _get_object_from_stream(
assert cast(str, obj_stm["/Type"]) == "/ObjStm"
# /N is the number of indirect objects in the stream
assert idx < obj_stm["/N"]
stream_data = BytesIO(b_(obj_stm.get_data()))
stream_data = BytesIO(obj_stm.get_data())
for i in range(obj_stm["/N"]): # type: ignore
read_non_whitespace(stream_data)
stream_data.seek(-1, 1)
Expand Down Expand Up @@ -932,7 +931,7 @@ def _read_pdf15_xref_stream(
xrefstream = cast(ContentStream, read_object(stream, self))
assert cast(str, xrefstream["/Type"]) == "/XRef"
self.cache_indirect_object(generation, idnum, xrefstream)
stream_data = BytesIO(b_(xrefstream.get_data()))
stream_data = BytesIO(xrefstream.get_data())
# Index pairs specify the subsections in the dictionary. If
# none create one subsection that spans everything.
idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
Expand Down
34 changes: 7 additions & 27 deletions pypdf/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,31 +336,6 @@ def mark_location(stream: StreamType) -> None:
stream.seek(-radius, 1)


B_CACHE: Dict[str, bytes] = {}


def b_(s: Union[str, bytes]) -> bytes:
if isinstance(s, bytes):
return s
bc = B_CACHE
if s in bc:
return bc[s]
try:
r = s.encode("latin-1")
except UnicodeEncodeError:
r = s.encode("utf-8")
if len(s) < 2:
bc[s] = r
return r


def str_(b: Any) -> str:
if isinstance(b, bytes):
return b.decode("latin-1")
else:
return str(b) # will return b.__str__() if defined


@overload
def ord_(b: str) -> int:
...
Expand Down Expand Up @@ -397,12 +372,17 @@ def deprecation(msg: str) -> None:

def deprecate_with_replacement(old_name: str, new_name: str, removed_in: str) -> None:
"""Raise an exception that a feature will be removed, but has a replacement."""
deprecate(f"{old_name} is deprecated and will be removed in pypdf {removed_in}. Use {new_name} instead.", 4)
deprecate(
f"{old_name} is deprecated and will be removed in pypdf {removed_in}. Use {new_name} instead.",
4,
)


def deprecation_with_replacement(old_name: str, new_name: str, removed_in: str) -> None:
"""Raise an exception that a feature was already removed, but has a replacement."""
deprecation(f"{old_name} is deprecated and was removed in pypdf {removed_in}. Use {new_name} instead.")
deprecation(
f"{old_name} is deprecated and was removed in pypdf {removed_in}. Use {new_name} instead."
)


def deprecate_no_replacement(name: str, removed_in: str) -> None:
Expand Down
6 changes: 3 additions & 3 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
StrByteType,
StreamType,
_get_max_pdf_version_header,
b_,
deprecate_with_replacement,
logger_warning,
)
Expand Down Expand Up @@ -678,9 +677,10 @@ def add_attachment(self, filename: str, data: Union[str, bytes]) -> None:
# Hello world!
# endstream
# endobj

if isinstance(data, str):
data = data.encode("latin-1")
file_entry = DecodedStreamObject()
file_entry.set_data(b_(data))
file_entry.set_data(data)
file_entry.update({NameObject(PA.TYPE): NameObject("/EmbeddedFile")})

# The Filespec entry
Expand Down
Loading

0 comments on commit 46c89dd

Please sign in to comment.