Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

STY: Same attributes between PdfReader and PdfWriter #1870

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions pypdf/_page_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
Example 1
---------

>>> reader.trailer["/Root"]["/PageLabels"]["/Nums"]
>>> reader.root_object["/PageLabels"]["/Nums"]
[0, IndirectObject(18, 0, 139929798197504),
8, IndirectObject(19, 0, 139929798197504)]
>>> reader.get_object(reader.trailer["/Root"]["/PageLabels"]["/Nums"][1])
>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][1])
{'/S': '/r'}
>>> reader.get_object(reader.trailer["/Root"]["/PageLabels"]["/Nums"][3])
>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][3])
{'/S': '/D'}

Example 2
Expand Down Expand Up @@ -57,7 +57,7 @@
aa to zz for the next 26, and so on)
"""

from typing import Iterator, Optional, Tuple
from typing import Iterator, Optional, Tuple, cast

from ._protocols import PdfReaderProtocol
from ._utils import logger_warning
Expand Down Expand Up @@ -127,10 +127,10 @@ def index2label(reader: PdfReaderProtocol, index: int) -> str:
Returns:
The label of the page, e.g. "iv" or "4".
"""
root = reader.trailer["/Root"]
root = cast(DictionaryObject, reader.root_object)
if "/PageLabels" not in root:
return str(index + 1) # Fallback
number_tree = root["/PageLabels"]
number_tree = cast(DictionaryObject, root["/PageLabels"])
if "/Nums" in number_tree:
# [Nums] shall be an array of the form
# [ key 1 value 1 key 2 value 2 ... key n value n ]
Expand All @@ -139,7 +139,7 @@ def index2label(reader: PdfReaderProtocol, index: int) -> str:
# The keys shall be sorted in numerical order,
# analogously to the arrangement of keys in a name tree
# as described in 7.9.6, "Name Trees."
nums = number_tree["/Nums"]
nums = cast(ArrayObject, number_tree["/Nums"])
i = 0
value = None
start_index = 0
Expand Down
8 changes: 8 additions & 0 deletions pypdf/_protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ def pages(self) -> List[Any]:
def trailer(self) -> Dict[str, Any]:
...

@property
def root_object(self) -> PdfObjectProtocol:
...

def get_object(self, indirect_reference: Any) -> Optional[PdfObjectProtocol]:
...

Expand All @@ -83,3 +87,7 @@ def pages(self) -> List[Any]:
@property
def pdf_header(self) -> bytes:
...

@property
def root_object(self) -> PdfObjectProtocol:
...
108 changes: 65 additions & 43 deletions pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,8 +329,7 @@ def __init__(
# Some documents may not have a /ID, use two empty
# byte strings instead. Solves
# https://github.com/py-pdf/pypdf/issues/608
id_entry = self.trailer.get(TK.ID)
id1_entry = id_entry[0].get_object().original_bytes if id_entry else b""
id1_entry = self._ID[0].get_object().original_bytes if self._ID else b""
encrypt_entry = cast(
DictionaryObject, self.trailer[TK.ENCRYPT].get_object()
)
Expand All @@ -348,6 +347,34 @@ def __init__(
elif password is not None:
raise PdfReadError("Not encrypted file")

@property
def root_object(self) -> DictionaryObject:
"""Provide access to "/Root". standardized with PdfWriter."""
return cast(DictionaryObject, self.trailer[TK.ROOT])

@property
def _info(self) -> Optional[DictionaryObject]:
"""
Provide access to "/Info". standardized with PdfWriter.

Returns:
/Info Dictionary ; None if the entry does not exists

"""
info = self.trailer.get(TK.INFO, None)
return None if info is None else cast(DictionaryObject, info.get_object())

@property
def _ID(self) -> Optional[ArrayObject]:
"""
Provide access to "/ID". standardized with PdfWriter.

Returns:
/ID array ; None if the entry does not exists
"""
id = self.trailer.get(TK.ID, None)
return None if id is None else cast(ArrayObject, id.get_object())

@property
def pdf_header(self) -> str:
"""
Expand Down Expand Up @@ -375,9 +402,9 @@ def metadata(self) -> Optional[DocumentInformation]:
"""
if TK.INFO not in self.trailer:
return None
obj = self.trailer[TK.INFO]
obj = self._info
retval = DocumentInformation()
if isinstance(obj, type(None)):
if obj is None:
raise PdfReadError(
"trailer not found or does not point to document information directory"
)
Expand Down Expand Up @@ -408,7 +435,7 @@ def xmp_metadata(self) -> Optional[XmpInformation]:
"""XMP (Extensible Metadata Platform) data."""
try:
self._override_encryption = True
return self.trailer[TK.ROOT].xmp_metadata # type: ignore
return self.root_object.xmp_metadata # type: ignore
finally:
self._override_encryption = False

Expand Down Expand Up @@ -446,7 +473,7 @@ def _get_num_pages(self) -> int:
# the PDF file's page count is used in this case. Otherwise,
# the original method (flattened page count) is used.
if self.is_encrypted:
return self.trailer[TK.ROOT]["/Pages"]["/Count"] # type: ignore
return self.root_object["/Pages"]["/Count"] # type: ignore
else:
if self.flattened_pages is None:
self._flatten()
Expand Down Expand Up @@ -546,10 +573,9 @@ def get_fields(
field_attributes.update(CheckboxRadioButtonAttributes.attributes_dict())
if retval is None:
retval = {}
catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
# get the AcroForm tree
if CD.ACRO_FORM in catalog:
tree = cast(Optional[TreeObject], catalog[CD.ACRO_FORM])
if CD.ACRO_FORM in self.root_object:
tree = cast(Optional[TreeObject], self.root_object[CD.ACRO_FORM])
else:
return None
if tree is None:
Expand Down Expand Up @@ -739,13 +765,12 @@ def _get_named_destinations(
"""
if retval is None:
retval = {}
catalog = cast(DictionaryObject, self.trailer[TK.ROOT])

# get the name tree
if CA.DESTS in catalog:
tree = cast(TreeObject, catalog[CA.DESTS])
elif CA.NAMES in catalog:
names = cast(DictionaryObject, catalog[CA.NAMES])
if CA.DESTS in self.root_object:
tree = cast(TreeObject, self.root_object[CA.DESTS])
elif CA.NAMES in self.root_object:
names = cast(DictionaryObject, self.root_object[CA.NAMES])
if CA.DESTS in names:
tree = cast(TreeObject, names[CA.DESTS])

Expand Down Expand Up @@ -825,11 +850,10 @@ def _get_outline(
) -> OutlineType:
if outline is None:
outline = []
catalog = cast(DictionaryObject, self.trailer[TK.ROOT])

# get the outline dictionary and named destinations
if CO.OUTLINES in catalog:
lines = cast(DictionaryObject, catalog[CO.OUTLINES])
if CO.OUTLINES in self.root_object:
lines = cast(DictionaryObject, self.root_object[CO.OUTLINES])

if isinstance(lines, NullObject):
return outline
Expand Down Expand Up @@ -882,9 +906,8 @@ def threads(self) -> Optional[ArrayObject]:
It's an array of dictionaries with "/F" and "/I" properties or
None if there are no articles.
"""
catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
if CO.THREADS in catalog:
return cast("ArrayObject", catalog[CO.THREADS])
if CO.THREADS in self.root_object:
return cast("ArrayObject", self.root_object[CO.THREADS])
else:
return None

Expand Down Expand Up @@ -1097,9 +1120,8 @@ def page_layout(self) -> Optional[str]:
* - /TwoPageRight
- Show two pages at a time, odd-numbered pages on the right
"""
trailer = cast(DictionaryObject, self.trailer[TK.ROOT])
if CD.PAGE_LAYOUT in trailer:
return cast(NameObject, trailer[CD.PAGE_LAYOUT])
if CD.PAGE_LAYOUT in self.root_object:
return cast(NameObject, self.root_object[CD.PAGE_LAYOUT])
return None

def getPageLayout(self) -> Optional[str]: # deprecated
Expand Down Expand Up @@ -1143,7 +1165,7 @@ def page_mode(self) -> Optional[PagemodeType]:
- Show attachments panel
"""
try:
return self.trailer[TK.ROOT]["/PageMode"] # type: ignore
return self.root_object["/PageMode"] # type: ignore
except KeyError:
return None

Expand Down Expand Up @@ -1183,7 +1205,7 @@ def _flatten(
if pages is None:
# Fix issue 327: set flattened_pages attribute only for
# decrypted file
catalog = self.trailer[TK.ROOT].get_object()
catalog = self.root_object.get_object()
pages = catalog["/Pages"].get_object() # type: ignore
self.flattened_pages = []

Expand Down Expand Up @@ -2073,12 +2095,13 @@ def isEncrypted(self) -> bool: # deprecated
def xfa(self) -> Optional[Dict[str, Any]]:
tree: Optional[TreeObject] = None
retval: Dict[str, Any] = {}
catalog = cast(DictionaryObject, self.trailer[TK.ROOT])

if "/AcroForm" not in catalog or not catalog["/AcroForm"]:
if isinstance(
self.root_object.get("/AcroForm", None), (type(None), NullObject)
):
return None

tree = cast(TreeObject, catalog["/AcroForm"])
tree = cast(TreeObject, self.root_object["/AcroForm"])

if "/XFA" in tree:
fields = cast(ArrayObject, tree["/XFA"])
Expand All @@ -2103,13 +2126,12 @@ def add_form_topname(self, name: str) -> Optional[DictionaryObject]:
Returns:
The created object. ``None`` means no object was created.
"""
catalog = cast(DictionaryObject, self.trailer[TK.ROOT])

if "/AcroForm" not in catalog or not isinstance(
catalog["/AcroForm"], DictionaryObject
if isinstance(
self.root_object.get("/AcroForm", None), (type(None), NullObject)
):
return None
acroform = cast(DictionaryObject, catalog[NameObject("/AcroForm")])

acroform = cast(DictionaryObject, self.root_object["/AcroForm"])
if "/Fields" not in acroform:
# TODO: :No error returns but may be extended for XFA Forms
return None
Expand Down Expand Up @@ -2145,13 +2167,12 @@ def rename_form_topname(self, name: str) -> Optional[DictionaryObject]:
Returns:
The modified object. ``None`` means no object was modified.
"""
catalog = cast(DictionaryObject, self.trailer[TK.ROOT])

if "/AcroForm" not in catalog or not isinstance(
catalog["/AcroForm"], DictionaryObject
if isinstance(
self.root_object.get("/AcroForm", None), (type(None), NullObject)
):
return None
acroform = cast(DictionaryObject, catalog[NameObject("/AcroForm")])

acroform = cast(DictionaryObject, self.root_object["/AcroForm"])
if "/Fields" not in acroform:
return None

Expand All @@ -2178,14 +2199,14 @@ def _list_attachments(self) -> List[str]:
Returns:
list of filenames
"""
catalog = cast(DictionaryObject, self.trailer["/Root"])
# From the catalog get the embedded file names
try:
filenames = cast(
ArrayObject,
cast(
DictionaryObject,
cast(DictionaryObject, catalog["/Names"])["/EmbeddedFiles"],
cast(DictionaryObject, self.root_object["/Names"])[
"/EmbeddedFiles"
],
)["/Names"],
)
except KeyError:
Expand Down Expand Up @@ -2220,14 +2241,15 @@ def _get_attachments(
dictionary of filename -> Union[bytestring or List[ByteString]]
if the filename exists multiple times a List of the different version will be provided
"""
catalog = cast(DictionaryObject, self.trailer["/Root"])
# From the catalog get the embedded file names
try:
filenames = cast(
ArrayObject,
cast(
DictionaryObject,
cast(DictionaryObject, catalog["/Names"])["/EmbeddedFiles"],
cast(DictionaryObject, self.root_object["/Names"])[
"/EmbeddedFiles"
],
)["/Names"],
)
except KeyError:
Expand Down
Loading