Skip to content

Commit

Permalink
ROB: Merge documents with named destinations with invalid page (#2857)
Browse files Browse the repository at this point in the history
Closes #2842.
  • Loading branch information
pubpub-zz authored Sep 20, 2024
1 parent 7e4a0d6 commit 36e1245
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 5 deletions.
4 changes: 2 additions & 2 deletions pypdf/_doc_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ def _get_outline(
# §12.3.3 Document outline, entries in the outline dictionary
if not is_null_or_none(lines) and "/First" in lines:
node = cast(DictionaryObject, lines["/First"])
self._namedDests = self._get_named_destinations()
self._named_destinations = self._get_named_destinations()

if node is None:
return outline
Expand Down Expand Up @@ -996,7 +996,7 @@ def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
# TODO : keep named destination instead of replacing it ?
try:
outline_item = self._build_destination(
title, self._namedDests[dest].dest_array
title, self._named_destinations[dest].dest_array
)
except KeyError:
# named destination not found in Name Dict
Expand Down
15 changes: 12 additions & 3 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2699,10 +2699,13 @@ def merge(
position += 1
srcpages[pg.indirect_reference.idnum].original_page = pg

reader._namedDests = (
reader._named_destinations = (
reader.named_destinations
) # need for the outline processing below
for dest in reader._namedDests.values():

arr: Any

def _process_named_dests(dest: Any) -> None:
arr = dest.dest_array
if "/Names" in self._root_object and dest["/Title"] in cast(
List[Any],
Expand All @@ -2718,7 +2721,10 @@ def merge(
elif isinstance(dest["/Page"], int):
# the page reference is a page number normally not a PDF Reference
# page numbers as int are normally accepted only in external goto
p = reader.pages[dest["/Page"]]
try:
p = reader.pages[dest["/Page"]]
except IndexError:
return
assert p.indirect_reference is not None
try:
arr[NumberObject(0)] = NumberObject(
Expand All @@ -2733,6 +2739,9 @@ def merge(
].indirect_reference
self.add_named_destination_array(dest["/Title"], arr)

for dest in reader._named_destinations.values():
_process_named_dests(dest)

outline_item_typ: TreeObject
if outline_item is not None:
outline_item_typ = cast(
Expand Down
12 changes: 12 additions & 0 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2468,3 +2468,15 @@ def test_increment_writer(caplog):
assert writer.metadata is None
b = BytesIO()
writer.write(b)


@pytest.mark.enable_socket()
def test_append_pdf_with_dest_without_page(caplog):
"""Tests for #2842"""
url = "https://github.com/user-attachments/files/16990834/test.pdf"
name = "iss2842.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
writer = PdfWriter()
writer.append(reader)
assert "/__WKANCHOR_8" not in writer.named_destinations
assert len(writer.named_destinations) == 3

0 comments on commit 36e1245

Please sign in to comment.