From 819370bf05ecac028cca0c283496109b73ab3a92 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 26 Jun 2022 09:11:29 +0200 Subject: [PATCH] ROB: Deal with invalid Destinations Issue: An AttributeError: 'NoneType' object has no attribute 'get_object' was raised when trying to write a page that was merged. Fix: Guard IndirectObject.get_object access Closes #997 --- PyPDF2/_merger.py | 2 ++ PyPDF2/generic.py | 5 ++++- tests/test_generic.py | 20 +++++++++++++++++++- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index bb37a797c..7cb27e605 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -435,6 +435,8 @@ def _trim_outline( else: prev_header_added = False for j in range(*pages): + if o["/Page"] is None: + continue if pdf.pages[j].get_object() == o["/Page"].get_object(): o[NameObject("/Page")] = o["/Page"].get_object() new_outline.append(o) diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index 3b2a7c8d4..4711a052d 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -218,7 +218,10 @@ def __init__(self, idnum: int, generation: int, pdf: Any) -> None: # PdfReader self.pdf = pdf def get_object(self) -> Optional[PdfObject]: - return self.pdf.get_object(self).get_object() + obj = self.pdf.get_object(self) + if obj is None: + return None + return obj.get_object() def __repr__(self) -> str: return f"IndirectObject({self.idnum!r}, {self.generation!r})" diff --git a/tests/test_generic.py b/tests/test_generic.py index e6e2d5c63..c7b79b308 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -3,7 +3,7 @@ import pytest -from PyPDF2 import PdfReader, PdfWriter +from PyPDF2 import PdfMerger, PdfReader, PdfWriter from PyPDF2.constants import TypFitArguments as TF from PyPDF2.errors import PdfReadError, PdfReadWarning, PdfStreamError from PyPDF2.generic import ( @@ -473,3 +473,21 @@ def test_bool_repr(): # cleanup os.remove("tmp-fields-report.txt") + + +def test_issue_997(): + url = "https://github.com/py-pdf/PyPDF2/files/8908874/Exhibit_A-2_930_Enterprise_Zone_Tax_Credits_final.pdf" + name = "gh-issue-997.pdf" + + merger = PdfMerger() + merged_filename = "tmp-out.pdf" + with pytest.warns(PdfReadWarning, match="not defined"): + merger.append( + BytesIO(get_pdf_from_url(url, name=name)) + ) # here the error raises + with open(merged_filename, "wb") as f: + merger.write(f) + merger.close() + + # cleanup + os.remove(merged_filename)