diff --git a/pypdf/_reader.py b/pypdf/_reader.py index ac32f0ffa..a3be9203a 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -1230,7 +1230,10 @@ def _flatten( addt = {} if isinstance(page, IndirectObject): addt["indirect_reference"] = page - self._flatten(page.get_object(), inherit, **addt) + obj = page.get_object() + if obj: + # damaged file may have invalid child in /Pages + self._flatten(obj, inherit, **addt) elif t == "/Page": for attr_in, value in list(inherit.items()): # if the page has it's own value, it does not inherit the diff --git a/tests/test_reader.py b/tests/test_reader.py index 69ef80e94..87c5a688f 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1450,3 +1450,12 @@ def test_iss2082(): bb[b.find(b"xref") + 2] = ord(b"E") with pytest.raises(PdfReadError): reader = PdfReader(BytesIO(bb)) + + +@pytest.mark.enable_socket() +def test_issue_140(): + url = "https://github.com/py-pdf/pypdf/files/12168578/bad_pdf_example.pdf" + name = "issue-140.pdf" + b = get_data_from_url(url, name=name) + reader = PdfReader(BytesIO(b)) + assert (len(reader.pages) == 54)