From d224430bc44413cd27c4ed7d5034f2e68ea0b5f7 Mon Sep 17 00:00:00 2001 From: exiledkingcc Date: Fri, 18 Aug 2023 13:37:55 +0800 Subject: [PATCH] SEC: Avoid endless recursion of reading damaged PDF file (#2093) Fixes #140 --- pypdf/_reader.py | 5 ++++- tests/test_reader.py | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pypdf/_reader.py b/pypdf/_reader.py index ac32f0ffa..a3be9203a 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -1230,7 +1230,10 @@ def _flatten( addt = {} if isinstance(page, IndirectObject): addt["indirect_reference"] = page - self._flatten(page.get_object(), inherit, **addt) + obj = page.get_object() + if obj: + # damaged file may have invalid child in /Pages + self._flatten(obj, inherit, **addt) elif t == "/Page": for attr_in, value in list(inherit.items()): # if the page has it's own value, it does not inherit the diff --git a/tests/test_reader.py b/tests/test_reader.py index 69ef80e94..87c5a688f 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1450,3 +1450,12 @@ def test_iss2082(): bb[b.find(b"xref") + 2] = ord(b"E") with pytest.raises(PdfReadError): reader = PdfReader(BytesIO(bb)) + + +@pytest.mark.enable_socket() +def test_issue_140(): + url = "https://github.com/py-pdf/pypdf/files/12168578/bad_pdf_example.pdf" + name = "issue-140.pdf" + b = get_data_from_url(url, name=name) + reader = PdfReader(BytesIO(b)) + assert (len(reader.pages) == 54)