Skip to content

Commit f2864d6

Browse files
SEC: Avoid infinite loop when reading broken DCT-based inline images (#3501)
1 parent b751ca2 commit f2864d6

File tree

2 files changed

+31
-7
lines changed

2 files changed

+31
-7
lines changed

pypdf/generic/_image_inline.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -148,18 +148,26 @@ def extract_inline_DCT(stream: StreamType) -> bytes:
148148
Extract DCT (JPEG) stream from inline image.
149149
The stream will be moved onto the EI.
150150
"""
151+
def read(length: int) -> bytes:
152+
# If 0 bytes are returned, and *size* was not 0, this indicates end of file.
153+
# If the object is in non-blocking mode and no bytes are available, `None` is returned.
154+
_result = stream.read(length)
155+
if _result is None or len(_result) != length:
156+
raise PdfReadError("Unexpected end of stream")
157+
return _result
158+
151159
data_out: bytes = b""
152160
# Read Blocks of data (ID/Size/data) up to ID=FF/D9
153161
# https://www.digicamsoft.com/itu/itu-t81-36.html
154-
notfirst = False
162+
not_first = False
155163
while True:
156-
c = stream.read(1)
157-
if notfirst or (c == b"\xff"):
164+
c = read(1)
165+
if not_first or (c == b"\xff"):
158166
data_out += c
159167
if c != b"\xff":
160168
continue
161-
notfirst = True
162-
c = stream.read(1)
169+
not_first = True
170+
c = read(1)
163171
data_out += c
164172
if c == b"\xff":
165173
stream.seek(-1, 1) # pragma: no cover
@@ -172,10 +180,10 @@ def extract_inline_DCT(stream: StreamType) -> bytes:
172180
b"\xda\xdb\xdc\xdd\xde\xdf"
173181
b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xfe"
174182
):
175-
c = stream.read(2)
183+
c = read(2)
176184
data_out += c
177185
sz = c[0] * 256 + c[1]
178-
data_out += stream.read(sz - 2)
186+
data_out += read(sz - 2)
179187

180188
ei_tok = read_non_whitespace(stream)
181189
ei_tok += stream.read(2)

tests/generic/test_image_inline.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
"""Test the pypdf.generic._image_inline module."""
22
from io import BytesIO
33

4+
import pytest
5+
6+
from pypdf import PdfReader
7+
from pypdf.errors import PdfReadError
48
from pypdf.generic._image_inline import is_followed_by_binary_data
9+
from tests import get_data_from_url
510

611

712
def test_is_followed_by_binary_data():
@@ -59,3 +64,14 @@ def test_is_followed_by_binary_data():
5964

6065
stream = BytesIO(b"1234.56 42 13 37 10 20 c\n")
6166
assert not is_followed_by_binary_data(stream)
67+
68+
69+
@pytest.mark.enable_socket
70+
def test_extract_inline_dct__early_end_of_file():
71+
url = "https://github.com/user-attachments/files/23056988/inline_dct__early_eof.pdf"
72+
name = "inline_dct__early_eof.pdf"
73+
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
74+
page = reader.pages[0]
75+
76+
with pytest.raises(expected_exception=PdfReadError, match=r"^Unexpected end of stream$"):
77+
page.images[0].image.load()

0 commit comments

Comments
 (0)