File tree Expand file tree Collapse file tree 2 files changed +31
-7
lines changed Expand file tree Collapse file tree 2 files changed +31
-7
lines changed Original file line number Diff line number Diff line change @@ -148,18 +148,26 @@ def extract_inline_DCT(stream: StreamType) -> bytes:
148148 Extract DCT (JPEG) stream from inline image.
149149 The stream will be moved onto the EI.
150150 """
151+ def read (length : int ) -> bytes :
152+ # If 0 bytes are returned, and *size* was not 0, this indicates end of file.
153+ # If the object is in non-blocking mode and no bytes are available, `None` is returned.
154+ _result = stream .read (length )
155+ if _result is None or len (_result ) != length :
156+ raise PdfReadError ("Unexpected end of stream" )
157+ return _result
158+
151159 data_out : bytes = b""
152160 # Read Blocks of data (ID/Size/data) up to ID=FF/D9
153161 # https://www.digicamsoft.com/itu/itu-t81-36.html
154- notfirst = False
162+ not_first = False
155163 while True :
156- c = stream . read (1 )
157- if notfirst or (c == b"\xff " ):
164+ c = read (1 )
165+ if not_first or (c == b"\xff " ):
158166 data_out += c
159167 if c != b"\xff " :
160168 continue
161- notfirst = True
162- c = stream . read (1 )
169+ not_first = True
170+ c = read (1 )
163171 data_out += c
164172 if c == b"\xff " :
165173 stream .seek (- 1 , 1 ) # pragma: no cover
@@ -172,10 +180,10 @@ def extract_inline_DCT(stream: StreamType) -> bytes:
172180 b"\xda \xdb \xdc \xdd \xde \xdf "
173181 b"\xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xfe "
174182 ):
175- c = stream . read (2 )
183+ c = read (2 )
176184 data_out += c
177185 sz = c [0 ] * 256 + c [1 ]
178- data_out += stream . read (sz - 2 )
186+ data_out += read (sz - 2 )
179187
180188 ei_tok = read_non_whitespace (stream )
181189 ei_tok += stream .read (2 )
Original file line number Diff line number Diff line change 11"""Test the pypdf.generic._image_inline module."""
22from io import BytesIO
33
4+ import pytest
5+
6+ from pypdf import PdfReader
7+ from pypdf .errors import PdfReadError
48from pypdf .generic ._image_inline import is_followed_by_binary_data
9+ from tests import get_data_from_url
510
611
712def test_is_followed_by_binary_data ():
@@ -59,3 +64,14 @@ def test_is_followed_by_binary_data():
5964
6065 stream = BytesIO (b"1234.56 42 13 37 10 20 c\n " )
6166 assert not is_followed_by_binary_data (stream )
67+
68+
69+ @pytest .mark .enable_socket
70+ def test_extract_inline_dct__early_end_of_file ():
71+ url = "https://github.com/user-attachments/files/23056988/inline_dct__early_eof.pdf"
72+ name = "inline_dct__early_eof.pdf"
73+ reader = PdfReader (BytesIO (get_data_from_url (url , name = name )))
74+ page = reader .pages [0 ]
75+
76+ with pytest .raises (expected_exception = PdfReadError , match = r"^Unexpected end of stream$" ):
77+ page .images [0 ].image .load ()
You can’t perform that action at this time.
0 commit comments