@@ -43,6 +43,7 @@ use datafusion_common_runtime::SpawnedTask;
4343use datafusion_execution:: disk_manager:: RefCountedTempFile ;
4444use datafusion_execution:: RecordBatchStream ;
4545use futures:: { FutureExt as _, Stream } ;
46+ use log:: warn;
4647
4748/// Stream that reads spill files from disk where each batch is read in a spawned blocking task
4849/// It will read one batch at a time and will not do any buffering, to buffer data use [`crate::common::spawn_buffered`]
@@ -54,12 +55,15 @@ use futures::{FutureExt as _, Stream};
5455struct SpillReaderStream {
5556 schema : SchemaRef ,
5657 state : SpillReaderStreamState ,
57- /// how much memory the largest memory batch is taking
58- pub max_record_batch_memory : Option < usize > ,
58+ /// Maximum memory size observed among spilling sorted record batches.
59+ /// This is used for validation purposes during reading each RecordBatch from spill.
60+ /// For context on why this value is recorded and validated,
61+ /// see `physical_plan/sort/multi_level_merge.rs`.
62+ max_record_batch_memory : Option < usize > ,
5963}
6064
6165// Small margin allowed to accommodate slight memory accounting variation
62- const MEMORY_MARGIN : usize = 4096 ;
66+ const SPILL_BATCH_MEMORY_MARGIN : usize = 4096 ;
6367
6468/// When we poll for the next batch, we will get back both the batch and the reader,
6569/// so we can call `next` again.
@@ -141,19 +145,15 @@ impl SpillReaderStream {
141145 let actual_size =
142146 get_record_batch_memory_size ( & batch) ;
143147 if actual_size
144- > max_record_batch_memory + MEMORY_MARGIN
148+ > max_record_batch_memory
149+ + SPILL_BATCH_MEMORY_MARGIN
145150 {
146- return Poll :: Ready ( Some ( Err (
147- DataFusionError :: ResourcesExhausted (
148- format ! (
149- "Record batch memory usage ({actual_size} bytes) exceeds the expected limit ({max_record_batch_memory} bytes)\n
150- by more than the allowed tolerance ({MEMORY_MARGIN} bytes).\n
151- This likely indicates a bug in memory accounting during spilling.\n
152- Please report this issue" ,
153- )
154- . to_owned ( ) ,
155- ) ,
156- ) ) ) ;
151+ warn ! (
152+ "Record batch memory usage ({actual_size} bytes) exceeds the expected limit ({max_record_batch_memory} bytes) \n \
153+ by more than the allowed tolerance ({SPILL_BATCH_MEMORY_MARGIN} bytes).\n \
154+ This likely indicates a bug in memory accounting during spilling.\n \
155+ Please report this issue in https://github.com/apache/datafusion/issues/17340."
156+ ) ;
157157 }
158158 }
159159 self . state = SpillReaderStreamState :: Waiting ( reader) ;
0 commit comments