Skip to content

Commit ae1ed6d

Browse files
committed
double the amount of memory needed to sort
1 parent af6b5c5 commit ae1ed6d

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

datafusion/physical-plan/src/sorts/multi_level_merge.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use datafusion_execution::memory_pool::{
3232
MemoryConsumer, MemoryPool, MemoryReservation, UnboundedMemoryPool,
3333
};
3434

35+
use crate::sorts::sort::get_reserved_byte_for_record_batch_size;
3536
use crate::sorts::streaming_merge::{SortedSpillFile, StreamingMergeBuilder};
3637
use crate::stream::RecordBatchStreamAdapter;
3738
use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
@@ -249,7 +250,9 @@ impl MultiLevelMergeBuilder {
249250
for spill in &self.sorted_spill_files {
250251
// For memory pools that are not shared this is good, for other this is not
251252
// and there should be some upper limit to memory reservation so we won't starve the system
252-
match reservation.try_grow(spill.max_record_batch_memory * buffer_len) {
253+
match reservation.try_grow(get_reserved_byte_for_record_batch_size(
254+
spill.max_record_batch_memory * buffer_len,
255+
)) {
253256
Ok(_) => {
254257
number_of_spills_to_read_for_current_phase += 1;
255258
}

datafusion/physical-plan/src/sorts/sort.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -784,11 +784,16 @@ impl ExternalSorter {
784784
/// in sorting and merging. The sorted copies are in either row format or array format.
785785
/// Please refer to cursor.rs and stream.rs for more details. No matter what format the
786786
/// sorted copies are, they will use more memory than the original record batch.
787-
fn get_reserved_byte_for_record_batch(batch: &RecordBatch) -> usize {
787+
pub(crate) fn get_reserved_byte_for_record_batch_size(record_batch_size: usize) -> usize {
788788
// 2x may not be enough for some cases, but it's a good start.
789789
// If 2x is not enough, user can set a larger value for `sort_spill_reservation_bytes`
790790
// to compensate for the extra memory needed.
791-
get_record_batch_memory_size(batch) * 2
791+
record_batch_size * 2
792+
}
793+
794+
/// Estimate how much memory is needed to sort a `RecordBatch`.
795+
fn get_reserved_byte_for_record_batch(batch: &RecordBatch) -> usize {
796+
get_reserved_byte_for_record_batch_size(get_record_batch_memory_size(batch))
792797
}
793798

794799
impl Debug for ExternalSorter {

0 commit comments

Comments
 (0)