Skip to content

Commit 58f086f

Browse files
emilktustvold
andauthored
Add Array::shrink_to_fit(&mut self) to 53.4.0 (#6790) (#6817) (#6962)
* Add `Array::shrink_to_fit(&mut self)` (#6790) * Add `Array::shrink_to_fit` * Test that shrink_to_fit actually frees memory * Make sure the buffer isn't shared in the test of shrink_to_fit * Remove `#[inline]` * Use `realloc` to reallocate the bytes * Clean up test * Improve docstring for `Array::shrink_to_fit` Co-authored-by: Raphael Taylor-Davies <[email protected]> * `Buffer::shrink_to_fit`: ignore shared buffers * Improve comment in `ArrayRef::shrink_to_fit` * Document why `try_realloc` is safe, and actually make it safe :) * Improve testing of shrink_to_fit * Fix a few corner cases, and improve test * Add license header to new test file --------- Co-authored-by: Raphael Taylor-Davies <[email protected]> * Support shrink to empty (#6817) * Support shrink to empty * Docs * Format --------- Co-authored-by: Raphael Taylor-Davies <[email protected]>
1 parent 962c92f commit 58f086f

23 files changed

+428
-20
lines changed

arrow-array/src/array/boolean_array.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,13 @@ impl Array for BooleanArray {
308308
self.values.is_empty()
309309
}
310310

311+
fn shrink_to_fit(&mut self) {
312+
self.values.shrink_to_fit();
313+
if let Some(nulls) = &mut self.nulls {
314+
nulls.shrink_to_fit();
315+
}
316+
}
317+
311318
fn offset(&self) -> usize {
312319
self.values.offset()
313320
}

arrow-array/src/array/byte_array.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,14 @@ impl<T: ByteArrayType> Array for GenericByteArray<T> {
453453
self.value_offsets.len() <= 1
454454
}
455455

456+
fn shrink_to_fit(&mut self) {
457+
self.value_offsets.shrink_to_fit();
458+
self.value_data.shrink_to_fit();
459+
if let Some(nulls) = &mut self.nulls {
460+
nulls.shrink_to_fit();
461+
}
462+
}
463+
456464
fn offset(&self) -> usize {
457465
0
458466
}

arrow-array/src/array/byte_view_array.rs

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -430,31 +430,31 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
430430
///
431431
/// Before GC:
432432
/// ```text
433-
/// ┌──────┐
434-
/// │......│
435-
/// │......│
436-
/// ┌────────────────────┐ ┌ ─ ─ ─ ▶ │Data1 │ Large buffer
433+
/// ┌──────┐
434+
/// │......│
435+
/// │......│
436+
/// ┌────────────────────┐ ┌ ─ ─ ─ ▶ │Data1 │ Large buffer
437437
/// │ View 1 │─ ─ ─ ─ │......│ with data that
438438
/// ├────────────────────┤ │......│ is not referred
439439
/// │ View 2 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data2 │ to by View 1 or
440-
/// └────────────────────┘ │......│ View 2
441-
/// │......│
442-
/// 2 views, refer to │......│
443-
/// small portions of a └──────┘
444-
/// large buffer
440+
/// └────────────────────┘ │......│ View 2
441+
/// │......│
442+
/// 2 views, refer to │......│
443+
/// small portions of a └──────┘
444+
/// large buffer
445445
/// ```
446-
///
446+
///
447447
/// After GC:
448448
///
449449
/// ```text
450450
/// ┌────────────────────┐ ┌─────┐ After gc, only
451-
/// │ View 1 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│ data that is
452-
/// ├────────────────────┤ ┌ ─ ─ ─ ▶ │Data2│ pointed to by
453-
/// │ View 2 │─ ─ ─ ─ └─────┘ the views is
454-
/// └────────────────────┘ left
455-
///
456-
///
457-
/// 2 views
451+
/// │ View 1 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│ data that is
452+
/// ├────────────────────┤ ┌ ─ ─ ─ ▶ │Data2│ pointed to by
453+
/// │ View 2 │─ ─ ─ ─ └─────┘ the views is
454+
/// └────────────────────┘ left
455+
///
456+
///
457+
/// 2 views
458458
/// ```
459459
/// This method will compact the data buffers by recreating the view array and only include the data
460460
/// that is pointed to by the views.
@@ -575,6 +575,15 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
575575
self.views.is_empty()
576576
}
577577

578+
fn shrink_to_fit(&mut self) {
579+
self.views.shrink_to_fit();
580+
self.buffers.iter_mut().for_each(|b| b.shrink_to_fit());
581+
self.buffers.shrink_to_fit();
582+
if let Some(nulls) = &mut self.nulls {
583+
nulls.shrink_to_fit();
584+
}
585+
}
586+
578587
fn offset(&self) -> usize {
579588
0
580589
}

arrow-array/src/array/dictionary_array.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,11 @@ impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
720720
self.keys.is_empty()
721721
}
722722

723+
fn shrink_to_fit(&mut self) {
724+
self.keys.shrink_to_fit();
725+
self.values.shrink_to_fit();
726+
}
727+
723728
fn offset(&self) -> usize {
724729
self.keys.offset()
725730
}

arrow-array/src/array/fixed_size_binary_array.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,13 @@ impl Array for FixedSizeBinaryArray {
602602
self.len == 0
603603
}
604604

605+
fn shrink_to_fit(&mut self) {
606+
self.value_data.shrink_to_fit();
607+
if let Some(nulls) = &mut self.nulls {
608+
nulls.shrink_to_fit();
609+
}
610+
}
611+
605612
fn offset(&self) -> usize {
606613
0
607614
}

arrow-array/src/array/fixed_size_list_array.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,13 @@ impl Array for FixedSizeListArray {
401401
self.len == 0
402402
}
403403

404+
fn shrink_to_fit(&mut self) {
405+
self.values.shrink_to_fit();
406+
if let Some(nulls) = &mut self.nulls {
407+
nulls.shrink_to_fit();
408+
}
409+
}
410+
404411
fn offset(&self) -> usize {
405412
0
406413
}

arrow-array/src/array/list_array.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,14 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
485485
self.value_offsets.len() <= 1
486486
}
487487

488+
fn shrink_to_fit(&mut self) {
489+
if let Some(nulls) = &mut self.nulls {
490+
nulls.shrink_to_fit();
491+
}
492+
self.values.shrink_to_fit();
493+
self.value_offsets.shrink_to_fit();
494+
}
495+
488496
fn offset(&self) -> usize {
489497
0
490498
}

arrow-array/src/array/list_view_array.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,15 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
326326
self.value_sizes.is_empty()
327327
}
328328

329+
fn shrink_to_fit(&mut self) {
330+
if let Some(nulls) = &mut self.nulls {
331+
nulls.shrink_to_fit();
332+
}
333+
self.values.shrink_to_fit();
334+
self.value_offsets.shrink_to_fit();
335+
self.value_sizes.shrink_to_fit();
336+
}
337+
329338
fn offset(&self) -> usize {
330339
0
331340
}

arrow-array/src/array/map_array.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,14 @@ impl Array for MapArray {
372372
self.value_offsets.len() <= 1
373373
}
374374

375+
fn shrink_to_fit(&mut self) {
376+
if let Some(nulls) = &mut self.nulls {
377+
nulls.shrink_to_fit();
378+
}
379+
self.entries.shrink_to_fit();
380+
self.value_offsets.shrink_to_fit();
381+
}
382+
375383
fn offset(&self) -> usize {
376384
0
377385
}

arrow-array/src/array/mod.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,12 @@ pub trait Array: std::fmt::Debug + Send + Sync {
167167
/// ```
168168
fn is_empty(&self) -> bool;
169169

170+
/// Shrinks the capacity of any exclusively owned buffer as much as possible
171+
///
172+
/// Shared or externally allocated buffers will be ignored, and
173+
/// any buffer offsets will be preserved.
174+
fn shrink_to_fit(&mut self) {}
175+
170176
/// Returns the offset into the underlying data used by this array(-slice).
171177
/// Note that the underlying data can be shared by many arrays.
172178
/// This defaults to `0`.
@@ -366,6 +372,15 @@ impl Array for ArrayRef {
366372
self.as_ref().is_empty()
367373
}
368374

375+
/// For shared buffers, this is a no-op.
376+
fn shrink_to_fit(&mut self) {
377+
if let Some(slf) = Arc::get_mut(self) {
378+
slf.shrink_to_fit();
379+
} else {
380+
// We ignore shared buffers.
381+
}
382+
}
383+
369384
fn offset(&self) -> usize {
370385
self.as_ref().offset()
371386
}

0 commit comments

Comments
 (0)