Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
231 changes: 231 additions & 0 deletions arrow-ipc/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1727,6 +1727,26 @@ fn write_array_data(
write_options,
)?;
return Ok(offset);
} else if let DataType::FixedSizeList(_, fixed_size) = data_type {
assert_eq!(array_data.child_data().len(), 1);
let fixed_size = *fixed_size as usize;

let child_offset = array_data.offset() * fixed_size;
let child_length = array_data.len() * fixed_size;
let child_data = array_data.child_data()[0].slice(child_offset, child_length);

offset = write_array_data(
&child_data,
buffers,
arrow_data,
nodes,
offset,
child_data.len(),
child_data.null_count(),
compression_codec,
write_options,
)?;
return Ok(offset);
} else {
for buffer in array_data.buffers() {
offset = write_buffer(
Expand Down Expand Up @@ -1837,6 +1857,9 @@ mod tests {
use std::io::Cursor;
use std::io::Seek;

use arrow_array::builder::FixedSizeListBuilder;
use arrow_array::builder::Float32Builder;
use arrow_array::builder::Int64Builder;
use arrow_array::builder::MapBuilder;
use arrow_array::builder::UnionBuilder;
use arrow_array::builder::{GenericListBuilder, ListBuilder, StringBuilder};
Expand Down Expand Up @@ -3075,4 +3098,212 @@ mod tests {
assert_eq!(stream_bytes_written_on_flush, expected_stream_flushed_bytes);
assert_eq!(file_bytes_written_on_flush, expected_file_flushed_bytes);
}

#[test]
fn test_roundtrip_list_of_fixed_list() -> Result<(), ArrowError> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I verified that this test fails without the code in this PR

assertion `left == right` failed
  left: RecordBatch { schema: Schema { fields: [Field { name: "points", data_type: List(Field { name: "item", data_type: FixedSizeList(Field { name: "item", data_type: Float32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }, columns: [ListArray
[
  FixedSizeListArray<3>
[
  PrimitiveArray<Float32>
[
  10.0,
  11.0,
  12.0,
],
],
]], row_count: 1 }
 right: RecordBatch { schema: Schema { fields: [Field { name: "points", data_type: List(Field { name: "item", data_type: FixedSizeList(Field { name: "item", data_type: Float32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }, columns: [ListArray
[
  FixedSizeListArray<3>
[
  PrimitiveArray<Float32>
[
  1.0,
  2.0,
  3.0,
],
],
]], row_count: 1 }

<Click to see difference>

thread 'writer::tests::test_roundtrip_list_of_fixed_list' panicked at arrow-ipc/src/writer.rs:3150:9:
assertion `left == right` failed
  left: RecordBatch { schema: Schema { fields: [Field { name: "points", data_type: List(Field { name: "item", data_type: FixedSizeList(Field { name: "item", data_type: Float32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }, columns: [ListArray
[
  FixedSizeListArray<3>
[
  PrimitiveArray<Float32>
[
  10.0,
  11.0,
  12.0,
],
],
]], row_count: 1 }
 right: RecordBatch { schema: Schema { fields: [Field { name: "points", data_type: List(Field { name: "item", data_type: FixedSizeList(Field { name: "item", data_type: Float32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }, columns: [ListArray
[
  FixedSizeListArray<3>
[
  PrimitiveArray<Float32>
[
  1.0,
  2.0,
  3.0,
],
],
]], row_count: 1 }
stack backtrace:
   0: rust_begin_unwind
             at /rustc/4eb161250e340c8f48f66e2b929ef4a5bed7c181/library/std/src/panicking.rs:692:5
   1: core::panicking::panic_fmt
             at /rustc/4eb161250e340c8f48f66e2b929ef4a5bed7c181/library/core/src/panicking.rs:75:14
   2: core::panicking::assert_failed_inner
   3: core::panicking::assert_failed
             at /Users/andrewlamb/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/panicking.rs:364:5
   4: arrow_ipc::writer::tests::test_subarray
             at ./src/writer.rs:3150:9
   5: arrow_ipc::writer::tests::test_roundtrip_list_of_fixed_list
             at ./src/writer.rs:3127:9
   6: arrow_ipc::writer::tests::test_roundtrip_list_of_fixed_list::{{closure}}
             at ./src/writer.rs:3083:47
   7: core::ops::function::FnOnce::call_once
             at /Users/andrewlamb/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/ops/function.rs:250:5
   8: core::ops::function::FnOnce::call_once
             at /rustc/4eb161250e340c8f48f66e2b929ef4a5bed7c181/library/core/src/ops/function.rs:250:5
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.

let l1_type =
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, false)), 3);
let l2_type = DataType::List(Arc::new(Field::new("item", l1_type.clone(), false)));

let l0_builder = Float32Builder::new();
let l1_builder = FixedSizeListBuilder::new(l0_builder, 3).with_field(Arc::new(Field::new(
"item",
DataType::Float32,
false,
)));
let mut l2_builder =
ListBuilder::new(l1_builder).with_field(Arc::new(Field::new("item", l1_type, false)));

for point in [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]] {
l2_builder.values().values().append_value(point[0]);
l2_builder.values().values().append_value(point[1]);
l2_builder.values().values().append_value(point[2]);

l2_builder.values().append(true);
}
l2_builder.append(true);

let point = [10., 11., 12.];
l2_builder.values().values().append_value(point[0]);
l2_builder.values().values().append_value(point[1]);
l2_builder.values().values().append_value(point[2]);

l2_builder.values().append(true);
l2_builder.append(true);

let array = Arc::new(l2_builder.finish()) as ArrayRef;

let schema = Arc::new(Schema::new_with_metadata(
vec![Field::new("points", l2_type, false)],
HashMap::default(),
));

// Test a variety of combinations that include 0 and non-zero offsets
// and also portions or the rest of the array
test_slices(&array, &schema, 0, 1)?;
test_slices(&array, &schema, 0, 2)?;
test_slices(&array, &schema, 1, 1)?;

Ok(())
}

#[test]
fn test_roundtrip_list_of_fixed_list_w_nulls() -> Result<(), ArrowError> {
let l0_builder = Float32Builder::new();
let l1_builder = FixedSizeListBuilder::new(l0_builder, 3);
let mut l2_builder = ListBuilder::new(l1_builder);

for point in [
[Some(1.0), Some(2.0), None],
[Some(4.0), Some(5.0), Some(6.0)],
[None, Some(8.0), Some(9.0)],
] {
for p in point {
match p {
Some(p) => l2_builder.values().values().append_value(p),
None => l2_builder.values().values().append_null(),
}
}

l2_builder.values().append(true);
}
l2_builder.append(true);

let point = [Some(10.), None, None];
for p in point {
match p {
Some(p) => l2_builder.values().values().append_value(p),
None => l2_builder.values().values().append_null(),
}
}

l2_builder.values().append(true);
l2_builder.append(true);
Comment on lines +3172 to +3181
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I wonder why this point [Some(10.), None, None] needs to be separately appended? Cannot it be in the above loop too?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make this entry a different row of the outer list.


let array = Arc::new(l2_builder.finish()) as ArrayRef;

let schema = Arc::new(Schema::new_with_metadata(
vec![Field::new(
"points",
DataType::List(Arc::new(Field::new(
"item",
DataType::FixedSizeList(
Arc::new(Field::new("item", DataType::Float32, true)),
3,
),
true,
))),
true,
)],
HashMap::default(),
));

// Test a variety of combinations that include 0 and non-zero offsets
// and also portions or the rest of the array
test_slices(&array, &schema, 0, 1)?;
test_slices(&array, &schema, 0, 2)?;
test_slices(&array, &schema, 1, 1)?;

Ok(())
}

fn test_slices(
parent_array: &ArrayRef,
schema: &SchemaRef,
offset: usize,
length: usize,
) -> Result<(), ArrowError> {
let subarray = parent_array.slice(offset, length);
let original_batch = RecordBatch::try_new(schema.clone(), vec![subarray])?;

let mut bytes = Vec::new();
let mut writer = StreamWriter::try_new(&mut bytes, schema)?;
writer.write(&original_batch)?;
writer.finish()?;

let mut cursor = std::io::Cursor::new(bytes);
let mut reader = StreamReader::try_new(&mut cursor, None)?;
let returned_batch = reader.next().unwrap()?;

assert_eq!(original_batch, returned_batch);

Ok(())
}

#[test]
fn test_roundtrip_fixed_list() -> Result<(), ArrowError> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add another test with null value too?

let int_builder = Int64Builder::new();
let mut fixed_list_builder = FixedSizeListBuilder::new(int_builder, 3)
.with_field(Arc::new(Field::new("item", DataType::Int64, false)));

for point in [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] {
fixed_list_builder.values().append_value(point[0]);
fixed_list_builder.values().append_value(point[1]);
fixed_list_builder.values().append_value(point[2]);

fixed_list_builder.append(true);
}

let array = Arc::new(fixed_list_builder.finish()) as ArrayRef;

let schema = Arc::new(Schema::new_with_metadata(
vec![Field::new(
"points",
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, false)), 3),
false,
)],
HashMap::default(),
));

// Test a variety of combinations that include 0 and non-zero offsets
// and also portions or the rest of the array
test_slices(&array, &schema, 0, 4)?;
test_slices(&array, &schema, 0, 2)?;
test_slices(&array, &schema, 1, 3)?;
test_slices(&array, &schema, 2, 1)?;

Ok(())
}

#[test]
fn test_roundtrip_fixed_list_w_nulls() -> Result<(), ArrowError> {
let int_builder = Int64Builder::new();
let mut fixed_list_builder = FixedSizeListBuilder::new(int_builder, 3);

for point in [
[Some(1), Some(2), None],
[Some(4), Some(5), Some(6)],
[None, Some(8), Some(9)],
[Some(10), None, None],
] {
for p in point {
match p {
Some(p) => fixed_list_builder.values().append_value(p),
None => fixed_list_builder.values().append_null(),
}
}

fixed_list_builder.append(true);
}

let array = Arc::new(fixed_list_builder.finish()) as ArrayRef;

let schema = Arc::new(Schema::new_with_metadata(
vec![Field::new(
"points",
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, true)), 3),
true,
)],
HashMap::default(),
));

// Test a variety of combinations that include 0 and non-zero offsets
// and also portions or the rest of the array
test_slices(&array, &schema, 0, 4)?;
test_slices(&array, &schema, 0, 2)?;
test_slices(&array, &schema, 1, 3)?;
test_slices(&array, &schema, 2, 1)?;

Ok(())
}
}
Loading