-
Notifications
You must be signed in to change notification settings - Fork 1.1k
bugfix: correct offsets when serializing a list of fixed sized list and non-zero start offset #7318
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
90a1016
1014c9f
1eb654e
a98f3bc
34b204a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1727,6 +1727,26 @@ fn write_array_data( | |
| write_options, | ||
| )?; | ||
| return Ok(offset); | ||
| } else if let DataType::FixedSizeList(_, fixed_size) = data_type { | ||
| assert_eq!(array_data.child_data().len(), 1); | ||
| let fixed_size = *fixed_size as usize; | ||
|
|
||
| let child_offset = array_data.offset() * fixed_size; | ||
| let child_length = array_data.len() * fixed_size; | ||
| let child_data = array_data.child_data()[0].slice(child_offset, child_length); | ||
|
|
||
| offset = write_array_data( | ||
| &child_data, | ||
| buffers, | ||
| arrow_data, | ||
| nodes, | ||
| offset, | ||
| child_data.len(), | ||
| child_data.null_count(), | ||
| compression_codec, | ||
| write_options, | ||
| )?; | ||
| return Ok(offset); | ||
| } else { | ||
| for buffer in array_data.buffers() { | ||
| offset = write_buffer( | ||
|
|
@@ -1837,6 +1857,9 @@ mod tests { | |
| use std::io::Cursor; | ||
| use std::io::Seek; | ||
|
|
||
| use arrow_array::builder::FixedSizeListBuilder; | ||
| use arrow_array::builder::Float32Builder; | ||
| use arrow_array::builder::Int64Builder; | ||
| use arrow_array::builder::MapBuilder; | ||
| use arrow_array::builder::UnionBuilder; | ||
| use arrow_array::builder::{GenericListBuilder, ListBuilder, StringBuilder}; | ||
|
|
@@ -3075,4 +3098,212 @@ mod tests { | |
| assert_eq!(stream_bytes_written_on_flush, expected_stream_flushed_bytes); | ||
| assert_eq!(file_bytes_written_on_flush, expected_file_flushed_bytes); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_roundtrip_list_of_fixed_list() -> Result<(), ArrowError> { | ||
| let l1_type = | ||
| DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, false)), 3); | ||
| let l2_type = DataType::List(Arc::new(Field::new("item", l1_type.clone(), false))); | ||
|
|
||
| let l0_builder = Float32Builder::new(); | ||
| let l1_builder = FixedSizeListBuilder::new(l0_builder, 3).with_field(Arc::new(Field::new( | ||
| "item", | ||
| DataType::Float32, | ||
| false, | ||
| ))); | ||
| let mut l2_builder = | ||
| ListBuilder::new(l1_builder).with_field(Arc::new(Field::new("item", l1_type, false))); | ||
|
|
||
| for point in [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]] { | ||
timsaucer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| l2_builder.values().values().append_value(point[0]); | ||
| l2_builder.values().values().append_value(point[1]); | ||
| l2_builder.values().values().append_value(point[2]); | ||
|
|
||
| l2_builder.values().append(true); | ||
| } | ||
| l2_builder.append(true); | ||
|
|
||
| let point = [10., 11., 12.]; | ||
| l2_builder.values().values().append_value(point[0]); | ||
| l2_builder.values().values().append_value(point[1]); | ||
| l2_builder.values().values().append_value(point[2]); | ||
|
|
||
| l2_builder.values().append(true); | ||
| l2_builder.append(true); | ||
|
|
||
| let array = Arc::new(l2_builder.finish()) as ArrayRef; | ||
|
|
||
| let schema = Arc::new(Schema::new_with_metadata( | ||
| vec![Field::new("points", l2_type, false)], | ||
| HashMap::default(), | ||
| )); | ||
|
|
||
| // Test a variety of combinations that include 0 and non-zero offsets | ||
| // and also portions or the rest of the array | ||
| test_slices(&array, &schema, 0, 1)?; | ||
| test_slices(&array, &schema, 0, 2)?; | ||
| test_slices(&array, &schema, 1, 1)?; | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_roundtrip_list_of_fixed_list_w_nulls() -> Result<(), ArrowError> { | ||
| let l0_builder = Float32Builder::new(); | ||
| let l1_builder = FixedSizeListBuilder::new(l0_builder, 3); | ||
| let mut l2_builder = ListBuilder::new(l1_builder); | ||
|
|
||
| for point in [ | ||
| [Some(1.0), Some(2.0), None], | ||
| [Some(4.0), Some(5.0), Some(6.0)], | ||
| [None, Some(8.0), Some(9.0)], | ||
| ] { | ||
| for p in point { | ||
| match p { | ||
| Some(p) => l2_builder.values().values().append_value(p), | ||
| None => l2_builder.values().values().append_null(), | ||
| } | ||
| } | ||
|
|
||
| l2_builder.values().append(true); | ||
| } | ||
| l2_builder.append(true); | ||
|
|
||
| let point = [Some(10.), None, None]; | ||
| for p in point { | ||
| match p { | ||
| Some(p) => l2_builder.values().values().append_value(p), | ||
| None => l2_builder.values().values().append_null(), | ||
| } | ||
| } | ||
|
|
||
| l2_builder.values().append(true); | ||
| l2_builder.append(true); | ||
|
Comment on lines
+3172
to
+3181
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I wonder why this point
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To make this entry a different row of the outer list. |
||
|
|
||
| let array = Arc::new(l2_builder.finish()) as ArrayRef; | ||
|
|
||
| let schema = Arc::new(Schema::new_with_metadata( | ||
| vec![Field::new( | ||
| "points", | ||
| DataType::List(Arc::new(Field::new( | ||
| "item", | ||
| DataType::FixedSizeList( | ||
| Arc::new(Field::new("item", DataType::Float32, true)), | ||
| 3, | ||
| ), | ||
| true, | ||
| ))), | ||
| true, | ||
| )], | ||
| HashMap::default(), | ||
| )); | ||
|
|
||
| // Test a variety of combinations that include 0 and non-zero offsets | ||
| // and also portions or the rest of the array | ||
| test_slices(&array, &schema, 0, 1)?; | ||
| test_slices(&array, &schema, 0, 2)?; | ||
| test_slices(&array, &schema, 1, 1)?; | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| fn test_slices( | ||
| parent_array: &ArrayRef, | ||
| schema: &SchemaRef, | ||
| offset: usize, | ||
| length: usize, | ||
| ) -> Result<(), ArrowError> { | ||
| let subarray = parent_array.slice(offset, length); | ||
| let original_batch = RecordBatch::try_new(schema.clone(), vec![subarray])?; | ||
|
|
||
| let mut bytes = Vec::new(); | ||
| let mut writer = StreamWriter::try_new(&mut bytes, schema)?; | ||
| writer.write(&original_batch)?; | ||
| writer.finish()?; | ||
|
|
||
| let mut cursor = std::io::Cursor::new(bytes); | ||
| let mut reader = StreamReader::try_new(&mut cursor, None)?; | ||
| let returned_batch = reader.next().unwrap()?; | ||
|
|
||
| assert_eq!(original_batch, returned_batch); | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_roundtrip_fixed_list() -> Result<(), ArrowError> { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we add another test with null value too? |
||
| let int_builder = Int64Builder::new(); | ||
| let mut fixed_list_builder = FixedSizeListBuilder::new(int_builder, 3) | ||
| .with_field(Arc::new(Field::new("item", DataType::Int64, false))); | ||
|
|
||
| for point in [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] { | ||
| fixed_list_builder.values().append_value(point[0]); | ||
| fixed_list_builder.values().append_value(point[1]); | ||
| fixed_list_builder.values().append_value(point[2]); | ||
|
|
||
| fixed_list_builder.append(true); | ||
| } | ||
|
|
||
| let array = Arc::new(fixed_list_builder.finish()) as ArrayRef; | ||
|
|
||
| let schema = Arc::new(Schema::new_with_metadata( | ||
| vec![Field::new( | ||
| "points", | ||
| DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, false)), 3), | ||
| false, | ||
| )], | ||
| HashMap::default(), | ||
| )); | ||
|
|
||
| // Test a variety of combinations that include 0 and non-zero offsets | ||
| // and also portions or the rest of the array | ||
| test_slices(&array, &schema, 0, 4)?; | ||
| test_slices(&array, &schema, 0, 2)?; | ||
| test_slices(&array, &schema, 1, 3)?; | ||
| test_slices(&array, &schema, 2, 1)?; | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_roundtrip_fixed_list_w_nulls() -> Result<(), ArrowError> { | ||
| let int_builder = Int64Builder::new(); | ||
| let mut fixed_list_builder = FixedSizeListBuilder::new(int_builder, 3); | ||
|
|
||
| for point in [ | ||
| [Some(1), Some(2), None], | ||
| [Some(4), Some(5), Some(6)], | ||
| [None, Some(8), Some(9)], | ||
| [Some(10), None, None], | ||
| ] { | ||
| for p in point { | ||
| match p { | ||
| Some(p) => fixed_list_builder.values().append_value(p), | ||
| None => fixed_list_builder.values().append_null(), | ||
| } | ||
| } | ||
|
|
||
| fixed_list_builder.append(true); | ||
| } | ||
|
|
||
| let array = Arc::new(fixed_list_builder.finish()) as ArrayRef; | ||
|
|
||
| let schema = Arc::new(Schema::new_with_metadata( | ||
| vec![Field::new( | ||
| "points", | ||
| DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, true)), 3), | ||
| true, | ||
timsaucer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| )], | ||
| HashMap::default(), | ||
| )); | ||
|
|
||
| // Test a variety of combinations that include 0 and non-zero offsets | ||
| // and also portions or the rest of the array | ||
| test_slices(&array, &schema, 0, 4)?; | ||
| test_slices(&array, &schema, 0, 2)?; | ||
| test_slices(&array, &schema, 1, 3)?; | ||
| test_slices(&array, &schema, 2, 1)?; | ||
|
|
||
| Ok(()) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I verified that this test fails without the code in this PR