diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs index 1b74af643c0c..49f4110faeaa 100644 --- a/datafusion/functions-nested/src/flatten.rs +++ b/datafusion/functions-nested/src/flatten.rs @@ -96,6 +96,7 @@ impl ScalarUDFImpl for Flatten { let data_type = match &arg_types[0] { List(field) => match field.data_type() { List(field) | FixedSizeList(field, _) => List(Arc::clone(field)), + LargeList(field) => LargeList(Arc::clone(field)), _ => arg_types[0].clone(), }, LargeList(field) => match field.data_type() { @@ -143,7 +144,8 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { List(_) => { let (inner_field, inner_offsets, inner_values, _) = as_list_array(&values)?.clone().into_parts(); - let offsets = get_offsets_for_flatten::(inner_offsets, offsets); + let offsets = + get_offsets_for_flatten::(inner_offsets, offsets); let flattened_array = GenericListArray::::new( inner_field, offsets, @@ -154,7 +156,17 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { Ok(Arc::new(flattened_array) as ArrayRef) } LargeList(_) => { - exec_err!("flatten does not support type '{:?}'", array.data_type())? + let (inner_field, inner_offsets, inner_values, _) = + as_large_list_array(&values)?.clone().into_parts(); + let offsets = + get_offsets_for_flatten::(inner_offsets, offsets); + let flattened_array = GenericListArray::::new( + inner_field, + offsets, + inner_values, + nulls, + ); + Ok(Arc::new(flattened_array) as ArrayRef) } _ => Ok(Arc::clone(array) as ArrayRef), } @@ -179,9 +191,10 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { Ok(Arc::new(flattened_array) as ArrayRef) } LargeList(_) => { - let (inner_field, inner_offsets, inner_values, nulls) = + let (inner_field, inner_offsets, inner_values, _) = as_large_list_array(&values)?.clone().into_parts(); - let offsets = get_offsets_for_flatten::(inner_offsets, offsets); + let offsets = + get_offsets_for_flatten::(inner_offsets, offsets); let flattened_array = GenericListArray::::new( inner_field, offsets, @@ -202,12 +215,12 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { } // Create new offsets that are equivalent to `flatten` the array. -fn get_offsets_for_flatten( - offsets: OffsetBuffer, - indexes: OffsetBuffer, +fn get_offsets_for_flatten( + inner_offsets: OffsetBuffer, + outer_offsets: OffsetBuffer

, ) -> OffsetBuffer { - let buffer = offsets.into_inner(); - let offsets: Vec = indexes + let buffer = inner_offsets.into_inner(); + let offsets: Vec = outer_offsets .iter() .map(|i| buffer[i.to_usize().unwrap()]) .collect(); @@ -216,11 +229,11 @@ fn get_offsets_for_flatten( // Create new large offsets that are equivalent to `flatten` the array. fn get_large_offsets_for_flatten( - offsets: OffsetBuffer, - indexes: OffsetBuffer

, + inner_offsets: OffsetBuffer, + outer_offsets: OffsetBuffer

, ) -> OffsetBuffer { - let buffer = offsets.into_inner(); - let offsets: Vec = indexes + let buffer = inner_offsets.into_inner(); + let offsets: Vec = outer_offsets .iter() .map(|i| buffer[i.to_usize().unwrap()].to_i64().unwrap()) .collect(); diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 5c74f3ddc613..38bdd7f3e3eb 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -7757,7 +7757,7 @@ select flatten(make_array(1, 2, 1, 3, 2)), query ??? select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'LargeList(Int64)')), - flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'LargeList(LargeList(Int64))')), + flatten(arrow_cast(make_array([1], null, [2, 3], [null], make_array(4, null, 5)), 'LargeList(LargeList(Int64))')), flatten(arrow_cast(make_array([[1.1]], [[2.2]], [[3.3], [4.4]]), 'LargeList(LargeList(LargeList(Float64)))')); ---- [1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] @@ -7769,6 +7769,14 @@ select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'FixedSizeList(5, Int64)')) ---- [1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] +query ??TT +select flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))')), + flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))')), + arrow_typeof(flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))'))), + arrow_typeof(flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))'))); +---- +[1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] LargeList(nullable Int64) LargeList(nullable FixedSizeList(1 x nullable Float64)) + # flatten with column values query ???? select flatten(column1),