Skip to content

Commit a1a33ce

Browse files
committed
fix: adopt the right array item name which changed in kernel 0.3.1
see delta-io/delta-kernel-rs#301
1 parent b325e27 commit a1a33ce

File tree

1 file changed

+24
-8
lines changed

1 file changed

+24
-8
lines changed

crates/core/src/writer/stats.rs

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,10 @@ impl AddAssign for AggregatedStats {
474474
/// the list and items fields from the path, but also need to handle the
475475
/// peculiar case where the user named the list field "list" or "item".
476476
///
477+
/// NOTE: As of delta_kernel 0.3.1 the name switched from `item` to `element` to line up with the
478+
/// parquet spec, see
479+
/// [here](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists)
480+
///
477481
/// For example:
478482
///
479483
/// * ["some_nested_list", "list", "item", "list", "item"] -> "some_nested_list"
@@ -495,9 +499,9 @@ fn get_list_field_name(column_descr: &Arc<ColumnDescriptor>) -> Option<String> {
495499
while let Some(part) = column_path_parts.pop() {
496500
match (part.as_str(), lists_seen, items_seen) {
497501
("list", seen, _) if seen == max_rep_levels => return Some("list".to_string()),
498-
("item", _, seen) if seen == max_rep_levels => return Some("item".to_string()),
502+
("element", _, seen) if seen == max_rep_levels => return Some("element".to_string()),
499503
("list", _, _) => lists_seen += 1,
500-
("item", _, _) => items_seen += 1,
504+
("element", _, _) => items_seen += 1,
501505
(other, _, _) => return Some(other.to_string()),
502506
}
503507
}
@@ -789,9 +793,21 @@ mod tests {
789793
let mut null_count_keys = vec!["some_list", "some_nested_list"];
790794
null_count_keys.extend_from_slice(min_max_keys.as_slice());
791795

792-
assert_eq!(min_max_keys.len(), stats.min_values.len());
793-
assert_eq!(min_max_keys.len(), stats.max_values.len());
794-
assert_eq!(null_count_keys.len(), stats.null_count.len());
796+
assert_eq!(
797+
min_max_keys.len(),
798+
stats.min_values.len(),
799+
"min values don't match"
800+
);
801+
assert_eq!(
802+
min_max_keys.len(),
803+
stats.max_values.len(),
804+
"max values don't match"
805+
);
806+
assert_eq!(
807+
null_count_keys.len(),
808+
stats.null_count.len(),
809+
"null counts don't match"
810+
);
795811

796812
// assert on min values
797813
for (k, v) in stats.min_values.iter() {
@@ -820,7 +836,7 @@ mod tests {
820836
("uuid", ColumnValueStat::Value(v)) => {
821837
assert_eq!("176c770d-92af-4a21-bf76-5d8c5261d659", v.as_str().unwrap())
822838
}
823-
_ => panic!("Key should not be present"),
839+
k => panic!("Key {k:?} should not be present in min_values"),
824840
}
825841
}
826842

@@ -851,7 +867,7 @@ mod tests {
851867
("uuid", ColumnValueStat::Value(v)) => {
852868
assert_eq!("a98bea04-d119-4f21-8edc-eb218b5849af", v.as_str().unwrap())
853869
}
854-
_ => panic!("Key should not be present"),
870+
k => panic!("Key {k:?} should not be present in max_values"),
855871
}
856872
}
857873

@@ -878,7 +894,7 @@ mod tests {
878894
("some_nested_list", ColumnCountStat::Value(v)) => assert_eq!(100, *v),
879895
("date", ColumnCountStat::Value(v)) => assert_eq!(0, *v),
880896
("uuid", ColumnCountStat::Value(v)) => assert_eq!(0, *v),
881-
_ => panic!("Key should not be present"),
897+
k => panic!("Key {k:?} should not be present in null_count"),
882898
}
883899
}
884900
}

0 commit comments

Comments
 (0)