Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-data/src/equal/list.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::data::{count_nulls, ArrayData};
19
use arrow_buffer::ArrowNativeType;
20
use num::Integer;
21
22
use super::equal_range;
23
24
48
fn lengths_equal<T: ArrowNativeType + Integer>(lhs: &[T], rhs: &[T]) -> bool {
25
    // invariant from `base_equal`
26
48
    debug_assert_eq!(lhs.len(), rhs.len());
27
28
48
    if lhs.is_empty() {
29
0
        return true;
30
48
    }
31
32
48
    if lhs[0] == T::zero() && 
rhs[0] == T::zero()36
{
33
36
        return lhs == rhs;
34
12
    };
35
36
    // The expensive case, e.g.
37
    // [0, 2, 4, 6, 9] == [4, 6, 8, 10, 13]
38
12
    lhs.windows(2)
39
12
        .zip(rhs.windows(2))
40
12
        .all(|(lhs_offsets, rhs_offsets)| 
{0
41
            // length of left == length of right
42
0
            (lhs_offsets[1] - lhs_offsets[0]) == (rhs_offsets[1] - rhs_offsets[0])
43
0
        })
44
48
}
45
46
79
pub(super) fn list_equal<T: ArrowNativeType + Integer>(
47
79
    lhs: &ArrayData,
48
79
    rhs: &ArrayData,
49
79
    lhs_start: usize,
50
79
    rhs_start: usize,
51
79
    len: usize,
52
79
) -> bool {
53
79
    let lhs_offsets = lhs.buffer::<T>(0);
54
79
    let rhs_offsets = rhs.buffer::<T>(0);
55
56
    // There is an edge-case where a n-length list that has 0 children, results in panics.
57
    // For example; an array with offsets [0, 0, 0, 0, 0] has 4 slots, but will have
58
    // no valid children.
59
    // Under logical equality, the child null bitmap will be an empty buffer, as there are
60
    // no child values. This causes panics when trying to count set bits.
61
    //
62
    // We caught this by chance from an accidental test-case, but due to the nature of this
63
    // crash only occurring on list equality checks, we are adding a check here, instead of
64
    // on the buffer/bitmap utilities, as a length check would incur a penalty for almost all
65
    // other use-cases.
66
    //
67
    // The solution is to check the number of child values from offsets, and return `true` if
68
    // they = 0. Empty arrays are equal, so this is correct.
69
    //
70
    // It's unlikely that one would create a n-length list array with no values, where n > 0,
71
    // however, one is more likely to slice into a list array and get a region that has 0
72
    // child values.
73
    // The test that triggered this behaviour had [4, 4] as a slice of 1 value slot.
74
    // For the edge case that zero length list arrays are always equal.
75
79
    if len == 0 {
76
2
        return true;
77
77
    }
78
79
77
    let lhs_child_length = lhs_offsets[lhs_start + len].to_usize().unwrap()
80
77
        - lhs_offsets[lhs_start].to_usize().unwrap();
81
82
77
    let rhs_child_length = rhs_offsets[rhs_start + len].to_usize().unwrap()
83
77
        - rhs_offsets[rhs_start].to_usize().unwrap();
84
85
77
    if lhs_child_length == 0 && 
lhs_child_length == rhs_child_length16
{
86
16
        return true;
87
61
    }
88
89
61
    let lhs_values = &lhs.child_data()[0];
90
61
    let rhs_values = &rhs.child_data()[0];
91
92
61
    let lhs_null_count = count_nulls(lhs.nulls(), lhs_start, len);
93
61
    let rhs_null_count = count_nulls(rhs.nulls(), rhs_start, len);
94
95
61
    if lhs_null_count != rhs_null_count {
96
0
        return false;
97
61
    }
98
99
61
    if lhs_null_count == 0 && 
rhs_null_count == 048
{
100
48
        lhs_child_length == rhs_child_length
101
48
            && lengths_equal(
102
48
                &lhs_offsets[lhs_start..lhs_start + len],
103
48
                &rhs_offsets[rhs_start..rhs_start + len],
104
            )
105
48
            && equal_range(
106
48
                lhs_values,
107
48
                rhs_values,
108
48
                lhs_offsets[lhs_start].to_usize().unwrap(),
109
48
                rhs_offsets[rhs_start].to_usize().unwrap(),
110
48
                lhs_child_length,
111
            )
112
    } else {
113
        // get a ref of the parent null buffer bytes, to use in testing for nullness
114
13
        let lhs_nulls = lhs.nulls().unwrap();
115
13
        let rhs_nulls = rhs.nulls().unwrap();
116
117
        // with nulls, we need to compare item by item whenever it is not null
118
        // TODO: Could potentially compare runs of not NULL values
119
78
        
(0..len)13
.
all13
(|i| {
120
78
            let lhs_pos = lhs_start + i;
121
78
            let rhs_pos = rhs_start + i;
122
123
78
            let lhs_is_null = lhs_nulls.is_null(lhs_pos);
124
78
            let rhs_is_null = rhs_nulls.is_null(rhs_pos);
125
126
78
            if lhs_is_null != rhs_is_null {
127
0
                return false;
128
78
            }
129
130
78
            let lhs_offset_start = lhs_offsets[lhs_pos].to_usize().unwrap();
131
78
            let lhs_offset_end = lhs_offsets[lhs_pos + 1].to_usize().unwrap();
132
78
            let rhs_offset_start = rhs_offsets[rhs_pos].to_usize().unwrap();
133
78
            let rhs_offset_end = rhs_offsets[rhs_pos + 1].to_usize().unwrap();
134
135
78
            let lhs_len = lhs_offset_end - lhs_offset_start;
136
78
            let rhs_len = rhs_offset_end - rhs_offset_start;
137
138
78
            lhs_is_null
139
54
                || (lhs_len == rhs_len
140
54
                    && equal_range(
141
54
                        lhs_values,
142
54
                        rhs_values,
143
54
                        lhs_offset_start,
144
54
                        rhs_offset_start,
145
54
                        lhs_len,
146
                    ))
147
78
        })
148
    }
149
79
}