Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-string/src/like.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Provide SQL's LIKE operators for Arrow's string arrays
19
20
use crate::predicate::Predicate;
21
22
use arrow_array::cast::AsArray;
23
use arrow_array::*;
24
use arrow_schema::*;
25
use arrow_select::take::take;
26
27
use std::sync::Arc;
28
29
use crate::binary_like::binary_apply;
30
pub use arrow_array::StringArrayType;
31
32
#[derive(Debug)]
33
pub(crate) enum Op {
34
    Like(bool),
35
    ILike(bool),
36
    Contains,
37
    StartsWith,
38
    EndsWith,
39
}
40
41
impl std::fmt::Display for Op {
42
0
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43
0
        match self {
44
0
            Op::Like(false) => write!(f, "LIKE"),
45
0
            Op::Like(true) => write!(f, "NLIKE"),
46
0
            Op::ILike(false) => write!(f, "ILIKE"),
47
0
            Op::ILike(true) => write!(f, "NILIKE"),
48
0
            Op::Contains => write!(f, "CONTAINS"),
49
0
            Op::StartsWith => write!(f, "STARTS_WITH"),
50
0
            Op::EndsWith => write!(f, "ENDS_WITH"),
51
        }
52
0
    }
53
}
54
55
/// Perform SQL `left LIKE right`
56
///
57
/// # Supported DataTypes
58
///
59
/// `left` and `right` must be the same type, and one of
60
/// - Utf8
61
/// - LargeUtf8
62
/// - Utf8View
63
///
64
/// There are two wildcards supported with the LIKE operator:
65
///
66
/// 1. `%` - The percent sign represents zero, one, or multiple characters
67
/// 2. `_` - The underscore represents a single character
68
///
69
/// Example
70
/// ```
71
/// # use arrow_array::{StringArray, BooleanArray};
72
/// # use arrow_string::like::like;
73
/// let strings = StringArray::from(vec!["Arrow", "Arrow", "Arrow", "Ar"]);
74
/// let patterns = StringArray::from(vec!["A%", "B%", "A.", "A_"]);
75
///
76
/// let result = like(&strings, &patterns).unwrap();
77
/// assert_eq!(result, BooleanArray::from(vec![true, false, false, true]));
78
/// ```
79
0
pub fn like(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
80
0
    like_op(Op::Like(false), left, right)
81
0
}
82
83
/// Perform SQL `left ILIKE right`
84
///
85
/// # Notes
86
/// - This is a case-insensitive version of [`like`]
87
/// - See the documentation on [`like`] for more details
88
/// - Implements loose matching as defined by the Unicode standard. For example,
89
///   the `ff` ligature is not equivalent to `FF` and `ß` is not equivalent to `SS`
90
0
pub fn ilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
91
0
    like_op(Op::ILike(false), left, right)
92
0
}
93
94
/// Perform SQL `left NOT LIKE right`
95
///
96
/// # Notes
97
/// - This is a negative of [`like`]
98
/// - See the documentation on [`like`] for more details
99
0
pub fn nlike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
100
0
    like_op(Op::Like(true), left, right)
101
0
}
102
103
/// Perform SQL `left NOT ILIKE right`
104
///
105
/// # Notes
106
/// - This is a negative of [`like`]
107
/// - See the documentation on [`ilike`] for more details
108
0
pub fn nilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
109
0
    like_op(Op::ILike(true), left, right)
110
0
}
111
112
/// Perform SQL `STARTSWITH(left, right)`
113
///
114
/// # Supported DataTypes
115
///
116
/// `left` and `right` must be the same type, and one of
117
/// - Utf8
118
/// - LargeUtf8
119
/// - Utf8View
120
/// - Binary
121
/// - LargeBinary
122
/// - BinaryView
123
///
124
/// # Example
125
/// ```
126
/// # use arrow_array::{StringArray, BooleanArray};
127
/// # use arrow_string::like::{like, starts_with};
128
/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "arrow-rs", "Parquet"]);
129
/// let patterns = StringArray::from(vec!["arr", "arrow", "arrow-cpp", "p"]);
130
///
131
/// let result = starts_with(&strings, &patterns).unwrap();
132
/// assert_eq!(result, BooleanArray::from(vec![true, true, false, false]));
133
/// ```
134
0
pub fn starts_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
135
0
    like_op(Op::StartsWith, left, right)
136
0
}
137
138
/// Perform SQL `ENDSWITH(left, right)`
139
///
140
/// # Supported DataTypes
141
///
142
/// `left` and `right` must be the same type, and one of
143
/// - Utf8
144
/// - LargeUtf8
145
/// - Utf8View
146
/// - Binary
147
/// - LargeBinary
148
/// - BinaryView
149
///
150
/// # Example
151
/// ```
152
/// # use arrow_array::{StringArray, BooleanArray};
153
/// # use arrow_string::like::{ends_with, like, starts_with};
154
/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs",  "Parquet"]);
155
/// let patterns = StringArray::from(vec!["arr", "-rs", "t"]);
156
///
157
/// let result = ends_with(&strings, &patterns).unwrap();
158
/// assert_eq!(result, BooleanArray::from(vec![false, true, true]));
159
/// ```
160
0
pub fn ends_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
161
0
    like_op(Op::EndsWith, left, right)
162
0
}
163
164
/// Perform SQL `CONTAINS(left, right)`
165
///
166
/// # Supported DataTypes
167
///
168
/// `left` and `right` must be the same type, and one of
169
/// - Utf8
170
/// - LargeUtf8
171
/// - Utf8View
172
/// - Binary
173
/// - LargeBinary
174
/// - BinaryView
175
///
176
/// # Example
177
/// ```
178
/// # use arrow_array::{StringArray, BooleanArray};
179
/// # use arrow_string::like::{contains, like, starts_with};
180
/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "arrow-rs", "Parquet"]);
181
/// let patterns = StringArray::from(vec!["arr", "-rs", "arrow-cpp", "X"]);
182
///
183
/// let result = contains(&strings, &patterns).unwrap();
184
/// assert_eq!(result, BooleanArray::from(vec![true, true, false, false]));
185
/// ```
186
0
pub fn contains(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
187
0
    like_op(Op::Contains, left, right)
188
0
}
189
190
0
fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> {
191
    use arrow_schema::DataType::*;
192
0
    let (l, l_s) = lhs.get();
193
0
    let (r, r_s) = rhs.get();
194
195
0
    if l.len() != r.len() && !l_s && !r_s {
196
0
        return Err(ArrowError::InvalidArgumentError(format!(
197
0
            "Cannot compare arrays of different lengths, got {} vs {}",
198
0
            l.len(),
199
0
            r.len()
200
0
        )));
201
0
    }
202
203
0
    let l_v = l.as_any_dictionary_opt();
204
0
    let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l);
205
206
0
    let r_v = r.as_any_dictionary_opt();
207
0
    let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r);
208
209
0
    match (l.data_type(), r.data_type()) {
210
0
        (Utf8, Utf8) => string_apply::<&GenericStringArray<i32>>(
211
0
            op,
212
0
            l.as_string(),
213
0
            l_s,
214
0
            l_v,
215
0
            r.as_string(),
216
0
            r_s,
217
0
            r_v,
218
        ),
219
0
        (LargeUtf8, LargeUtf8) => string_apply::<&GenericStringArray<i64>>(
220
0
            op,
221
0
            l.as_string(),
222
0
            l_s,
223
0
            l_v,
224
0
            r.as_string(),
225
0
            r_s,
226
0
            r_v,
227
        ),
228
0
        (Utf8View, Utf8View) => string_apply::<&StringViewArray>(
229
0
            op,
230
0
            l.as_string_view(),
231
0
            l_s,
232
0
            l_v,
233
0
            r.as_string_view(),
234
0
            r_s,
235
0
            r_v,
236
        ),
237
0
        (Binary, Binary) => binary_apply::<&GenericBinaryArray<i32>>(
238
0
            op.try_into()?,
239
0
            l.as_binary(),
240
0
            l_s,
241
0
            l_v,
242
0
            r.as_binary(),
243
0
            r_s,
244
0
            r_v,
245
        ),
246
0
        (LargeBinary, LargeBinary) => binary_apply::<&GenericBinaryArray<i64>>(
247
0
            op.try_into()?,
248
0
            l.as_binary(),
249
0
            l_s,
250
0
            l_v,
251
0
            r.as_binary(),
252
0
            r_s,
253
0
            r_v,
254
        ),
255
0
        (BinaryView, BinaryView) => binary_apply::<&BinaryViewArray>(
256
0
            op.try_into()?,
257
0
            l.as_binary_view(),
258
0
            l_s,
259
0
            l_v,
260
0
            r.as_binary_view(),
261
0
            r_s,
262
0
            r_v,
263
        ),
264
0
        (l_t, r_t) => Err(ArrowError::InvalidArgumentError(format!(
265
0
            "Invalid string/binary operation: {l_t} {op} {r_t}"
266
0
        ))),
267
    }
268
0
}
269
270
0
fn string_apply<'a, T: StringArrayType<'a> + 'a>(
271
0
    op: Op,
272
0
    l: T,
273
0
    l_s: bool,
274
0
    l_v: Option<&'a dyn AnyDictionaryArray>,
275
0
    r: T,
276
0
    r_s: bool,
277
0
    r_v: Option<&'a dyn AnyDictionaryArray>,
278
0
) -> Result<BooleanArray, ArrowError> {
279
0
    let l_len = l_v.map(|l| l.len()).unwrap_or(l.len());
280
0
    if r_s {
281
0
        let idx = match r_v {
282
0
            Some(dict) if dict.null_count() != 0 => return Ok(BooleanArray::new_null(l_len)),
283
0
            Some(dict) => dict.normalized_keys()[0],
284
0
            None => 0,
285
        };
286
0
        if r.is_null(idx) {
287
0
            return Ok(BooleanArray::new_null(l_len));
288
0
        }
289
0
        op_scalar::<T>(op, l, l_v, r.value(idx))
290
    } else {
291
0
        match (l_s, l_v, r_v) {
292
            (true, None, None) => {
293
0
                let v = l.is_valid(0).then(|| l.value(0));
294
0
                op_binary(op, std::iter::repeat(v), r.iter())
295
            }
296
0
            (true, Some(l_v), None) => {
297
0
                let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
298
0
                let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
299
0
                op_binary(op, std::iter::repeat(v), r.iter())
300
            }
301
0
            (true, None, Some(r_v)) => {
302
0
                let v = l.is_valid(0).then(|| l.value(0));
303
0
                op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
304
            }
305
0
            (true, Some(l_v), Some(r_v)) => {
306
0
                let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
307
0
                let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
308
0
                op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
309
            }
310
0
            (false, None, None) => op_binary(op, l.iter(), r.iter()),
311
0
            (false, Some(l_v), None) => op_binary(op, vectored_iter(l, l_v), r.iter()),
312
0
            (false, None, Some(r_v)) => op_binary(op, l.iter(), vectored_iter(r, r_v)),
313
0
            (false, Some(l_v), Some(r_v)) => {
314
0
                op_binary(op, vectored_iter(l, l_v), vectored_iter(r, r_v))
315
            }
316
        }
317
    }
318
0
}
319
320
#[inline(never)]
321
0
fn op_scalar<'a, T: StringArrayType<'a>>(
322
0
    op: Op,
323
0
    l: T,
324
0
    l_v: Option<&dyn AnyDictionaryArray>,
325
0
    r: &str,
326
0
) -> Result<BooleanArray, ArrowError> {
327
0
    let r = match op {
328
0
        Op::Like(neg) => Predicate::like(r)?.evaluate_array(l, neg),
329
0
        Op::ILike(neg) => Predicate::ilike(r, l.is_ascii())?.evaluate_array(l, neg),
330
0
        Op::Contains => Predicate::contains(r).evaluate_array(l, false),
331
0
        Op::StartsWith => Predicate::StartsWith(r).evaluate_array(l, false),
332
0
        Op::EndsWith => Predicate::EndsWith(r).evaluate_array(l, false),
333
    };
334
335
0
    Ok(match l_v {
336
0
        Some(v) => take(&r, v.keys(), None)?.as_boolean().clone(),
337
0
        None => r,
338
    })
339
0
}
340
341
0
fn vectored_iter<'a, T: StringArrayType<'a> + 'a>(
342
0
    a: T,
343
0
    a_v: &'a dyn AnyDictionaryArray,
344
0
) -> impl Iterator<Item = Option<&'a str>> + 'a {
345
0
    let nulls = a_v.nulls();
346
0
    let keys = a_v.normalized_keys();
347
0
    keys.into_iter().enumerate().map(move |(idx, key)| {
348
0
        if nulls.map(|n| n.is_null(idx)).unwrap_or_default() || a.is_null(key) {
349
0
            return None;
350
0
        }
351
0
        Some(a.value(key))
352
0
    })
353
0
}
354
355
#[inline(never)]
356
0
fn op_binary<'a>(
357
0
    op: Op,
358
0
    l: impl Iterator<Item = Option<&'a str>>,
359
0
    r: impl Iterator<Item = Option<&'a str>>,
360
0
) -> Result<BooleanArray, ArrowError> {
361
0
    match op {
362
0
        Op::Like(neg) => binary_predicate(l, r, neg, Predicate::like),
363
0
        Op::ILike(neg) => binary_predicate(l, r, neg, |s| Predicate::ilike(s, false)),
364
0
        Op::Contains => Ok(l.zip(r).map(|(l, r)| Some(str_contains(l?, r?))).collect()),
365
0
        Op::StartsWith => Ok(l
366
0
            .zip(r)
367
0
            .map(|(l, r)| Some(Predicate::StartsWith(r?).evaluate(l?)))
368
0
            .collect()),
369
0
        Op::EndsWith => Ok(l
370
0
            .zip(r)
371
0
            .map(|(l, r)| Some(Predicate::EndsWith(r?).evaluate(l?)))
372
0
            .collect()),
373
    }
374
0
}
375
376
0
fn str_contains(haystack: &str, needle: &str) -> bool {
377
0
    memchr::memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
378
0
}
379
380
0
fn binary_predicate<'a>(
381
0
    l: impl Iterator<Item = Option<&'a str>>,
382
0
    r: impl Iterator<Item = Option<&'a str>>,
383
0
    neg: bool,
384
0
    f: impl Fn(&'a str) -> Result<Predicate<'a>, ArrowError>,
385
0
) -> Result<BooleanArray, ArrowError> {
386
0
    let mut previous = None;
387
0
    l.zip(r)
388
0
        .map(|(l, r)| match (l, r) {
389
0
            (Some(l), Some(r)) => {
390
0
                let p: &Predicate = match previous {
391
0
                    Some((expr, ref predicate)) if expr == r => predicate,
392
0
                    _ => &previous.insert((r, f(r)?)).1,
393
                };
394
0
                Ok(Some(p.evaluate(l) != neg))
395
            }
396
0
            _ => Ok(None),
397
0
        })
398
0
        .collect()
399
0
}
400
401
// Deprecated kernels
402
403
0
fn make_scalar(data_type: &DataType, scalar: &str) -> Result<ArrayRef, ArrowError> {
404
0
    match data_type {
405
0
        DataType::Utf8 => Ok(Arc::new(StringArray::from_iter_values([scalar]))),
406
0
        DataType::LargeUtf8 => Ok(Arc::new(LargeStringArray::from_iter_values([scalar]))),
407
0
        DataType::Dictionary(_, v) => make_scalar(v.as_ref(), scalar),
408
0
        d => Err(ArrowError::InvalidArgumentError(format!(
409
0
            "Unsupported string scalar data type {d:?}",
410
0
        ))),
411
    }
412
0
}
413
414
macro_rules! legacy_kernels {
415
    ($fn_datum:ident, $fn_array:ident, $fn_scalar:ident, $fn_array_dyn:ident, $fn_scalar_dyn:ident, $deprecation:expr) => {
416
        #[doc(hidden)]
417
        #[deprecated(note = $deprecation)]
418
        pub fn $fn_array<O: OffsetSizeTrait>(
419
            left: &GenericStringArray<O>,
420
            right: &GenericStringArray<O>,
421
        ) -> Result<BooleanArray, ArrowError> {
422
            $fn_datum(left, right)
423
        }
424
425
        #[doc(hidden)]
426
        #[deprecated(note = $deprecation)]
427
        pub fn $fn_scalar<O: OffsetSizeTrait>(
428
            left: &GenericStringArray<O>,
429
            right: &str,
430
        ) -> Result<BooleanArray, ArrowError> {
431
            let scalar = GenericStringArray::<O>::from_iter_values([right]);
432
            $fn_datum(left, &Scalar::new(&scalar))
433
        }
434
435
        #[doc(hidden)]
436
        #[deprecated(note = $deprecation)]
437
0
        pub fn $fn_array_dyn(
438
0
            left: &dyn Array,
439
0
            right: &dyn Array,
440
0
        ) -> Result<BooleanArray, ArrowError> {
441
0
            $fn_datum(&left, &right)
442
0
        }
443
444
        #[doc(hidden)]
445
        #[deprecated(note = $deprecation)]
446
0
        pub fn $fn_scalar_dyn(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
447
0
            let scalar = make_scalar(left.data_type(), right)?;
448
0
            $fn_datum(&left, &Scalar::new(&scalar))
449
0
        }
450
    };
451
}
452
453
legacy_kernels!(
454
    like,
455
    like_utf8,
456
    like_utf8_scalar,
457
    like_dyn,
458
    like_utf8_scalar_dyn,
459
    "Use arrow_string::like::like"
460
);
461
legacy_kernels!(
462
    ilike,
463
    ilike_utf8,
464
    ilike_utf8_scalar,
465
    ilike_dyn,
466
    ilike_utf8_scalar_dyn,
467
    "Use arrow_string::like::ilike"
468
);
469
legacy_kernels!(
470
    nlike,
471
    nlike_utf8,
472
    nlike_utf8_scalar,
473
    nlike_dyn,
474
    nlike_utf8_scalar_dyn,
475
    "Use arrow_string::like::nlike"
476
);
477
legacy_kernels!(
478
    nilike,
479
    nilike_utf8,
480
    nilike_utf8_scalar,
481
    nilike_dyn,
482
    nilike_utf8_scalar_dyn,
483
    "Use arrow_string::like::nilike"
484
);
485
legacy_kernels!(
486
    contains,
487
    contains_utf8,
488
    contains_utf8_scalar,
489
    contains_dyn,
490
    contains_utf8_scalar_dyn,
491
    "Use arrow_string::like::contains"
492
);
493
legacy_kernels!(
494
    starts_with,
495
    starts_with_utf8,
496
    starts_with_utf8_scalar,
497
    starts_with_dyn,
498
    starts_with_utf8_scalar_dyn,
499
    "Use arrow_string::like::starts_with"
500
);
501
502
legacy_kernels!(
503
    ends_with,
504
    ends_with_utf8,
505
    ends_with_utf8_scalar,
506
    ends_with_dyn,
507
    ends_with_utf8_scalar_dyn,
508
    "Use arrow_string::like::ends_with"
509
);
510
511
#[cfg(test)]
512
#[allow(deprecated)]
513
mod tests {
514
    use super::*;
515
    use arrow_array::builder::BinaryDictionaryBuilder;
516
    use arrow_array::types::{ArrowDictionaryKeyType, Int8Type};
517
    use std::iter::zip;
518
519
    fn convert_binary_iterator_to_binary_dictionary<
520
        'a,
521
        K: ArrowDictionaryKeyType,
522
        I: IntoIterator<Item = &'a [u8]>,
523
    >(
524
        iter: I,
525
    ) -> DictionaryArray<K> {
526
        let it = iter.into_iter();
527
        let (lower, _) = it.size_hint();
528
        let mut builder = BinaryDictionaryBuilder::with_capacity(lower, 256, 1024);
529
        it.for_each(|i| {
530
            builder
531
                .append(i)
532
                .expect("Unable to append a value to a dictionary array.");
533
        });
534
535
        builder.finish()
536
    }
537
538
    /// Applying `op(left, right)`, both sides are arrays
539
    /// The macro tests four types of array implementations:
540
    /// - `StringArray`
541
    /// - `LargeStringArray`
542
    /// - `StringViewArray`
543
    /// - `DictionaryArray`
544
    macro_rules! test_utf8 {
545
        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
546
            #[test]
547
            fn $test_name() {
548
                let expected = BooleanArray::from($expected);
549
550
                let left = StringArray::from($left);
551
                let right = StringArray::from($right);
552
                let res = $op(&left, &right).unwrap();
553
                assert_eq!(res, expected);
554
555
                let left = LargeStringArray::from($left);
556
                let right = LargeStringArray::from($right);
557
                let res = $op(&left, &right).unwrap();
558
                assert_eq!(res, expected);
559
560
                let left = StringViewArray::from($left);
561
                let right = StringViewArray::from($right);
562
                let res = $op(&left, &right).unwrap();
563
                assert_eq!(res, expected);
564
565
                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
566
                let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
567
                let res = $op(&left, &right).unwrap();
568
                assert_eq!(res, expected);
569
            }
570
        };
571
    }
572
573
    /// Applying `op(left, right)`, both sides are arrays
574
    /// The macro tests four types of array implementations:
575
    /// - `StringArray`
576
    /// - `LargeStringArray`
577
    /// - `StringViewArray`
578
    /// - `DictionaryArray`
579
    macro_rules! test_utf8_and_binary {
580
        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
581
            #[test]
582
            fn $test_name() {
583
                let expected = BooleanArray::from($expected);
584
585
                let left = StringArray::from($left);
586
                let right = StringArray::from($right);
587
                let res = $op(&left, &right).unwrap();
588
                assert_eq!(res, expected);
589
590
                let left = LargeStringArray::from($left);
591
                let right = LargeStringArray::from($right);
592
                let res = $op(&left, &right).unwrap();
593
                assert_eq!(res, expected);
594
595
                let left = StringViewArray::from($left);
596
                let right = StringViewArray::from($right);
597
                let res = $op(&left, &right).unwrap();
598
                assert_eq!(res, expected);
599
600
                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
601
                let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
602
                let res = $op(&left, &right).unwrap();
603
                assert_eq!(res, expected);
604
605
                let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
606
                let right_binary = $right.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
607
608
                let left = BinaryArray::from(left_binary.clone());
609
                let right = BinaryArray::from(right_binary.clone());
610
                let res = $op(&left, &right).unwrap();
611
                assert_eq!(res, expected);
612
613
                let left = LargeBinaryArray::from(left_binary.clone());
614
                let right = LargeBinaryArray::from(right_binary.clone());
615
                let res = $op(&left, &right).unwrap();
616
                assert_eq!(res, expected);
617
618
                let left: DictionaryArray<Int8Type> =
619
                    convert_binary_iterator_to_binary_dictionary(left_binary);
620
                let right: DictionaryArray<Int8Type> =
621
                    convert_binary_iterator_to_binary_dictionary(right_binary);
622
                let res = $op(&left, &right).unwrap();
623
                assert_eq!(res, expected);
624
            }
625
        };
626
    }
627
628
    /// Applying `op(left, right)`, left side is array, right side is scalar
629
    /// The macro tests four types of array implementations:
630
    /// - `StringArray`
631
    /// - `LargeStringArray`
632
    /// - `StringViewArray`
633
    /// - `DictionaryArray`
634
    macro_rules! test_utf8_scalar {
635
        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
636
            #[test]
637
            fn $test_name() {
638
                let expected = BooleanArray::from($expected);
639
640
                let left = StringArray::from($left);
641
                let right = StringArray::from_iter_values([$right]);
642
                let res = $op(&left, &Scalar::new(&right)).unwrap();
643
                assert_eq!(res, expected);
644
645
                let left = LargeStringArray::from($left);
646
                let right = LargeStringArray::from_iter_values([$right]);
647
                let res = $op(&left, &Scalar::new(&right)).unwrap();
648
                assert_eq!(res, expected);
649
650
                let left = StringViewArray::from($left);
651
                let right = StringViewArray::from_iter_values([$right]);
652
                let res = $op(&left, &Scalar::new(&right)).unwrap();
653
                assert_eq!(res, expected);
654
655
                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
656
                let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
657
                let res = $op(&left, &Scalar::new(&right)).unwrap();
658
                assert_eq!(res, expected);
659
            }
660
        };
661
    }
662
663
    /// Applying `op(left, right)`, left side is array, right side is scalar
664
    /// The macro tests four types of array implementations:
665
    /// - `StringArray`
666
    /// - `LargeStringArray`
667
    /// - `StringViewArray`
668
    /// - `DictionaryArray`
669
    macro_rules! test_utf8_and_binary_scalar {
670
        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
671
            #[test]
672
            fn $test_name() {
673
                let expected = BooleanArray::from($expected);
674
675
                let left = StringArray::from($left);
676
                let right = StringArray::from_iter_values([$right]);
677
                let res = $op(&left, &Scalar::new(&right)).unwrap();
678
                assert_eq!(res, expected);
679
680
                let left = LargeStringArray::from($left);
681
                let right = LargeStringArray::from_iter_values([$right]);
682
                let res = $op(&left, &Scalar::new(&right)).unwrap();
683
                assert_eq!(res, expected);
684
685
                let left = StringViewArray::from($left);
686
                let right = StringViewArray::from_iter_values([$right]);
687
                let res = $op(&left, &Scalar::new(&right)).unwrap();
688
                assert_eq!(res, expected);
689
690
                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
691
                let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
692
                let res = $op(&left, &Scalar::new(&right)).unwrap();
693
                assert_eq!(res, expected);
694
695
                let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
696
                let right_binary = $right.as_bytes();
697
698
                let left = BinaryArray::from(left_binary.clone());
699
                let right = BinaryArray::from_iter_values([right_binary]);
700
                let res = $op(&left, &Scalar::new(&right)).unwrap();
701
                assert_eq!(res, expected);
702
703
                let left = LargeBinaryArray::from(left_binary.clone());
704
                let right = LargeBinaryArray::from_iter_values([right_binary]);
705
                let res = $op(&left, &Scalar::new(&right)).unwrap();
706
                assert_eq!(res, expected);
707
708
                let left: DictionaryArray<Int8Type> =
709
                    convert_binary_iterator_to_binary_dictionary(left_binary);
710
                let right: DictionaryArray<Int8Type> =
711
                    convert_binary_iterator_to_binary_dictionary([right_binary]);
712
                let res = $op(&left, &Scalar::new(&right)).unwrap();
713
                assert_eq!(res, expected);
714
            }
715
        };
716
    }
717
718
    test_utf8!(
719
        test_utf8_array_like,
720
        vec![
721
            "arrow",
722
            "arrow_long_string_more than 12 bytes",
723
            "arrow",
724
            "arrow",
725
            "arrow",
726
            "arrows",
727
            "arrow",
728
            "arrow"
729
        ],
730
        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"],
731
        like,
732
        vec![true, true, true, false, false, true, false, false]
733
    );
734
735
    test_utf8_scalar!(
736
        test_utf8_array_like_scalar_escape_testing,
737
        vec![
738
            "varchar(255)",
739
            "int(255)longer than 12 bytes",
740
            "varchar",
741
            "int"
742
        ],
743
        "%(%)%",
744
        like,
745
        vec![true, true, false, false]
746
    );
747
748
    test_utf8_scalar!(
749
        test_utf8_array_like_scalar_escape_regex,
750
        vec![".*", "a", "*"],
751
        ".*",
752
        like,
753
        vec![true, false, false]
754
    );
755
756
    test_utf8_scalar!(
757
        test_utf8_array_like_scalar_escape_regex_dot,
758
        vec![".", "a", "*"],
759
        ".",
760
        like,
761
        vec![true, false, false]
762
    );
763
764
    test_utf8_scalar!(
765
        test_utf8_array_like_scalar,
766
        vec![
767
            "arrow",
768
            "parquet",
769
            "datafusion",
770
            "flight",
771
            "long string arrow test 12 bytes"
772
        ],
773
        "%ar%",
774
        like,
775
        vec![true, true, false, false, true]
776
    );
777
778
    test_utf8_scalar!(
779
        test_utf8_array_like_scalar_start,
780
        vec![
781
            "arrow",
782
            "parrow",
783
            "arrows",
784
            "arr",
785
            "arrow long string longer than 12 bytes"
786
        ],
787
        "arrow%",
788
        like,
789
        vec![true, false, true, false, true]
790
    );
791
792
    // Replicates `test_utf8_array_like_scalar_start` `test_utf8_array_like_scalar_dyn_start` to
793
    // demonstrate that `SQL STARTSWITH` works as expected.
794
    test_utf8_and_binary_scalar!(
795
        test_utf8_and_binary_array_starts_with_scalar_start,
796
        vec![
797
            "arrow",
798
            "parrow",
799
            "arrows",
800
            "arr",
801
            "arrow long string longer than 12 bytes"
802
        ],
803
        "arrow",
804
        starts_with,
805
        vec![true, false, true, false, true]
806
    );
807
808
    test_utf8_and_binary!(
809
        test_utf8_and_binary_array_starts_with,
810
        vec![
811
            "arrow",
812
            "arrow_long_string_more than 12 bytes",
813
            "arrow",
814
            "arrow",
815
            "arrow",
816
            "arrows",
817
            "arrow",
818
            "arrow"
819
        ],
820
        vec!["arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"],
821
        starts_with,
822
        vec![true, false, false, false, true, false, false, false]
823
    );
824
825
    test_utf8_scalar!(
826
        test_utf8_array_like_scalar_end,
827
        vec![
828
            "arrow",
829
            "parrow",
830
            "arrows",
831
            "arr",
832
            "arrow long string longer than 12 bytes"
833
        ],
834
        "%arrow",
835
        like,
836
        vec![true, true, false, false, false]
837
    );
838
839
    // Replicates `test_utf8_array_like_scalar_end` `test_utf8_array_like_scalar_dyn_end` to
840
    // demonstrate that `SQL ENDSWITH` works as expected.
841
    test_utf8_and_binary_scalar!(
842
        test_utf8_and_binary_array_ends_with_scalar_end,
843
        vec![
844
            "arrow",
845
            "parrow",
846
            "arrows",
847
            "arr",
848
            "arrow long string longer than 12 bytes"
849
        ],
850
        "arrow",
851
        ends_with,
852
        vec![true, true, false, false, false]
853
    );
854
855
    test_utf8_and_binary!(
856
        test_utf8_and_binary_array_ends_with,
857
        vec![
858
            "arrow",
859
            "arrow_long_string_more than 12 bytes",
860
            "arrow",
861
            "arrow",
862
            "arrow",
863
            "arrows",
864
            "arrow",
865
            "arrow"
866
        ],
867
        vec!["arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"],
868
        ends_with,
869
        vec![true, false, true, false, false, false, false, false]
870
    );
871
872
    test_utf8_scalar!(
873
        test_utf8_array_like_scalar_equals,
874
        vec![
875
            "arrow",
876
            "parrow",
877
            "arrows",
878
            "arr",
879
            "arrow long string longer than 12 bytes"
880
        ],
881
        "arrow",
882
        like,
883
        vec![true, false, false, false, false]
884
    );
885
886
    test_utf8_scalar!(
887
        test_utf8_array_like_scalar_one,
888
        vec![
889
            "arrow",
890
            "arrows",
891
            "parrow",
892
            "arr",
893
            "arrow long string longer than 12 bytes"
894
        ],
895
        "arrow_",
896
        like,
897
        vec![false, true, false, false, false]
898
    );
899
900
    test_utf8_scalar!(
901
        test_utf8_scalar_like_escape,
902
        vec!["a%", "a\\x", "arrow long string longer than 12 bytes"],
903
        "a\\%",
904
        like,
905
        vec![true, false, false]
906
    );
907
908
    test_utf8_scalar!(
909
        test_utf8_scalar_like_escape_contains,
910
        vec!["ba%", "ba\\x", "arrow long string longer than 12 bytes"],
911
        "%a\\%",
912
        like,
913
        vec![true, false, false]
914
    );
915
916
    test_utf8!(
917
        test_utf8_scalar_ilike_regex,
918
        vec!["%%%"],
919
        vec![r"\%_\%"],
920
        ilike,
921
        vec![true]
922
    );
923
924
    test_utf8!(
925
        test_utf8_array_nlike,
926
        vec![
927
            "arrow",
928
            "arrow",
929
            "arrow long string longer than 12 bytes",
930
            "arrow",
931
            "arrow",
932
            "arrows",
933
            "arrow"
934
        ],
935
        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
936
        nlike,
937
        vec![false, false, false, true, true, false, true]
938
    );
939
940
    test_utf8_scalar!(
941
        test_utf8_array_nlike_escape_testing,
942
        vec![
943
            "varchar(255)",
944
            "int(255) arrow long string longer than 12 bytes",
945
            "varchar",
946
            "int"
947
        ],
948
        "%(%)%",
949
        nlike,
950
        vec![false, false, true, true]
951
    );
952
953
    test_utf8_scalar!(
954
        test_utf8_array_nlike_scalar_escape_regex,
955
        vec![".*", "a", "*"],
956
        ".*",
957
        nlike,
958
        vec![false, true, true]
959
    );
960
961
    test_utf8_scalar!(
962
        test_utf8_array_nlike_scalar_escape_regex_dot,
963
        vec![".", "a", "*"],
964
        ".",
965
        nlike,
966
        vec![false, true, true]
967
    );
968
    test_utf8_scalar!(
969
        test_utf8_array_nlike_scalar,
970
        vec![
971
            "arrow",
972
            "parquet",
973
            "datafusion",
974
            "flight",
975
            "arrow long string longer than 12 bytes"
976
        ],
977
        "%ar%",
978
        nlike,
979
        vec![false, false, true, true, false]
980
    );
981
982
    test_utf8_scalar!(
983
        test_utf8_array_nlike_scalar_start,
984
        vec![
985
            "arrow",
986
            "parrow",
987
            "arrows",
988
            "arr",
989
            "arrow long string longer than 12 bytes"
990
        ],
991
        "arrow%",
992
        nlike,
993
        vec![false, true, false, true, false]
994
    );
995
996
    test_utf8_scalar!(
997
        test_utf8_array_nlike_scalar_end,
998
        vec![
999
            "arrow",
1000
            "parrow",
1001
            "arrows",
1002
            "arr",
1003
            "arrow long string longer than 12 bytes"
1004
        ],
1005
        "%arrow",
1006
        nlike,
1007
        vec![false, false, true, true, true]
1008
    );
1009
1010
    test_utf8_scalar!(
1011
        test_utf8_array_nlike_scalar_equals,
1012
        vec![
1013
            "arrow",
1014
            "parrow",
1015
            "arrows",
1016
            "arr",
1017
            "arrow long string longer than 12 bytes"
1018
        ],
1019
        "arrow",
1020
        nlike,
1021
        vec![false, true, true, true, true]
1022
    );
1023
1024
    test_utf8_scalar!(
1025
        test_utf8_array_nlike_scalar_one,
1026
        vec![
1027
            "arrow",
1028
            "arrows",
1029
            "parrow",
1030
            "arr",
1031
            "arrow long string longer than 12 bytes"
1032
        ],
1033
        "arrow_",
1034
        nlike,
1035
        vec![true, false, true, true, true]
1036
    );
1037
1038
    test_utf8!(
1039
        test_utf8_array_ilike,
1040
        vec![
1041
            "arrow",
1042
            "arrow",
1043
            "ARROW long string longer than 12 bytes",
1044
            "arrow",
1045
            "ARROW",
1046
            "ARROWS",
1047
            "arROw"
1048
        ],
1049
        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1050
        ilike,
1051
        vec![true, true, true, false, false, true, false]
1052
    );
1053
1054
    test_utf8_scalar!(
1055
        ilike_utf8_scalar_escape_testing,
1056
        vec![
1057
            "varchar(255)",
1058
            "int(255) long string longer than 12 bytes",
1059
            "varchar",
1060
            "int"
1061
        ],
1062
        "%(%)%",
1063
        ilike,
1064
        vec![true, true, false, false]
1065
    );
1066
1067
    test_utf8_scalar!(
1068
        test_utf8_array_ilike_scalar,
1069
        vec![
1070
            "arrow",
1071
            "parquet",
1072
            "datafusion",
1073
            "flight",
1074
            "arrow long string longer than 12 bytes"
1075
        ],
1076
        "%AR%",
1077
        ilike,
1078
        vec![true, true, false, false, true]
1079
    );
1080
1081
    test_utf8_scalar!(
1082
        test_utf8_array_ilike_scalar_start,
1083
        vec![
1084
            "arrow",
1085
            "parrow",
1086
            "arrows",
1087
            "ARR",
1088
            "arrow long string longer than 12 bytes"
1089
        ],
1090
        "aRRow%",
1091
        ilike,
1092
        vec![true, false, true, false, true]
1093
    );
1094
1095
    test_utf8_scalar!(
1096
        test_utf8_array_ilike_scalar_end,
1097
        vec![
1098
            "ArroW",
1099
            "parrow",
1100
            "ARRowS",
1101
            "arr",
1102
            "arrow long string longer than 12 bytes"
1103
        ],
1104
        "%arrow",
1105
        ilike,
1106
        vec![true, true, false, false, false]
1107
    );
1108
1109
    test_utf8_scalar!(
1110
        test_utf8_array_ilike_scalar_equals,
1111
        vec![
1112
            "arrow",
1113
            "parrow",
1114
            "arrows",
1115
            "arr",
1116
            "arrow long string longer than 12 bytes"
1117
        ],
1118
        "Arrow",
1119
        ilike,
1120
        vec![true, false, false, false, false]
1121
    );
1122
1123
    // We only implement loose matching
1124
    test_utf8_scalar!(
1125
        test_utf8_array_ilike_unicode,
1126
        vec![
1127
            "FFkoß",
1128
            "FFkoSS",
1129
            "FFkoss",
1130
            "FFkoS",
1131
            "FFkos",
1132
            "ffkoSS",
1133
            "ffkoß",
1134
            "FFKoSS",
1135
            "longer than 12 bytes FFKoSS"
1136
        ],
1137
        "FFkoSS",
1138
        ilike,
1139
        vec![false, true, true, false, false, false, false, true, false]
1140
    );
1141
1142
    test_utf8_scalar!(
1143
        test_utf8_array_ilike_unicode_starts,
1144
        vec![
1145
            "FFkoßsdlkdf",
1146
            "FFkoSSsdlkdf",
1147
            "FFkosssdlkdf",
1148
            "FFkoS",
1149
            "FFkos",
1150
            "ffkoSS",
1151
            "ffkoß",
1152
            "FfkosSsdfd",
1153
            "FFKoSS",
1154
            "longer than 12 bytes FFKoSS",
1155
        ],
1156
        "FFkoSS%",
1157
        ilike,
1158
        vec![false, true, true, false, false, false, false, true, true, false]
1159
    );
1160
1161
    test_utf8_scalar!(
1162
        test_utf8_array_ilike_unicode_ends,
1163
        vec![
1164
            "sdlkdfFFkoß",
1165
            "sdlkdfFFkoSS",
1166
            "sdlkdfFFkoss",
1167
            "FFkoS",
1168
            "FFkos",
1169
            "ffkoSS",
1170
            "ffkoß",
1171
            "h😃klFfkosS",
1172
            "FFKoSS",
1173
            "longer than 12 bytes FFKoSS",
1174
        ],
1175
        "%FFkoSS",
1176
        ilike,
1177
        vec![false, true, true, false, false, false, false, true, true, true]
1178
    );
1179
1180
    test_utf8_scalar!(
1181
        test_utf8_array_ilike_unicode_contains,
1182
        vec![
1183
            "sdlkdfFkoßsdfs",
1184
            "sdlkdfFkoSSdggs",
1185
            "sdlkdfFkosssdsd",
1186
            "FkoS",
1187
            "Fkos",
1188
            "ffkoSS",
1189
            "ffkoß",
1190
            "😃sadlksffkosSsh😃klF",
1191
            "😱slgffkosSsh😃klF",
1192
            "FFKoSS",
1193
            "longer than 12 bytes FFKoSS",
1194
        ],
1195
        "%FFkoSS%",
1196
        ilike,
1197
        vec![false, true, true, false, false, false, false, true, true, true, true]
1198
    );
1199
1200
    // Replicates `test_utf8_array_ilike_unicode_contains` and
1201
    // `test_utf8_array_ilike_unicode_contains_dyn` to
1202
    // demonstrate that `SQL CONTAINS` works as expected.
1203
    //
1204
    // NOTE: 5 of the values were changed because the original used a case insensitive `ilike`.
1205
    test_utf8_and_binary_scalar!(
1206
        test_utf8_and_binary_array_contains_unicode_contains,
1207
        vec![
1208
            "sdlkdfFkoßsdfs",
1209
            "sdlkdFFkoSSdggs", // Original was case insensitive "sdlkdfFkoSSdggs"
1210
            "sdlkdFFkoSSsdsd", // Original was case insensitive "sdlkdfFkosssdsd"
1211
            "FkoS",
1212
            "Fkos",
1213
            "ffkoSS",
1214
            "ffkoß",
1215
            "😃sadlksFFkoSSsh😃klF", // Original was case insensitive "😃sadlksffkosSsh😃klF"
1216
            "😱slgFFkoSSsh😃klF",    // Original was case insensitive "😱slgffkosSsh😃klF"
1217
            "FFkoSS",                // "FFKoSS"
1218
            "longer than 12 bytes FFKoSS",
1219
        ],
1220
        "FFkoSS",
1221
        contains,
1222
        vec![false, true, true, false, false, false, false, true, true, true, false]
1223
    );
1224
1225
    test_utf8_scalar!(
1226
        test_utf8_array_ilike_unicode_complex,
1227
        vec![
1228
            "sdlkdfFooßsdfs",
1229
            "sdlkdfFooSSdggs",
1230
            "sdlkdfFoosssdsd",
1231
            "FooS",
1232
            "Foos",
1233
            "ffooSS",
1234
            "ffooß",
1235
            "😃sadlksffofsSsh😃klF",
1236
            "😱slgffoesSsh😃klF",
1237
            "FFKoSS",
1238
            "longer than 12 bytes FFKoSS",
1239
        ],
1240
        "%FF__SS%",
1241
        ilike,
1242
        vec![false, true, true, false, false, false, false, true, true, true, true]
1243
    );
1244
1245
    // 😈 is four bytes long.
1246
    test_utf8_scalar!(
1247
        test_uff8_array_like_multibyte,
1248
        vec![
1249
            "sdlkdfFooßsdfs",
1250
            "sdlkdfFooSSdggs",
1251
            "sdlkdfFoosssdsd",
1252
            "FooS",
1253
            "Foos",
1254
            "ffooSS",
1255
            "ffooß",
1256
            "😃sadlksffofsSsh😈klF",
1257
            "😱slgffoesSsh😈klF",
1258
            "FFKoSS",
1259
            "longer than 12 bytes FFKoSS",
1260
        ],
1261
        "%Ssh😈klF",
1262
        like,
1263
        vec![false, false, false, false, false, false, false, true, true, false, false]
1264
    );
1265
1266
    test_utf8_scalar!(
1267
        test_utf8_array_ilike_scalar_one,
1268
        vec![
1269
            "arrow",
1270
            "arrows",
1271
            "parrow",
1272
            "arr",
1273
            "arrow long string longer than 12 bytes"
1274
        ],
1275
        "arrow_",
1276
        ilike,
1277
        vec![false, true, false, false, false]
1278
    );
1279
1280
    test_utf8!(
1281
        test_utf8_array_nilike,
1282
        vec![
1283
            "arrow",
1284
            "arrow",
1285
            "ARROW longer than 12 bytes string",
1286
            "arrow",
1287
            "ARROW",
1288
            "ARROWS",
1289
            "arROw"
1290
        ],
1291
        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1292
        nilike,
1293
        vec![false, false, false, true, true, false, true]
1294
    );
1295
1296
    test_utf8_scalar!(
1297
        nilike_utf8_scalar_escape_testing,
1298
        vec![
1299
            "varchar(255)",
1300
            "int(255) longer than 12 bytes string",
1301
            "varchar",
1302
            "int"
1303
        ],
1304
        "%(%)%",
1305
        nilike,
1306
        vec![false, false, true, true]
1307
    );
1308
1309
    test_utf8_scalar!(
1310
        test_utf8_array_nilike_scalar,
1311
        vec![
1312
            "arrow",
1313
            "parquet",
1314
            "datafusion",
1315
            "flight",
1316
            "arrow long string longer than 12 bytes"
1317
        ],
1318
        "%AR%",
1319
        nilike,
1320
        vec![false, false, true, true, false]
1321
    );
1322
1323
    test_utf8_scalar!(
1324
        test_utf8_array_nilike_scalar_start,
1325
        vec![
1326
            "arrow",
1327
            "parrow",
1328
            "arrows",
1329
            "ARR",
1330
            "arrow long string longer than 12 bytes"
1331
        ],
1332
        "aRRow%",
1333
        nilike,
1334
        vec![false, true, false, true, false]
1335
    );
1336
1337
    test_utf8_scalar!(
1338
        test_utf8_array_nilike_scalar_end,
1339
        vec![
1340
            "ArroW",
1341
            "parrow",
1342
            "ARRowS",
1343
            "arr",
1344
            "arrow long string longer than 12 bytes"
1345
        ],
1346
        "%arrow",
1347
        nilike,
1348
        vec![false, false, true, true, true]
1349
    );
1350
1351
    test_utf8_scalar!(
1352
        test_utf8_array_nilike_scalar_equals,
1353
        vec![
1354
            "arRow",
1355
            "parrow",
1356
            "arrows",
1357
            "arr",
1358
            "arrow long string longer than 12 bytes"
1359
        ],
1360
        "Arrow",
1361
        nilike,
1362
        vec![false, true, true, true, true]
1363
    );
1364
1365
    test_utf8_scalar!(
1366
        test_utf8_array_nilike_scalar_one,
1367
        vec![
1368
            "arrow",
1369
            "arrows",
1370
            "parrow",
1371
            "arr",
1372
            "arrow long string longer than 12 bytes"
1373
        ],
1374
        "arrow_",
1375
        nilike,
1376
        vec![true, false, true, true, true]
1377
    );
1378
1379
    #[test]
1380
    fn test_dict_like_kernels() {
1381
        let data = vec![
1382
            Some("Earth"),
1383
            Some("Fire"),
1384
            Some("Water"),
1385
            Some("Air"),
1386
            None,
1387
            Some("Air"),
1388
            Some("bbbbb\nAir"),
1389
        ];
1390
1391
        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1392
1393
        assert_eq!(
1394
            like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1395
            BooleanArray::from(vec![
1396
                Some(false),
1397
                Some(false),
1398
                Some(false),
1399
                Some(true),
1400
                None,
1401
                Some(true),
1402
                Some(false),
1403
            ]),
1404
        );
1405
1406
        assert_eq!(
1407
            like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1408
            BooleanArray::from(vec![
1409
                Some(false),
1410
                Some(false),
1411
                Some(false),
1412
                Some(true),
1413
                None,
1414
                Some(true),
1415
                Some(false),
1416
            ]),
1417
        );
1418
1419
        assert_eq!(
1420
            like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1421
            BooleanArray::from(vec![
1422
                Some(false),
1423
                Some(false),
1424
                Some(true),
1425
                Some(false),
1426
                None,
1427
                Some(false),
1428
                Some(false),
1429
            ]),
1430
        );
1431
1432
        assert_eq!(
1433
            like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1434
            BooleanArray::from(vec![
1435
                Some(false),
1436
                Some(false),
1437
                Some(true),
1438
                Some(false),
1439
                None,
1440
                Some(false),
1441
                Some(false),
1442
            ]),
1443
        );
1444
1445
        assert_eq!(
1446
            like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1447
            BooleanArray::from(vec![
1448
                Some(false),
1449
                Some(false),
1450
                Some(true),
1451
                Some(true),
1452
                None,
1453
                Some(true),
1454
                Some(true),
1455
            ]),
1456
        );
1457
1458
        assert_eq!(
1459
            like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1460
            BooleanArray::from(vec![
1461
                Some(false),
1462
                Some(false),
1463
                Some(true),
1464
                Some(true),
1465
                None,
1466
                Some(true),
1467
                Some(true),
1468
            ]),
1469
        );
1470
1471
        assert_eq!(
1472
            like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1473
            BooleanArray::from(vec![
1474
                Some(false),
1475
                Some(true),
1476
                Some(false),
1477
                Some(true),
1478
                None,
1479
                Some(true),
1480
                Some(true),
1481
            ]),
1482
        );
1483
1484
        assert_eq!(
1485
            like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1486
            BooleanArray::from(vec![
1487
                Some(false),
1488
                Some(true),
1489
                Some(false),
1490
                Some(true),
1491
                None,
1492
                Some(true),
1493
                Some(true),
1494
            ]),
1495
        );
1496
1497
        assert_eq!(
1498
            like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1499
            BooleanArray::from(vec![
1500
                Some(true),
1501
                Some(false),
1502
                Some(true),
1503
                Some(false),
1504
                None,
1505
                Some(false),
1506
                Some(false),
1507
            ]),
1508
        );
1509
1510
        assert_eq!(
1511
            like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1512
            BooleanArray::from(vec![
1513
                Some(true),
1514
                Some(false),
1515
                Some(true),
1516
                Some(false),
1517
                None,
1518
                Some(false),
1519
                Some(false),
1520
            ]),
1521
        );
1522
    }
1523
1524
    #[test]
1525
    fn test_dict_nlike_kernels() {
1526
        let data = vec![
1527
            Some("Earth"),
1528
            Some("Fire"),
1529
            Some("Water"),
1530
            Some("Air"),
1531
            None,
1532
            Some("Air"),
1533
            Some("bbbbb\nAir"),
1534
        ];
1535
1536
        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1537
1538
        assert_eq!(
1539
            nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1540
            BooleanArray::from(vec![
1541
                Some(true),
1542
                Some(true),
1543
                Some(true),
1544
                Some(false),
1545
                None,
1546
                Some(false),
1547
                Some(true),
1548
            ]),
1549
        );
1550
1551
        assert_eq!(
1552
            nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1553
            BooleanArray::from(vec![
1554
                Some(true),
1555
                Some(true),
1556
                Some(true),
1557
                Some(false),
1558
                None,
1559
                Some(false),
1560
                Some(true),
1561
            ]),
1562
        );
1563
1564
        assert_eq!(
1565
            nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1566
            BooleanArray::from(vec![
1567
                Some(true),
1568
                Some(true),
1569
                Some(false),
1570
                Some(true),
1571
                None,
1572
                Some(true),
1573
                Some(true),
1574
            ]),
1575
        );
1576
1577
        assert_eq!(
1578
            nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1579
            BooleanArray::from(vec![
1580
                Some(true),
1581
                Some(true),
1582
                Some(false),
1583
                Some(true),
1584
                None,
1585
                Some(true),
1586
                Some(true),
1587
            ]),
1588
        );
1589
1590
        assert_eq!(
1591
            nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1592
            BooleanArray::from(vec![
1593
                Some(true),
1594
                Some(true),
1595
                Some(false),
1596
                Some(false),
1597
                None,
1598
                Some(false),
1599
                Some(false),
1600
            ]),
1601
        );
1602
1603
        assert_eq!(
1604
            nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1605
            BooleanArray::from(vec![
1606
                Some(true),
1607
                Some(true),
1608
                Some(false),
1609
                Some(false),
1610
                None,
1611
                Some(false),
1612
                Some(false),
1613
            ]),
1614
        );
1615
1616
        assert_eq!(
1617
            nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1618
            BooleanArray::from(vec![
1619
                Some(true),
1620
                Some(false),
1621
                Some(true),
1622
                Some(false),
1623
                None,
1624
                Some(false),
1625
                Some(false),
1626
            ]),
1627
        );
1628
1629
        assert_eq!(
1630
            nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1631
            BooleanArray::from(vec![
1632
                Some(true),
1633
                Some(false),
1634
                Some(true),
1635
                Some(false),
1636
                None,
1637
                Some(false),
1638
                Some(false),
1639
            ]),
1640
        );
1641
1642
        assert_eq!(
1643
            nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1644
            BooleanArray::from(vec![
1645
                Some(false),
1646
                Some(true),
1647
                Some(false),
1648
                Some(true),
1649
                None,
1650
                Some(true),
1651
                Some(true),
1652
            ]),
1653
        );
1654
1655
        assert_eq!(
1656
            nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1657
            BooleanArray::from(vec![
1658
                Some(false),
1659
                Some(true),
1660
                Some(false),
1661
                Some(true),
1662
                None,
1663
                Some(true),
1664
                Some(true),
1665
            ]),
1666
        );
1667
    }
1668
1669
    #[test]
1670
    fn test_dict_ilike_kernels() {
1671
        let data = vec![
1672
            Some("Earth"),
1673
            Some("Fire"),
1674
            Some("Water"),
1675
            Some("Air"),
1676
            None,
1677
            Some("Air"),
1678
            Some("bbbbb\nAir"),
1679
        ];
1680
1681
        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1682
1683
        assert_eq!(
1684
            ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1685
            BooleanArray::from(vec![
1686
                Some(false),
1687
                Some(false),
1688
                Some(false),
1689
                Some(true),
1690
                None,
1691
                Some(true),
1692
                Some(false),
1693
            ]),
1694
        );
1695
1696
        assert_eq!(
1697
            ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1698
            BooleanArray::from(vec![
1699
                Some(false),
1700
                Some(false),
1701
                Some(false),
1702
                Some(true),
1703
                None,
1704
                Some(true),
1705
                Some(false),
1706
            ]),
1707
        );
1708
1709
        assert_eq!(
1710
            ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1711
            BooleanArray::from(vec![
1712
                Some(false),
1713
                Some(false),
1714
                Some(true),
1715
                Some(false),
1716
                None,
1717
                Some(false),
1718
                Some(false),
1719
            ]),
1720
        );
1721
1722
        assert_eq!(
1723
            ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1724
            BooleanArray::from(vec![
1725
                Some(false),
1726
                Some(false),
1727
                Some(true),
1728
                Some(false),
1729
                None,
1730
                Some(false),
1731
                Some(false),
1732
            ]),
1733
        );
1734
1735
        assert_eq!(
1736
            ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1737
            BooleanArray::from(vec![
1738
                Some(false),
1739
                Some(false),
1740
                Some(true),
1741
                Some(true),
1742
                None,
1743
                Some(true),
1744
                Some(true),
1745
            ]),
1746
        );
1747
1748
        assert_eq!(
1749
            ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1750
            BooleanArray::from(vec![
1751
                Some(false),
1752
                Some(false),
1753
                Some(true),
1754
                Some(true),
1755
                None,
1756
                Some(true),
1757
                Some(true),
1758
            ]),
1759
        );
1760
1761
        assert_eq!(
1762
            ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1763
            BooleanArray::from(vec![
1764
                Some(false),
1765
                Some(true),
1766
                Some(false),
1767
                Some(true),
1768
                None,
1769
                Some(true),
1770
                Some(true),
1771
            ]),
1772
        );
1773
1774
        assert_eq!(
1775
            ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1776
            BooleanArray::from(vec![
1777
                Some(false),
1778
                Some(true),
1779
                Some(false),
1780
                Some(true),
1781
                None,
1782
                Some(true),
1783
                Some(true),
1784
            ]),
1785
        );
1786
1787
        assert_eq!(
1788
            ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1789
            BooleanArray::from(vec![
1790
                Some(true),
1791
                Some(false),
1792
                Some(true),
1793
                Some(true),
1794
                None,
1795
                Some(true),
1796
                Some(true),
1797
            ]),
1798
        );
1799
1800
        assert_eq!(
1801
            ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1802
            BooleanArray::from(vec![
1803
                Some(true),
1804
                Some(false),
1805
                Some(true),
1806
                Some(true),
1807
                None,
1808
                Some(true),
1809
                Some(true),
1810
            ]),
1811
        );
1812
    }
1813
1814
    #[test]
1815
    fn test_dict_nilike_kernels() {
1816
        let data = vec![
1817
            Some("Earth"),
1818
            Some("Fire"),
1819
            Some("Water"),
1820
            Some("Air"),
1821
            None,
1822
            Some("Air"),
1823
            Some("bbbbb\nAir"),
1824
        ];
1825
1826
        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1827
1828
        assert_eq!(
1829
            nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1830
            BooleanArray::from(vec![
1831
                Some(true),
1832
                Some(true),
1833
                Some(true),
1834
                Some(false),
1835
                None,
1836
                Some(false),
1837
                Some(true),
1838
            ]),
1839
        );
1840
1841
        assert_eq!(
1842
            nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1843
            BooleanArray::from(vec![
1844
                Some(true),
1845
                Some(true),
1846
                Some(true),
1847
                Some(false),
1848
                None,
1849
                Some(false),
1850
                Some(true),
1851
            ]),
1852
        );
1853
1854
        assert_eq!(
1855
            nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1856
            BooleanArray::from(vec![
1857
                Some(true),
1858
                Some(true),
1859
                Some(false),
1860
                Some(true),
1861
                None,
1862
                Some(true),
1863
                Some(true),
1864
            ]),
1865
        );
1866
1867
        assert_eq!(
1868
            nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1869
            BooleanArray::from(vec![
1870
                Some(true),
1871
                Some(true),
1872
                Some(false),
1873
                Some(true),
1874
                None,
1875
                Some(true),
1876
                Some(true),
1877
            ]),
1878
        );
1879
1880
        assert_eq!(
1881
            nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1882
            BooleanArray::from(vec![
1883
                Some(true),
1884
                Some(true),
1885
                Some(false),
1886
                Some(false),
1887
                None,
1888
                Some(false),
1889
                Some(false),
1890
            ]),
1891
        );
1892
1893
        assert_eq!(
1894
            nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1895
            BooleanArray::from(vec![
1896
                Some(true),
1897
                Some(true),
1898
                Some(false),
1899
                Some(false),
1900
                None,
1901
                Some(false),
1902
                Some(false),
1903
            ]),
1904
        );
1905
1906
        assert_eq!(
1907
            nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1908
            BooleanArray::from(vec![
1909
                Some(true),
1910
                Some(false),
1911
                Some(true),
1912
                Some(false),
1913
                None,
1914
                Some(false),
1915
                Some(false),
1916
            ]),
1917
        );
1918
1919
        assert_eq!(
1920
            nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1921
            BooleanArray::from(vec![
1922
                Some(true),
1923
                Some(false),
1924
                Some(true),
1925
                Some(false),
1926
                None,
1927
                Some(false),
1928
                Some(false),
1929
            ]),
1930
        );
1931
1932
        assert_eq!(
1933
            nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1934
            BooleanArray::from(vec![
1935
                Some(false),
1936
                Some(true),
1937
                Some(false),
1938
                Some(false),
1939
                None,
1940
                Some(false),
1941
                Some(false),
1942
            ]),
1943
        );
1944
1945
        assert_eq!(
1946
            nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1947
            BooleanArray::from(vec![
1948
                Some(false),
1949
                Some(true),
1950
                Some(false),
1951
                Some(false),
1952
                None,
1953
                Some(false),
1954
                Some(false),
1955
            ]),
1956
        );
1957
    }
1958
1959
    #[test]
1960
    fn string_null_like_pattern() {
1961
        // Different patterns have different execution code paths
1962
        for pattern in &[
1963
            "",           // can execute as equality check
1964
            "_",          // can execute as length check
1965
            "%",          // can execute as starts_with("") or non-null check
1966
            "a%",         // can execute as starts_with("a")
1967
            "%a",         // can execute as ends_with("")
1968
            "a%b",        // can execute as starts_with("a") && ends_with("b")
1969
            "%a%",        // can_execute as contains("a")
1970
            "%a%b_c_d%e", // can_execute as regular expression
1971
        ] {
1972
            // These tests focus on the null handling, but are case-insensitive
1973
            for like_f in [like, ilike, nlike, nilike] {
1974
                let a = Scalar::new(StringArray::new_null(1));
1975
                let b = StringArray::new_scalar(pattern);
1976
                let r = like_f(&a, &b).unwrap();
1977
                assert_eq!(r.len(), 1, "With pattern {pattern}");
1978
                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1979
                assert!(r.is_null(0), "With pattern {pattern}");
1980
1981
                let a = Scalar::new(StringArray::new_null(1));
1982
                let b = StringArray::from_iter_values([pattern]);
1983
                let r = like_f(&a, &b).unwrap();
1984
                assert_eq!(r.len(), 1, "With pattern {pattern}");
1985
                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1986
                assert!(r.is_null(0), "With pattern {pattern}");
1987
1988
                let a = StringArray::new_null(1);
1989
                let b = StringArray::from_iter_values([pattern]);
1990
                let r = like_f(&a, &b).unwrap();
1991
                assert_eq!(r.len(), 1, "With pattern {pattern}");
1992
                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1993
                assert!(r.is_null(0), "With pattern {pattern}");
1994
1995
                let a = StringArray::new_null(1);
1996
                let b = StringArray::new_scalar(pattern);
1997
                let r = like_f(&a, &b).unwrap();
1998
                assert_eq!(r.len(), 1, "With pattern {pattern}");
1999
                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2000
                assert!(r.is_null(0), "With pattern {pattern}");
2001
            }
2002
        }
2003
    }
2004
2005
    #[test]
2006
    fn string_view_null_like_pattern() {
2007
        // Different patterns have different execution code paths
2008
        for pattern in &[
2009
            "",           // can execute as equality check
2010
            "_",          // can execute as length check
2011
            "%",          // can execute as starts_with("") or non-null check
2012
            "a%",         // can execute as starts_with("a")
2013
            "%a",         // can execute as ends_with("")
2014
            "a%b",        // can execute as starts_with("a") && ends_with("b")
2015
            "%a%",        // can_execute as contains("a")
2016
            "%a%b_c_d%e", // can_execute as regular expression
2017
        ] {
2018
            // These tests focus on the null handling, but are case-insensitive
2019
            for like_f in [like, ilike, nlike, nilike] {
2020
                let a = Scalar::new(StringViewArray::new_null(1));
2021
                let b = StringViewArray::new_scalar(pattern);
2022
                let r = like_f(&a, &b).unwrap();
2023
                assert_eq!(r.len(), 1, "With pattern {pattern}");
2024
                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2025
                assert!(r.is_null(0), "With pattern {pattern}");
2026
2027
                let a = Scalar::new(StringViewArray::new_null(1));
2028
                let b = StringViewArray::from_iter_values([pattern]);
2029
                let r = like_f(&a, &b).unwrap();
2030
                assert_eq!(r.len(), 1, "With pattern {pattern}");
2031
                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2032
                assert!(r.is_null(0), "With pattern {pattern}");
2033
2034
                let a = StringViewArray::new_null(1);
2035
                let b = StringViewArray::from_iter_values([pattern]);
2036
                let r = like_f(&a, &b).unwrap();
2037
                assert_eq!(r.len(), 1, "With pattern {pattern}");
2038
                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2039
                assert!(r.is_null(0), "With pattern {pattern}");
2040
2041
                let a = StringViewArray::new_null(1);
2042
                let b = StringViewArray::new_scalar(pattern);
2043
                let r = like_f(&a, &b).unwrap();
2044
                assert_eq!(r.len(), 1, "With pattern {pattern}");
2045
                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2046
                assert!(r.is_null(0), "With pattern {pattern}");
2047
            }
2048
        }
2049
    }
2050
2051
    #[test]
2052
    fn string_like_scalar_null() {
2053
        for like_f in [like, ilike, nlike, nilike] {
2054
            let a = StringArray::new_scalar("a");
2055
            let b = Scalar::new(StringArray::new_null(1));
2056
            let r = like_f(&a, &b).unwrap();
2057
            assert_eq!(r.len(), 1);
2058
            assert_eq!(r.null_count(), 1);
2059
            assert!(r.is_null(0));
2060
2061
            let a = StringArray::from_iter_values(["a"]);
2062
            let b = Scalar::new(StringArray::new_null(1));
2063
            let r = like_f(&a, &b).unwrap();
2064
            assert_eq!(r.len(), 1);
2065
            assert_eq!(r.null_count(), 1);
2066
            assert!(r.is_null(0));
2067
2068
            let a = StringArray::from_iter_values(["a"]);
2069
            let b = StringArray::new_null(1);
2070
            let r = like_f(&a, &b).unwrap();
2071
            assert_eq!(r.len(), 1);
2072
            assert_eq!(r.null_count(), 1);
2073
            assert!(r.is_null(0));
2074
2075
            let a = StringArray::new_scalar("a");
2076
            let b = StringArray::new_null(1);
2077
            let r = like_f(&a, &b).unwrap();
2078
            assert_eq!(r.len(), 1);
2079
            assert_eq!(r.null_count(), 1);
2080
            assert!(r.is_null(0));
2081
        }
2082
    }
2083
2084
    #[test]
2085
    fn string_view_like_scalar_null() {
2086
        for like_f in [like, ilike, nlike, nilike] {
2087
            let a = StringViewArray::new_scalar("a");
2088
            let b = Scalar::new(StringViewArray::new_null(1));
2089
            let r = like_f(&a, &b).unwrap();
2090
            assert_eq!(r.len(), 1);
2091
            assert_eq!(r.null_count(), 1);
2092
            assert!(r.is_null(0));
2093
2094
            let a = StringViewArray::from_iter_values(["a"]);
2095
            let b = Scalar::new(StringViewArray::new_null(1));
2096
            let r = like_f(&a, &b).unwrap();
2097
            assert_eq!(r.len(), 1);
2098
            assert_eq!(r.null_count(), 1);
2099
            assert!(r.is_null(0));
2100
2101
            let a = StringViewArray::from_iter_values(["a"]);
2102
            let b = StringViewArray::new_null(1);
2103
            let r = like_f(&a, &b).unwrap();
2104
            assert_eq!(r.len(), 1);
2105
            assert_eq!(r.null_count(), 1);
2106
            assert!(r.is_null(0));
2107
2108
            let a = StringViewArray::new_scalar("a");
2109
            let b = StringViewArray::new_null(1);
2110
            let r = like_f(&a, &b).unwrap();
2111
            assert_eq!(r.len(), 1);
2112
            assert_eq!(r.null_count(), 1);
2113
            assert!(r.is_null(0));
2114
        }
2115
    }
2116
2117
    #[test]
2118
    fn like_escape() {
2119
        // (value, pattern, expected)
2120
        let test_cases = vec![
2121
            // Empty pattern
2122
            (r"", r"", true),
2123
            (r"\", r"", false),
2124
            // Sole (dangling) escape (some engines consider this invalid pattern)
2125
            (r"", r"\", false),
2126
            (r"\", r"\", true),
2127
            (r"\\", r"\", false),
2128
            (r"a", r"\", false),
2129
            (r"\a", r"\", false),
2130
            (r"\\a", r"\", false),
2131
            // Sole escape
2132
            (r"", r"\\", false),
2133
            (r"\", r"\\", true),
2134
            (r"\\", r"\\", false),
2135
            (r"a", r"\\", false),
2136
            (r"\a", r"\\", false),
2137
            (r"\\a", r"\\", false),
2138
            // Sole escape and dangling escape
2139
            (r"", r"\\\", false),
2140
            (r"\", r"\\\", false),
2141
            (r"\\", r"\\\", true),
2142
            (r"\\\", r"\\\", false),
2143
            (r"\\\\", r"\\\", false),
2144
            (r"a", r"\\\", false),
2145
            (r"\a", r"\\\", false),
2146
            (r"\\a", r"\\\", false),
2147
            // Sole two escapes
2148
            (r"", r"\\\\", false),
2149
            (r"\", r"\\\\", false),
2150
            (r"\\", r"\\\\", true),
2151
            (r"\\\", r"\\\\", false),
2152
            (r"\\\\", r"\\\\", false),
2153
            (r"\\\\\", r"\\\\", false),
2154
            (r"a", r"\\\\", false),
2155
            (r"\a", r"\\\\", false),
2156
            (r"\\a", r"\\\\", false),
2157
            // Escaped non-wildcard
2158
            (r"", r"\a", false),
2159
            (r"\", r"\a", false),
2160
            (r"\\", r"\a", false),
2161
            (r"a", r"\a", true),
2162
            (r"\a", r"\a", false),
2163
            (r"\\a", r"\a", false),
2164
            // Escaped _ wildcard
2165
            (r"", r"\_", false),
2166
            (r"\", r"\_", false),
2167
            (r"\\", r"\_", false),
2168
            (r"a", r"\_", false),
2169
            (r"_", r"\_", true),
2170
            (r"%", r"\_", false),
2171
            (r"\a", r"\_", false),
2172
            (r"\\a", r"\_", false),
2173
            (r"\_", r"\_", false),
2174
            (r"\\_", r"\_", false),
2175
            // Escaped % wildcard
2176
            (r"", r"\%", false),
2177
            (r"\", r"\%", false),
2178
            (r"\\", r"\%", false),
2179
            (r"a", r"\%", false),
2180
            (r"_", r"\%", false),
2181
            (r"%", r"\%", true),
2182
            (r"\a", r"\%", false),
2183
            (r"\\a", r"\%", false),
2184
            (r"\%", r"\%", false),
2185
            (r"\\%", r"\%", false),
2186
            // Escape and non-wildcard
2187
            (r"", r"\\a", false),
2188
            (r"\", r"\\a", false),
2189
            (r"\\", r"\\a", false),
2190
            (r"a", r"\\a", false),
2191
            (r"\a", r"\\a", true),
2192
            (r"\\a", r"\\a", false),
2193
            (r"\\\a", r"\\a", false),
2194
            // Escape and _ wildcard
2195
            (r"", r"\\_", false),
2196
            (r"\", r"\\_", false),
2197
            (r"\\", r"\\_", true),
2198
            (r"a", r"\\_", false),
2199
            (r"_", r"\\_", false),
2200
            (r"%", r"\\_", false),
2201
            (r"\a", r"\\_", true),
2202
            (r"\\a", r"\\_", false),
2203
            (r"\_", r"\\_", true),
2204
            (r"\\_", r"\\_", false),
2205
            (r"\\\_", r"\\_", false),
2206
            // Escape and % wildcard
2207
            (r"", r"\\%", false),
2208
            (r"\", r"\\%", true),
2209
            (r"\\", r"\\%", true),
2210
            (r"a", r"\\%", false),
2211
            (r"ab", r"\\%", false),
2212
            (r"a%", r"\\%", false),
2213
            (r"_", r"\\%", false),
2214
            (r"%", r"\\%", false),
2215
            (r"\a", r"\\%", true),
2216
            (r"\\a", r"\\%", true),
2217
            (r"\%", r"\\%", true),
2218
            (r"\\%", r"\\%", true),
2219
            (r"\\\%", r"\\%", true),
2220
            // %... pattern with dangling wildcard
2221
            (r"\", r"%\", true),
2222
            (r"\\", r"%\", true),
2223
            (r"%\", r"%\", true),
2224
            (r"%\\", r"%\", true),
2225
            (r"abc\", r"%\", true),
2226
            (r"abc", r"%\", false),
2227
            // %... pattern with wildcard
2228
            (r"\", r"%\\", true),
2229
            (r"\\", r"%\\", true),
2230
            (r"%\\", r"%\\", true),
2231
            (r"%\\\", r"%\\", true),
2232
            (r"abc\", r"%\\", true),
2233
            (r"abc", r"%\\", false),
2234
            // %... pattern including escaped non-wildcard
2235
            (r"ac", r"%a\c", true),
2236
            (r"xyzac", r"%a\c", true),
2237
            (r"abc", r"%a\c", false),
2238
            (r"a\c", r"%a\c", false),
2239
            (r"%a\c", r"%a\c", false),
2240
            // %... pattern including escape
2241
            (r"\", r"%a\\c", false),
2242
            (r"\\", r"%a\\c", false),
2243
            (r"ac", r"%a\\c", false),
2244
            (r"a\c", r"%a\\c", true),
2245
            (r"a\\c", r"%a\\c", false),
2246
            (r"abc", r"%a\\c", false),
2247
            (r"xyza\c", r"%a\\c", true),
2248
            (r"xyza\\c", r"%a\\c", false),
2249
            (r"%a\\c", r"%a\\c", false),
2250
            // ...% pattern with wildcard
2251
            (r"\", r"\\%", true),
2252
            (r"\\", r"\\%", true),
2253
            (r"\\%", r"\\%", true),
2254
            (r"\\\%", r"\\%", true),
2255
            (r"\abc", r"\\%", true),
2256
            (r"a", r"\\%", false),
2257
            (r"abc", r"\\%", false),
2258
            // ...% pattern including escaped non-wildcard
2259
            (r"ac", r"a\c%", true),
2260
            (r"acxyz", r"a\c%", true),
2261
            (r"abc", r"a\c%", false),
2262
            (r"a\c", r"a\c%", false),
2263
            (r"a\c%", r"a\c%", false),
2264
            (r"a\\c%", r"a\c%", false),
2265
            // ...% pattern including escape
2266
            (r"ac", r"a\\c%", false),
2267
            (r"a\c", r"a\\c%", true),
2268
            (r"a\cxyz", r"a\\c%", true),
2269
            (r"a\\c", r"a\\c%", false),
2270
            (r"a\\cxyz", r"a\\c%", false),
2271
            (r"abc", r"a\\c%", false),
2272
            (r"abcxyz", r"a\\c%", false),
2273
            (r"a\\c%", r"a\\c%", false),
2274
            // %...% pattern including escaped non-wildcard
2275
            (r"ac", r"%a\c%", true),
2276
            (r"xyzacxyz", r"%a\c%", true),
2277
            (r"abc", r"%a\c%", false),
2278
            (r"a\c", r"%a\c%", false),
2279
            (r"xyza\cxyz", r"%a\c%", false),
2280
            (r"%a\c%", r"%a\c%", false),
2281
            (r"%a\\c%", r"%a\c%", false),
2282
            // %...% pattern including escape
2283
            (r"ac", r"%a\\c%", false),
2284
            (r"a\c", r"%a\\c%", true),
2285
            (r"xyza\cxyz", r"%a\\c%", true),
2286
            (r"a\\c", r"%a\\c%", false),
2287
            (r"xyza\\cxyz", r"%a\\c%", false),
2288
            (r"abc", r"%a\\c%", false),
2289
            (r"xyzabcxyz", r"%a\\c%", false),
2290
            (r"%a\\c%", r"%a\\c%", false),
2291
            // Odd (7) backslashes and % wildcard
2292
            (r"\\%", r"\\\\\\\%", false),
2293
            (r"\\\", r"\\\\\\\%", false),
2294
            (r"\\\%", r"\\\\\\\%", true),
2295
            (r"\\\\", r"\\\\\\\%", false),
2296
            (r"\\\\%", r"\\\\\\\%", false),
2297
            (r"\\\\\\\%", r"\\\\\\\%", false),
2298
            // Odd (7) backslashes and _ wildcard
2299
            (r"\\\", r"\\\\\\\_", false),
2300
            (r"\\\\", r"\\\\\\\_", false),
2301
            (r"\\\_", r"\\\\\\\_", true),
2302
            (r"\\\\", r"\\\\\\\_", false),
2303
            (r"\\\a", r"\\\\\\\_", false),
2304
            (r"\\\\_", r"\\\\\\\_", false),
2305
            (r"\\\\\\\_", r"\\\\\\\_", false),
2306
            // Even (8) backslashes and % wildcard
2307
            (r"\\\", r"\\\\\\\\%", false),
2308
            (r"\\\\", r"\\\\\\\\%", true),
2309
            (r"\\\\\", r"\\\\\\\\%", true),
2310
            (r"\\\\xyz", r"\\\\\\\\%", true),
2311
            (r"\\\\\\\\%", r"\\\\\\\\%", true),
2312
            // Even (8) backslashes and _ wildcard
2313
            (r"\\\", r"\\\\\\\\_", false),
2314
            (r"\\\\", r"\\\\\\\\_", false),
2315
            (r"\\\\\", r"\\\\\\\\_", true),
2316
            (r"\\\\a", r"\\\\\\\\_", true),
2317
            (r"\\\\\a", r"\\\\\\\\_", false),
2318
            (r"\\\\ab", r"\\\\\\\\_", false),
2319
            (r"\\\\\\\\_", r"\\\\\\\\_", false),
2320
        ];
2321
2322
        for (value, pattern, expected) in test_cases {
2323
            let unexpected = BooleanArray::from(vec![!expected]);
2324
            let expected = BooleanArray::from(vec![expected]);
2325
2326
            for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
2327
                for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip(
2328
                    make_datums(value, &string_type),
2329
                    make_datums(pattern, &string_type),
2330
                ) {
2331
                    let value_datum = value_datum.as_ref();
2332
                    let pattern_datum = pattern_datum.as_ref();
2333
                    assert_eq!(
2334
                        like(value_datum, pattern_datum).unwrap(),
2335
                        expected,
2336
                        "{value_type:?} «{value}» like {pattern_type:?} «{pattern}»"
2337
                    );
2338
                    assert_eq!(
2339
                        ilike(value_datum, pattern_datum).unwrap(),
2340
                        expected,
2341
                        "{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»"
2342
                    );
2343
                    assert_eq!(
2344
                        nlike(value_datum, pattern_datum).unwrap(),
2345
                        unexpected,
2346
                        "{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»"
2347
                    );
2348
                    assert_eq!(
2349
                        nilike(value_datum, pattern_datum).unwrap(),
2350
                        unexpected,
2351
                        "{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»"
2352
                    );
2353
                }
2354
            }
2355
        }
2356
    }
2357
2358
    #[test]
2359
    fn like_escape_many() {
2360
        // (value, pattern, expected)
2361
        let test_cases = vec![
2362
            (r"", r"", true),
2363
            (r"\", r"", false),
2364
            (r"\\", r"", false),
2365
            (r"\\\", r"", false),
2366
            (r"\\\\", r"", false),
2367
            (r"a", r"", false),
2368
            (r"\a", r"", false),
2369
            (r"\\a", r"", false),
2370
            (r"%", r"", false),
2371
            (r"\%", r"", false),
2372
            (r"\\%", r"", false),
2373
            (r"%%", r"", false),
2374
            (r"\%%", r"", false),
2375
            (r"\\%%", r"", false),
2376
            (r"_", r"", false),
2377
            (r"\_", r"", false),
2378
            (r"\\_", r"", false),
2379
            (r"__", r"", false),
2380
            (r"\__", r"", false),
2381
            (r"\\__", r"", false),
2382
            (r"abc", r"", false),
2383
            (r"a_c", r"", false),
2384
            (r"a\bc", r"", false),
2385
            (r"a\_c", r"", false),
2386
            (r"%abc", r"", false),
2387
            (r"\%abc", r"", false),
2388
            (r"a\\_c%", r"", false),
2389
            (r"", r"\", false),
2390
            (r"\", r"\", true),
2391
            (r"\\", r"\", false),
2392
            (r"\\\", r"\", false),
2393
            (r"\\\\", r"\", false),
2394
            (r"a", r"\", false),
2395
            (r"\a", r"\", false),
2396
            (r"\\a", r"\", false),
2397
            (r"%", r"\", false),
2398
            (r"\%", r"\", false),
2399
            (r"\\%", r"\", false),
2400
            (r"%%", r"\", false),
2401
            (r"\%%", r"\", false),
2402
            (r"\\%%", r"\", false),
2403
            (r"_", r"\", false),
2404
            (r"\_", r"\", false),
2405
            (r"\\_", r"\", false),
2406
            (r"__", r"\", false),
2407
            (r"\__", r"\", false),
2408
            (r"\\__", r"\", false),
2409
            (r"abc", r"\", false),
2410
            (r"a_c", r"\", false),
2411
            (r"a\bc", r"\", false),
2412
            (r"a\_c", r"\", false),
2413
            (r"%abc", r"\", false),
2414
            (r"\%abc", r"\", false),
2415
            (r"a\\_c%", r"\", false),
2416
            (r"", r"\\", false),
2417
            (r"\", r"\\", true),
2418
            (r"\\", r"\\", false),
2419
            (r"\\\", r"\\", false),
2420
            (r"\\\\", r"\\", false),
2421
            (r"a", r"\\", false),
2422
            (r"\a", r"\\", false),
2423
            (r"\\a", r"\\", false),
2424
            (r"%", r"\\", false),
2425
            (r"\%", r"\\", false),
2426
            (r"\\%", r"\\", false),
2427
            (r"%%", r"\\", false),
2428
            (r"\%%", r"\\", false),
2429
            (r"\\%%", r"\\", false),
2430
            (r"_", r"\\", false),
2431
            (r"\_", r"\\", false),
2432
            (r"\\_", r"\\", false),
2433
            (r"__", r"\\", false),
2434
            (r"\__", r"\\", false),
2435
            (r"\\__", r"\\", false),
2436
            (r"abc", r"\\", false),
2437
            (r"a_c", r"\\", false),
2438
            (r"a\bc", r"\\", false),
2439
            (r"a\_c", r"\\", false),
2440
            (r"%abc", r"\\", false),
2441
            (r"\%abc", r"\\", false),
2442
            (r"a\\_c%", r"\\", false),
2443
            (r"", r"\\\", false),
2444
            (r"\", r"\\\", false),
2445
            (r"\\", r"\\\", true),
2446
            (r"\\\", r"\\\", false),
2447
            (r"\\\\", r"\\\", false),
2448
            (r"a", r"\\\", false),
2449
            (r"\a", r"\\\", false),
2450
            (r"\\a", r"\\\", false),
2451
            (r"%", r"\\\", false),
2452
            (r"\%", r"\\\", false),
2453
            (r"\\%", r"\\\", false),
2454
            (r"%%", r"\\\", false),
2455
            (r"\%%", r"\\\", false),
2456
            (r"\\%%", r"\\\", false),
2457
            (r"_", r"\\\", false),
2458
            (r"\_", r"\\\", false),
2459
            (r"\\_", r"\\\", false),
2460
            (r"__", r"\\\", false),
2461
            (r"\__", r"\\\", false),
2462
            (r"\\__", r"\\\", false),
2463
            (r"abc", r"\\\", false),
2464
            (r"a_c", r"\\\", false),
2465
            (r"a\bc", r"\\\", false),
2466
            (r"a\_c", r"\\\", false),
2467
            (r"%abc", r"\\\", false),
2468
            (r"\%abc", r"\\\", false),
2469
            (r"a\\_c%", r"\\\", false),
2470
            (r"", r"\\\\", false),
2471
            (r"\", r"\\\\", false),
2472
            (r"\\", r"\\\\", true),
2473
            (r"\\\", r"\\\\", false),
2474
            (r"\\\\", r"\\\\", false),
2475
            (r"a", r"\\\\", false),
2476
            (r"\a", r"\\\\", false),
2477
            (r"\\a", r"\\\\", false),
2478
            (r"%", r"\\\\", false),
2479
            (r"\%", r"\\\\", false),
2480
            (r"\\%", r"\\\\", false),
2481
            (r"%%", r"\\\\", false),
2482
            (r"\%%", r"\\\\", false),
2483
            (r"\\%%", r"\\\\", false),
2484
            (r"_", r"\\\\", false),
2485
            (r"\_", r"\\\\", false),
2486
            (r"\\_", r"\\\\", false),
2487
            (r"__", r"\\\\", false),
2488
            (r"\__", r"\\\\", false),
2489
            (r"\\__", r"\\\\", false),
2490
            (r"abc", r"\\\\", false),
2491
            (r"a_c", r"\\\\", false),
2492
            (r"a\bc", r"\\\\", false),
2493
            (r"a\_c", r"\\\\", false),
2494
            (r"%abc", r"\\\\", false),
2495
            (r"\%abc", r"\\\\", false),
2496
            (r"a\\_c%", r"\\\\", false),
2497
            (r"", r"a", false),
2498
            (r"\", r"a", false),
2499
            (r"\\", r"a", false),
2500
            (r"\\\", r"a", false),
2501
            (r"\\\\", r"a", false),
2502
            (r"a", r"a", true),
2503
            (r"\a", r"a", false),
2504
            (r"\\a", r"a", false),
2505
            (r"%", r"a", false),
2506
            (r"\%", r"a", false),
2507
            (r"\\%", r"a", false),
2508
            (r"%%", r"a", false),
2509
            (r"\%%", r"a", false),
2510
            (r"\\%%", r"a", false),
2511
            (r"_", r"a", false),
2512
            (r"\_", r"a", false),
2513
            (r"\\_", r"a", false),
2514
            (r"__", r"a", false),
2515
            (r"\__", r"a", false),
2516
            (r"\\__", r"a", false),
2517
            (r"abc", r"a", false),
2518
            (r"a_c", r"a", false),
2519
            (r"a\bc", r"a", false),
2520
            (r"a\_c", r"a", false),
2521
            (r"%abc", r"a", false),
2522
            (r"\%abc", r"a", false),
2523
            (r"a\\_c%", r"a", false),
2524
            (r"", r"\a", false),
2525
            (r"\", r"\a", false),
2526
            (r"\\", r"\a", false),
2527
            (r"\\\", r"\a", false),
2528
            (r"\\\\", r"\a", false),
2529
            (r"a", r"\a", true),
2530
            (r"\a", r"\a", false),
2531
            (r"\\a", r"\a", false),
2532
            (r"%", r"\a", false),
2533
            (r"\%", r"\a", false),
2534
            (r"\\%", r"\a", false),
2535
            (r"%%", r"\a", false),
2536
            (r"\%%", r"\a", false),
2537
            (r"\\%%", r"\a", false),
2538
            (r"_", r"\a", false),
2539
            (r"\_", r"\a", false),
2540
            (r"\\_", r"\a", false),
2541
            (r"__", r"\a", false),
2542
            (r"\__", r"\a", false),
2543
            (r"\\__", r"\a", false),
2544
            (r"abc", r"\a", false),
2545
            (r"a_c", r"\a", false),
2546
            (r"a\bc", r"\a", false),
2547
            (r"a\_c", r"\a", false),
2548
            (r"%abc", r"\a", false),
2549
            (r"\%abc", r"\a", false),
2550
            (r"a\\_c%", r"\a", false),
2551
            (r"", r"\\a", false),
2552
            (r"\", r"\\a", false),
2553
            (r"\\", r"\\a", false),
2554
            (r"\\\", r"\\a", false),
2555
            (r"\\\\", r"\\a", false),
2556
            (r"a", r"\\a", false),
2557
            (r"\a", r"\\a", true),
2558
            (r"\\a", r"\\a", false),
2559
            (r"%", r"\\a", false),
2560
            (r"\%", r"\\a", false),
2561
            (r"\\%", r"\\a", false),
2562
            (r"%%", r"\\a", false),
2563
            (r"\%%", r"\\a", false),
2564
            (r"\\%%", r"\\a", false),
2565
            (r"_", r"\\a", false),
2566
            (r"\_", r"\\a", false),
2567
            (r"\\_", r"\\a", false),
2568
            (r"__", r"\\a", false),
2569
            (r"\__", r"\\a", false),
2570
            (r"\\__", r"\\a", false),
2571
            (r"abc", r"\\a", false),
2572
            (r"a_c", r"\\a", false),
2573
            (r"a\bc", r"\\a", false),
2574
            (r"a\_c", r"\\a", false),
2575
            (r"%abc", r"\\a", false),
2576
            (r"\%abc", r"\\a", false),
2577
            (r"a\\_c%", r"\\a", false),
2578
            (r"", r"%", true),
2579
            (r"\", r"%", true),
2580
            (r"\\", r"%", true),
2581
            (r"\\\", r"%", true),
2582
            (r"\\\\", r"%", true),
2583
            (r"a", r"%", true),
2584
            (r"\a", r"%", true),
2585
            (r"\\a", r"%", true),
2586
            (r"%", r"%", true),
2587
            (r"\%", r"%", true),
2588
            (r"\\%", r"%", true),
2589
            (r"%%", r"%", true),
2590
            (r"\%%", r"%", true),
2591
            (r"\\%%", r"%", true),
2592
            (r"_", r"%", true),
2593
            (r"\_", r"%", true),
2594
            (r"\\_", r"%", true),
2595
            (r"__", r"%", true),
2596
            (r"\__", r"%", true),
2597
            (r"\\__", r"%", true),
2598
            (r"abc", r"%", true),
2599
            (r"a_c", r"%", true),
2600
            (r"a\bc", r"%", true),
2601
            (r"a\_c", r"%", true),
2602
            (r"%abc", r"%", true),
2603
            (r"\%abc", r"%", true),
2604
            (r"a\\_c%", r"%", true),
2605
            (r"", r"\%", false),
2606
            (r"\", r"\%", false),
2607
            (r"\\", r"\%", false),
2608
            (r"\\\", r"\%", false),
2609
            (r"\\\\", r"\%", false),
2610
            (r"a", r"\%", false),
2611
            (r"\a", r"\%", false),
2612
            (r"\\a", r"\%", false),
2613
            (r"%", r"\%", true),
2614
            (r"\%", r"\%", false),
2615
            (r"\\%", r"\%", false),
2616
            (r"%%", r"\%", false),
2617
            (r"\%%", r"\%", false),
2618
            (r"\\%%", r"\%", false),
2619
            (r"_", r"\%", false),
2620
            (r"\_", r"\%", false),
2621
            (r"\\_", r"\%", false),
2622
            (r"__", r"\%", false),
2623
            (r"\__", r"\%", false),
2624
            (r"\\__", r"\%", false),
2625
            (r"abc", r"\%", false),
2626
            (r"a_c", r"\%", false),
2627
            (r"a\bc", r"\%", false),
2628
            (r"a\_c", r"\%", false),
2629
            (r"%abc", r"\%", false),
2630
            (r"\%abc", r"\%", false),
2631
            (r"a\\_c%", r"\%", false),
2632
            (r"", r"\\%", false),
2633
            (r"\", r"\\%", true),
2634
            (r"\\", r"\\%", true),
2635
            (r"\\\", r"\\%", true),
2636
            (r"\\\\", r"\\%", true),
2637
            (r"a", r"\\%", false),
2638
            (r"\a", r"\\%", true),
2639
            (r"\\a", r"\\%", true),
2640
            (r"%", r"\\%", false),
2641
            (r"\%", r"\\%", true),
2642
            (r"\\%", r"\\%", true),
2643
            (r"%%", r"\\%", false),
2644
            (r"\%%", r"\\%", true),
2645
            (r"\\%%", r"\\%", true),
2646
            (r"_", r"\\%", false),
2647
            (r"\_", r"\\%", true),
2648
            (r"\\_", r"\\%", true),
2649
            (r"__", r"\\%", false),
2650
            (r"\__", r"\\%", true),
2651
            (r"\\__", r"\\%", true),
2652
            (r"abc", r"\\%", false),
2653
            (r"a_c", r"\\%", false),
2654
            (r"a\bc", r"\\%", false),
2655
            (r"a\_c", r"\\%", false),
2656
            (r"%abc", r"\\%", false),
2657
            (r"\%abc", r"\\%", true),
2658
            (r"a\\_c%", r"\\%", false),
2659
            (r"", r"%%", true),
2660
            (r"\", r"%%", true),
2661
            (r"\\", r"%%", true),
2662
            (r"\\\", r"%%", true),
2663
            (r"\\\\", r"%%", true),
2664
            (r"a", r"%%", true),
2665
            (r"\a", r"%%", true),
2666
            (r"\\a", r"%%", true),
2667
            (r"%", r"%%", true),
2668
            (r"\%", r"%%", true),
2669
            (r"\\%", r"%%", true),
2670
            (r"%%", r"%%", true),
2671
            (r"\%%", r"%%", true),
2672
            (r"\\%%", r"%%", true),
2673
            (r"_", r"%%", true),
2674
            (r"\_", r"%%", true),
2675
            (r"\\_", r"%%", true),
2676
            (r"__", r"%%", true),
2677
            (r"\__", r"%%", true),
2678
            (r"\\__", r"%%", true),
2679
            (r"abc", r"%%", true),
2680
            (r"a_c", r"%%", true),
2681
            (r"a\bc", r"%%", true),
2682
            (r"a\_c", r"%%", true),
2683
            (r"%abc", r"%%", true),
2684
            (r"\%abc", r"%%", true),
2685
            (r"a\\_c%", r"%%", true),
2686
            (r"", r"\%%", false),
2687
            (r"\", r"\%%", false),
2688
            (r"\\", r"\%%", false),
2689
            (r"\\\", r"\%%", false),
2690
            (r"\\\\", r"\%%", false),
2691
            (r"a", r"\%%", false),
2692
            (r"\a", r"\%%", false),
2693
            (r"\\a", r"\%%", false),
2694
            (r"%", r"\%%", true),
2695
            (r"\%", r"\%%", false),
2696
            (r"\\%", r"\%%", false),
2697
            (r"%%", r"\%%", true),
2698
            (r"\%%", r"\%%", false),
2699
            (r"\\%%", r"\%%", false),
2700
            (r"_", r"\%%", false),
2701
            (r"\_", r"\%%", false),
2702
            (r"\\_", r"\%%", false),
2703
            (r"__", r"\%%", false),
2704
            (r"\__", r"\%%", false),
2705
            (r"\\__", r"\%%", false),
2706
            (r"abc", r"\%%", false),
2707
            (r"a_c", r"\%%", false),
2708
            (r"a\bc", r"\%%", false),
2709
            (r"a\_c", r"\%%", false),
2710
            (r"%abc", r"\%%", true),
2711
            (r"\%abc", r"\%%", false),
2712
            (r"a\\_c%", r"\%%", false),
2713
            (r"", r"\\%%", false),
2714
            (r"\", r"\\%%", true),
2715
            (r"\\", r"\\%%", true),
2716
            (r"\\\", r"\\%%", true),
2717
            (r"\\\\", r"\\%%", true),
2718
            (r"a", r"\\%%", false),
2719
            (r"\a", r"\\%%", true),
2720
            (r"\\a", r"\\%%", true),
2721
            (r"%", r"\\%%", false),
2722
            (r"\%", r"\\%%", true),
2723
            (r"\\%", r"\\%%", true),
2724
            (r"%%", r"\\%%", false),
2725
            (r"\%%", r"\\%%", true),
2726
            (r"\\%%", r"\\%%", true),
2727
            (r"_", r"\\%%", false),
2728
            (r"\_", r"\\%%", true),
2729
            (r"\\_", r"\\%%", true),
2730
            (r"__", r"\\%%", false),
2731
            (r"\__", r"\\%%", true),
2732
            (r"\\__", r"\\%%", true),
2733
            (r"abc", r"\\%%", false),
2734
            (r"a_c", r"\\%%", false),
2735
            (r"a\bc", r"\\%%", false),
2736
            (r"a\_c", r"\\%%", false),
2737
            (r"%abc", r"\\%%", false),
2738
            (r"\%abc", r"\\%%", true),
2739
            (r"a\\_c%", r"\\%%", false),
2740
            (r"", r"_", false),
2741
            (r"\", r"_", true),
2742
            (r"\\", r"_", false),
2743
            (r"\\\", r"_", false),
2744
            (r"\\\\", r"_", false),
2745
            (r"a", r"_", true),
2746
            (r"\a", r"_", false),
2747
            (r"\\a", r"_", false),
2748
            (r"%", r"_", true),
2749
            (r"\%", r"_", false),
2750
            (r"\\%", r"_", false),
2751
            (r"%%", r"_", false),
2752
            (r"\%%", r"_", false),
2753
            (r"\\%%", r"_", false),
2754
            (r"_", r"_", true),
2755
            (r"\_", r"_", false),
2756
            (r"\\_", r"_", false),
2757
            (r"__", r"_", false),
2758
            (r"\__", r"_", false),
2759
            (r"\\__", r"_", false),
2760
            (r"abc", r"_", false),
2761
            (r"a_c", r"_", false),
2762
            (r"a\bc", r"_", false),
2763
            (r"a\_c", r"_", false),
2764
            (r"%abc", r"_", false),
2765
            (r"\%abc", r"_", false),
2766
            (r"a\\_c%", r"_", false),
2767
            (r"", r"\_", false),
2768
            (r"\", r"\_", false),
2769
            (r"\\", r"\_", false),
2770
            (r"\\\", r"\_", false),
2771
            (r"\\\\", r"\_", false),
2772
            (r"a", r"\_", false),
2773
            (r"\a", r"\_", false),
2774
            (r"\\a", r"\_", false),
2775
            (r"%", r"\_", false),
2776
            (r"\%", r"\_", false),
2777
            (r"\\%", r"\_", false),
2778
            (r"%%", r"\_", false),
2779
            (r"\%%", r"\_", false),
2780
            (r"\\%%", r"\_", false),
2781
            (r"_", r"\_", true),
2782
            (r"\_", r"\_", false),
2783
            (r"\\_", r"\_", false),
2784
            (r"__", r"\_", false),
2785
            (r"\__", r"\_", false),
2786
            (r"\\__", r"\_", false),
2787
            (r"abc", r"\_", false),
2788
            (r"a_c", r"\_", false),
2789
            (r"a\bc", r"\_", false),
2790
            (r"a\_c", r"\_", false),
2791
            (r"%abc", r"\_", false),
2792
            (r"\%abc", r"\_", false),
2793
            (r"a\\_c%", r"\_", false),
2794
            (r"", r"\\_", false),
2795
            (r"\", r"\\_", false),
2796
            (r"\\", r"\\_", true),
2797
            (r"\\\", r"\\_", false),
2798
            (r"\\\\", r"\\_", false),
2799
            (r"a", r"\\_", false),
2800
            (r"\a", r"\\_", true),
2801
            (r"\\a", r"\\_", false),
2802
            (r"%", r"\\_", false),
2803
            (r"\%", r"\\_", true),
2804
            (r"\\%", r"\\_", false),
2805
            (r"%%", r"\\_", false),
2806
            (r"\%%", r"\\_", false),
2807
            (r"\\%%", r"\\_", false),
2808
            (r"_", r"\\_", false),
2809
            (r"\_", r"\\_", true),
2810
            (r"\\_", r"\\_", false),
2811
            (r"__", r"\\_", false),
2812
            (r"\__", r"\\_", false),
2813
            (r"\\__", r"\\_", false),
2814
            (r"abc", r"\\_", false),
2815
            (r"a_c", r"\\_", false),
2816
            (r"a\bc", r"\\_", false),
2817
            (r"a\_c", r"\\_", false),
2818
            (r"%abc", r"\\_", false),
2819
            (r"\%abc", r"\\_", false),
2820
            (r"a\\_c%", r"\\_", false),
2821
            (r"", r"__", false),
2822
            (r"\", r"__", false),
2823
            (r"\\", r"__", true),
2824
            (r"\\\", r"__", false),
2825
            (r"\\\\", r"__", false),
2826
            (r"a", r"__", false),
2827
            (r"\a", r"__", true),
2828
            (r"\\a", r"__", false),
2829
            (r"%", r"__", false),
2830
            (r"\%", r"__", true),
2831
            (r"\\%", r"__", false),
2832
            (r"%%", r"__", true),
2833
            (r"\%%", r"__", false),
2834
            (r"\\%%", r"__", false),
2835
            (r"_", r"__", false),
2836
            (r"\_", r"__", true),
2837
            (r"\\_", r"__", false),
2838
            (r"__", r"__", true),
2839
            (r"\__", r"__", false),
2840
            (r"\\__", r"__", false),
2841
            (r"abc", r"__", false),
2842
            (r"a_c", r"__", false),
2843
            (r"a\bc", r"__", false),
2844
            (r"a\_c", r"__", false),
2845
            (r"%abc", r"__", false),
2846
            (r"\%abc", r"__", false),
2847
            (r"a\\_c%", r"__", false),
2848
            (r"", r"\__", false),
2849
            (r"\", r"\__", false),
2850
            (r"\\", r"\__", false),
2851
            (r"\\\", r"\__", false),
2852
            (r"\\\\", r"\__", false),
2853
            (r"a", r"\__", false),
2854
            (r"\a", r"\__", false),
2855
            (r"\\a", r"\__", false),
2856
            (r"%", r"\__", false),
2857
            (r"\%", r"\__", false),
2858
            (r"\\%", r"\__", false),
2859
            (r"%%", r"\__", false),
2860
            (r"\%%", r"\__", false),
2861
            (r"\\%%", r"\__", false),
2862
            (r"_", r"\__", false),
2863
            (r"\_", r"\__", false),
2864
            (r"\\_", r"\__", false),
2865
            (r"__", r"\__", true),
2866
            (r"\__", r"\__", false),
2867
            (r"\\__", r"\__", false),
2868
            (r"abc", r"\__", false),
2869
            (r"a_c", r"\__", false),
2870
            (r"a\bc", r"\__", false),
2871
            (r"a\_c", r"\__", false),
2872
            (r"%abc", r"\__", false),
2873
            (r"\%abc", r"\__", false),
2874
            (r"a\\_c%", r"\__", false),
2875
            (r"", r"\\__", false),
2876
            (r"\", r"\\__", false),
2877
            (r"\\", r"\\__", false),
2878
            (r"\\\", r"\\__", true),
2879
            (r"\\\\", r"\\__", false),
2880
            (r"a", r"\\__", false),
2881
            (r"\a", r"\\__", false),
2882
            (r"\\a", r"\\__", true),
2883
            (r"%", r"\\__", false),
2884
            (r"\%", r"\\__", false),
2885
            (r"\\%", r"\\__", true),
2886
            (r"%%", r"\\__", false),
2887
            (r"\%%", r"\\__", true),
2888
            (r"\\%%", r"\\__", false),
2889
            (r"_", r"\\__", false),
2890
            (r"\_", r"\\__", false),
2891
            (r"\\_", r"\\__", true),
2892
            (r"__", r"\\__", false),
2893
            (r"\__", r"\\__", true),
2894
            (r"\\__", r"\\__", false),
2895
            (r"abc", r"\\__", false),
2896
            (r"a_c", r"\\__", false),
2897
            (r"a\bc", r"\\__", false),
2898
            (r"a\_c", r"\\__", false),
2899
            (r"%abc", r"\\__", false),
2900
            (r"\%abc", r"\\__", false),
2901
            (r"a\\_c%", r"\\__", false),
2902
            (r"", r"abc", false),
2903
            (r"\", r"abc", false),
2904
            (r"\\", r"abc", false),
2905
            (r"\\\", r"abc", false),
2906
            (r"\\\\", r"abc", false),
2907
            (r"a", r"abc", false),
2908
            (r"\a", r"abc", false),
2909
            (r"\\a", r"abc", false),
2910
            (r"%", r"abc", false),
2911
            (r"\%", r"abc", false),
2912
            (r"\\%", r"abc", false),
2913
            (r"%%", r"abc", false),
2914
            (r"\%%", r"abc", false),
2915
            (r"\\%%", r"abc", false),
2916
            (r"_", r"abc", false),
2917
            (r"\_", r"abc", false),
2918
            (r"\\_", r"abc", false),
2919
            (r"__", r"abc", false),
2920
            (r"\__", r"abc", false),
2921
            (r"\\__", r"abc", false),
2922
            (r"abc", r"abc", true),
2923
            (r"a_c", r"abc", false),
2924
            (r"a\bc", r"abc", false),
2925
            (r"a\_c", r"abc", false),
2926
            (r"%abc", r"abc", false),
2927
            (r"\%abc", r"abc", false),
2928
            (r"a\\_c%", r"abc", false),
2929
            (r"", r"a_c", false),
2930
            (r"\", r"a_c", false),
2931
            (r"\\", r"a_c", false),
2932
            (r"\\\", r"a_c", false),
2933
            (r"\\\\", r"a_c", false),
2934
            (r"a", r"a_c", false),
2935
            (r"\a", r"a_c", false),
2936
            (r"\\a", r"a_c", false),
2937
            (r"%", r"a_c", false),
2938
            (r"\%", r"a_c", false),
2939
            (r"\\%", r"a_c", false),
2940
            (r"%%", r"a_c", false),
2941
            (r"\%%", r"a_c", false),
2942
            (r"\\%%", r"a_c", false),
2943
            (r"_", r"a_c", false),
2944
            (r"\_", r"a_c", false),
2945
            (r"\\_", r"a_c", false),
2946
            (r"__", r"a_c", false),
2947
            (r"\__", r"a_c", false),
2948
            (r"\\__", r"a_c", false),
2949
            (r"abc", r"a_c", true),
2950
            (r"a_c", r"a_c", true),
2951
            (r"a\bc", r"a_c", false),
2952
            (r"a\_c", r"a_c", false),
2953
            (r"%abc", r"a_c", false),
2954
            (r"\%abc", r"a_c", false),
2955
            (r"a\\_c%", r"a_c", false),
2956
            (r"", r"a\bc", false),
2957
            (r"\", r"a\bc", false),
2958
            (r"\\", r"a\bc", false),
2959
            (r"\\\", r"a\bc", false),
2960
            (r"\\\\", r"a\bc", false),
2961
            (r"a", r"a\bc", false),
2962
            (r"\a", r"a\bc", false),
2963
            (r"\\a", r"a\bc", false),
2964
            (r"%", r"a\bc", false),
2965
            (r"\%", r"a\bc", false),
2966
            (r"\\%", r"a\bc", false),
2967
            (r"%%", r"a\bc", false),
2968
            (r"\%%", r"a\bc", false),
2969
            (r"\\%%", r"a\bc", false),
2970
            (r"_", r"a\bc", false),
2971
            (r"\_", r"a\bc", false),
2972
            (r"\\_", r"a\bc", false),
2973
            (r"__", r"a\bc", false),
2974
            (r"\__", r"a\bc", false),
2975
            (r"\\__", r"a\bc", false),
2976
            (r"abc", r"a\bc", true),
2977
            (r"a_c", r"a\bc", false),
2978
            (r"a\bc", r"a\bc", false),
2979
            (r"a\_c", r"a\bc", false),
2980
            (r"%abc", r"a\bc", false),
2981
            (r"\%abc", r"a\bc", false),
2982
            (r"a\\_c%", r"a\bc", false),
2983
            (r"", r"a\_c", false),
2984
            (r"\", r"a\_c", false),
2985
            (r"\\", r"a\_c", false),
2986
            (r"\\\", r"a\_c", false),
2987
            (r"\\\\", r"a\_c", false),
2988
            (r"a", r"a\_c", false),
2989
            (r"\a", r"a\_c", false),
2990
            (r"\\a", r"a\_c", false),
2991
            (r"%", r"a\_c", false),
2992
            (r"\%", r"a\_c", false),
2993
            (r"\\%", r"a\_c", false),
2994
            (r"%%", r"a\_c", false),
2995
            (r"\%%", r"a\_c", false),
2996
            (r"\\%%", r"a\_c", false),
2997
            (r"_", r"a\_c", false),
2998
            (r"\_", r"a\_c", false),
2999
            (r"\\_", r"a\_c", false),
3000
            (r"__", r"a\_c", false),
3001
            (r"\__", r"a\_c", false),
3002
            (r"\\__", r"a\_c", false),
3003
            (r"abc", r"a\_c", false),
3004
            (r"a_c", r"a\_c", true),
3005
            (r"a\bc", r"a\_c", false),
3006
            (r"a\_c", r"a\_c", false),
3007
            (r"%abc", r"a\_c", false),
3008
            (r"\%abc", r"a\_c", false),
3009
            (r"a\\_c%", r"a\_c", false),
3010
            (r"", r"%abc", false),
3011
            (r"\", r"%abc", false),
3012
            (r"\\", r"%abc", false),
3013
            (r"\\\", r"%abc", false),
3014
            (r"\\\\", r"%abc", false),
3015
            (r"a", r"%abc", false),
3016
            (r"\a", r"%abc", false),
3017
            (r"\\a", r"%abc", false),
3018
            (r"%", r"%abc", false),
3019
            (r"\%", r"%abc", false),
3020
            (r"\\%", r"%abc", false),
3021
            (r"%%", r"%abc", false),
3022
            (r"\%%", r"%abc", false),
3023
            (r"\\%%", r"%abc", false),
3024
            (r"_", r"%abc", false),
3025
            (r"\_", r"%abc", false),
3026
            (r"\\_", r"%abc", false),
3027
            (r"__", r"%abc", false),
3028
            (r"\__", r"%abc", false),
3029
            (r"\\__", r"%abc", false),
3030
            (r"abc", r"%abc", true),
3031
            (r"a_c", r"%abc", false),
3032
            (r"a\bc", r"%abc", false),
3033
            (r"a\_c", r"%abc", false),
3034
            (r"%abc", r"%abc", true),
3035
            (r"\%abc", r"%abc", true),
3036
            (r"a\\_c%", r"%abc", false),
3037
            (r"", r"\%abc", false),
3038
            (r"\", r"\%abc", false),
3039
            (r"\\", r"\%abc", false),
3040
            (r"\\\", r"\%abc", false),
3041
            (r"\\\\", r"\%abc", false),
3042
            (r"a", r"\%abc", false),
3043
            (r"\a", r"\%abc", false),
3044
            (r"\\a", r"\%abc", false),
3045
            (r"%", r"\%abc", false),
3046
            (r"\%", r"\%abc", false),
3047
            (r"\\%", r"\%abc", false),
3048
            (r"%%", r"\%abc", false),
3049
            (r"\%%", r"\%abc", false),
3050
            (r"\\%%", r"\%abc", false),
3051
            (r"_", r"\%abc", false),
3052
            (r"\_", r"\%abc", false),
3053
            (r"\\_", r"\%abc", false),
3054
            (r"__", r"\%abc", false),
3055
            (r"\__", r"\%abc", false),
3056
            (r"\\__", r"\%abc", false),
3057
            (r"abc", r"\%abc", false),
3058
            (r"a_c", r"\%abc", false),
3059
            (r"a\bc", r"\%abc", false),
3060
            (r"a\_c", r"\%abc", false),
3061
            (r"%abc", r"\%abc", true),
3062
            (r"\%abc", r"\%abc", false),
3063
            (r"a\\_c%", r"\%abc", false),
3064
            (r"", r"a\\_c%", false),
3065
            (r"\", r"a\\_c%", false),
3066
            (r"\\", r"a\\_c%", false),
3067
            (r"\\\", r"a\\_c%", false),
3068
            (r"\\\\", r"a\\_c%", false),
3069
            (r"a", r"a\\_c%", false),
3070
            (r"\a", r"a\\_c%", false),
3071
            (r"\\a", r"a\\_c%", false),
3072
            (r"%", r"a\\_c%", false),
3073
            (r"\%", r"a\\_c%", false),
3074
            (r"\\%", r"a\\_c%", false),
3075
            (r"%%", r"a\\_c%", false),
3076
            (r"\%%", r"a\\_c%", false),
3077
            (r"\\%%", r"a\\_c%", false),
3078
            (r"_", r"a\\_c%", false),
3079
            (r"\_", r"a\\_c%", false),
3080
            (r"\\_", r"a\\_c%", false),
3081
            (r"__", r"a\\_c%", false),
3082
            (r"\__", r"a\\_c%", false),
3083
            (r"\\__", r"a\\_c%", false),
3084
            (r"abc", r"a\\_c%", false),
3085
            (r"a_c", r"a\\_c%", false),
3086
            (r"a\bc", r"a\\_c%", true),
3087
            (r"a\_c", r"a\\_c%", true),
3088
            (r"%abc", r"a\\_c%", false),
3089
            (r"\%abc", r"a\\_c%", false),
3090
            (r"a\\_c%", r"a\\_c%", false),
3091
        ];
3092
3093
        let values = test_cases
3094
            .iter()
3095
            .map(|(value, _, _)| *value)
3096
            .collect::<Vec<_>>();
3097
        let patterns = test_cases
3098
            .iter()
3099
            .map(|(_, pattern, _)| *pattern)
3100
            .collect::<Vec<_>>();
3101
        let expected = BooleanArray::from(
3102
            test_cases
3103
                .iter()
3104
                .map(|(_, _, expected)| *expected)
3105
                .collect::<Vec<_>>(),
3106
        );
3107
        let unexpected = BooleanArray::from(
3108
            test_cases
3109
                .iter()
3110
                .map(|(_, _, expected)| !*expected)
3111
                .collect::<Vec<_>>(),
3112
        );
3113
3114
        for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
3115
            let values = make_array(values.iter(), &string_type);
3116
            let patterns = make_array(patterns.iter(), &string_type);
3117
            let (values, patterns) = (values.as_ref(), patterns.as_ref());
3118
3119
            assert_eq!(like(&values, &patterns).unwrap(), expected,);
3120
            assert_eq!(ilike(&values, &patterns).unwrap(), expected,);
3121
            assert_eq!(nlike(&values, &patterns).unwrap(), unexpected,);
3122
            assert_eq!(nilike(&values, &patterns).unwrap(), unexpected,);
3123
        }
3124
    }
3125
3126
    fn make_datums(
3127
        value: impl AsRef<str>,
3128
        data_type: &DataType,
3129
    ) -> Vec<(Box<dyn Datum>, DatumType)> {
3130
        match data_type {
3131
            DataType::Utf8 => {
3132
                let array = StringArray::from_iter_values([value]);
3133
                vec![
3134
                    (Box::new(array.clone()), DatumType::Array),
3135
                    (Box::new(Scalar::new(array)), DatumType::Scalar),
3136
                ]
3137
            }
3138
            DataType::LargeUtf8 => {
3139
                let array = LargeStringArray::from_iter_values([value]);
3140
                vec![
3141
                    (Box::new(array.clone()), DatumType::Array),
3142
                    (Box::new(Scalar::new(array)), DatumType::Scalar),
3143
                ]
3144
            }
3145
            DataType::Utf8View => {
3146
                let array = StringViewArray::from_iter_values([value]);
3147
                vec![
3148
                    (Box::new(array.clone()), DatumType::Array),
3149
                    (Box::new(Scalar::new(array)), DatumType::Scalar),
3150
                ]
3151
            }
3152
            _ => unimplemented!(),
3153
        }
3154
    }
3155
3156
    fn make_array(
3157
        values: impl IntoIterator<Item: AsRef<str>>,
3158
        data_type: &DataType,
3159
    ) -> Box<dyn Array> {
3160
        match data_type {
3161
            DataType::Utf8 => Box::new(StringArray::from_iter_values(values)),
3162
            DataType::LargeUtf8 => Box::new(LargeStringArray::from_iter_values(values)),
3163
            DataType::Utf8View => Box::new(StringViewArray::from_iter_values(values)),
3164
            _ => unimplemented!(),
3165
        }
3166
    }
3167
3168
    #[derive(Debug)]
3169
    enum DatumType {
3170
        Array,
3171
        Scalar,
3172
    }
3173
}