/Users/andrewlamb/Software/arrow-rs/arrow-string/src/like.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Provide SQL's LIKE operators for Arrow's string arrays |
19 | | |
20 | | use crate::predicate::Predicate; |
21 | | |
22 | | use arrow_array::cast::AsArray; |
23 | | use arrow_array::*; |
24 | | use arrow_schema::*; |
25 | | use arrow_select::take::take; |
26 | | |
27 | | use std::sync::Arc; |
28 | | |
29 | | use crate::binary_like::binary_apply; |
30 | | pub use arrow_array::StringArrayType; |
31 | | |
32 | | #[derive(Debug)] |
33 | | pub(crate) enum Op { |
34 | | Like(bool), |
35 | | ILike(bool), |
36 | | Contains, |
37 | | StartsWith, |
38 | | EndsWith, |
39 | | } |
40 | | |
41 | | impl std::fmt::Display for Op { |
42 | 0 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
43 | 0 | match self { |
44 | 0 | Op::Like(false) => write!(f, "LIKE"), |
45 | 0 | Op::Like(true) => write!(f, "NLIKE"), |
46 | 0 | Op::ILike(false) => write!(f, "ILIKE"), |
47 | 0 | Op::ILike(true) => write!(f, "NILIKE"), |
48 | 0 | Op::Contains => write!(f, "CONTAINS"), |
49 | 0 | Op::StartsWith => write!(f, "STARTS_WITH"), |
50 | 0 | Op::EndsWith => write!(f, "ENDS_WITH"), |
51 | | } |
52 | 0 | } |
53 | | } |
54 | | |
55 | | /// Perform SQL `left LIKE right` |
56 | | /// |
57 | | /// # Supported DataTypes |
58 | | /// |
59 | | /// `left` and `right` must be the same type, and one of |
60 | | /// - Utf8 |
61 | | /// - LargeUtf8 |
62 | | /// - Utf8View |
63 | | /// |
64 | | /// There are two wildcards supported with the LIKE operator: |
65 | | /// |
66 | | /// 1. `%` - The percent sign represents zero, one, or multiple characters |
67 | | /// 2. `_` - The underscore represents a single character |
68 | | /// |
69 | | /// Example |
70 | | /// ``` |
71 | | /// # use arrow_array::{StringArray, BooleanArray}; |
72 | | /// # use arrow_string::like::like; |
73 | | /// let strings = StringArray::from(vec!["Arrow", "Arrow", "Arrow", "Ar"]); |
74 | | /// let patterns = StringArray::from(vec!["A%", "B%", "A.", "A_"]); |
75 | | /// |
76 | | /// let result = like(&strings, &patterns).unwrap(); |
77 | | /// assert_eq!(result, BooleanArray::from(vec![true, false, false, true])); |
78 | | /// ``` |
79 | 0 | pub fn like(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> { |
80 | 0 | like_op(Op::Like(false), left, right) |
81 | 0 | } |
82 | | |
83 | | /// Perform SQL `left ILIKE right` |
84 | | /// |
85 | | /// # Notes |
86 | | /// - This is a case-insensitive version of [`like`] |
87 | | /// - See the documentation on [`like`] for more details |
88 | | /// - Implements loose matching as defined by the Unicode standard. For example, |
89 | | /// the `ff` ligature is not equivalent to `FF` and `ß` is not equivalent to `SS` |
90 | 0 | pub fn ilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> { |
91 | 0 | like_op(Op::ILike(false), left, right) |
92 | 0 | } |
93 | | |
94 | | /// Perform SQL `left NOT LIKE right` |
95 | | /// |
96 | | /// # Notes |
97 | | /// - This is a negative of [`like`] |
98 | | /// - See the documentation on [`like`] for more details |
99 | 0 | pub fn nlike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> { |
100 | 0 | like_op(Op::Like(true), left, right) |
101 | 0 | } |
102 | | |
103 | | /// Perform SQL `left NOT ILIKE right` |
104 | | /// |
105 | | /// # Notes |
106 | | /// - This is a negative of [`like`] |
107 | | /// - See the documentation on [`ilike`] for more details |
108 | 0 | pub fn nilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> { |
109 | 0 | like_op(Op::ILike(true), left, right) |
110 | 0 | } |
111 | | |
112 | | /// Perform SQL `STARTSWITH(left, right)` |
113 | | /// |
114 | | /// # Supported DataTypes |
115 | | /// |
116 | | /// `left` and `right` must be the same type, and one of |
117 | | /// - Utf8 |
118 | | /// - LargeUtf8 |
119 | | /// - Utf8View |
120 | | /// - Binary |
121 | | /// - LargeBinary |
122 | | /// - BinaryView |
123 | | /// |
124 | | /// # Example |
125 | | /// ``` |
126 | | /// # use arrow_array::{StringArray, BooleanArray}; |
127 | | /// # use arrow_string::like::{like, starts_with}; |
128 | | /// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "arrow-rs", "Parquet"]); |
129 | | /// let patterns = StringArray::from(vec!["arr", "arrow", "arrow-cpp", "p"]); |
130 | | /// |
131 | | /// let result = starts_with(&strings, &patterns).unwrap(); |
132 | | /// assert_eq!(result, BooleanArray::from(vec![true, true, false, false])); |
133 | | /// ``` |
134 | 0 | pub fn starts_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> { |
135 | 0 | like_op(Op::StartsWith, left, right) |
136 | 0 | } |
137 | | |
138 | | /// Perform SQL `ENDSWITH(left, right)` |
139 | | /// |
140 | | /// # Supported DataTypes |
141 | | /// |
142 | | /// `left` and `right` must be the same type, and one of |
143 | | /// - Utf8 |
144 | | /// - LargeUtf8 |
145 | | /// - Utf8View |
146 | | /// - Binary |
147 | | /// - LargeBinary |
148 | | /// - BinaryView |
149 | | /// |
150 | | /// # Example |
151 | | /// ``` |
152 | | /// # use arrow_array::{StringArray, BooleanArray}; |
153 | | /// # use arrow_string::like::{ends_with, like, starts_with}; |
154 | | /// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "Parquet"]); |
155 | | /// let patterns = StringArray::from(vec!["arr", "-rs", "t"]); |
156 | | /// |
157 | | /// let result = ends_with(&strings, &patterns).unwrap(); |
158 | | /// assert_eq!(result, BooleanArray::from(vec![false, true, true])); |
159 | | /// ``` |
160 | 0 | pub fn ends_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> { |
161 | 0 | like_op(Op::EndsWith, left, right) |
162 | 0 | } |
163 | | |
164 | | /// Perform SQL `CONTAINS(left, right)` |
165 | | /// |
166 | | /// # Supported DataTypes |
167 | | /// |
168 | | /// `left` and `right` must be the same type, and one of |
169 | | /// - Utf8 |
170 | | /// - LargeUtf8 |
171 | | /// - Utf8View |
172 | | /// - Binary |
173 | | /// - LargeBinary |
174 | | /// - BinaryView |
175 | | /// |
176 | | /// # Example |
177 | | /// ``` |
178 | | /// # use arrow_array::{StringArray, BooleanArray}; |
179 | | /// # use arrow_string::like::{contains, like, starts_with}; |
180 | | /// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "arrow-rs", "Parquet"]); |
181 | | /// let patterns = StringArray::from(vec!["arr", "-rs", "arrow-cpp", "X"]); |
182 | | /// |
183 | | /// let result = contains(&strings, &patterns).unwrap(); |
184 | | /// assert_eq!(result, BooleanArray::from(vec![true, true, false, false])); |
185 | | /// ``` |
186 | 0 | pub fn contains(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> { |
187 | 0 | like_op(Op::Contains, left, right) |
188 | 0 | } |
189 | | |
190 | 0 | fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> { |
191 | | use arrow_schema::DataType::*; |
192 | 0 | let (l, l_s) = lhs.get(); |
193 | 0 | let (r, r_s) = rhs.get(); |
194 | | |
195 | 0 | if l.len() != r.len() && !l_s && !r_s { |
196 | 0 | return Err(ArrowError::InvalidArgumentError(format!( |
197 | 0 | "Cannot compare arrays of different lengths, got {} vs {}", |
198 | 0 | l.len(), |
199 | 0 | r.len() |
200 | 0 | ))); |
201 | 0 | } |
202 | | |
203 | 0 | let l_v = l.as_any_dictionary_opt(); |
204 | 0 | let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l); |
205 | | |
206 | 0 | let r_v = r.as_any_dictionary_opt(); |
207 | 0 | let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r); |
208 | | |
209 | 0 | match (l.data_type(), r.data_type()) { |
210 | 0 | (Utf8, Utf8) => string_apply::<&GenericStringArray<i32>>( |
211 | 0 | op, |
212 | 0 | l.as_string(), |
213 | 0 | l_s, |
214 | 0 | l_v, |
215 | 0 | r.as_string(), |
216 | 0 | r_s, |
217 | 0 | r_v, |
218 | | ), |
219 | 0 | (LargeUtf8, LargeUtf8) => string_apply::<&GenericStringArray<i64>>( |
220 | 0 | op, |
221 | 0 | l.as_string(), |
222 | 0 | l_s, |
223 | 0 | l_v, |
224 | 0 | r.as_string(), |
225 | 0 | r_s, |
226 | 0 | r_v, |
227 | | ), |
228 | 0 | (Utf8View, Utf8View) => string_apply::<&StringViewArray>( |
229 | 0 | op, |
230 | 0 | l.as_string_view(), |
231 | 0 | l_s, |
232 | 0 | l_v, |
233 | 0 | r.as_string_view(), |
234 | 0 | r_s, |
235 | 0 | r_v, |
236 | | ), |
237 | 0 | (Binary, Binary) => binary_apply::<&GenericBinaryArray<i32>>( |
238 | 0 | op.try_into()?, |
239 | 0 | l.as_binary(), |
240 | 0 | l_s, |
241 | 0 | l_v, |
242 | 0 | r.as_binary(), |
243 | 0 | r_s, |
244 | 0 | r_v, |
245 | | ), |
246 | 0 | (LargeBinary, LargeBinary) => binary_apply::<&GenericBinaryArray<i64>>( |
247 | 0 | op.try_into()?, |
248 | 0 | l.as_binary(), |
249 | 0 | l_s, |
250 | 0 | l_v, |
251 | 0 | r.as_binary(), |
252 | 0 | r_s, |
253 | 0 | r_v, |
254 | | ), |
255 | 0 | (BinaryView, BinaryView) => binary_apply::<&BinaryViewArray>( |
256 | 0 | op.try_into()?, |
257 | 0 | l.as_binary_view(), |
258 | 0 | l_s, |
259 | 0 | l_v, |
260 | 0 | r.as_binary_view(), |
261 | 0 | r_s, |
262 | 0 | r_v, |
263 | | ), |
264 | 0 | (l_t, r_t) => Err(ArrowError::InvalidArgumentError(format!( |
265 | 0 | "Invalid string/binary operation: {l_t} {op} {r_t}" |
266 | 0 | ))), |
267 | | } |
268 | 0 | } |
269 | | |
270 | 0 | fn string_apply<'a, T: StringArrayType<'a> + 'a>( |
271 | 0 | op: Op, |
272 | 0 | l: T, |
273 | 0 | l_s: bool, |
274 | 0 | l_v: Option<&'a dyn AnyDictionaryArray>, |
275 | 0 | r: T, |
276 | 0 | r_s: bool, |
277 | 0 | r_v: Option<&'a dyn AnyDictionaryArray>, |
278 | 0 | ) -> Result<BooleanArray, ArrowError> { |
279 | 0 | let l_len = l_v.map(|l| l.len()).unwrap_or(l.len()); |
280 | 0 | if r_s { |
281 | 0 | let idx = match r_v { |
282 | 0 | Some(dict) if dict.null_count() != 0 => return Ok(BooleanArray::new_null(l_len)), |
283 | 0 | Some(dict) => dict.normalized_keys()[0], |
284 | 0 | None => 0, |
285 | | }; |
286 | 0 | if r.is_null(idx) { |
287 | 0 | return Ok(BooleanArray::new_null(l_len)); |
288 | 0 | } |
289 | 0 | op_scalar::<T>(op, l, l_v, r.value(idx)) |
290 | | } else { |
291 | 0 | match (l_s, l_v, r_v) { |
292 | | (true, None, None) => { |
293 | 0 | let v = l.is_valid(0).then(|| l.value(0)); |
294 | 0 | op_binary(op, std::iter::repeat(v), r.iter()) |
295 | | } |
296 | 0 | (true, Some(l_v), None) => { |
297 | 0 | let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]); |
298 | 0 | let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx))); |
299 | 0 | op_binary(op, std::iter::repeat(v), r.iter()) |
300 | | } |
301 | 0 | (true, None, Some(r_v)) => { |
302 | 0 | let v = l.is_valid(0).then(|| l.value(0)); |
303 | 0 | op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v)) |
304 | | } |
305 | 0 | (true, Some(l_v), Some(r_v)) => { |
306 | 0 | let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]); |
307 | 0 | let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx))); |
308 | 0 | op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v)) |
309 | | } |
310 | 0 | (false, None, None) => op_binary(op, l.iter(), r.iter()), |
311 | 0 | (false, Some(l_v), None) => op_binary(op, vectored_iter(l, l_v), r.iter()), |
312 | 0 | (false, None, Some(r_v)) => op_binary(op, l.iter(), vectored_iter(r, r_v)), |
313 | 0 | (false, Some(l_v), Some(r_v)) => { |
314 | 0 | op_binary(op, vectored_iter(l, l_v), vectored_iter(r, r_v)) |
315 | | } |
316 | | } |
317 | | } |
318 | 0 | } |
319 | | |
320 | | #[inline(never)] |
321 | 0 | fn op_scalar<'a, T: StringArrayType<'a>>( |
322 | 0 | op: Op, |
323 | 0 | l: T, |
324 | 0 | l_v: Option<&dyn AnyDictionaryArray>, |
325 | 0 | r: &str, |
326 | 0 | ) -> Result<BooleanArray, ArrowError> { |
327 | 0 | let r = match op { |
328 | 0 | Op::Like(neg) => Predicate::like(r)?.evaluate_array(l, neg), |
329 | 0 | Op::ILike(neg) => Predicate::ilike(r, l.is_ascii())?.evaluate_array(l, neg), |
330 | 0 | Op::Contains => Predicate::contains(r).evaluate_array(l, false), |
331 | 0 | Op::StartsWith => Predicate::StartsWith(r).evaluate_array(l, false), |
332 | 0 | Op::EndsWith => Predicate::EndsWith(r).evaluate_array(l, false), |
333 | | }; |
334 | | |
335 | 0 | Ok(match l_v { |
336 | 0 | Some(v) => take(&r, v.keys(), None)?.as_boolean().clone(), |
337 | 0 | None => r, |
338 | | }) |
339 | 0 | } |
340 | | |
341 | 0 | fn vectored_iter<'a, T: StringArrayType<'a> + 'a>( |
342 | 0 | a: T, |
343 | 0 | a_v: &'a dyn AnyDictionaryArray, |
344 | 0 | ) -> impl Iterator<Item = Option<&'a str>> + 'a { |
345 | 0 | let nulls = a_v.nulls(); |
346 | 0 | let keys = a_v.normalized_keys(); |
347 | 0 | keys.into_iter().enumerate().map(move |(idx, key)| { |
348 | 0 | if nulls.map(|n| n.is_null(idx)).unwrap_or_default() || a.is_null(key) { |
349 | 0 | return None; |
350 | 0 | } |
351 | 0 | Some(a.value(key)) |
352 | 0 | }) |
353 | 0 | } |
354 | | |
355 | | #[inline(never)] |
356 | 0 | fn op_binary<'a>( |
357 | 0 | op: Op, |
358 | 0 | l: impl Iterator<Item = Option<&'a str>>, |
359 | 0 | r: impl Iterator<Item = Option<&'a str>>, |
360 | 0 | ) -> Result<BooleanArray, ArrowError> { |
361 | 0 | match op { |
362 | 0 | Op::Like(neg) => binary_predicate(l, r, neg, Predicate::like), |
363 | 0 | Op::ILike(neg) => binary_predicate(l, r, neg, |s| Predicate::ilike(s, false)), |
364 | 0 | Op::Contains => Ok(l.zip(r).map(|(l, r)| Some(str_contains(l?, r?))).collect()), |
365 | 0 | Op::StartsWith => Ok(l |
366 | 0 | .zip(r) |
367 | 0 | .map(|(l, r)| Some(Predicate::StartsWith(r?).evaluate(l?))) |
368 | 0 | .collect()), |
369 | 0 | Op::EndsWith => Ok(l |
370 | 0 | .zip(r) |
371 | 0 | .map(|(l, r)| Some(Predicate::EndsWith(r?).evaluate(l?))) |
372 | 0 | .collect()), |
373 | | } |
374 | 0 | } |
375 | | |
376 | 0 | fn str_contains(haystack: &str, needle: &str) -> bool { |
377 | 0 | memchr::memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some() |
378 | 0 | } |
379 | | |
380 | 0 | fn binary_predicate<'a>( |
381 | 0 | l: impl Iterator<Item = Option<&'a str>>, |
382 | 0 | r: impl Iterator<Item = Option<&'a str>>, |
383 | 0 | neg: bool, |
384 | 0 | f: impl Fn(&'a str) -> Result<Predicate<'a>, ArrowError>, |
385 | 0 | ) -> Result<BooleanArray, ArrowError> { |
386 | 0 | let mut previous = None; |
387 | 0 | l.zip(r) |
388 | 0 | .map(|(l, r)| match (l, r) { |
389 | 0 | (Some(l), Some(r)) => { |
390 | 0 | let p: &Predicate = match previous { |
391 | 0 | Some((expr, ref predicate)) if expr == r => predicate, |
392 | 0 | _ => &previous.insert((r, f(r)?)).1, |
393 | | }; |
394 | 0 | Ok(Some(p.evaluate(l) != neg)) |
395 | | } |
396 | 0 | _ => Ok(None), |
397 | 0 | }) |
398 | 0 | .collect() |
399 | 0 | } |
400 | | |
401 | | // Deprecated kernels |
402 | | |
403 | 0 | fn make_scalar(data_type: &DataType, scalar: &str) -> Result<ArrayRef, ArrowError> { |
404 | 0 | match data_type { |
405 | 0 | DataType::Utf8 => Ok(Arc::new(StringArray::from_iter_values([scalar]))), |
406 | 0 | DataType::LargeUtf8 => Ok(Arc::new(LargeStringArray::from_iter_values([scalar]))), |
407 | 0 | DataType::Dictionary(_, v) => make_scalar(v.as_ref(), scalar), |
408 | 0 | d => Err(ArrowError::InvalidArgumentError(format!( |
409 | 0 | "Unsupported string scalar data type {d:?}", |
410 | 0 | ))), |
411 | | } |
412 | 0 | } |
413 | | |
414 | | macro_rules! legacy_kernels { |
415 | | ($fn_datum:ident, $fn_array:ident, $fn_scalar:ident, $fn_array_dyn:ident, $fn_scalar_dyn:ident, $deprecation:expr) => { |
416 | | #[doc(hidden)] |
417 | | #[deprecated(note = $deprecation)] |
418 | | pub fn $fn_array<O: OffsetSizeTrait>( |
419 | | left: &GenericStringArray<O>, |
420 | | right: &GenericStringArray<O>, |
421 | | ) -> Result<BooleanArray, ArrowError> { |
422 | | $fn_datum(left, right) |
423 | | } |
424 | | |
425 | | #[doc(hidden)] |
426 | | #[deprecated(note = $deprecation)] |
427 | | pub fn $fn_scalar<O: OffsetSizeTrait>( |
428 | | left: &GenericStringArray<O>, |
429 | | right: &str, |
430 | | ) -> Result<BooleanArray, ArrowError> { |
431 | | let scalar = GenericStringArray::<O>::from_iter_values([right]); |
432 | | $fn_datum(left, &Scalar::new(&scalar)) |
433 | | } |
434 | | |
435 | | #[doc(hidden)] |
436 | | #[deprecated(note = $deprecation)] |
437 | 0 | pub fn $fn_array_dyn( |
438 | 0 | left: &dyn Array, |
439 | 0 | right: &dyn Array, |
440 | 0 | ) -> Result<BooleanArray, ArrowError> { |
441 | 0 | $fn_datum(&left, &right) |
442 | 0 | } |
443 | | |
444 | | #[doc(hidden)] |
445 | | #[deprecated(note = $deprecation)] |
446 | 0 | pub fn $fn_scalar_dyn(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> { |
447 | 0 | let scalar = make_scalar(left.data_type(), right)?; |
448 | 0 | $fn_datum(&left, &Scalar::new(&scalar)) |
449 | 0 | } |
450 | | }; |
451 | | } |
452 | | |
453 | | legacy_kernels!( |
454 | | like, |
455 | | like_utf8, |
456 | | like_utf8_scalar, |
457 | | like_dyn, |
458 | | like_utf8_scalar_dyn, |
459 | | "Use arrow_string::like::like" |
460 | | ); |
461 | | legacy_kernels!( |
462 | | ilike, |
463 | | ilike_utf8, |
464 | | ilike_utf8_scalar, |
465 | | ilike_dyn, |
466 | | ilike_utf8_scalar_dyn, |
467 | | "Use arrow_string::like::ilike" |
468 | | ); |
469 | | legacy_kernels!( |
470 | | nlike, |
471 | | nlike_utf8, |
472 | | nlike_utf8_scalar, |
473 | | nlike_dyn, |
474 | | nlike_utf8_scalar_dyn, |
475 | | "Use arrow_string::like::nlike" |
476 | | ); |
477 | | legacy_kernels!( |
478 | | nilike, |
479 | | nilike_utf8, |
480 | | nilike_utf8_scalar, |
481 | | nilike_dyn, |
482 | | nilike_utf8_scalar_dyn, |
483 | | "Use arrow_string::like::nilike" |
484 | | ); |
485 | | legacy_kernels!( |
486 | | contains, |
487 | | contains_utf8, |
488 | | contains_utf8_scalar, |
489 | | contains_dyn, |
490 | | contains_utf8_scalar_dyn, |
491 | | "Use arrow_string::like::contains" |
492 | | ); |
493 | | legacy_kernels!( |
494 | | starts_with, |
495 | | starts_with_utf8, |
496 | | starts_with_utf8_scalar, |
497 | | starts_with_dyn, |
498 | | starts_with_utf8_scalar_dyn, |
499 | | "Use arrow_string::like::starts_with" |
500 | | ); |
501 | | |
502 | | legacy_kernels!( |
503 | | ends_with, |
504 | | ends_with_utf8, |
505 | | ends_with_utf8_scalar, |
506 | | ends_with_dyn, |
507 | | ends_with_utf8_scalar_dyn, |
508 | | "Use arrow_string::like::ends_with" |
509 | | ); |
510 | | |
511 | | #[cfg(test)] |
512 | | #[allow(deprecated)] |
513 | | mod tests { |
514 | | use super::*; |
515 | | use arrow_array::builder::BinaryDictionaryBuilder; |
516 | | use arrow_array::types::{ArrowDictionaryKeyType, Int8Type}; |
517 | | use std::iter::zip; |
518 | | |
519 | | fn convert_binary_iterator_to_binary_dictionary< |
520 | | 'a, |
521 | | K: ArrowDictionaryKeyType, |
522 | | I: IntoIterator<Item = &'a [u8]>, |
523 | | >( |
524 | | iter: I, |
525 | | ) -> DictionaryArray<K> { |
526 | | let it = iter.into_iter(); |
527 | | let (lower, _) = it.size_hint(); |
528 | | let mut builder = BinaryDictionaryBuilder::with_capacity(lower, 256, 1024); |
529 | | it.for_each(|i| { |
530 | | builder |
531 | | .append(i) |
532 | | .expect("Unable to append a value to a dictionary array."); |
533 | | }); |
534 | | |
535 | | builder.finish() |
536 | | } |
537 | | |
538 | | /// Applying `op(left, right)`, both sides are arrays |
539 | | /// The macro tests four types of array implementations: |
540 | | /// - `StringArray` |
541 | | /// - `LargeStringArray` |
542 | | /// - `StringViewArray` |
543 | | /// - `DictionaryArray` |
544 | | macro_rules! test_utf8 { |
545 | | ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => { |
546 | | #[test] |
547 | | fn $test_name() { |
548 | | let expected = BooleanArray::from($expected); |
549 | | |
550 | | let left = StringArray::from($left); |
551 | | let right = StringArray::from($right); |
552 | | let res = $op(&left, &right).unwrap(); |
553 | | assert_eq!(res, expected); |
554 | | |
555 | | let left = LargeStringArray::from($left); |
556 | | let right = LargeStringArray::from($right); |
557 | | let res = $op(&left, &right).unwrap(); |
558 | | assert_eq!(res, expected); |
559 | | |
560 | | let left = StringViewArray::from($left); |
561 | | let right = StringViewArray::from($right); |
562 | | let res = $op(&left, &right).unwrap(); |
563 | | assert_eq!(res, expected); |
564 | | |
565 | | let left: DictionaryArray<Int8Type> = $left.into_iter().collect(); |
566 | | let right: DictionaryArray<Int8Type> = $right.into_iter().collect(); |
567 | | let res = $op(&left, &right).unwrap(); |
568 | | assert_eq!(res, expected); |
569 | | } |
570 | | }; |
571 | | } |
572 | | |
573 | | /// Applying `op(left, right)`, both sides are arrays |
574 | | /// The macro tests four types of array implementations: |
575 | | /// - `StringArray` |
576 | | /// - `LargeStringArray` |
577 | | /// - `StringViewArray` |
578 | | /// - `DictionaryArray` |
579 | | macro_rules! test_utf8_and_binary { |
580 | | ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => { |
581 | | #[test] |
582 | | fn $test_name() { |
583 | | let expected = BooleanArray::from($expected); |
584 | | |
585 | | let left = StringArray::from($left); |
586 | | let right = StringArray::from($right); |
587 | | let res = $op(&left, &right).unwrap(); |
588 | | assert_eq!(res, expected); |
589 | | |
590 | | let left = LargeStringArray::from($left); |
591 | | let right = LargeStringArray::from($right); |
592 | | let res = $op(&left, &right).unwrap(); |
593 | | assert_eq!(res, expected); |
594 | | |
595 | | let left = StringViewArray::from($left); |
596 | | let right = StringViewArray::from($right); |
597 | | let res = $op(&left, &right).unwrap(); |
598 | | assert_eq!(res, expected); |
599 | | |
600 | | let left: DictionaryArray<Int8Type> = $left.into_iter().collect(); |
601 | | let right: DictionaryArray<Int8Type> = $right.into_iter().collect(); |
602 | | let res = $op(&left, &right).unwrap(); |
603 | | assert_eq!(res, expected); |
604 | | |
605 | | let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>(); |
606 | | let right_binary = $right.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>(); |
607 | | |
608 | | let left = BinaryArray::from(left_binary.clone()); |
609 | | let right = BinaryArray::from(right_binary.clone()); |
610 | | let res = $op(&left, &right).unwrap(); |
611 | | assert_eq!(res, expected); |
612 | | |
613 | | let left = LargeBinaryArray::from(left_binary.clone()); |
614 | | let right = LargeBinaryArray::from(right_binary.clone()); |
615 | | let res = $op(&left, &right).unwrap(); |
616 | | assert_eq!(res, expected); |
617 | | |
618 | | let left: DictionaryArray<Int8Type> = |
619 | | convert_binary_iterator_to_binary_dictionary(left_binary); |
620 | | let right: DictionaryArray<Int8Type> = |
621 | | convert_binary_iterator_to_binary_dictionary(right_binary); |
622 | | let res = $op(&left, &right).unwrap(); |
623 | | assert_eq!(res, expected); |
624 | | } |
625 | | }; |
626 | | } |
627 | | |
628 | | /// Applying `op(left, right)`, left side is array, right side is scalar |
629 | | /// The macro tests four types of array implementations: |
630 | | /// - `StringArray` |
631 | | /// - `LargeStringArray` |
632 | | /// - `StringViewArray` |
633 | | /// - `DictionaryArray` |
634 | | macro_rules! test_utf8_scalar { |
635 | | ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => { |
636 | | #[test] |
637 | | fn $test_name() { |
638 | | let expected = BooleanArray::from($expected); |
639 | | |
640 | | let left = StringArray::from($left); |
641 | | let right = StringArray::from_iter_values([$right]); |
642 | | let res = $op(&left, &Scalar::new(&right)).unwrap(); |
643 | | assert_eq!(res, expected); |
644 | | |
645 | | let left = LargeStringArray::from($left); |
646 | | let right = LargeStringArray::from_iter_values([$right]); |
647 | | let res = $op(&left, &Scalar::new(&right)).unwrap(); |
648 | | assert_eq!(res, expected); |
649 | | |
650 | | let left = StringViewArray::from($left); |
651 | | let right = StringViewArray::from_iter_values([$right]); |
652 | | let res = $op(&left, &Scalar::new(&right)).unwrap(); |
653 | | assert_eq!(res, expected); |
654 | | |
655 | | let left: DictionaryArray<Int8Type> = $left.into_iter().collect(); |
656 | | let right: DictionaryArray<Int8Type> = [$right].into_iter().collect(); |
657 | | let res = $op(&left, &Scalar::new(&right)).unwrap(); |
658 | | assert_eq!(res, expected); |
659 | | } |
660 | | }; |
661 | | } |
662 | | |
663 | | /// Applying `op(left, right)`, left side is array, right side is scalar |
664 | | /// The macro tests four types of array implementations: |
665 | | /// - `StringArray` |
666 | | /// - `LargeStringArray` |
667 | | /// - `StringViewArray` |
668 | | /// - `DictionaryArray` |
669 | | macro_rules! test_utf8_and_binary_scalar { |
670 | | ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => { |
671 | | #[test] |
672 | | fn $test_name() { |
673 | | let expected = BooleanArray::from($expected); |
674 | | |
675 | | let left = StringArray::from($left); |
676 | | let right = StringArray::from_iter_values([$right]); |
677 | | let res = $op(&left, &Scalar::new(&right)).unwrap(); |
678 | | assert_eq!(res, expected); |
679 | | |
680 | | let left = LargeStringArray::from($left); |
681 | | let right = LargeStringArray::from_iter_values([$right]); |
682 | | let res = $op(&left, &Scalar::new(&right)).unwrap(); |
683 | | assert_eq!(res, expected); |
684 | | |
685 | | let left = StringViewArray::from($left); |
686 | | let right = StringViewArray::from_iter_values([$right]); |
687 | | let res = $op(&left, &Scalar::new(&right)).unwrap(); |
688 | | assert_eq!(res, expected); |
689 | | |
690 | | let left: DictionaryArray<Int8Type> = $left.into_iter().collect(); |
691 | | let right: DictionaryArray<Int8Type> = [$right].into_iter().collect(); |
692 | | let res = $op(&left, &Scalar::new(&right)).unwrap(); |
693 | | assert_eq!(res, expected); |
694 | | |
695 | | let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>(); |
696 | | let right_binary = $right.as_bytes(); |
697 | | |
698 | | let left = BinaryArray::from(left_binary.clone()); |
699 | | let right = BinaryArray::from_iter_values([right_binary]); |
700 | | let res = $op(&left, &Scalar::new(&right)).unwrap(); |
701 | | assert_eq!(res, expected); |
702 | | |
703 | | let left = LargeBinaryArray::from(left_binary.clone()); |
704 | | let right = LargeBinaryArray::from_iter_values([right_binary]); |
705 | | let res = $op(&left, &Scalar::new(&right)).unwrap(); |
706 | | assert_eq!(res, expected); |
707 | | |
708 | | let left: DictionaryArray<Int8Type> = |
709 | | convert_binary_iterator_to_binary_dictionary(left_binary); |
710 | | let right: DictionaryArray<Int8Type> = |
711 | | convert_binary_iterator_to_binary_dictionary([right_binary]); |
712 | | let res = $op(&left, &Scalar::new(&right)).unwrap(); |
713 | | assert_eq!(res, expected); |
714 | | } |
715 | | }; |
716 | | } |
717 | | |
718 | | test_utf8!( |
719 | | test_utf8_array_like, |
720 | | vec![ |
721 | | "arrow", |
722 | | "arrow_long_string_more than 12 bytes", |
723 | | "arrow", |
724 | | "arrow", |
725 | | "arrow", |
726 | | "arrows", |
727 | | "arrow", |
728 | | "arrow" |
729 | | ], |
730 | | vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"], |
731 | | like, |
732 | | vec![true, true, true, false, false, true, false, false] |
733 | | ); |
734 | | |
735 | | test_utf8_scalar!( |
736 | | test_utf8_array_like_scalar_escape_testing, |
737 | | vec![ |
738 | | "varchar(255)", |
739 | | "int(255)longer than 12 bytes", |
740 | | "varchar", |
741 | | "int" |
742 | | ], |
743 | | "%(%)%", |
744 | | like, |
745 | | vec![true, true, false, false] |
746 | | ); |
747 | | |
748 | | test_utf8_scalar!( |
749 | | test_utf8_array_like_scalar_escape_regex, |
750 | | vec![".*", "a", "*"], |
751 | | ".*", |
752 | | like, |
753 | | vec![true, false, false] |
754 | | ); |
755 | | |
756 | | test_utf8_scalar!( |
757 | | test_utf8_array_like_scalar_escape_regex_dot, |
758 | | vec![".", "a", "*"], |
759 | | ".", |
760 | | like, |
761 | | vec![true, false, false] |
762 | | ); |
763 | | |
764 | | test_utf8_scalar!( |
765 | | test_utf8_array_like_scalar, |
766 | | vec![ |
767 | | "arrow", |
768 | | "parquet", |
769 | | "datafusion", |
770 | | "flight", |
771 | | "long string arrow test 12 bytes" |
772 | | ], |
773 | | "%ar%", |
774 | | like, |
775 | | vec![true, true, false, false, true] |
776 | | ); |
777 | | |
778 | | test_utf8_scalar!( |
779 | | test_utf8_array_like_scalar_start, |
780 | | vec![ |
781 | | "arrow", |
782 | | "parrow", |
783 | | "arrows", |
784 | | "arr", |
785 | | "arrow long string longer than 12 bytes" |
786 | | ], |
787 | | "arrow%", |
788 | | like, |
789 | | vec![true, false, true, false, true] |
790 | | ); |
791 | | |
792 | | // Replicates `test_utf8_array_like_scalar_start` `test_utf8_array_like_scalar_dyn_start` to |
793 | | // demonstrate that `SQL STARTSWITH` works as expected. |
794 | | test_utf8_and_binary_scalar!( |
795 | | test_utf8_and_binary_array_starts_with_scalar_start, |
796 | | vec![ |
797 | | "arrow", |
798 | | "parrow", |
799 | | "arrows", |
800 | | "arr", |
801 | | "arrow long string longer than 12 bytes" |
802 | | ], |
803 | | "arrow", |
804 | | starts_with, |
805 | | vec![true, false, true, false, true] |
806 | | ); |
807 | | |
808 | | test_utf8_and_binary!( |
809 | | test_utf8_and_binary_array_starts_with, |
810 | | vec![ |
811 | | "arrow", |
812 | | "arrow_long_string_more than 12 bytes", |
813 | | "arrow", |
814 | | "arrow", |
815 | | "arrow", |
816 | | "arrows", |
817 | | "arrow", |
818 | | "arrow" |
819 | | ], |
820 | | vec!["arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"], |
821 | | starts_with, |
822 | | vec![true, false, false, false, true, false, false, false] |
823 | | ); |
824 | | |
825 | | test_utf8_scalar!( |
826 | | test_utf8_array_like_scalar_end, |
827 | | vec![ |
828 | | "arrow", |
829 | | "parrow", |
830 | | "arrows", |
831 | | "arr", |
832 | | "arrow long string longer than 12 bytes" |
833 | | ], |
834 | | "%arrow", |
835 | | like, |
836 | | vec![true, true, false, false, false] |
837 | | ); |
838 | | |
839 | | // Replicates `test_utf8_array_like_scalar_end` `test_utf8_array_like_scalar_dyn_end` to |
840 | | // demonstrate that `SQL ENDSWITH` works as expected. |
841 | | test_utf8_and_binary_scalar!( |
842 | | test_utf8_and_binary_array_ends_with_scalar_end, |
843 | | vec![ |
844 | | "arrow", |
845 | | "parrow", |
846 | | "arrows", |
847 | | "arr", |
848 | | "arrow long string longer than 12 bytes" |
849 | | ], |
850 | | "arrow", |
851 | | ends_with, |
852 | | vec![true, true, false, false, false] |
853 | | ); |
854 | | |
855 | | test_utf8_and_binary!( |
856 | | test_utf8_and_binary_array_ends_with, |
857 | | vec![ |
858 | | "arrow", |
859 | | "arrow_long_string_more than 12 bytes", |
860 | | "arrow", |
861 | | "arrow", |
862 | | "arrow", |
863 | | "arrows", |
864 | | "arrow", |
865 | | "arrow" |
866 | | ], |
867 | | vec!["arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"], |
868 | | ends_with, |
869 | | vec![true, false, true, false, false, false, false, false] |
870 | | ); |
871 | | |
872 | | test_utf8_scalar!( |
873 | | test_utf8_array_like_scalar_equals, |
874 | | vec![ |
875 | | "arrow", |
876 | | "parrow", |
877 | | "arrows", |
878 | | "arr", |
879 | | "arrow long string longer than 12 bytes" |
880 | | ], |
881 | | "arrow", |
882 | | like, |
883 | | vec![true, false, false, false, false] |
884 | | ); |
885 | | |
886 | | test_utf8_scalar!( |
887 | | test_utf8_array_like_scalar_one, |
888 | | vec![ |
889 | | "arrow", |
890 | | "arrows", |
891 | | "parrow", |
892 | | "arr", |
893 | | "arrow long string longer than 12 bytes" |
894 | | ], |
895 | | "arrow_", |
896 | | like, |
897 | | vec![false, true, false, false, false] |
898 | | ); |
899 | | |
900 | | test_utf8_scalar!( |
901 | | test_utf8_scalar_like_escape, |
902 | | vec!["a%", "a\\x", "arrow long string longer than 12 bytes"], |
903 | | "a\\%", |
904 | | like, |
905 | | vec![true, false, false] |
906 | | ); |
907 | | |
908 | | test_utf8_scalar!( |
909 | | test_utf8_scalar_like_escape_contains, |
910 | | vec!["ba%", "ba\\x", "arrow long string longer than 12 bytes"], |
911 | | "%a\\%", |
912 | | like, |
913 | | vec![true, false, false] |
914 | | ); |
915 | | |
916 | | test_utf8!( |
917 | | test_utf8_scalar_ilike_regex, |
918 | | vec!["%%%"], |
919 | | vec![r"\%_\%"], |
920 | | ilike, |
921 | | vec![true] |
922 | | ); |
923 | | |
924 | | test_utf8!( |
925 | | test_utf8_array_nlike, |
926 | | vec![ |
927 | | "arrow", |
928 | | "arrow", |
929 | | "arrow long string longer than 12 bytes", |
930 | | "arrow", |
931 | | "arrow", |
932 | | "arrows", |
933 | | "arrow" |
934 | | ], |
935 | | vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"], |
936 | | nlike, |
937 | | vec![false, false, false, true, true, false, true] |
938 | | ); |
939 | | |
940 | | test_utf8_scalar!( |
941 | | test_utf8_array_nlike_escape_testing, |
942 | | vec![ |
943 | | "varchar(255)", |
944 | | "int(255) arrow long string longer than 12 bytes", |
945 | | "varchar", |
946 | | "int" |
947 | | ], |
948 | | "%(%)%", |
949 | | nlike, |
950 | | vec![false, false, true, true] |
951 | | ); |
952 | | |
953 | | test_utf8_scalar!( |
954 | | test_utf8_array_nlike_scalar_escape_regex, |
955 | | vec![".*", "a", "*"], |
956 | | ".*", |
957 | | nlike, |
958 | | vec![false, true, true] |
959 | | ); |
960 | | |
961 | | test_utf8_scalar!( |
962 | | test_utf8_array_nlike_scalar_escape_regex_dot, |
963 | | vec![".", "a", "*"], |
964 | | ".", |
965 | | nlike, |
966 | | vec![false, true, true] |
967 | | ); |
968 | | test_utf8_scalar!( |
969 | | test_utf8_array_nlike_scalar, |
970 | | vec![ |
971 | | "arrow", |
972 | | "parquet", |
973 | | "datafusion", |
974 | | "flight", |
975 | | "arrow long string longer than 12 bytes" |
976 | | ], |
977 | | "%ar%", |
978 | | nlike, |
979 | | vec![false, false, true, true, false] |
980 | | ); |
981 | | |
982 | | test_utf8_scalar!( |
983 | | test_utf8_array_nlike_scalar_start, |
984 | | vec![ |
985 | | "arrow", |
986 | | "parrow", |
987 | | "arrows", |
988 | | "arr", |
989 | | "arrow long string longer than 12 bytes" |
990 | | ], |
991 | | "arrow%", |
992 | | nlike, |
993 | | vec![false, true, false, true, false] |
994 | | ); |
995 | | |
996 | | test_utf8_scalar!( |
997 | | test_utf8_array_nlike_scalar_end, |
998 | | vec![ |
999 | | "arrow", |
1000 | | "parrow", |
1001 | | "arrows", |
1002 | | "arr", |
1003 | | "arrow long string longer than 12 bytes" |
1004 | | ], |
1005 | | "%arrow", |
1006 | | nlike, |
1007 | | vec![false, false, true, true, true] |
1008 | | ); |
1009 | | |
1010 | | test_utf8_scalar!( |
1011 | | test_utf8_array_nlike_scalar_equals, |
1012 | | vec![ |
1013 | | "arrow", |
1014 | | "parrow", |
1015 | | "arrows", |
1016 | | "arr", |
1017 | | "arrow long string longer than 12 bytes" |
1018 | | ], |
1019 | | "arrow", |
1020 | | nlike, |
1021 | | vec![false, true, true, true, true] |
1022 | | ); |
1023 | | |
1024 | | test_utf8_scalar!( |
1025 | | test_utf8_array_nlike_scalar_one, |
1026 | | vec![ |
1027 | | "arrow", |
1028 | | "arrows", |
1029 | | "parrow", |
1030 | | "arr", |
1031 | | "arrow long string longer than 12 bytes" |
1032 | | ], |
1033 | | "arrow_", |
1034 | | nlike, |
1035 | | vec![true, false, true, true, true] |
1036 | | ); |
1037 | | |
1038 | | test_utf8!( |
1039 | | test_utf8_array_ilike, |
1040 | | vec![ |
1041 | | "arrow", |
1042 | | "arrow", |
1043 | | "ARROW long string longer than 12 bytes", |
1044 | | "arrow", |
1045 | | "ARROW", |
1046 | | "ARROWS", |
1047 | | "arROw" |
1048 | | ], |
1049 | | vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"], |
1050 | | ilike, |
1051 | | vec![true, true, true, false, false, true, false] |
1052 | | ); |
1053 | | |
1054 | | test_utf8_scalar!( |
1055 | | ilike_utf8_scalar_escape_testing, |
1056 | | vec![ |
1057 | | "varchar(255)", |
1058 | | "int(255) long string longer than 12 bytes", |
1059 | | "varchar", |
1060 | | "int" |
1061 | | ], |
1062 | | "%(%)%", |
1063 | | ilike, |
1064 | | vec![true, true, false, false] |
1065 | | ); |
1066 | | |
1067 | | test_utf8_scalar!( |
1068 | | test_utf8_array_ilike_scalar, |
1069 | | vec![ |
1070 | | "arrow", |
1071 | | "parquet", |
1072 | | "datafusion", |
1073 | | "flight", |
1074 | | "arrow long string longer than 12 bytes" |
1075 | | ], |
1076 | | "%AR%", |
1077 | | ilike, |
1078 | | vec![true, true, false, false, true] |
1079 | | ); |
1080 | | |
1081 | | test_utf8_scalar!( |
1082 | | test_utf8_array_ilike_scalar_start, |
1083 | | vec![ |
1084 | | "arrow", |
1085 | | "parrow", |
1086 | | "arrows", |
1087 | | "ARR", |
1088 | | "arrow long string longer than 12 bytes" |
1089 | | ], |
1090 | | "aRRow%", |
1091 | | ilike, |
1092 | | vec![true, false, true, false, true] |
1093 | | ); |
1094 | | |
1095 | | test_utf8_scalar!( |
1096 | | test_utf8_array_ilike_scalar_end, |
1097 | | vec![ |
1098 | | "ArroW", |
1099 | | "parrow", |
1100 | | "ARRowS", |
1101 | | "arr", |
1102 | | "arrow long string longer than 12 bytes" |
1103 | | ], |
1104 | | "%arrow", |
1105 | | ilike, |
1106 | | vec![true, true, false, false, false] |
1107 | | ); |
1108 | | |
1109 | | test_utf8_scalar!( |
1110 | | test_utf8_array_ilike_scalar_equals, |
1111 | | vec![ |
1112 | | "arrow", |
1113 | | "parrow", |
1114 | | "arrows", |
1115 | | "arr", |
1116 | | "arrow long string longer than 12 bytes" |
1117 | | ], |
1118 | | "Arrow", |
1119 | | ilike, |
1120 | | vec![true, false, false, false, false] |
1121 | | ); |
1122 | | |
1123 | | // We only implement loose matching |
1124 | | test_utf8_scalar!( |
1125 | | test_utf8_array_ilike_unicode, |
1126 | | vec![ |
1127 | | "FFkoß", |
1128 | | "FFkoSS", |
1129 | | "FFkoss", |
1130 | | "FFkoS", |
1131 | | "FFkos", |
1132 | | "ffkoSS", |
1133 | | "ffkoß", |
1134 | | "FFKoSS", |
1135 | | "longer than 12 bytes FFKoSS" |
1136 | | ], |
1137 | | "FFkoSS", |
1138 | | ilike, |
1139 | | vec![false, true, true, false, false, false, false, true, false] |
1140 | | ); |
1141 | | |
1142 | | test_utf8_scalar!( |
1143 | | test_utf8_array_ilike_unicode_starts, |
1144 | | vec![ |
1145 | | "FFkoßsdlkdf", |
1146 | | "FFkoSSsdlkdf", |
1147 | | "FFkosssdlkdf", |
1148 | | "FFkoS", |
1149 | | "FFkos", |
1150 | | "ffkoSS", |
1151 | | "ffkoß", |
1152 | | "FfkosSsdfd", |
1153 | | "FFKoSS", |
1154 | | "longer than 12 bytes FFKoSS", |
1155 | | ], |
1156 | | "FFkoSS%", |
1157 | | ilike, |
1158 | | vec![false, true, true, false, false, false, false, true, true, false] |
1159 | | ); |
1160 | | |
1161 | | test_utf8_scalar!( |
1162 | | test_utf8_array_ilike_unicode_ends, |
1163 | | vec![ |
1164 | | "sdlkdfFFkoß", |
1165 | | "sdlkdfFFkoSS", |
1166 | | "sdlkdfFFkoss", |
1167 | | "FFkoS", |
1168 | | "FFkos", |
1169 | | "ffkoSS", |
1170 | | "ffkoß", |
1171 | | "h😃klFfkosS", |
1172 | | "FFKoSS", |
1173 | | "longer than 12 bytes FFKoSS", |
1174 | | ], |
1175 | | "%FFkoSS", |
1176 | | ilike, |
1177 | | vec![false, true, true, false, false, false, false, true, true, true] |
1178 | | ); |
1179 | | |
1180 | | test_utf8_scalar!( |
1181 | | test_utf8_array_ilike_unicode_contains, |
1182 | | vec![ |
1183 | | "sdlkdfFkoßsdfs", |
1184 | | "sdlkdfFkoSSdggs", |
1185 | | "sdlkdfFkosssdsd", |
1186 | | "FkoS", |
1187 | | "Fkos", |
1188 | | "ffkoSS", |
1189 | | "ffkoß", |
1190 | | "😃sadlksffkosSsh😃klF", |
1191 | | "😱slgffkosSsh😃klF", |
1192 | | "FFKoSS", |
1193 | | "longer than 12 bytes FFKoSS", |
1194 | | ], |
1195 | | "%FFkoSS%", |
1196 | | ilike, |
1197 | | vec![false, true, true, false, false, false, false, true, true, true, true] |
1198 | | ); |
1199 | | |
1200 | | // Replicates `test_utf8_array_ilike_unicode_contains` and |
1201 | | // `test_utf8_array_ilike_unicode_contains_dyn` to |
1202 | | // demonstrate that `SQL CONTAINS` works as expected. |
1203 | | // |
1204 | | // NOTE: 5 of the values were changed because the original used a case insensitive `ilike`. |
1205 | | test_utf8_and_binary_scalar!( |
1206 | | test_utf8_and_binary_array_contains_unicode_contains, |
1207 | | vec![ |
1208 | | "sdlkdfFkoßsdfs", |
1209 | | "sdlkdFFkoSSdggs", // Original was case insensitive "sdlkdfFkoSSdggs" |
1210 | | "sdlkdFFkoSSsdsd", // Original was case insensitive "sdlkdfFkosssdsd" |
1211 | | "FkoS", |
1212 | | "Fkos", |
1213 | | "ffkoSS", |
1214 | | "ffkoß", |
1215 | | "😃sadlksFFkoSSsh😃klF", // Original was case insensitive "😃sadlksffkosSsh😃klF" |
1216 | | "😱slgFFkoSSsh😃klF", // Original was case insensitive "😱slgffkosSsh😃klF" |
1217 | | "FFkoSS", // "FFKoSS" |
1218 | | "longer than 12 bytes FFKoSS", |
1219 | | ], |
1220 | | "FFkoSS", |
1221 | | contains, |
1222 | | vec![false, true, true, false, false, false, false, true, true, true, false] |
1223 | | ); |
1224 | | |
1225 | | test_utf8_scalar!( |
1226 | | test_utf8_array_ilike_unicode_complex, |
1227 | | vec![ |
1228 | | "sdlkdfFooßsdfs", |
1229 | | "sdlkdfFooSSdggs", |
1230 | | "sdlkdfFoosssdsd", |
1231 | | "FooS", |
1232 | | "Foos", |
1233 | | "ffooSS", |
1234 | | "ffooß", |
1235 | | "😃sadlksffofsSsh😃klF", |
1236 | | "😱slgffoesSsh😃klF", |
1237 | | "FFKoSS", |
1238 | | "longer than 12 bytes FFKoSS", |
1239 | | ], |
1240 | | "%FF__SS%", |
1241 | | ilike, |
1242 | | vec![false, true, true, false, false, false, false, true, true, true, true] |
1243 | | ); |
1244 | | |
1245 | | // 😈 is four bytes long. |
1246 | | test_utf8_scalar!( |
1247 | | test_uff8_array_like_multibyte, |
1248 | | vec![ |
1249 | | "sdlkdfFooßsdfs", |
1250 | | "sdlkdfFooSSdggs", |
1251 | | "sdlkdfFoosssdsd", |
1252 | | "FooS", |
1253 | | "Foos", |
1254 | | "ffooSS", |
1255 | | "ffooß", |
1256 | | "😃sadlksffofsSsh😈klF", |
1257 | | "😱slgffoesSsh😈klF", |
1258 | | "FFKoSS", |
1259 | | "longer than 12 bytes FFKoSS", |
1260 | | ], |
1261 | | "%Ssh😈klF", |
1262 | | like, |
1263 | | vec![false, false, false, false, false, false, false, true, true, false, false] |
1264 | | ); |
1265 | | |
1266 | | test_utf8_scalar!( |
1267 | | test_utf8_array_ilike_scalar_one, |
1268 | | vec![ |
1269 | | "arrow", |
1270 | | "arrows", |
1271 | | "parrow", |
1272 | | "arr", |
1273 | | "arrow long string longer than 12 bytes" |
1274 | | ], |
1275 | | "arrow_", |
1276 | | ilike, |
1277 | | vec![false, true, false, false, false] |
1278 | | ); |
1279 | | |
1280 | | test_utf8!( |
1281 | | test_utf8_array_nilike, |
1282 | | vec![ |
1283 | | "arrow", |
1284 | | "arrow", |
1285 | | "ARROW longer than 12 bytes string", |
1286 | | "arrow", |
1287 | | "ARROW", |
1288 | | "ARROWS", |
1289 | | "arROw" |
1290 | | ], |
1291 | | vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"], |
1292 | | nilike, |
1293 | | vec![false, false, false, true, true, false, true] |
1294 | | ); |
1295 | | |
1296 | | test_utf8_scalar!( |
1297 | | nilike_utf8_scalar_escape_testing, |
1298 | | vec![ |
1299 | | "varchar(255)", |
1300 | | "int(255) longer than 12 bytes string", |
1301 | | "varchar", |
1302 | | "int" |
1303 | | ], |
1304 | | "%(%)%", |
1305 | | nilike, |
1306 | | vec![false, false, true, true] |
1307 | | ); |
1308 | | |
1309 | | test_utf8_scalar!( |
1310 | | test_utf8_array_nilike_scalar, |
1311 | | vec![ |
1312 | | "arrow", |
1313 | | "parquet", |
1314 | | "datafusion", |
1315 | | "flight", |
1316 | | "arrow long string longer than 12 bytes" |
1317 | | ], |
1318 | | "%AR%", |
1319 | | nilike, |
1320 | | vec![false, false, true, true, false] |
1321 | | ); |
1322 | | |
1323 | | test_utf8_scalar!( |
1324 | | test_utf8_array_nilike_scalar_start, |
1325 | | vec![ |
1326 | | "arrow", |
1327 | | "parrow", |
1328 | | "arrows", |
1329 | | "ARR", |
1330 | | "arrow long string longer than 12 bytes" |
1331 | | ], |
1332 | | "aRRow%", |
1333 | | nilike, |
1334 | | vec![false, true, false, true, false] |
1335 | | ); |
1336 | | |
1337 | | test_utf8_scalar!( |
1338 | | test_utf8_array_nilike_scalar_end, |
1339 | | vec![ |
1340 | | "ArroW", |
1341 | | "parrow", |
1342 | | "ARRowS", |
1343 | | "arr", |
1344 | | "arrow long string longer than 12 bytes" |
1345 | | ], |
1346 | | "%arrow", |
1347 | | nilike, |
1348 | | vec![false, false, true, true, true] |
1349 | | ); |
1350 | | |
1351 | | test_utf8_scalar!( |
1352 | | test_utf8_array_nilike_scalar_equals, |
1353 | | vec![ |
1354 | | "arRow", |
1355 | | "parrow", |
1356 | | "arrows", |
1357 | | "arr", |
1358 | | "arrow long string longer than 12 bytes" |
1359 | | ], |
1360 | | "Arrow", |
1361 | | nilike, |
1362 | | vec![false, true, true, true, true] |
1363 | | ); |
1364 | | |
1365 | | test_utf8_scalar!( |
1366 | | test_utf8_array_nilike_scalar_one, |
1367 | | vec![ |
1368 | | "arrow", |
1369 | | "arrows", |
1370 | | "parrow", |
1371 | | "arr", |
1372 | | "arrow long string longer than 12 bytes" |
1373 | | ], |
1374 | | "arrow_", |
1375 | | nilike, |
1376 | | vec![true, false, true, true, true] |
1377 | | ); |
1378 | | |
1379 | | #[test] |
1380 | | fn test_dict_like_kernels() { |
1381 | | let data = vec![ |
1382 | | Some("Earth"), |
1383 | | Some("Fire"), |
1384 | | Some("Water"), |
1385 | | Some("Air"), |
1386 | | None, |
1387 | | Some("Air"), |
1388 | | Some("bbbbb\nAir"), |
1389 | | ]; |
1390 | | |
1391 | | let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect(); |
1392 | | |
1393 | | assert_eq!( |
1394 | | like_utf8_scalar_dyn(&dict_array, "Air").unwrap(), |
1395 | | BooleanArray::from(vec![ |
1396 | | Some(false), |
1397 | | Some(false), |
1398 | | Some(false), |
1399 | | Some(true), |
1400 | | None, |
1401 | | Some(true), |
1402 | | Some(false), |
1403 | | ]), |
1404 | | ); |
1405 | | |
1406 | | assert_eq!( |
1407 | | like_utf8_scalar_dyn(&dict_array, "Air").unwrap(), |
1408 | | BooleanArray::from(vec![ |
1409 | | Some(false), |
1410 | | Some(false), |
1411 | | Some(false), |
1412 | | Some(true), |
1413 | | None, |
1414 | | Some(true), |
1415 | | Some(false), |
1416 | | ]), |
1417 | | ); |
1418 | | |
1419 | | assert_eq!( |
1420 | | like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(), |
1421 | | BooleanArray::from(vec![ |
1422 | | Some(false), |
1423 | | Some(false), |
1424 | | Some(true), |
1425 | | Some(false), |
1426 | | None, |
1427 | | Some(false), |
1428 | | Some(false), |
1429 | | ]), |
1430 | | ); |
1431 | | |
1432 | | assert_eq!( |
1433 | | like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(), |
1434 | | BooleanArray::from(vec![ |
1435 | | Some(false), |
1436 | | Some(false), |
1437 | | Some(true), |
1438 | | Some(false), |
1439 | | None, |
1440 | | Some(false), |
1441 | | Some(false), |
1442 | | ]), |
1443 | | ); |
1444 | | |
1445 | | assert_eq!( |
1446 | | like_utf8_scalar_dyn(&dict_array, "%r").unwrap(), |
1447 | | BooleanArray::from(vec![ |
1448 | | Some(false), |
1449 | | Some(false), |
1450 | | Some(true), |
1451 | | Some(true), |
1452 | | None, |
1453 | | Some(true), |
1454 | | Some(true), |
1455 | | ]), |
1456 | | ); |
1457 | | |
1458 | | assert_eq!( |
1459 | | like_utf8_scalar_dyn(&dict_array, "%r").unwrap(), |
1460 | | BooleanArray::from(vec![ |
1461 | | Some(false), |
1462 | | Some(false), |
1463 | | Some(true), |
1464 | | Some(true), |
1465 | | None, |
1466 | | Some(true), |
1467 | | Some(true), |
1468 | | ]), |
1469 | | ); |
1470 | | |
1471 | | assert_eq!( |
1472 | | like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(), |
1473 | | BooleanArray::from(vec![ |
1474 | | Some(false), |
1475 | | Some(true), |
1476 | | Some(false), |
1477 | | Some(true), |
1478 | | None, |
1479 | | Some(true), |
1480 | | Some(true), |
1481 | | ]), |
1482 | | ); |
1483 | | |
1484 | | assert_eq!( |
1485 | | like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(), |
1486 | | BooleanArray::from(vec![ |
1487 | | Some(false), |
1488 | | Some(true), |
1489 | | Some(false), |
1490 | | Some(true), |
1491 | | None, |
1492 | | Some(true), |
1493 | | Some(true), |
1494 | | ]), |
1495 | | ); |
1496 | | |
1497 | | assert_eq!( |
1498 | | like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(), |
1499 | | BooleanArray::from(vec![ |
1500 | | Some(true), |
1501 | | Some(false), |
1502 | | Some(true), |
1503 | | Some(false), |
1504 | | None, |
1505 | | Some(false), |
1506 | | Some(false), |
1507 | | ]), |
1508 | | ); |
1509 | | |
1510 | | assert_eq!( |
1511 | | like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(), |
1512 | | BooleanArray::from(vec![ |
1513 | | Some(true), |
1514 | | Some(false), |
1515 | | Some(true), |
1516 | | Some(false), |
1517 | | None, |
1518 | | Some(false), |
1519 | | Some(false), |
1520 | | ]), |
1521 | | ); |
1522 | | } |
1523 | | |
1524 | | #[test] |
1525 | | fn test_dict_nlike_kernels() { |
1526 | | let data = vec![ |
1527 | | Some("Earth"), |
1528 | | Some("Fire"), |
1529 | | Some("Water"), |
1530 | | Some("Air"), |
1531 | | None, |
1532 | | Some("Air"), |
1533 | | Some("bbbbb\nAir"), |
1534 | | ]; |
1535 | | |
1536 | | let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect(); |
1537 | | |
1538 | | assert_eq!( |
1539 | | nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(), |
1540 | | BooleanArray::from(vec![ |
1541 | | Some(true), |
1542 | | Some(true), |
1543 | | Some(true), |
1544 | | Some(false), |
1545 | | None, |
1546 | | Some(false), |
1547 | | Some(true), |
1548 | | ]), |
1549 | | ); |
1550 | | |
1551 | | assert_eq!( |
1552 | | nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(), |
1553 | | BooleanArray::from(vec![ |
1554 | | Some(true), |
1555 | | Some(true), |
1556 | | Some(true), |
1557 | | Some(false), |
1558 | | None, |
1559 | | Some(false), |
1560 | | Some(true), |
1561 | | ]), |
1562 | | ); |
1563 | | |
1564 | | assert_eq!( |
1565 | | nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(), |
1566 | | BooleanArray::from(vec![ |
1567 | | Some(true), |
1568 | | Some(true), |
1569 | | Some(false), |
1570 | | Some(true), |
1571 | | None, |
1572 | | Some(true), |
1573 | | Some(true), |
1574 | | ]), |
1575 | | ); |
1576 | | |
1577 | | assert_eq!( |
1578 | | nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(), |
1579 | | BooleanArray::from(vec![ |
1580 | | Some(true), |
1581 | | Some(true), |
1582 | | Some(false), |
1583 | | Some(true), |
1584 | | None, |
1585 | | Some(true), |
1586 | | Some(true), |
1587 | | ]), |
1588 | | ); |
1589 | | |
1590 | | assert_eq!( |
1591 | | nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(), |
1592 | | BooleanArray::from(vec![ |
1593 | | Some(true), |
1594 | | Some(true), |
1595 | | Some(false), |
1596 | | Some(false), |
1597 | | None, |
1598 | | Some(false), |
1599 | | Some(false), |
1600 | | ]), |
1601 | | ); |
1602 | | |
1603 | | assert_eq!( |
1604 | | nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(), |
1605 | | BooleanArray::from(vec![ |
1606 | | Some(true), |
1607 | | Some(true), |
1608 | | Some(false), |
1609 | | Some(false), |
1610 | | None, |
1611 | | Some(false), |
1612 | | Some(false), |
1613 | | ]), |
1614 | | ); |
1615 | | |
1616 | | assert_eq!( |
1617 | | nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(), |
1618 | | BooleanArray::from(vec![ |
1619 | | Some(true), |
1620 | | Some(false), |
1621 | | Some(true), |
1622 | | Some(false), |
1623 | | None, |
1624 | | Some(false), |
1625 | | Some(false), |
1626 | | ]), |
1627 | | ); |
1628 | | |
1629 | | assert_eq!( |
1630 | | nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(), |
1631 | | BooleanArray::from(vec![ |
1632 | | Some(true), |
1633 | | Some(false), |
1634 | | Some(true), |
1635 | | Some(false), |
1636 | | None, |
1637 | | Some(false), |
1638 | | Some(false), |
1639 | | ]), |
1640 | | ); |
1641 | | |
1642 | | assert_eq!( |
1643 | | nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(), |
1644 | | BooleanArray::from(vec![ |
1645 | | Some(false), |
1646 | | Some(true), |
1647 | | Some(false), |
1648 | | Some(true), |
1649 | | None, |
1650 | | Some(true), |
1651 | | Some(true), |
1652 | | ]), |
1653 | | ); |
1654 | | |
1655 | | assert_eq!( |
1656 | | nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(), |
1657 | | BooleanArray::from(vec![ |
1658 | | Some(false), |
1659 | | Some(true), |
1660 | | Some(false), |
1661 | | Some(true), |
1662 | | None, |
1663 | | Some(true), |
1664 | | Some(true), |
1665 | | ]), |
1666 | | ); |
1667 | | } |
1668 | | |
1669 | | #[test] |
1670 | | fn test_dict_ilike_kernels() { |
1671 | | let data = vec![ |
1672 | | Some("Earth"), |
1673 | | Some("Fire"), |
1674 | | Some("Water"), |
1675 | | Some("Air"), |
1676 | | None, |
1677 | | Some("Air"), |
1678 | | Some("bbbbb\nAir"), |
1679 | | ]; |
1680 | | |
1681 | | let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect(); |
1682 | | |
1683 | | assert_eq!( |
1684 | | ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(), |
1685 | | BooleanArray::from(vec![ |
1686 | | Some(false), |
1687 | | Some(false), |
1688 | | Some(false), |
1689 | | Some(true), |
1690 | | None, |
1691 | | Some(true), |
1692 | | Some(false), |
1693 | | ]), |
1694 | | ); |
1695 | | |
1696 | | assert_eq!( |
1697 | | ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(), |
1698 | | BooleanArray::from(vec![ |
1699 | | Some(false), |
1700 | | Some(false), |
1701 | | Some(false), |
1702 | | Some(true), |
1703 | | None, |
1704 | | Some(true), |
1705 | | Some(false), |
1706 | | ]), |
1707 | | ); |
1708 | | |
1709 | | assert_eq!( |
1710 | | ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(), |
1711 | | BooleanArray::from(vec![ |
1712 | | Some(false), |
1713 | | Some(false), |
1714 | | Some(true), |
1715 | | Some(false), |
1716 | | None, |
1717 | | Some(false), |
1718 | | Some(false), |
1719 | | ]), |
1720 | | ); |
1721 | | |
1722 | | assert_eq!( |
1723 | | ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(), |
1724 | | BooleanArray::from(vec![ |
1725 | | Some(false), |
1726 | | Some(false), |
1727 | | Some(true), |
1728 | | Some(false), |
1729 | | None, |
1730 | | Some(false), |
1731 | | Some(false), |
1732 | | ]), |
1733 | | ); |
1734 | | |
1735 | | assert_eq!( |
1736 | | ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(), |
1737 | | BooleanArray::from(vec![ |
1738 | | Some(false), |
1739 | | Some(false), |
1740 | | Some(true), |
1741 | | Some(true), |
1742 | | None, |
1743 | | Some(true), |
1744 | | Some(true), |
1745 | | ]), |
1746 | | ); |
1747 | | |
1748 | | assert_eq!( |
1749 | | ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(), |
1750 | | BooleanArray::from(vec![ |
1751 | | Some(false), |
1752 | | Some(false), |
1753 | | Some(true), |
1754 | | Some(true), |
1755 | | None, |
1756 | | Some(true), |
1757 | | Some(true), |
1758 | | ]), |
1759 | | ); |
1760 | | |
1761 | | assert_eq!( |
1762 | | ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(), |
1763 | | BooleanArray::from(vec![ |
1764 | | Some(false), |
1765 | | Some(true), |
1766 | | Some(false), |
1767 | | Some(true), |
1768 | | None, |
1769 | | Some(true), |
1770 | | Some(true), |
1771 | | ]), |
1772 | | ); |
1773 | | |
1774 | | assert_eq!( |
1775 | | ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(), |
1776 | | BooleanArray::from(vec![ |
1777 | | Some(false), |
1778 | | Some(true), |
1779 | | Some(false), |
1780 | | Some(true), |
1781 | | None, |
1782 | | Some(true), |
1783 | | Some(true), |
1784 | | ]), |
1785 | | ); |
1786 | | |
1787 | | assert_eq!( |
1788 | | ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(), |
1789 | | BooleanArray::from(vec![ |
1790 | | Some(true), |
1791 | | Some(false), |
1792 | | Some(true), |
1793 | | Some(true), |
1794 | | None, |
1795 | | Some(true), |
1796 | | Some(true), |
1797 | | ]), |
1798 | | ); |
1799 | | |
1800 | | assert_eq!( |
1801 | | ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(), |
1802 | | BooleanArray::from(vec![ |
1803 | | Some(true), |
1804 | | Some(false), |
1805 | | Some(true), |
1806 | | Some(true), |
1807 | | None, |
1808 | | Some(true), |
1809 | | Some(true), |
1810 | | ]), |
1811 | | ); |
1812 | | } |
1813 | | |
1814 | | #[test] |
1815 | | fn test_dict_nilike_kernels() { |
1816 | | let data = vec![ |
1817 | | Some("Earth"), |
1818 | | Some("Fire"), |
1819 | | Some("Water"), |
1820 | | Some("Air"), |
1821 | | None, |
1822 | | Some("Air"), |
1823 | | Some("bbbbb\nAir"), |
1824 | | ]; |
1825 | | |
1826 | | let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect(); |
1827 | | |
1828 | | assert_eq!( |
1829 | | nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(), |
1830 | | BooleanArray::from(vec![ |
1831 | | Some(true), |
1832 | | Some(true), |
1833 | | Some(true), |
1834 | | Some(false), |
1835 | | None, |
1836 | | Some(false), |
1837 | | Some(true), |
1838 | | ]), |
1839 | | ); |
1840 | | |
1841 | | assert_eq!( |
1842 | | nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(), |
1843 | | BooleanArray::from(vec![ |
1844 | | Some(true), |
1845 | | Some(true), |
1846 | | Some(true), |
1847 | | Some(false), |
1848 | | None, |
1849 | | Some(false), |
1850 | | Some(true), |
1851 | | ]), |
1852 | | ); |
1853 | | |
1854 | | assert_eq!( |
1855 | | nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(), |
1856 | | BooleanArray::from(vec![ |
1857 | | Some(true), |
1858 | | Some(true), |
1859 | | Some(false), |
1860 | | Some(true), |
1861 | | None, |
1862 | | Some(true), |
1863 | | Some(true), |
1864 | | ]), |
1865 | | ); |
1866 | | |
1867 | | assert_eq!( |
1868 | | nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(), |
1869 | | BooleanArray::from(vec![ |
1870 | | Some(true), |
1871 | | Some(true), |
1872 | | Some(false), |
1873 | | Some(true), |
1874 | | None, |
1875 | | Some(true), |
1876 | | Some(true), |
1877 | | ]), |
1878 | | ); |
1879 | | |
1880 | | assert_eq!( |
1881 | | nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(), |
1882 | | BooleanArray::from(vec![ |
1883 | | Some(true), |
1884 | | Some(true), |
1885 | | Some(false), |
1886 | | Some(false), |
1887 | | None, |
1888 | | Some(false), |
1889 | | Some(false), |
1890 | | ]), |
1891 | | ); |
1892 | | |
1893 | | assert_eq!( |
1894 | | nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(), |
1895 | | BooleanArray::from(vec![ |
1896 | | Some(true), |
1897 | | Some(true), |
1898 | | Some(false), |
1899 | | Some(false), |
1900 | | None, |
1901 | | Some(false), |
1902 | | Some(false), |
1903 | | ]), |
1904 | | ); |
1905 | | |
1906 | | assert_eq!( |
1907 | | nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(), |
1908 | | BooleanArray::from(vec![ |
1909 | | Some(true), |
1910 | | Some(false), |
1911 | | Some(true), |
1912 | | Some(false), |
1913 | | None, |
1914 | | Some(false), |
1915 | | Some(false), |
1916 | | ]), |
1917 | | ); |
1918 | | |
1919 | | assert_eq!( |
1920 | | nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(), |
1921 | | BooleanArray::from(vec![ |
1922 | | Some(true), |
1923 | | Some(false), |
1924 | | Some(true), |
1925 | | Some(false), |
1926 | | None, |
1927 | | Some(false), |
1928 | | Some(false), |
1929 | | ]), |
1930 | | ); |
1931 | | |
1932 | | assert_eq!( |
1933 | | nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(), |
1934 | | BooleanArray::from(vec![ |
1935 | | Some(false), |
1936 | | Some(true), |
1937 | | Some(false), |
1938 | | Some(false), |
1939 | | None, |
1940 | | Some(false), |
1941 | | Some(false), |
1942 | | ]), |
1943 | | ); |
1944 | | |
1945 | | assert_eq!( |
1946 | | nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(), |
1947 | | BooleanArray::from(vec![ |
1948 | | Some(false), |
1949 | | Some(true), |
1950 | | Some(false), |
1951 | | Some(false), |
1952 | | None, |
1953 | | Some(false), |
1954 | | Some(false), |
1955 | | ]), |
1956 | | ); |
1957 | | } |
1958 | | |
1959 | | #[test] |
1960 | | fn string_null_like_pattern() { |
1961 | | // Different patterns have different execution code paths |
1962 | | for pattern in &[ |
1963 | | "", // can execute as equality check |
1964 | | "_", // can execute as length check |
1965 | | "%", // can execute as starts_with("") or non-null check |
1966 | | "a%", // can execute as starts_with("a") |
1967 | | "%a", // can execute as ends_with("") |
1968 | | "a%b", // can execute as starts_with("a") && ends_with("b") |
1969 | | "%a%", // can_execute as contains("a") |
1970 | | "%a%b_c_d%e", // can_execute as regular expression |
1971 | | ] { |
1972 | | // These tests focus on the null handling, but are case-insensitive |
1973 | | for like_f in [like, ilike, nlike, nilike] { |
1974 | | let a = Scalar::new(StringArray::new_null(1)); |
1975 | | let b = StringArray::new_scalar(pattern); |
1976 | | let r = like_f(&a, &b).unwrap(); |
1977 | | assert_eq!(r.len(), 1, "With pattern {pattern}"); |
1978 | | assert_eq!(r.null_count(), 1, "With pattern {pattern}"); |
1979 | | assert!(r.is_null(0), "With pattern {pattern}"); |
1980 | | |
1981 | | let a = Scalar::new(StringArray::new_null(1)); |
1982 | | let b = StringArray::from_iter_values([pattern]); |
1983 | | let r = like_f(&a, &b).unwrap(); |
1984 | | assert_eq!(r.len(), 1, "With pattern {pattern}"); |
1985 | | assert_eq!(r.null_count(), 1, "With pattern {pattern}"); |
1986 | | assert!(r.is_null(0), "With pattern {pattern}"); |
1987 | | |
1988 | | let a = StringArray::new_null(1); |
1989 | | let b = StringArray::from_iter_values([pattern]); |
1990 | | let r = like_f(&a, &b).unwrap(); |
1991 | | assert_eq!(r.len(), 1, "With pattern {pattern}"); |
1992 | | assert_eq!(r.null_count(), 1, "With pattern {pattern}"); |
1993 | | assert!(r.is_null(0), "With pattern {pattern}"); |
1994 | | |
1995 | | let a = StringArray::new_null(1); |
1996 | | let b = StringArray::new_scalar(pattern); |
1997 | | let r = like_f(&a, &b).unwrap(); |
1998 | | assert_eq!(r.len(), 1, "With pattern {pattern}"); |
1999 | | assert_eq!(r.null_count(), 1, "With pattern {pattern}"); |
2000 | | assert!(r.is_null(0), "With pattern {pattern}"); |
2001 | | } |
2002 | | } |
2003 | | } |
2004 | | |
2005 | | #[test] |
2006 | | fn string_view_null_like_pattern() { |
2007 | | // Different patterns have different execution code paths |
2008 | | for pattern in &[ |
2009 | | "", // can execute as equality check |
2010 | | "_", // can execute as length check |
2011 | | "%", // can execute as starts_with("") or non-null check |
2012 | | "a%", // can execute as starts_with("a") |
2013 | | "%a", // can execute as ends_with("") |
2014 | | "a%b", // can execute as starts_with("a") && ends_with("b") |
2015 | | "%a%", // can_execute as contains("a") |
2016 | | "%a%b_c_d%e", // can_execute as regular expression |
2017 | | ] { |
2018 | | // These tests focus on the null handling, but are case-insensitive |
2019 | | for like_f in [like, ilike, nlike, nilike] { |
2020 | | let a = Scalar::new(StringViewArray::new_null(1)); |
2021 | | let b = StringViewArray::new_scalar(pattern); |
2022 | | let r = like_f(&a, &b).unwrap(); |
2023 | | assert_eq!(r.len(), 1, "With pattern {pattern}"); |
2024 | | assert_eq!(r.null_count(), 1, "With pattern {pattern}"); |
2025 | | assert!(r.is_null(0), "With pattern {pattern}"); |
2026 | | |
2027 | | let a = Scalar::new(StringViewArray::new_null(1)); |
2028 | | let b = StringViewArray::from_iter_values([pattern]); |
2029 | | let r = like_f(&a, &b).unwrap(); |
2030 | | assert_eq!(r.len(), 1, "With pattern {pattern}"); |
2031 | | assert_eq!(r.null_count(), 1, "With pattern {pattern}"); |
2032 | | assert!(r.is_null(0), "With pattern {pattern}"); |
2033 | | |
2034 | | let a = StringViewArray::new_null(1); |
2035 | | let b = StringViewArray::from_iter_values([pattern]); |
2036 | | let r = like_f(&a, &b).unwrap(); |
2037 | | assert_eq!(r.len(), 1, "With pattern {pattern}"); |
2038 | | assert_eq!(r.null_count(), 1, "With pattern {pattern}"); |
2039 | | assert!(r.is_null(0), "With pattern {pattern}"); |
2040 | | |
2041 | | let a = StringViewArray::new_null(1); |
2042 | | let b = StringViewArray::new_scalar(pattern); |
2043 | | let r = like_f(&a, &b).unwrap(); |
2044 | | assert_eq!(r.len(), 1, "With pattern {pattern}"); |
2045 | | assert_eq!(r.null_count(), 1, "With pattern {pattern}"); |
2046 | | assert!(r.is_null(0), "With pattern {pattern}"); |
2047 | | } |
2048 | | } |
2049 | | } |
2050 | | |
2051 | | #[test] |
2052 | | fn string_like_scalar_null() { |
2053 | | for like_f in [like, ilike, nlike, nilike] { |
2054 | | let a = StringArray::new_scalar("a"); |
2055 | | let b = Scalar::new(StringArray::new_null(1)); |
2056 | | let r = like_f(&a, &b).unwrap(); |
2057 | | assert_eq!(r.len(), 1); |
2058 | | assert_eq!(r.null_count(), 1); |
2059 | | assert!(r.is_null(0)); |
2060 | | |
2061 | | let a = StringArray::from_iter_values(["a"]); |
2062 | | let b = Scalar::new(StringArray::new_null(1)); |
2063 | | let r = like_f(&a, &b).unwrap(); |
2064 | | assert_eq!(r.len(), 1); |
2065 | | assert_eq!(r.null_count(), 1); |
2066 | | assert!(r.is_null(0)); |
2067 | | |
2068 | | let a = StringArray::from_iter_values(["a"]); |
2069 | | let b = StringArray::new_null(1); |
2070 | | let r = like_f(&a, &b).unwrap(); |
2071 | | assert_eq!(r.len(), 1); |
2072 | | assert_eq!(r.null_count(), 1); |
2073 | | assert!(r.is_null(0)); |
2074 | | |
2075 | | let a = StringArray::new_scalar("a"); |
2076 | | let b = StringArray::new_null(1); |
2077 | | let r = like_f(&a, &b).unwrap(); |
2078 | | assert_eq!(r.len(), 1); |
2079 | | assert_eq!(r.null_count(), 1); |
2080 | | assert!(r.is_null(0)); |
2081 | | } |
2082 | | } |
2083 | | |
2084 | | #[test] |
2085 | | fn string_view_like_scalar_null() { |
2086 | | for like_f in [like, ilike, nlike, nilike] { |
2087 | | let a = StringViewArray::new_scalar("a"); |
2088 | | let b = Scalar::new(StringViewArray::new_null(1)); |
2089 | | let r = like_f(&a, &b).unwrap(); |
2090 | | assert_eq!(r.len(), 1); |
2091 | | assert_eq!(r.null_count(), 1); |
2092 | | assert!(r.is_null(0)); |
2093 | | |
2094 | | let a = StringViewArray::from_iter_values(["a"]); |
2095 | | let b = Scalar::new(StringViewArray::new_null(1)); |
2096 | | let r = like_f(&a, &b).unwrap(); |
2097 | | assert_eq!(r.len(), 1); |
2098 | | assert_eq!(r.null_count(), 1); |
2099 | | assert!(r.is_null(0)); |
2100 | | |
2101 | | let a = StringViewArray::from_iter_values(["a"]); |
2102 | | let b = StringViewArray::new_null(1); |
2103 | | let r = like_f(&a, &b).unwrap(); |
2104 | | assert_eq!(r.len(), 1); |
2105 | | assert_eq!(r.null_count(), 1); |
2106 | | assert!(r.is_null(0)); |
2107 | | |
2108 | | let a = StringViewArray::new_scalar("a"); |
2109 | | let b = StringViewArray::new_null(1); |
2110 | | let r = like_f(&a, &b).unwrap(); |
2111 | | assert_eq!(r.len(), 1); |
2112 | | assert_eq!(r.null_count(), 1); |
2113 | | assert!(r.is_null(0)); |
2114 | | } |
2115 | | } |
2116 | | |
2117 | | #[test] |
2118 | | fn like_escape() { |
2119 | | // (value, pattern, expected) |
2120 | | let test_cases = vec![ |
2121 | | // Empty pattern |
2122 | | (r"", r"", true), |
2123 | | (r"\", r"", false), |
2124 | | // Sole (dangling) escape (some engines consider this invalid pattern) |
2125 | | (r"", r"\", false), |
2126 | | (r"\", r"\", true), |
2127 | | (r"\\", r"\", false), |
2128 | | (r"a", r"\", false), |
2129 | | (r"\a", r"\", false), |
2130 | | (r"\\a", r"\", false), |
2131 | | // Sole escape |
2132 | | (r"", r"\\", false), |
2133 | | (r"\", r"\\", true), |
2134 | | (r"\\", r"\\", false), |
2135 | | (r"a", r"\\", false), |
2136 | | (r"\a", r"\\", false), |
2137 | | (r"\\a", r"\\", false), |
2138 | | // Sole escape and dangling escape |
2139 | | (r"", r"\\\", false), |
2140 | | (r"\", r"\\\", false), |
2141 | | (r"\\", r"\\\", true), |
2142 | | (r"\\\", r"\\\", false), |
2143 | | (r"\\\\", r"\\\", false), |
2144 | | (r"a", r"\\\", false), |
2145 | | (r"\a", r"\\\", false), |
2146 | | (r"\\a", r"\\\", false), |
2147 | | // Sole two escapes |
2148 | | (r"", r"\\\\", false), |
2149 | | (r"\", r"\\\\", false), |
2150 | | (r"\\", r"\\\\", true), |
2151 | | (r"\\\", r"\\\\", false), |
2152 | | (r"\\\\", r"\\\\", false), |
2153 | | (r"\\\\\", r"\\\\", false), |
2154 | | (r"a", r"\\\\", false), |
2155 | | (r"\a", r"\\\\", false), |
2156 | | (r"\\a", r"\\\\", false), |
2157 | | // Escaped non-wildcard |
2158 | | (r"", r"\a", false), |
2159 | | (r"\", r"\a", false), |
2160 | | (r"\\", r"\a", false), |
2161 | | (r"a", r"\a", true), |
2162 | | (r"\a", r"\a", false), |
2163 | | (r"\\a", r"\a", false), |
2164 | | // Escaped _ wildcard |
2165 | | (r"", r"\_", false), |
2166 | | (r"\", r"\_", false), |
2167 | | (r"\\", r"\_", false), |
2168 | | (r"a", r"\_", false), |
2169 | | (r"_", r"\_", true), |
2170 | | (r"%", r"\_", false), |
2171 | | (r"\a", r"\_", false), |
2172 | | (r"\\a", r"\_", false), |
2173 | | (r"\_", r"\_", false), |
2174 | | (r"\\_", r"\_", false), |
2175 | | // Escaped % wildcard |
2176 | | (r"", r"\%", false), |
2177 | | (r"\", r"\%", false), |
2178 | | (r"\\", r"\%", false), |
2179 | | (r"a", r"\%", false), |
2180 | | (r"_", r"\%", false), |
2181 | | (r"%", r"\%", true), |
2182 | | (r"\a", r"\%", false), |
2183 | | (r"\\a", r"\%", false), |
2184 | | (r"\%", r"\%", false), |
2185 | | (r"\\%", r"\%", false), |
2186 | | // Escape and non-wildcard |
2187 | | (r"", r"\\a", false), |
2188 | | (r"\", r"\\a", false), |
2189 | | (r"\\", r"\\a", false), |
2190 | | (r"a", r"\\a", false), |
2191 | | (r"\a", r"\\a", true), |
2192 | | (r"\\a", r"\\a", false), |
2193 | | (r"\\\a", r"\\a", false), |
2194 | | // Escape and _ wildcard |
2195 | | (r"", r"\\_", false), |
2196 | | (r"\", r"\\_", false), |
2197 | | (r"\\", r"\\_", true), |
2198 | | (r"a", r"\\_", false), |
2199 | | (r"_", r"\\_", false), |
2200 | | (r"%", r"\\_", false), |
2201 | | (r"\a", r"\\_", true), |
2202 | | (r"\\a", r"\\_", false), |
2203 | | (r"\_", r"\\_", true), |
2204 | | (r"\\_", r"\\_", false), |
2205 | | (r"\\\_", r"\\_", false), |
2206 | | // Escape and % wildcard |
2207 | | (r"", r"\\%", false), |
2208 | | (r"\", r"\\%", true), |
2209 | | (r"\\", r"\\%", true), |
2210 | | (r"a", r"\\%", false), |
2211 | | (r"ab", r"\\%", false), |
2212 | | (r"a%", r"\\%", false), |
2213 | | (r"_", r"\\%", false), |
2214 | | (r"%", r"\\%", false), |
2215 | | (r"\a", r"\\%", true), |
2216 | | (r"\\a", r"\\%", true), |
2217 | | (r"\%", r"\\%", true), |
2218 | | (r"\\%", r"\\%", true), |
2219 | | (r"\\\%", r"\\%", true), |
2220 | | // %... pattern with dangling wildcard |
2221 | | (r"\", r"%\", true), |
2222 | | (r"\\", r"%\", true), |
2223 | | (r"%\", r"%\", true), |
2224 | | (r"%\\", r"%\", true), |
2225 | | (r"abc\", r"%\", true), |
2226 | | (r"abc", r"%\", false), |
2227 | | // %... pattern with wildcard |
2228 | | (r"\", r"%\\", true), |
2229 | | (r"\\", r"%\\", true), |
2230 | | (r"%\\", r"%\\", true), |
2231 | | (r"%\\\", r"%\\", true), |
2232 | | (r"abc\", r"%\\", true), |
2233 | | (r"abc", r"%\\", false), |
2234 | | // %... pattern including escaped non-wildcard |
2235 | | (r"ac", r"%a\c", true), |
2236 | | (r"xyzac", r"%a\c", true), |
2237 | | (r"abc", r"%a\c", false), |
2238 | | (r"a\c", r"%a\c", false), |
2239 | | (r"%a\c", r"%a\c", false), |
2240 | | // %... pattern including escape |
2241 | | (r"\", r"%a\\c", false), |
2242 | | (r"\\", r"%a\\c", false), |
2243 | | (r"ac", r"%a\\c", false), |
2244 | | (r"a\c", r"%a\\c", true), |
2245 | | (r"a\\c", r"%a\\c", false), |
2246 | | (r"abc", r"%a\\c", false), |
2247 | | (r"xyza\c", r"%a\\c", true), |
2248 | | (r"xyza\\c", r"%a\\c", false), |
2249 | | (r"%a\\c", r"%a\\c", false), |
2250 | | // ...% pattern with wildcard |
2251 | | (r"\", r"\\%", true), |
2252 | | (r"\\", r"\\%", true), |
2253 | | (r"\\%", r"\\%", true), |
2254 | | (r"\\\%", r"\\%", true), |
2255 | | (r"\abc", r"\\%", true), |
2256 | | (r"a", r"\\%", false), |
2257 | | (r"abc", r"\\%", false), |
2258 | | // ...% pattern including escaped non-wildcard |
2259 | | (r"ac", r"a\c%", true), |
2260 | | (r"acxyz", r"a\c%", true), |
2261 | | (r"abc", r"a\c%", false), |
2262 | | (r"a\c", r"a\c%", false), |
2263 | | (r"a\c%", r"a\c%", false), |
2264 | | (r"a\\c%", r"a\c%", false), |
2265 | | // ...% pattern including escape |
2266 | | (r"ac", r"a\\c%", false), |
2267 | | (r"a\c", r"a\\c%", true), |
2268 | | (r"a\cxyz", r"a\\c%", true), |
2269 | | (r"a\\c", r"a\\c%", false), |
2270 | | (r"a\\cxyz", r"a\\c%", false), |
2271 | | (r"abc", r"a\\c%", false), |
2272 | | (r"abcxyz", r"a\\c%", false), |
2273 | | (r"a\\c%", r"a\\c%", false), |
2274 | | // %...% pattern including escaped non-wildcard |
2275 | | (r"ac", r"%a\c%", true), |
2276 | | (r"xyzacxyz", r"%a\c%", true), |
2277 | | (r"abc", r"%a\c%", false), |
2278 | | (r"a\c", r"%a\c%", false), |
2279 | | (r"xyza\cxyz", r"%a\c%", false), |
2280 | | (r"%a\c%", r"%a\c%", false), |
2281 | | (r"%a\\c%", r"%a\c%", false), |
2282 | | // %...% pattern including escape |
2283 | | (r"ac", r"%a\\c%", false), |
2284 | | (r"a\c", r"%a\\c%", true), |
2285 | | (r"xyza\cxyz", r"%a\\c%", true), |
2286 | | (r"a\\c", r"%a\\c%", false), |
2287 | | (r"xyza\\cxyz", r"%a\\c%", false), |
2288 | | (r"abc", r"%a\\c%", false), |
2289 | | (r"xyzabcxyz", r"%a\\c%", false), |
2290 | | (r"%a\\c%", r"%a\\c%", false), |
2291 | | // Odd (7) backslashes and % wildcard |
2292 | | (r"\\%", r"\\\\\\\%", false), |
2293 | | (r"\\\", r"\\\\\\\%", false), |
2294 | | (r"\\\%", r"\\\\\\\%", true), |
2295 | | (r"\\\\", r"\\\\\\\%", false), |
2296 | | (r"\\\\%", r"\\\\\\\%", false), |
2297 | | (r"\\\\\\\%", r"\\\\\\\%", false), |
2298 | | // Odd (7) backslashes and _ wildcard |
2299 | | (r"\\\", r"\\\\\\\_", false), |
2300 | | (r"\\\\", r"\\\\\\\_", false), |
2301 | | (r"\\\_", r"\\\\\\\_", true), |
2302 | | (r"\\\\", r"\\\\\\\_", false), |
2303 | | (r"\\\a", r"\\\\\\\_", false), |
2304 | | (r"\\\\_", r"\\\\\\\_", false), |
2305 | | (r"\\\\\\\_", r"\\\\\\\_", false), |
2306 | | // Even (8) backslashes and % wildcard |
2307 | | (r"\\\", r"\\\\\\\\%", false), |
2308 | | (r"\\\\", r"\\\\\\\\%", true), |
2309 | | (r"\\\\\", r"\\\\\\\\%", true), |
2310 | | (r"\\\\xyz", r"\\\\\\\\%", true), |
2311 | | (r"\\\\\\\\%", r"\\\\\\\\%", true), |
2312 | | // Even (8) backslashes and _ wildcard |
2313 | | (r"\\\", r"\\\\\\\\_", false), |
2314 | | (r"\\\\", r"\\\\\\\\_", false), |
2315 | | (r"\\\\\", r"\\\\\\\\_", true), |
2316 | | (r"\\\\a", r"\\\\\\\\_", true), |
2317 | | (r"\\\\\a", r"\\\\\\\\_", false), |
2318 | | (r"\\\\ab", r"\\\\\\\\_", false), |
2319 | | (r"\\\\\\\\_", r"\\\\\\\\_", false), |
2320 | | ]; |
2321 | | |
2322 | | for (value, pattern, expected) in test_cases { |
2323 | | let unexpected = BooleanArray::from(vec![!expected]); |
2324 | | let expected = BooleanArray::from(vec![expected]); |
2325 | | |
2326 | | for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] { |
2327 | | for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip( |
2328 | | make_datums(value, &string_type), |
2329 | | make_datums(pattern, &string_type), |
2330 | | ) { |
2331 | | let value_datum = value_datum.as_ref(); |
2332 | | let pattern_datum = pattern_datum.as_ref(); |
2333 | | assert_eq!( |
2334 | | like(value_datum, pattern_datum).unwrap(), |
2335 | | expected, |
2336 | | "{value_type:?} «{value}» like {pattern_type:?} «{pattern}»" |
2337 | | ); |
2338 | | assert_eq!( |
2339 | | ilike(value_datum, pattern_datum).unwrap(), |
2340 | | expected, |
2341 | | "{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»" |
2342 | | ); |
2343 | | assert_eq!( |
2344 | | nlike(value_datum, pattern_datum).unwrap(), |
2345 | | unexpected, |
2346 | | "{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»" |
2347 | | ); |
2348 | | assert_eq!( |
2349 | | nilike(value_datum, pattern_datum).unwrap(), |
2350 | | unexpected, |
2351 | | "{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»" |
2352 | | ); |
2353 | | } |
2354 | | } |
2355 | | } |
2356 | | } |
2357 | | |
2358 | | #[test] |
2359 | | fn like_escape_many() { |
2360 | | // (value, pattern, expected) |
2361 | | let test_cases = vec![ |
2362 | | (r"", r"", true), |
2363 | | (r"\", r"", false), |
2364 | | (r"\\", r"", false), |
2365 | | (r"\\\", r"", false), |
2366 | | (r"\\\\", r"", false), |
2367 | | (r"a", r"", false), |
2368 | | (r"\a", r"", false), |
2369 | | (r"\\a", r"", false), |
2370 | | (r"%", r"", false), |
2371 | | (r"\%", r"", false), |
2372 | | (r"\\%", r"", false), |
2373 | | (r"%%", r"", false), |
2374 | | (r"\%%", r"", false), |
2375 | | (r"\\%%", r"", false), |
2376 | | (r"_", r"", false), |
2377 | | (r"\_", r"", false), |
2378 | | (r"\\_", r"", false), |
2379 | | (r"__", r"", false), |
2380 | | (r"\__", r"", false), |
2381 | | (r"\\__", r"", false), |
2382 | | (r"abc", r"", false), |
2383 | | (r"a_c", r"", false), |
2384 | | (r"a\bc", r"", false), |
2385 | | (r"a\_c", r"", false), |
2386 | | (r"%abc", r"", false), |
2387 | | (r"\%abc", r"", false), |
2388 | | (r"a\\_c%", r"", false), |
2389 | | (r"", r"\", false), |
2390 | | (r"\", r"\", true), |
2391 | | (r"\\", r"\", false), |
2392 | | (r"\\\", r"\", false), |
2393 | | (r"\\\\", r"\", false), |
2394 | | (r"a", r"\", false), |
2395 | | (r"\a", r"\", false), |
2396 | | (r"\\a", r"\", false), |
2397 | | (r"%", r"\", false), |
2398 | | (r"\%", r"\", false), |
2399 | | (r"\\%", r"\", false), |
2400 | | (r"%%", r"\", false), |
2401 | | (r"\%%", r"\", false), |
2402 | | (r"\\%%", r"\", false), |
2403 | | (r"_", r"\", false), |
2404 | | (r"\_", r"\", false), |
2405 | | (r"\\_", r"\", false), |
2406 | | (r"__", r"\", false), |
2407 | | (r"\__", r"\", false), |
2408 | | (r"\\__", r"\", false), |
2409 | | (r"abc", r"\", false), |
2410 | | (r"a_c", r"\", false), |
2411 | | (r"a\bc", r"\", false), |
2412 | | (r"a\_c", r"\", false), |
2413 | | (r"%abc", r"\", false), |
2414 | | (r"\%abc", r"\", false), |
2415 | | (r"a\\_c%", r"\", false), |
2416 | | (r"", r"\\", false), |
2417 | | (r"\", r"\\", true), |
2418 | | (r"\\", r"\\", false), |
2419 | | (r"\\\", r"\\", false), |
2420 | | (r"\\\\", r"\\", false), |
2421 | | (r"a", r"\\", false), |
2422 | | (r"\a", r"\\", false), |
2423 | | (r"\\a", r"\\", false), |
2424 | | (r"%", r"\\", false), |
2425 | | (r"\%", r"\\", false), |
2426 | | (r"\\%", r"\\", false), |
2427 | | (r"%%", r"\\", false), |
2428 | | (r"\%%", r"\\", false), |
2429 | | (r"\\%%", r"\\", false), |
2430 | | (r"_", r"\\", false), |
2431 | | (r"\_", r"\\", false), |
2432 | | (r"\\_", r"\\", false), |
2433 | | (r"__", r"\\", false), |
2434 | | (r"\__", r"\\", false), |
2435 | | (r"\\__", r"\\", false), |
2436 | | (r"abc", r"\\", false), |
2437 | | (r"a_c", r"\\", false), |
2438 | | (r"a\bc", r"\\", false), |
2439 | | (r"a\_c", r"\\", false), |
2440 | | (r"%abc", r"\\", false), |
2441 | | (r"\%abc", r"\\", false), |
2442 | | (r"a\\_c%", r"\\", false), |
2443 | | (r"", r"\\\", false), |
2444 | | (r"\", r"\\\", false), |
2445 | | (r"\\", r"\\\", true), |
2446 | | (r"\\\", r"\\\", false), |
2447 | | (r"\\\\", r"\\\", false), |
2448 | | (r"a", r"\\\", false), |
2449 | | (r"\a", r"\\\", false), |
2450 | | (r"\\a", r"\\\", false), |
2451 | | (r"%", r"\\\", false), |
2452 | | (r"\%", r"\\\", false), |
2453 | | (r"\\%", r"\\\", false), |
2454 | | (r"%%", r"\\\", false), |
2455 | | (r"\%%", r"\\\", false), |
2456 | | (r"\\%%", r"\\\", false), |
2457 | | (r"_", r"\\\", false), |
2458 | | (r"\_", r"\\\", false), |
2459 | | (r"\\_", r"\\\", false), |
2460 | | (r"__", r"\\\", false), |
2461 | | (r"\__", r"\\\", false), |
2462 | | (r"\\__", r"\\\", false), |
2463 | | (r"abc", r"\\\", false), |
2464 | | (r"a_c", r"\\\", false), |
2465 | | (r"a\bc", r"\\\", false), |
2466 | | (r"a\_c", r"\\\", false), |
2467 | | (r"%abc", r"\\\", false), |
2468 | | (r"\%abc", r"\\\", false), |
2469 | | (r"a\\_c%", r"\\\", false), |
2470 | | (r"", r"\\\\", false), |
2471 | | (r"\", r"\\\\", false), |
2472 | | (r"\\", r"\\\\", true), |
2473 | | (r"\\\", r"\\\\", false), |
2474 | | (r"\\\\", r"\\\\", false), |
2475 | | (r"a", r"\\\\", false), |
2476 | | (r"\a", r"\\\\", false), |
2477 | | (r"\\a", r"\\\\", false), |
2478 | | (r"%", r"\\\\", false), |
2479 | | (r"\%", r"\\\\", false), |
2480 | | (r"\\%", r"\\\\", false), |
2481 | | (r"%%", r"\\\\", false), |
2482 | | (r"\%%", r"\\\\", false), |
2483 | | (r"\\%%", r"\\\\", false), |
2484 | | (r"_", r"\\\\", false), |
2485 | | (r"\_", r"\\\\", false), |
2486 | | (r"\\_", r"\\\\", false), |
2487 | | (r"__", r"\\\\", false), |
2488 | | (r"\__", r"\\\\", false), |
2489 | | (r"\\__", r"\\\\", false), |
2490 | | (r"abc", r"\\\\", false), |
2491 | | (r"a_c", r"\\\\", false), |
2492 | | (r"a\bc", r"\\\\", false), |
2493 | | (r"a\_c", r"\\\\", false), |
2494 | | (r"%abc", r"\\\\", false), |
2495 | | (r"\%abc", r"\\\\", false), |
2496 | | (r"a\\_c%", r"\\\\", false), |
2497 | | (r"", r"a", false), |
2498 | | (r"\", r"a", false), |
2499 | | (r"\\", r"a", false), |
2500 | | (r"\\\", r"a", false), |
2501 | | (r"\\\\", r"a", false), |
2502 | | (r"a", r"a", true), |
2503 | | (r"\a", r"a", false), |
2504 | | (r"\\a", r"a", false), |
2505 | | (r"%", r"a", false), |
2506 | | (r"\%", r"a", false), |
2507 | | (r"\\%", r"a", false), |
2508 | | (r"%%", r"a", false), |
2509 | | (r"\%%", r"a", false), |
2510 | | (r"\\%%", r"a", false), |
2511 | | (r"_", r"a", false), |
2512 | | (r"\_", r"a", false), |
2513 | | (r"\\_", r"a", false), |
2514 | | (r"__", r"a", false), |
2515 | | (r"\__", r"a", false), |
2516 | | (r"\\__", r"a", false), |
2517 | | (r"abc", r"a", false), |
2518 | | (r"a_c", r"a", false), |
2519 | | (r"a\bc", r"a", false), |
2520 | | (r"a\_c", r"a", false), |
2521 | | (r"%abc", r"a", false), |
2522 | | (r"\%abc", r"a", false), |
2523 | | (r"a\\_c%", r"a", false), |
2524 | | (r"", r"\a", false), |
2525 | | (r"\", r"\a", false), |
2526 | | (r"\\", r"\a", false), |
2527 | | (r"\\\", r"\a", false), |
2528 | | (r"\\\\", r"\a", false), |
2529 | | (r"a", r"\a", true), |
2530 | | (r"\a", r"\a", false), |
2531 | | (r"\\a", r"\a", false), |
2532 | | (r"%", r"\a", false), |
2533 | | (r"\%", r"\a", false), |
2534 | | (r"\\%", r"\a", false), |
2535 | | (r"%%", r"\a", false), |
2536 | | (r"\%%", r"\a", false), |
2537 | | (r"\\%%", r"\a", false), |
2538 | | (r"_", r"\a", false), |
2539 | | (r"\_", r"\a", false), |
2540 | | (r"\\_", r"\a", false), |
2541 | | (r"__", r"\a", false), |
2542 | | (r"\__", r"\a", false), |
2543 | | (r"\\__", r"\a", false), |
2544 | | (r"abc", r"\a", false), |
2545 | | (r"a_c", r"\a", false), |
2546 | | (r"a\bc", r"\a", false), |
2547 | | (r"a\_c", r"\a", false), |
2548 | | (r"%abc", r"\a", false), |
2549 | | (r"\%abc", r"\a", false), |
2550 | | (r"a\\_c%", r"\a", false), |
2551 | | (r"", r"\\a", false), |
2552 | | (r"\", r"\\a", false), |
2553 | | (r"\\", r"\\a", false), |
2554 | | (r"\\\", r"\\a", false), |
2555 | | (r"\\\\", r"\\a", false), |
2556 | | (r"a", r"\\a", false), |
2557 | | (r"\a", r"\\a", true), |
2558 | | (r"\\a", r"\\a", false), |
2559 | | (r"%", r"\\a", false), |
2560 | | (r"\%", r"\\a", false), |
2561 | | (r"\\%", r"\\a", false), |
2562 | | (r"%%", r"\\a", false), |
2563 | | (r"\%%", r"\\a", false), |
2564 | | (r"\\%%", r"\\a", false), |
2565 | | (r"_", r"\\a", false), |
2566 | | (r"\_", r"\\a", false), |
2567 | | (r"\\_", r"\\a", false), |
2568 | | (r"__", r"\\a", false), |
2569 | | (r"\__", r"\\a", false), |
2570 | | (r"\\__", r"\\a", false), |
2571 | | (r"abc", r"\\a", false), |
2572 | | (r"a_c", r"\\a", false), |
2573 | | (r"a\bc", r"\\a", false), |
2574 | | (r"a\_c", r"\\a", false), |
2575 | | (r"%abc", r"\\a", false), |
2576 | | (r"\%abc", r"\\a", false), |
2577 | | (r"a\\_c%", r"\\a", false), |
2578 | | (r"", r"%", true), |
2579 | | (r"\", r"%", true), |
2580 | | (r"\\", r"%", true), |
2581 | | (r"\\\", r"%", true), |
2582 | | (r"\\\\", r"%", true), |
2583 | | (r"a", r"%", true), |
2584 | | (r"\a", r"%", true), |
2585 | | (r"\\a", r"%", true), |
2586 | | (r"%", r"%", true), |
2587 | | (r"\%", r"%", true), |
2588 | | (r"\\%", r"%", true), |
2589 | | (r"%%", r"%", true), |
2590 | | (r"\%%", r"%", true), |
2591 | | (r"\\%%", r"%", true), |
2592 | | (r"_", r"%", true), |
2593 | | (r"\_", r"%", true), |
2594 | | (r"\\_", r"%", true), |
2595 | | (r"__", r"%", true), |
2596 | | (r"\__", r"%", true), |
2597 | | (r"\\__", r"%", true), |
2598 | | (r"abc", r"%", true), |
2599 | | (r"a_c", r"%", true), |
2600 | | (r"a\bc", r"%", true), |
2601 | | (r"a\_c", r"%", true), |
2602 | | (r"%abc", r"%", true), |
2603 | | (r"\%abc", r"%", true), |
2604 | | (r"a\\_c%", r"%", true), |
2605 | | (r"", r"\%", false), |
2606 | | (r"\", r"\%", false), |
2607 | | (r"\\", r"\%", false), |
2608 | | (r"\\\", r"\%", false), |
2609 | | (r"\\\\", r"\%", false), |
2610 | | (r"a", r"\%", false), |
2611 | | (r"\a", r"\%", false), |
2612 | | (r"\\a", r"\%", false), |
2613 | | (r"%", r"\%", true), |
2614 | | (r"\%", r"\%", false), |
2615 | | (r"\\%", r"\%", false), |
2616 | | (r"%%", r"\%", false), |
2617 | | (r"\%%", r"\%", false), |
2618 | | (r"\\%%", r"\%", false), |
2619 | | (r"_", r"\%", false), |
2620 | | (r"\_", r"\%", false), |
2621 | | (r"\\_", r"\%", false), |
2622 | | (r"__", r"\%", false), |
2623 | | (r"\__", r"\%", false), |
2624 | | (r"\\__", r"\%", false), |
2625 | | (r"abc", r"\%", false), |
2626 | | (r"a_c", r"\%", false), |
2627 | | (r"a\bc", r"\%", false), |
2628 | | (r"a\_c", r"\%", false), |
2629 | | (r"%abc", r"\%", false), |
2630 | | (r"\%abc", r"\%", false), |
2631 | | (r"a\\_c%", r"\%", false), |
2632 | | (r"", r"\\%", false), |
2633 | | (r"\", r"\\%", true), |
2634 | | (r"\\", r"\\%", true), |
2635 | | (r"\\\", r"\\%", true), |
2636 | | (r"\\\\", r"\\%", true), |
2637 | | (r"a", r"\\%", false), |
2638 | | (r"\a", r"\\%", true), |
2639 | | (r"\\a", r"\\%", true), |
2640 | | (r"%", r"\\%", false), |
2641 | | (r"\%", r"\\%", true), |
2642 | | (r"\\%", r"\\%", true), |
2643 | | (r"%%", r"\\%", false), |
2644 | | (r"\%%", r"\\%", true), |
2645 | | (r"\\%%", r"\\%", true), |
2646 | | (r"_", r"\\%", false), |
2647 | | (r"\_", r"\\%", true), |
2648 | | (r"\\_", r"\\%", true), |
2649 | | (r"__", r"\\%", false), |
2650 | | (r"\__", r"\\%", true), |
2651 | | (r"\\__", r"\\%", true), |
2652 | | (r"abc", r"\\%", false), |
2653 | | (r"a_c", r"\\%", false), |
2654 | | (r"a\bc", r"\\%", false), |
2655 | | (r"a\_c", r"\\%", false), |
2656 | | (r"%abc", r"\\%", false), |
2657 | | (r"\%abc", r"\\%", true), |
2658 | | (r"a\\_c%", r"\\%", false), |
2659 | | (r"", r"%%", true), |
2660 | | (r"\", r"%%", true), |
2661 | | (r"\\", r"%%", true), |
2662 | | (r"\\\", r"%%", true), |
2663 | | (r"\\\\", r"%%", true), |
2664 | | (r"a", r"%%", true), |
2665 | | (r"\a", r"%%", true), |
2666 | | (r"\\a", r"%%", true), |
2667 | | (r"%", r"%%", true), |
2668 | | (r"\%", r"%%", true), |
2669 | | (r"\\%", r"%%", true), |
2670 | | (r"%%", r"%%", true), |
2671 | | (r"\%%", r"%%", true), |
2672 | | (r"\\%%", r"%%", true), |
2673 | | (r"_", r"%%", true), |
2674 | | (r"\_", r"%%", true), |
2675 | | (r"\\_", r"%%", true), |
2676 | | (r"__", r"%%", true), |
2677 | | (r"\__", r"%%", true), |
2678 | | (r"\\__", r"%%", true), |
2679 | | (r"abc", r"%%", true), |
2680 | | (r"a_c", r"%%", true), |
2681 | | (r"a\bc", r"%%", true), |
2682 | | (r"a\_c", r"%%", true), |
2683 | | (r"%abc", r"%%", true), |
2684 | | (r"\%abc", r"%%", true), |
2685 | | (r"a\\_c%", r"%%", true), |
2686 | | (r"", r"\%%", false), |
2687 | | (r"\", r"\%%", false), |
2688 | | (r"\\", r"\%%", false), |
2689 | | (r"\\\", r"\%%", false), |
2690 | | (r"\\\\", r"\%%", false), |
2691 | | (r"a", r"\%%", false), |
2692 | | (r"\a", r"\%%", false), |
2693 | | (r"\\a", r"\%%", false), |
2694 | | (r"%", r"\%%", true), |
2695 | | (r"\%", r"\%%", false), |
2696 | | (r"\\%", r"\%%", false), |
2697 | | (r"%%", r"\%%", true), |
2698 | | (r"\%%", r"\%%", false), |
2699 | | (r"\\%%", r"\%%", false), |
2700 | | (r"_", r"\%%", false), |
2701 | | (r"\_", r"\%%", false), |
2702 | | (r"\\_", r"\%%", false), |
2703 | | (r"__", r"\%%", false), |
2704 | | (r"\__", r"\%%", false), |
2705 | | (r"\\__", r"\%%", false), |
2706 | | (r"abc", r"\%%", false), |
2707 | | (r"a_c", r"\%%", false), |
2708 | | (r"a\bc", r"\%%", false), |
2709 | | (r"a\_c", r"\%%", false), |
2710 | | (r"%abc", r"\%%", true), |
2711 | | (r"\%abc", r"\%%", false), |
2712 | | (r"a\\_c%", r"\%%", false), |
2713 | | (r"", r"\\%%", false), |
2714 | | (r"\", r"\\%%", true), |
2715 | | (r"\\", r"\\%%", true), |
2716 | | (r"\\\", r"\\%%", true), |
2717 | | (r"\\\\", r"\\%%", true), |
2718 | | (r"a", r"\\%%", false), |
2719 | | (r"\a", r"\\%%", true), |
2720 | | (r"\\a", r"\\%%", true), |
2721 | | (r"%", r"\\%%", false), |
2722 | | (r"\%", r"\\%%", true), |
2723 | | (r"\\%", r"\\%%", true), |
2724 | | (r"%%", r"\\%%", false), |
2725 | | (r"\%%", r"\\%%", true), |
2726 | | (r"\\%%", r"\\%%", true), |
2727 | | (r"_", r"\\%%", false), |
2728 | | (r"\_", r"\\%%", true), |
2729 | | (r"\\_", r"\\%%", true), |
2730 | | (r"__", r"\\%%", false), |
2731 | | (r"\__", r"\\%%", true), |
2732 | | (r"\\__", r"\\%%", true), |
2733 | | (r"abc", r"\\%%", false), |
2734 | | (r"a_c", r"\\%%", false), |
2735 | | (r"a\bc", r"\\%%", false), |
2736 | | (r"a\_c", r"\\%%", false), |
2737 | | (r"%abc", r"\\%%", false), |
2738 | | (r"\%abc", r"\\%%", true), |
2739 | | (r"a\\_c%", r"\\%%", false), |
2740 | | (r"", r"_", false), |
2741 | | (r"\", r"_", true), |
2742 | | (r"\\", r"_", false), |
2743 | | (r"\\\", r"_", false), |
2744 | | (r"\\\\", r"_", false), |
2745 | | (r"a", r"_", true), |
2746 | | (r"\a", r"_", false), |
2747 | | (r"\\a", r"_", false), |
2748 | | (r"%", r"_", true), |
2749 | | (r"\%", r"_", false), |
2750 | | (r"\\%", r"_", false), |
2751 | | (r"%%", r"_", false), |
2752 | | (r"\%%", r"_", false), |
2753 | | (r"\\%%", r"_", false), |
2754 | | (r"_", r"_", true), |
2755 | | (r"\_", r"_", false), |
2756 | | (r"\\_", r"_", false), |
2757 | | (r"__", r"_", false), |
2758 | | (r"\__", r"_", false), |
2759 | | (r"\\__", r"_", false), |
2760 | | (r"abc", r"_", false), |
2761 | | (r"a_c", r"_", false), |
2762 | | (r"a\bc", r"_", false), |
2763 | | (r"a\_c", r"_", false), |
2764 | | (r"%abc", r"_", false), |
2765 | | (r"\%abc", r"_", false), |
2766 | | (r"a\\_c%", r"_", false), |
2767 | | (r"", r"\_", false), |
2768 | | (r"\", r"\_", false), |
2769 | | (r"\\", r"\_", false), |
2770 | | (r"\\\", r"\_", false), |
2771 | | (r"\\\\", r"\_", false), |
2772 | | (r"a", r"\_", false), |
2773 | | (r"\a", r"\_", false), |
2774 | | (r"\\a", r"\_", false), |
2775 | | (r"%", r"\_", false), |
2776 | | (r"\%", r"\_", false), |
2777 | | (r"\\%", r"\_", false), |
2778 | | (r"%%", r"\_", false), |
2779 | | (r"\%%", r"\_", false), |
2780 | | (r"\\%%", r"\_", false), |
2781 | | (r"_", r"\_", true), |
2782 | | (r"\_", r"\_", false), |
2783 | | (r"\\_", r"\_", false), |
2784 | | (r"__", r"\_", false), |
2785 | | (r"\__", r"\_", false), |
2786 | | (r"\\__", r"\_", false), |
2787 | | (r"abc", r"\_", false), |
2788 | | (r"a_c", r"\_", false), |
2789 | | (r"a\bc", r"\_", false), |
2790 | | (r"a\_c", r"\_", false), |
2791 | | (r"%abc", r"\_", false), |
2792 | | (r"\%abc", r"\_", false), |
2793 | | (r"a\\_c%", r"\_", false), |
2794 | | (r"", r"\\_", false), |
2795 | | (r"\", r"\\_", false), |
2796 | | (r"\\", r"\\_", true), |
2797 | | (r"\\\", r"\\_", false), |
2798 | | (r"\\\\", r"\\_", false), |
2799 | | (r"a", r"\\_", false), |
2800 | | (r"\a", r"\\_", true), |
2801 | | (r"\\a", r"\\_", false), |
2802 | | (r"%", r"\\_", false), |
2803 | | (r"\%", r"\\_", true), |
2804 | | (r"\\%", r"\\_", false), |
2805 | | (r"%%", r"\\_", false), |
2806 | | (r"\%%", r"\\_", false), |
2807 | | (r"\\%%", r"\\_", false), |
2808 | | (r"_", r"\\_", false), |
2809 | | (r"\_", r"\\_", true), |
2810 | | (r"\\_", r"\\_", false), |
2811 | | (r"__", r"\\_", false), |
2812 | | (r"\__", r"\\_", false), |
2813 | | (r"\\__", r"\\_", false), |
2814 | | (r"abc", r"\\_", false), |
2815 | | (r"a_c", r"\\_", false), |
2816 | | (r"a\bc", r"\\_", false), |
2817 | | (r"a\_c", r"\\_", false), |
2818 | | (r"%abc", r"\\_", false), |
2819 | | (r"\%abc", r"\\_", false), |
2820 | | (r"a\\_c%", r"\\_", false), |
2821 | | (r"", r"__", false), |
2822 | | (r"\", r"__", false), |
2823 | | (r"\\", r"__", true), |
2824 | | (r"\\\", r"__", false), |
2825 | | (r"\\\\", r"__", false), |
2826 | | (r"a", r"__", false), |
2827 | | (r"\a", r"__", true), |
2828 | | (r"\\a", r"__", false), |
2829 | | (r"%", r"__", false), |
2830 | | (r"\%", r"__", true), |
2831 | | (r"\\%", r"__", false), |
2832 | | (r"%%", r"__", true), |
2833 | | (r"\%%", r"__", false), |
2834 | | (r"\\%%", r"__", false), |
2835 | | (r"_", r"__", false), |
2836 | | (r"\_", r"__", true), |
2837 | | (r"\\_", r"__", false), |
2838 | | (r"__", r"__", true), |
2839 | | (r"\__", r"__", false), |
2840 | | (r"\\__", r"__", false), |
2841 | | (r"abc", r"__", false), |
2842 | | (r"a_c", r"__", false), |
2843 | | (r"a\bc", r"__", false), |
2844 | | (r"a\_c", r"__", false), |
2845 | | (r"%abc", r"__", false), |
2846 | | (r"\%abc", r"__", false), |
2847 | | (r"a\\_c%", r"__", false), |
2848 | | (r"", r"\__", false), |
2849 | | (r"\", r"\__", false), |
2850 | | (r"\\", r"\__", false), |
2851 | | (r"\\\", r"\__", false), |
2852 | | (r"\\\\", r"\__", false), |
2853 | | (r"a", r"\__", false), |
2854 | | (r"\a", r"\__", false), |
2855 | | (r"\\a", r"\__", false), |
2856 | | (r"%", r"\__", false), |
2857 | | (r"\%", r"\__", false), |
2858 | | (r"\\%", r"\__", false), |
2859 | | (r"%%", r"\__", false), |
2860 | | (r"\%%", r"\__", false), |
2861 | | (r"\\%%", r"\__", false), |
2862 | | (r"_", r"\__", false), |
2863 | | (r"\_", r"\__", false), |
2864 | | (r"\\_", r"\__", false), |
2865 | | (r"__", r"\__", true), |
2866 | | (r"\__", r"\__", false), |
2867 | | (r"\\__", r"\__", false), |
2868 | | (r"abc", r"\__", false), |
2869 | | (r"a_c", r"\__", false), |
2870 | | (r"a\bc", r"\__", false), |
2871 | | (r"a\_c", r"\__", false), |
2872 | | (r"%abc", r"\__", false), |
2873 | | (r"\%abc", r"\__", false), |
2874 | | (r"a\\_c%", r"\__", false), |
2875 | | (r"", r"\\__", false), |
2876 | | (r"\", r"\\__", false), |
2877 | | (r"\\", r"\\__", false), |
2878 | | (r"\\\", r"\\__", true), |
2879 | | (r"\\\\", r"\\__", false), |
2880 | | (r"a", r"\\__", false), |
2881 | | (r"\a", r"\\__", false), |
2882 | | (r"\\a", r"\\__", true), |
2883 | | (r"%", r"\\__", false), |
2884 | | (r"\%", r"\\__", false), |
2885 | | (r"\\%", r"\\__", true), |
2886 | | (r"%%", r"\\__", false), |
2887 | | (r"\%%", r"\\__", true), |
2888 | | (r"\\%%", r"\\__", false), |
2889 | | (r"_", r"\\__", false), |
2890 | | (r"\_", r"\\__", false), |
2891 | | (r"\\_", r"\\__", true), |
2892 | | (r"__", r"\\__", false), |
2893 | | (r"\__", r"\\__", true), |
2894 | | (r"\\__", r"\\__", false), |
2895 | | (r"abc", r"\\__", false), |
2896 | | (r"a_c", r"\\__", false), |
2897 | | (r"a\bc", r"\\__", false), |
2898 | | (r"a\_c", r"\\__", false), |
2899 | | (r"%abc", r"\\__", false), |
2900 | | (r"\%abc", r"\\__", false), |
2901 | | (r"a\\_c%", r"\\__", false), |
2902 | | (r"", r"abc", false), |
2903 | | (r"\", r"abc", false), |
2904 | | (r"\\", r"abc", false), |
2905 | | (r"\\\", r"abc", false), |
2906 | | (r"\\\\", r"abc", false), |
2907 | | (r"a", r"abc", false), |
2908 | | (r"\a", r"abc", false), |
2909 | | (r"\\a", r"abc", false), |
2910 | | (r"%", r"abc", false), |
2911 | | (r"\%", r"abc", false), |
2912 | | (r"\\%", r"abc", false), |
2913 | | (r"%%", r"abc", false), |
2914 | | (r"\%%", r"abc", false), |
2915 | | (r"\\%%", r"abc", false), |
2916 | | (r"_", r"abc", false), |
2917 | | (r"\_", r"abc", false), |
2918 | | (r"\\_", r"abc", false), |
2919 | | (r"__", r"abc", false), |
2920 | | (r"\__", r"abc", false), |
2921 | | (r"\\__", r"abc", false), |
2922 | | (r"abc", r"abc", true), |
2923 | | (r"a_c", r"abc", false), |
2924 | | (r"a\bc", r"abc", false), |
2925 | | (r"a\_c", r"abc", false), |
2926 | | (r"%abc", r"abc", false), |
2927 | | (r"\%abc", r"abc", false), |
2928 | | (r"a\\_c%", r"abc", false), |
2929 | | (r"", r"a_c", false), |
2930 | | (r"\", r"a_c", false), |
2931 | | (r"\\", r"a_c", false), |
2932 | | (r"\\\", r"a_c", false), |
2933 | | (r"\\\\", r"a_c", false), |
2934 | | (r"a", r"a_c", false), |
2935 | | (r"\a", r"a_c", false), |
2936 | | (r"\\a", r"a_c", false), |
2937 | | (r"%", r"a_c", false), |
2938 | | (r"\%", r"a_c", false), |
2939 | | (r"\\%", r"a_c", false), |
2940 | | (r"%%", r"a_c", false), |
2941 | | (r"\%%", r"a_c", false), |
2942 | | (r"\\%%", r"a_c", false), |
2943 | | (r"_", r"a_c", false), |
2944 | | (r"\_", r"a_c", false), |
2945 | | (r"\\_", r"a_c", false), |
2946 | | (r"__", r"a_c", false), |
2947 | | (r"\__", r"a_c", false), |
2948 | | (r"\\__", r"a_c", false), |
2949 | | (r"abc", r"a_c", true), |
2950 | | (r"a_c", r"a_c", true), |
2951 | | (r"a\bc", r"a_c", false), |
2952 | | (r"a\_c", r"a_c", false), |
2953 | | (r"%abc", r"a_c", false), |
2954 | | (r"\%abc", r"a_c", false), |
2955 | | (r"a\\_c%", r"a_c", false), |
2956 | | (r"", r"a\bc", false), |
2957 | | (r"\", r"a\bc", false), |
2958 | | (r"\\", r"a\bc", false), |
2959 | | (r"\\\", r"a\bc", false), |
2960 | | (r"\\\\", r"a\bc", false), |
2961 | | (r"a", r"a\bc", false), |
2962 | | (r"\a", r"a\bc", false), |
2963 | | (r"\\a", r"a\bc", false), |
2964 | | (r"%", r"a\bc", false), |
2965 | | (r"\%", r"a\bc", false), |
2966 | | (r"\\%", r"a\bc", false), |
2967 | | (r"%%", r"a\bc", false), |
2968 | | (r"\%%", r"a\bc", false), |
2969 | | (r"\\%%", r"a\bc", false), |
2970 | | (r"_", r"a\bc", false), |
2971 | | (r"\_", r"a\bc", false), |
2972 | | (r"\\_", r"a\bc", false), |
2973 | | (r"__", r"a\bc", false), |
2974 | | (r"\__", r"a\bc", false), |
2975 | | (r"\\__", r"a\bc", false), |
2976 | | (r"abc", r"a\bc", true), |
2977 | | (r"a_c", r"a\bc", false), |
2978 | | (r"a\bc", r"a\bc", false), |
2979 | | (r"a\_c", r"a\bc", false), |
2980 | | (r"%abc", r"a\bc", false), |
2981 | | (r"\%abc", r"a\bc", false), |
2982 | | (r"a\\_c%", r"a\bc", false), |
2983 | | (r"", r"a\_c", false), |
2984 | | (r"\", r"a\_c", false), |
2985 | | (r"\\", r"a\_c", false), |
2986 | | (r"\\\", r"a\_c", false), |
2987 | | (r"\\\\", r"a\_c", false), |
2988 | | (r"a", r"a\_c", false), |
2989 | | (r"\a", r"a\_c", false), |
2990 | | (r"\\a", r"a\_c", false), |
2991 | | (r"%", r"a\_c", false), |
2992 | | (r"\%", r"a\_c", false), |
2993 | | (r"\\%", r"a\_c", false), |
2994 | | (r"%%", r"a\_c", false), |
2995 | | (r"\%%", r"a\_c", false), |
2996 | | (r"\\%%", r"a\_c", false), |
2997 | | (r"_", r"a\_c", false), |
2998 | | (r"\_", r"a\_c", false), |
2999 | | (r"\\_", r"a\_c", false), |
3000 | | (r"__", r"a\_c", false), |
3001 | | (r"\__", r"a\_c", false), |
3002 | | (r"\\__", r"a\_c", false), |
3003 | | (r"abc", r"a\_c", false), |
3004 | | (r"a_c", r"a\_c", true), |
3005 | | (r"a\bc", r"a\_c", false), |
3006 | | (r"a\_c", r"a\_c", false), |
3007 | | (r"%abc", r"a\_c", false), |
3008 | | (r"\%abc", r"a\_c", false), |
3009 | | (r"a\\_c%", r"a\_c", false), |
3010 | | (r"", r"%abc", false), |
3011 | | (r"\", r"%abc", false), |
3012 | | (r"\\", r"%abc", false), |
3013 | | (r"\\\", r"%abc", false), |
3014 | | (r"\\\\", r"%abc", false), |
3015 | | (r"a", r"%abc", false), |
3016 | | (r"\a", r"%abc", false), |
3017 | | (r"\\a", r"%abc", false), |
3018 | | (r"%", r"%abc", false), |
3019 | | (r"\%", r"%abc", false), |
3020 | | (r"\\%", r"%abc", false), |
3021 | | (r"%%", r"%abc", false), |
3022 | | (r"\%%", r"%abc", false), |
3023 | | (r"\\%%", r"%abc", false), |
3024 | | (r"_", r"%abc", false), |
3025 | | (r"\_", r"%abc", false), |
3026 | | (r"\\_", r"%abc", false), |
3027 | | (r"__", r"%abc", false), |
3028 | | (r"\__", r"%abc", false), |
3029 | | (r"\\__", r"%abc", false), |
3030 | | (r"abc", r"%abc", true), |
3031 | | (r"a_c", r"%abc", false), |
3032 | | (r"a\bc", r"%abc", false), |
3033 | | (r"a\_c", r"%abc", false), |
3034 | | (r"%abc", r"%abc", true), |
3035 | | (r"\%abc", r"%abc", true), |
3036 | | (r"a\\_c%", r"%abc", false), |
3037 | | (r"", r"\%abc", false), |
3038 | | (r"\", r"\%abc", false), |
3039 | | (r"\\", r"\%abc", false), |
3040 | | (r"\\\", r"\%abc", false), |
3041 | | (r"\\\\", r"\%abc", false), |
3042 | | (r"a", r"\%abc", false), |
3043 | | (r"\a", r"\%abc", false), |
3044 | | (r"\\a", r"\%abc", false), |
3045 | | (r"%", r"\%abc", false), |
3046 | | (r"\%", r"\%abc", false), |
3047 | | (r"\\%", r"\%abc", false), |
3048 | | (r"%%", r"\%abc", false), |
3049 | | (r"\%%", r"\%abc", false), |
3050 | | (r"\\%%", r"\%abc", false), |
3051 | | (r"_", r"\%abc", false), |
3052 | | (r"\_", r"\%abc", false), |
3053 | | (r"\\_", r"\%abc", false), |
3054 | | (r"__", r"\%abc", false), |
3055 | | (r"\__", r"\%abc", false), |
3056 | | (r"\\__", r"\%abc", false), |
3057 | | (r"abc", r"\%abc", false), |
3058 | | (r"a_c", r"\%abc", false), |
3059 | | (r"a\bc", r"\%abc", false), |
3060 | | (r"a\_c", r"\%abc", false), |
3061 | | (r"%abc", r"\%abc", true), |
3062 | | (r"\%abc", r"\%abc", false), |
3063 | | (r"a\\_c%", r"\%abc", false), |
3064 | | (r"", r"a\\_c%", false), |
3065 | | (r"\", r"a\\_c%", false), |
3066 | | (r"\\", r"a\\_c%", false), |
3067 | | (r"\\\", r"a\\_c%", false), |
3068 | | (r"\\\\", r"a\\_c%", false), |
3069 | | (r"a", r"a\\_c%", false), |
3070 | | (r"\a", r"a\\_c%", false), |
3071 | | (r"\\a", r"a\\_c%", false), |
3072 | | (r"%", r"a\\_c%", false), |
3073 | | (r"\%", r"a\\_c%", false), |
3074 | | (r"\\%", r"a\\_c%", false), |
3075 | | (r"%%", r"a\\_c%", false), |
3076 | | (r"\%%", r"a\\_c%", false), |
3077 | | (r"\\%%", r"a\\_c%", false), |
3078 | | (r"_", r"a\\_c%", false), |
3079 | | (r"\_", r"a\\_c%", false), |
3080 | | (r"\\_", r"a\\_c%", false), |
3081 | | (r"__", r"a\\_c%", false), |
3082 | | (r"\__", r"a\\_c%", false), |
3083 | | (r"\\__", r"a\\_c%", false), |
3084 | | (r"abc", r"a\\_c%", false), |
3085 | | (r"a_c", r"a\\_c%", false), |
3086 | | (r"a\bc", r"a\\_c%", true), |
3087 | | (r"a\_c", r"a\\_c%", true), |
3088 | | (r"%abc", r"a\\_c%", false), |
3089 | | (r"\%abc", r"a\\_c%", false), |
3090 | | (r"a\\_c%", r"a\\_c%", false), |
3091 | | ]; |
3092 | | |
3093 | | let values = test_cases |
3094 | | .iter() |
3095 | | .map(|(value, _, _)| *value) |
3096 | | .collect::<Vec<_>>(); |
3097 | | let patterns = test_cases |
3098 | | .iter() |
3099 | | .map(|(_, pattern, _)| *pattern) |
3100 | | .collect::<Vec<_>>(); |
3101 | | let expected = BooleanArray::from( |
3102 | | test_cases |
3103 | | .iter() |
3104 | | .map(|(_, _, expected)| *expected) |
3105 | | .collect::<Vec<_>>(), |
3106 | | ); |
3107 | | let unexpected = BooleanArray::from( |
3108 | | test_cases |
3109 | | .iter() |
3110 | | .map(|(_, _, expected)| !*expected) |
3111 | | .collect::<Vec<_>>(), |
3112 | | ); |
3113 | | |
3114 | | for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] { |
3115 | | let values = make_array(values.iter(), &string_type); |
3116 | | let patterns = make_array(patterns.iter(), &string_type); |
3117 | | let (values, patterns) = (values.as_ref(), patterns.as_ref()); |
3118 | | |
3119 | | assert_eq!(like(&values, &patterns).unwrap(), expected,); |
3120 | | assert_eq!(ilike(&values, &patterns).unwrap(), expected,); |
3121 | | assert_eq!(nlike(&values, &patterns).unwrap(), unexpected,); |
3122 | | assert_eq!(nilike(&values, &patterns).unwrap(), unexpected,); |
3123 | | } |
3124 | | } |
3125 | | |
3126 | | fn make_datums( |
3127 | | value: impl AsRef<str>, |
3128 | | data_type: &DataType, |
3129 | | ) -> Vec<(Box<dyn Datum>, DatumType)> { |
3130 | | match data_type { |
3131 | | DataType::Utf8 => { |
3132 | | let array = StringArray::from_iter_values([value]); |
3133 | | vec![ |
3134 | | (Box::new(array.clone()), DatumType::Array), |
3135 | | (Box::new(Scalar::new(array)), DatumType::Scalar), |
3136 | | ] |
3137 | | } |
3138 | | DataType::LargeUtf8 => { |
3139 | | let array = LargeStringArray::from_iter_values([value]); |
3140 | | vec![ |
3141 | | (Box::new(array.clone()), DatumType::Array), |
3142 | | (Box::new(Scalar::new(array)), DatumType::Scalar), |
3143 | | ] |
3144 | | } |
3145 | | DataType::Utf8View => { |
3146 | | let array = StringViewArray::from_iter_values([value]); |
3147 | | vec![ |
3148 | | (Box::new(array.clone()), DatumType::Array), |
3149 | | (Box::new(Scalar::new(array)), DatumType::Scalar), |
3150 | | ] |
3151 | | } |
3152 | | _ => unimplemented!(), |
3153 | | } |
3154 | | } |
3155 | | |
3156 | | fn make_array( |
3157 | | values: impl IntoIterator<Item: AsRef<str>>, |
3158 | | data_type: &DataType, |
3159 | | ) -> Box<dyn Array> { |
3160 | | match data_type { |
3161 | | DataType::Utf8 => Box::new(StringArray::from_iter_values(values)), |
3162 | | DataType::LargeUtf8 => Box::new(LargeStringArray::from_iter_values(values)), |
3163 | | DataType::Utf8View => Box::new(StringViewArray::from_iter_values(values)), |
3164 | | _ => unimplemented!(), |
3165 | | } |
3166 | | } |
3167 | | |
3168 | | #[derive(Debug)] |
3169 | | enum DatumType { |
3170 | | Array, |
3171 | | Scalar, |
3172 | | } |
3173 | | } |