Skip to content

Commit 5737c20

Browse files
committed
feat: support upper and lower for stringview
1 parent 8fd9d69 commit 5737c20

File tree

4 files changed

+54
-9
lines changed

4 files changed

+54
-9
lines changed

datafusion/functions/src/string/common.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use std::sync::Arc;
2323
use arrow::array::{
2424
new_null_array, Array, ArrayAccessor, ArrayDataBuilder, ArrayIter, ArrayRef,
2525
GenericStringArray, GenericStringBuilder, OffsetSizeTrait, StringArray,
26-
StringViewArray,
26+
StringBuilder, StringViewArray,
2727
};
2828
use arrow::buffer::{Buffer, MutableBuffer, NullBuffer};
2929
use arrow::datatypes::DataType;
@@ -214,6 +214,23 @@ where
214214
i64,
215215
_,
216216
>(array, op)?)),
217+
DataType::Utf8View => {
218+
let string_array = as_string_view_array(array)?;
219+
let mut string_builder = StringBuilder::with_capacity(
220+
string_array.len(),
221+
string_array.get_array_memory_size(),
222+
);
223+
224+
for str in string_array.iter() {
225+
if let Some(str) = str {
226+
string_builder.append_value(op(str));
227+
} else {
228+
string_builder.append_null();
229+
}
230+
}
231+
232+
Ok(ColumnarValue::Array(Arc::new(string_builder.finish())))
233+
}
217234
other => exec_err!("Unsupported data type {other:?} for function {name}"),
218235
},
219236
ColumnarValue::Scalar(scalar) => match scalar {
@@ -225,6 +242,10 @@ where
225242
let result = a.as_ref().map(|x| op(x));
226243
Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result)))
227244
}
245+
ScalarValue::Utf8View(a) => {
246+
let result = a.as_ref().map(|x| op(x));
247+
Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(result)))
248+
}
228249
other => exec_err!("Unsupported data type {other:?} for function {name}"),
229250
},
230251
}

datafusion/functions/src/string/lower.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ impl LowerFunc {
4343
Self {
4444
signature: Signature::uniform(
4545
1,
46-
vec![Utf8, LargeUtf8],
46+
vec![Utf8, LargeUtf8, Utf8View],
4747
Volatility::Immutable,
4848
),
4949
}

datafusion/functions/src/string/upper.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ impl UpperFunc {
4040
Self {
4141
signature: Signature::uniform(
4242
1,
43-
vec![Utf8, LargeUtf8],
43+
vec![Utf8, LargeUtf8, Utf8View],
4444
Volatility::Immutable,
4545
),
4646
}

datafusion/sqllogictest/test_files/string_view.slt

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -460,8 +460,6 @@ Xiangpeng
460460
Raphael
461461
NULL
462462

463-
464-
465463
### Initcap
466464

467465
query TT
@@ -478,7 +476,7 @@ statement ok
478476
CREATE TABLE test_lowercase AS SELECT
479477
lower(column1_utf8) as column1_utf8_lower,
480478
lower(column1_large_utf8) as column1_large_utf8_lower,
481-
lower(column1_utf8view) as column1_utf8view_lower
479+
arrow_cast(lower(column1_utf8), 'Utf8View') as column1_utf8view_lower
482480
FROM test;
483481

484482
# Test INITCAP with utf8view, utf8, and largeutf8
@@ -501,7 +499,7 @@ SELECT
501499
INITCAP(column1_large_utf8_lower) as c3
502500
FROM test_lowercase;
503501
----
504-
Andrew Andrew Andrew
502+
Andrew Andrew Andrew
505503
Xiangpeng Xiangpeng Xiangpeng
506504
Raphael Raphael Raphael
507505
NULL NULL NULL
@@ -828,16 +826,42 @@ logical_plan
828826
02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
829827

830828
## Ensure no casts for LOWER
831-
## TODO https://github.com/apache/datafusion/issues/11855
832829
query TT
833830
EXPLAIN SELECT
834831
LOWER(column1_utf8view) as c1
835832
FROM test;
836833
----
837834
logical_plan
838-
01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1
835+
01)Projection: lower(test.column1_utf8view) AS c1
836+
02)--TableScan: test projection=[column1_utf8view]
837+
838+
query T
839+
SELECT LOWER(column1_utf8view) as c1
840+
FROM test;
841+
----
842+
andrew
843+
xiangpeng
844+
raphael
845+
NULL
846+
847+
## Ensure no casts for UPPER
848+
query TT
849+
EXPLAIN SELECT
850+
UPPER(column1_utf8view) as c1
851+
FROM test;
852+
----
853+
logical_plan
854+
01)Projection: upper(test.column1_utf8view) AS c1
839855
02)--TableScan: test projection=[column1_utf8view]
840856

857+
query T
858+
SELECT UPPER(column1_utf8view) as c1
859+
FROM test;
860+
----
861+
ANDREW
862+
XIANGPENG
863+
RAPHAEL
864+
NULL
841865

842866
## Ensure no casts for LPAD
843867
query TT

0 commit comments

Comments
 (0)