Skip to content

Commit 66cb65a

Browse files
author
Arttu Voutilainen
committed
use IntervalCompound instead of interval-month-day-nano UDT
1 parent 146f16a commit 66cb65a

File tree

4 files changed

+79
-118
lines changed

4 files changed

+79
-118
lines changed

datafusion/substrait/src/logical_plan/consumer.rs

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,15 @@ use crate::variation_const::{
4242
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
4343
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
4444
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
45-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
46-
UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
45+
LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
46+
VIEW_CONTAINER_TYPE_VARIATION_REF,
4747
};
4848
#[allow(deprecated)]
4949
use crate::variation_const::{
50-
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_REF,
51-
INTERVAL_YEAR_MONTH_TYPE_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
52-
TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
53-
TIMESTAMP_SECOND_TYPE_VARIATION_REF,
50+
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
51+
INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
52+
TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
53+
TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
5454
};
5555
use datafusion::arrow::array::{new_empty_array, AsArray};
5656
use datafusion::common::scalar::ScalarStructBuilder;
@@ -71,10 +71,10 @@ use datafusion::{
7171
use std::collections::HashSet;
7272
use std::sync::Arc;
7373
use substrait::proto::exchange_rel::ExchangeKind;
74-
use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
7574
use substrait::proto::expression::literal::user_defined::Val;
7675
use substrait::proto::expression::literal::{
77-
IntervalDayToSecond, IntervalYearToMonth, UserDefined,
76+
interval_day_to_second, IntervalCompound, IntervalDayToSecond, IntervalYearToMonth,
77+
UserDefined,
7878
};
7979
use substrait::proto::expression::subquery::SubqueryType;
8080
use substrait::proto::expression::{self, FieldReference, Literal, ScalarFunction};
@@ -1831,8 +1831,13 @@ fn from_substrait_type(
18311831
Ok(DataType::Interval(IntervalUnit::YearMonth))
18321832
}
18331833
r#type::Kind::IntervalDay(_) => Ok(DataType::Interval(IntervalUnit::DayTime)),
1834+
r#type::Kind::IntervalCompound(_) => {
1835+
Ok(DataType::Interval(IntervalUnit::MonthDayNano))
1836+
}
18341837
r#type::Kind::UserDefined(u) => {
1838+
// Kept for backwards compatibility, use IntervalCompound instead
18351839
if let Some(name) = extensions.types.get(&u.type_reference) {
1840+
#[allow(deprecated)]
18361841
match name.as_ref() {
18371842
INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
18381843
_ => not_impl_err!(
@@ -1842,7 +1847,7 @@ fn from_substrait_type(
18421847
),
18431848
}
18441849
} else {
1845-
// Kept for backwards compatibility, new plans should include the extension instead
1850+
// Kept for backwards compatibility, use IntervalCompound instead
18461851
#[allow(deprecated)]
18471852
match u.type_reference {
18481853
// Kept for backwards compatibility, use IntervalYear instead
@@ -2275,6 +2280,7 @@ fn from_substrait_literal(
22752280
subseconds,
22762281
precision_mode,
22772282
})) => {
2283+
use interval_day_to_second::PrecisionMode;
22782284
// DF only supports millisecond precision, so for any more granular type we lose precision
22792285
let milliseconds = match precision_mode {
22802286
Some(PrecisionMode::Microseconds(ms)) => ms / 1000,
@@ -2299,6 +2305,39 @@ fn from_substrait_literal(
22992305
Some(LiteralType::IntervalYearToMonth(IntervalYearToMonth { years, months })) => {
23002306
ScalarValue::new_interval_ym(*years, *months)
23012307
}
2308+
Some(LiteralType::IntervalCompound(IntervalCompound {
2309+
interval_year_to_month,
2310+
interval_day_to_second,
2311+
})) => match (interval_year_to_month, interval_day_to_second) {
2312+
(
2313+
Some(IntervalYearToMonth { years, months }),
2314+
Some(IntervalDayToSecond {
2315+
days,
2316+
seconds,
2317+
subseconds,
2318+
precision_mode:
2319+
Some(interval_day_to_second::PrecisionMode::Precision(p)),
2320+
}),
2321+
) => {
2322+
let nanos = match p {
2323+
0 => *subseconds * 1_000_000_000,
2324+
3 => *subseconds * 1_000_000,
2325+
6 => *subseconds * 1_000,
2326+
9 => *subseconds,
2327+
_ => {
2328+
return not_impl_err!(
2329+
"Unsupported Substrait interval day to second precision mode"
2330+
)
2331+
}
2332+
};
2333+
ScalarValue::new_interval_mdn(
2334+
*years * 12 + months,
2335+
*days,
2336+
*seconds as i64 * 1_000_000_000 + nanos,
2337+
)
2338+
}
2339+
_ => return not_impl_err!("Unsupported Substrait compound interval literal"),
2340+
},
23022341
Some(LiteralType::FixedChar(c)) => ScalarValue::Utf8(Some(c.clone())),
23032342
Some(LiteralType::UserDefined(user_defined)) => {
23042343
// Helper function to prevent duplicating this code - can be inlined once the non-extension path is removed
@@ -2329,6 +2368,8 @@ fn from_substrait_literal(
23292368

23302369
if let Some(name) = extensions.types.get(&user_defined.type_reference) {
23312370
match name.as_ref() {
2371+
// Kept for backwards compatibility - new plans should use IntervalCompound instead
2372+
#[allow(deprecated)]
23322373
INTERVAL_MONTH_DAY_NANO_TYPE_NAME => {
23332374
interval_month_day_nano(user_defined)?
23342375
}
@@ -2379,6 +2420,7 @@ fn from_substrait_literal(
23792420
milliseconds,
23802421
}))
23812422
}
2423+
// Kept for backwards compatibility, use IntervalCompound instead
23822424
INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
23832425
interval_month_day_nano(user_defined)?
23842426
}

datafusion/substrait/src/logical_plan/producer.rs

Lines changed: 23 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ use substrait::proto::exchange_rel::{ExchangeKind, RoundRobin, ScatterFields};
5858
use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
5959
use substrait::proto::expression::literal::map::KeyValue;
6060
use substrait::proto::expression::literal::{
61-
user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Map,
62-
PrecisionTimestamp, Struct, UserDefined,
61+
IntervalCompound, IntervalDayToSecond, IntervalYearToMonth, List, Map,
62+
PrecisionTimestamp, Struct,
6363
};
6464
use substrait::proto::expression::subquery::InPredicate;
6565
use substrait::proto::expression::window_function::BoundsType;
@@ -1489,16 +1489,14 @@ fn to_substrait_type(
14891489
})),
14901490
}),
14911491
IntervalUnit::MonthDayNano => {
1492-
// Substrait doesn't currently support this type, so we represent it as a UDT
14931492
Ok(substrait::proto::Type {
1494-
kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
1495-
type_reference: extensions.register_type(
1496-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string(),
1497-
),
1498-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1499-
nullability,
1500-
type_parameters: vec![],
1501-
})),
1493+
kind: Some(r#type::Kind::IntervalCompound(
1494+
r#type::IntervalCompound {
1495+
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1496+
nullability,
1497+
precision: 9, // nanos
1498+
},
1499+
)),
15021500
})
15031501
}
15041502
}
@@ -1892,23 +1890,21 @@ fn to_substrait_literal(
18921890
}),
18931891
DEFAULT_TYPE_VARIATION_REF,
18941892
),
1895-
ScalarValue::IntervalMonthDayNano(Some(i)) => {
1896-
// IntervalMonthDayNano is internally represented as a 128-bit integer, containing
1897-
// months (32bit), days (32bit), and nanoseconds (64bit)
1898-
let bytes = i.to_byte_slice();
1899-
(
1900-
LiteralType::UserDefined(UserDefined {
1901-
type_reference: extensions
1902-
.register_type(INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()),
1903-
type_parameters: vec![],
1904-
val: Some(user_defined::Val::Value(ProtoAny {
1905-
type_url: INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string(),
1906-
value: bytes.to_vec().into(),
1907-
})),
1893+
ScalarValue::IntervalMonthDayNano(Some(i)) => (
1894+
LiteralType::IntervalCompound(IntervalCompound {
1895+
interval_year_to_month: Some(IntervalYearToMonth {
1896+
years: 0,
1897+
months: i.months,
19081898
}),
1909-
DEFAULT_TYPE_VARIATION_REF,
1910-
)
1911-
}
1899+
interval_day_to_second: Some(IntervalDayToSecond {
1900+
days: i.days,
1901+
seconds: 0,
1902+
subseconds: i.nanoseconds,
1903+
precision_mode: Some(PrecisionMode::Precision(9)), // nanoseconds
1904+
}),
1905+
}),
1906+
DEFAULT_TYPE_VARIATION_REF,
1907+
),
19121908
ScalarValue::IntervalDayTime(Some(i)) => (
19131909
LiteralType::IntervalDayToSecond(IntervalDayToSecond {
19141910
days: i.days,
@@ -2310,39 +2306,6 @@ mod test {
23102306
Ok(())
23112307
}
23122308

2313-
#[test]
2314-
fn custom_type_literal_extensions() -> Result<()> {
2315-
let mut extensions = Extensions::default();
2316-
// IntervalMonthDayNano is represented as a custom type in Substrait
2317-
let scalar = ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::new(
2318-
17, 25, 1234567890,
2319-
)));
2320-
let substrait_literal = to_substrait_literal(&scalar, &mut extensions)?;
2321-
let roundtrip_scalar =
2322-
from_substrait_literal_without_names(&substrait_literal, &extensions)?;
2323-
assert_eq!(scalar, roundtrip_scalar);
2324-
2325-
assert_eq!(
2326-
extensions,
2327-
Extensions {
2328-
functions: HashMap::new(),
2329-
types: HashMap::from([(
2330-
0,
2331-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()
2332-
)]),
2333-
type_variations: HashMap::new(),
2334-
}
2335-
);
2336-
2337-
// Check we fail if we don't propagate extensions
2338-
assert!(from_substrait_literal_without_names(
2339-
&substrait_literal,
2340-
&Extensions::default()
2341-
)
2342-
.is_err());
2343-
Ok(())
2344-
}
2345-
23462309
#[test]
23472310
fn round_trip_types() -> Result<()> {
23482311
round_trip_type(DataType::Boolean)?;
@@ -2424,37 +2387,6 @@ mod test {
24242387
Ok(())
24252388
}
24262389

2427-
#[test]
2428-
fn custom_type_extensions() -> Result<()> {
2429-
let mut extensions = Extensions::default();
2430-
// IntervalMonthDayNano is represented as a custom type in Substrait
2431-
let dt = DataType::Interval(IntervalUnit::MonthDayNano);
2432-
2433-
let substrait = to_substrait_type(&dt, true, &mut extensions)?;
2434-
let roundtrip_dt = from_substrait_type_without_names(&substrait, &extensions)?;
2435-
assert_eq!(dt, roundtrip_dt);
2436-
2437-
assert_eq!(
2438-
extensions,
2439-
Extensions {
2440-
functions: HashMap::new(),
2441-
types: HashMap::from([(
2442-
0,
2443-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()
2444-
)]),
2445-
type_variations: HashMap::new(),
2446-
}
2447-
);
2448-
2449-
// Check we fail if we don't propagate extensions
2450-
assert!(
2451-
from_substrait_type_without_names(&substrait, &Extensions::default())
2452-
.is_err()
2453-
);
2454-
2455-
Ok(())
2456-
}
2457-
24582390
#[test]
24592391
fn named_struct_names() -> Result<()> {
24602392
let mut extensions = Extensions::default();

datafusion/substrait/src/variation_const.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,16 @@ pub const INTERVAL_DAY_TIME_TYPE_REF: u32 = 2;
9696
/// [`ScalarValue::IntervalMonthDayNano`]: datafusion::common::ScalarValue::IntervalMonthDayNano
9797
#[deprecated(
9898
since = "41.0.0",
99-
note = "Use Substrait `UserDefinedType` with name `INTERVAL_MONTH_DAY_NANO_TYPE_NAME` instead"
99+
note = "Use Substrait `IntervalCompund` type instead"
100100
)]
101101
pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3;
102102

103103
/// For [`DataType::Interval`] with [`IntervalUnit::MonthDayNano`].
104104
///
105105
/// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
106106
/// [`IntervalUnit::MonthDayNano`]: datafusion::arrow::datatypes::IntervalUnit::MonthDayNano
107+
#[deprecated(
108+
since = "42.1.0",
109+
note = "Use Substrait `IntervalCompund` type instead"
110+
)]
107111
pub const INTERVAL_MONTH_DAY_NANO_TYPE_NAME: &str = "interval-month-day-nano";

datafusion/substrait/tests/cases/roundtrip_logical_plan.rs

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -230,23 +230,6 @@ async fn select_with_reused_functions() -> Result<()> {
230230
Ok(())
231231
}
232232

233-
#[tokio::test]
234-
async fn roundtrip_udt_extensions() -> Result<()> {
235-
let ctx = create_context().await?;
236-
let proto =
237-
roundtrip_with_ctx("SELECT INTERVAL '1 YEAR 1 DAY 1 SECOND' FROM data", ctx)
238-
.await?;
239-
let expected_type = SimpleExtensionDeclaration {
240-
mapping_type: Some(MappingType::ExtensionType(ExtensionType {
241-
extension_uri_reference: u32::MAX,
242-
type_anchor: 0,
243-
name: "interval-month-day-nano".to_string(),
244-
})),
245-
};
246-
assert_eq!(proto.extensions, vec![expected_type]);
247-
Ok(())
248-
}
249-
250233
#[tokio::test]
251234
async fn select_with_filter_date() -> Result<()> {
252235
roundtrip("SELECT * FROM data WHERE c > CAST('2020-01-01' AS DATE)").await

0 commit comments

Comments
 (0)