Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -467,9 +467,8 @@ config_namespace! {

/// The default time zone
///
/// Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime
/// according to this time zone, and then extract the hour
pub time_zone: String, default = "+00:00".into()
/// Some functions, e.g. `now` return timestamps in this time zone
pub time_zone: Option<String>, default = None

/// Parquet options
pub parquet: ParquetOptions, default = Default::default()
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/expr_api/simplification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ fn multiple_now() -> Result<()> {
// expect the same timestamp appears in both exprs
let actual = get_optimized_plan_formatted(plan, &time);
let expected = format!(
"Projection: TimestampNanosecond({}, Some(\"+00:00\")) AS now(), TimestampNanosecond({}, Some(\"+00:00\")) AS t2\n TableScan: test",
"Projection: TimestampNanosecond({}, None) AS now(), TimestampNanosecond({}, None) AS t2\n TableScan: test",
time.timestamp_nanos_opt().unwrap(),
time.timestamp_nanos_opt().unwrap()
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1812,7 +1812,7 @@ async fn test_config_options_work_for_scalar_func() -> Result<()> {
});

let mut config = SessionConfig::new();
config.options_mut().execution.time_zone = "AEST".into();
config.options_mut().execution.time_zone = Some("AEST".into());

let ctx = SessionContext::new_with_config(config);

Expand Down
9 changes: 8 additions & 1 deletion datafusion/functions/src/datetime/current_date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,14 @@ impl ScalarUDFImpl for CurrentDateFunc {
let days = info
.execution_props()
.config_options()
.and_then(|config| config.execution.time_zone.parse::<Tz>().ok())
.and_then(|config| {
config
.execution
.time_zone
.as_ref()
.map(|tz| tz.parse::<Tz>().ok())
})
.flatten()
.map_or_else(
|| datetime_to_days(&now_ts),
|tz| {
Expand Down
15 changes: 13 additions & 2 deletions datafusion/functions/src/datetime/current_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,14 @@ impl ScalarUDFImpl for CurrentTimeFunc {
let nano = info
.execution_props()
.config_options()
.and_then(|config| config.execution.time_zone.parse::<Tz>().ok())
.and_then(|config| {
config
.execution
.time_zone
.as_ref()
.map(|tz| tz.parse::<Tz>().ok())
})
.flatten()
.map_or_else(
|| datetime_to_time_nanos(&now_ts),
|tz| {
Expand Down Expand Up @@ -167,7 +174,11 @@ mod tests {

fn set_session_timezone_env(tz: &str, start_time: DateTime<Utc>) -> MockSimplifyInfo {
let mut config = datafusion_common::config::ConfigOptions::default();
config.execution.time_zone = tz.to_string();
config.execution.time_zone = if tz.is_empty() {
None
} else {
Some(tz.to_string())
};
let mut execution_props =
ExecutionProps::new().with_query_execution_start_time(start_time);
execution_props.config_options = Some(Arc::new(config));
Expand Down
10 changes: 7 additions & 3 deletions datafusion/functions/src/datetime/now.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use datafusion_macros::user_doc;
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = r#"
Returns the current UTC timestamp.
Returns the current timestamp in the system configured timezone (None by default).

The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes.
"#,
Expand All @@ -58,15 +58,19 @@ impl NowFunc {
Self {
signature: Signature::nullary(Volatility::Stable),
aliases: vec!["current_timestamp".to_string()],
timezone: Some(Arc::from("+00")),
timezone: None,
}
}

pub fn new_with_config(config: &ConfigOptions) -> Self {
Self {
signature: Signature::nullary(Volatility::Stable),
aliases: vec!["current_timestamp".to_string()],
timezone: Some(Arc::from(config.execution.time_zone.as_str())),
timezone: config
.execution
.time_zone
.as_ref()
.map(|tz| Arc::from(tz.as_str())),
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// Timestamp With Time Zone
// INPUT : [SQLDataType] TimestampTz + [Config] Time Zone
// OUTPUT: [ArrowDataType] Timestamp<TimeUnit, Some(Time Zone)>
Some(self.context_provider.options().execution.time_zone.clone())
self.context_provider.options().execution.time_zone.clone()
} else {
// Timestamp Without Time zone
None
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/arrow_typeof.slt
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ Timestamp(ns)
query T
SELECT arrow_typeof(now())
----
Timestamp(ns, "+00:00")
Timestamp(ns)

# arrow_typeof_timestamp_date32(
query T
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/dates.slt
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ where d3_date > now() + '5 days';
----
DataFusion error: type_coercion
caused by
Error during planning: Cannot coerce arithmetic expression Timestamp(ns, "+00:00") + Utf8 to valid types
Error during planning: Cannot coerce arithmetic expression Timestamp(ns) + Utf8 to valid types


# DATE minus DATE
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/ddl.slt
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,7 @@ query TTTTTT
show columns FROM table_with_pk;
----
datafusion public table_with_pk sn Int32 NO
datafusion public table_with_pk ts Timestamp(ns, "+00:00") NO
datafusion public table_with_pk ts Timestamp(ns) NO
datafusion public table_with_pk currency Utf8View NO
datafusion public table_with_pk amount Float32 YES

Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/group_by.slt
Original file line number Diff line number Diff line change
Expand Up @@ -5556,7 +5556,7 @@ SELECT
arrow_cast('2024-01-01T00:00:00Z'::timestamptz, 'Timestamp(Second, Some("+08:00"))') AS ts
GROUP BY ts, text
----
foo 2024-01-01T08:00:00+08:00
foo 2024-01-01T00:00:00+08:00

# Test multi group by int + Decimal128
statement ok
Expand Down
12 changes: 6 additions & 6 deletions datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ datafusion.execution.sort_spill_reservation_bytes 10485760
datafusion.execution.spill_compression uncompressed
datafusion.execution.split_file_groups_by_statistics false
datafusion.execution.target_partitions 7
datafusion.execution.time_zone +00:00
datafusion.execution.time_zone NULL
datafusion.execution.use_row_number_estimates_to_optimize_partitioning false
datafusion.explain.analyze_level dev
datafusion.explain.format indent
Expand Down Expand Up @@ -387,7 +387,7 @@ datafusion.execution.sort_spill_reservation_bytes 10485760 Specifies the reserve
datafusion.execution.spill_compression uncompressed Sets the compression codec used when spilling data to disk. Since datafusion writes spill files using the Arrow IPC Stream format, only codecs supported by the Arrow IPC Stream Writer are allowed. Valid values are: uncompressed, lz4_frame, zstd. Note: lz4_frame offers faster (de)compression, but typically results in larger spill files. In contrast, zstd achieves higher compression ratios at the cost of slower (de)compression speed.
datafusion.execution.split_file_groups_by_statistics false Attempt to eliminate sorts by packing & sorting files with non-overlapping statistics into the same file groups. Currently experimental
datafusion.execution.target_partitions 7 Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of CPU cores on the system
datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour
datafusion.execution.time_zone NULL The default time zone Some functions, e.g. `now` return timestamps in this time zone
datafusion.execution.use_row_number_estimates_to_optimize_partitioning false Should DataFusion use row number estimates at the input to decide whether increasing parallelism is beneficial or not. By default, only exact row numbers (not estimates) are used for this decision. Setting this flag to `true` will likely produce better plans. if the source of statistics is accurate. We plan to make this the default in the future.
datafusion.explain.analyze_level dev Verbosity level for "EXPLAIN ANALYZE". Default is "dev" "summary" shows common metrics for high-level insights. "dev" provides deep operator-level introspection for developers.
datafusion.explain.format indent Display format of explain. Default is "indent". When set to "tree", it will print the plan in a tree-rendered format.
Expand Down Expand Up @@ -459,29 +459,29 @@ datafusion.execution.batch_size 8192 Default batch size while creating new batch
query TT
SHOW TIME ZONE
----
datafusion.execution.time_zone +00:00
datafusion.execution.time_zone NULL

# show_timezone_default_utc
# https://github.com/apache/datafusion/issues/3255
query TT
SHOW TIMEZONE
----
datafusion.execution.time_zone +00:00
datafusion.execution.time_zone NULL


# show_time_zone_default_utc_verbose
# https://github.com/apache/datafusion/issues/3255
query TTT
SHOW TIME ZONE VERBOSE
----
datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour
datafusion.execution.time_zone NULL The default time zone Some functions, e.g. `now` return timestamps in this time zone

# show_timezone_default_utc
# https://github.com/apache/datafusion/issues/3255
query TTT
SHOW TIMEZONE VERBOSE
----
datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour
datafusion.execution.time_zone NULL The default time zone Some functions, e.g. `now` return timestamps in this time zone


# show empty verbose
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/metadata.slt
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ GROUP BY ts
ORDER BY ts
LIMIT 1;
----
2020-09-08T13:42:29.190855123Z
2020-09-08T13:42:29.190855123



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -583,4 +583,4 @@ WHERE trace_id = '00000000000000000000000000000002' AND deployment_environment =
ORDER BY start_timestamp, trace_id
LIMIT 1;
----
2024-10-01T00:00:00Z
2024-10-01T00:00:00
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/table_functions.slt
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ SELECT * FROM range(TIMESTAMP '2023-01-01T00:00:00', TIMESTAMP '2023-01-03T00:00
query P
SELECT * FROM range(TIMESTAMPTZ '2023-02-01T00:00:00-07:00', TIMESTAMPTZ '2023-02-01T09:00:00+01:00', INTERVAL '1' HOUR);
----
2023-02-01T07:00:00Z
2023-02-01T07:00:00

# Basic date range with hour interval
query P
Expand Down
70 changes: 35 additions & 35 deletions datafusion/sqllogictest/test_files/window.slt
Original file line number Diff line number Diff line change
Expand Up @@ -942,22 +942,22 @@ CREATE TABLE table1 (

statement ok
INSERT INTO table1 (bar, foo, time) VALUES
(200.0, 'me', '1970-01-01T00:00:00.000000010Z'),
(1.0, 'me', '1970-01-01T00:00:00.000000030Z'),
(1.0, 'me', '1970-01-01T00:00:00.000000040Z'),
(2.0, 'you', '1970-01-01T00:00:00.000000020Z');
(200.0, 'me', '1970-01-01T00:00:00.000000010'),
(1.0, 'me', '1970-01-01T00:00:00.000000030'),
(1.0, 'me', '1970-01-01T00:00:00.000000040'),
(2.0, 'you', '1970-01-01T00:00:00.000000020');

query TP
SELECT foo, first_value(time ORDER BY time DESC NULLS LAST) AS time FROM table1 GROUP BY foo ORDER BY foo;
----
me 1970-01-01T00:00:00.000000040Z
you 1970-01-01T00:00:00.000000020Z
me 1970-01-01T00:00:00.000000040
you 1970-01-01T00:00:00.000000020

query TP
SELECT foo, last_value(time ORDER BY time DESC NULLS LAST) AS time FROM table1 GROUP BY foo ORDER BY foo;
----
me 1970-01-01T00:00:00.000000010Z
you 1970-01-01T00:00:00.000000020Z
me 1970-01-01T00:00:00.000000010
you 1970-01-01T00:00:00.000000020

statement ok
drop table table1;
Expand Down Expand Up @@ -5766,15 +5766,15 @@ CREATE TABLE table_test_distinct_count (

statement ok
INSERT INTO table_test_distinct_count (k, v, time) VALUES
('a', 1, '1970-01-01T00:01:00.00Z'),
('a', 1, '1970-01-01T00:02:00.00Z'),
('a', 1, '1970-01-01T00:03:00.00Z'),
('a', 2, '1970-01-01T00:03:00.00Z'),
('a', 1, '1970-01-01T00:04:00.00Z'),
('b', 3, '1970-01-01T00:01:00.00Z'),
('b', 3, '1970-01-01T00:02:00.00Z'),
('b', 4, '1970-01-01T00:03:00.00Z'),
('b', 4, '1970-01-01T00:03:00.00Z');
('a', 1, '1970-01-01T00:01:00.00'),
('a', 1, '1970-01-01T00:02:00.00'),
('a', 1, '1970-01-01T00:03:00.00'),
('a', 2, '1970-01-01T00:03:00.00'),
('a', 1, '1970-01-01T00:04:00.00'),
('b', 3, '1970-01-01T00:01:00.00'),
('b', 3, '1970-01-01T00:02:00.00'),
('b', 4, '1970-01-01T00:03:00.00'),
('b', 4, '1970-01-01T00:03:00.00');

query TPII
SELECT
Expand All @@ -5793,15 +5793,15 @@ SELECT
FROM table_test_distinct_count
ORDER BY k, time;
----
a 1970-01-01T00:01:00Z 1 1
a 1970-01-01T00:02:00Z 2 1
a 1970-01-01T00:03:00Z 4 2
a 1970-01-01T00:03:00Z 4 2
a 1970-01-01T00:04:00Z 4 2
b 1970-01-01T00:01:00Z 1 1
b 1970-01-01T00:02:00Z 2 1
b 1970-01-01T00:03:00Z 4 2
b 1970-01-01T00:03:00Z 4 2
a 1970-01-01T00:01:00 1 1
a 1970-01-01T00:02:00 2 1
a 1970-01-01T00:03:00 4 2
a 1970-01-01T00:03:00 4 2
a 1970-01-01T00:04:00 4 2
b 1970-01-01T00:01:00 1 1
b 1970-01-01T00:02:00 2 1
b 1970-01-01T00:03:00 4 2
b 1970-01-01T00:03:00 4 2


query TT
Expand Down Expand Up @@ -5854,15 +5854,15 @@ SELECT
FROM table_test_distinct_count
ORDER BY k, time;
----
a 1970-01-01T00:01:00Z 1 1
a 1970-01-01T00:02:00Z 2 1
a 1970-01-01T00:03:00Z 5 3
a 1970-01-01T00:03:00Z 5 3
a 1970-01-01T00:04:00Z 5 3
b 1970-01-01T00:01:00Z 3 3
b 1970-01-01T00:02:00Z 6 3
b 1970-01-01T00:03:00Z 14 7
b 1970-01-01T00:03:00Z 14 7
a 1970-01-01T00:01:00 1 1
a 1970-01-01T00:02:00 2 1
a 1970-01-01T00:03:00 5 3
a 1970-01-01T00:03:00 5 3
a 1970-01-01T00:04:00 5 3
b 1970-01-01T00:01:00 3 3
b 1970-01-01T00:02:00 6 3
b 1970-01-01T00:03:00 14 7
b 1970-01-01T00:03:00 14 7



Expand Down
14 changes: 14 additions & 0 deletions docs/source/library-user-guide/upgrading.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,20 @@ let indices = projection_exprs.column_indices();
_execution plan_ of the query. With this release, `DESCRIBE query` now outputs
the computed _schema_ of the query, consistent with the behavior of `DESCRIBE table_name`.

### `datafusion.execution.time_zone` default configuration changed

The default value for `datafusion.execution.time_zone` previously was a string value of `+00:00` (GMT/Zulu time).
This was changed to be an `Option<String>` with a default of `None`. If you want to change the timezone back
to the previous value you can execute the sql:

```sql
SET
TIMEZONE = '+00:00';
```

This change was made to better support using the default timezone in scalar UDF functions such as
`now`, `current_date`, `current_time`, and `to_timestamp` among others.

## DataFusion `50.0.0`

### ListingTable automatically detects Hive Partitioned tables
Expand Down
Loading