Skip to content

Commit 7bd4b53

Browse files
authored
Implement a dialect-specific rule for unparsing an identifier with or without quotes (#10573)
* add ident needs quote check * implement the check for default dialect and fix tests * add test for need-quoted cases * update cargo lock * fomrat cargo toml * fix the example test * move regex to top level * add comments for new_ident_quoted_if_needs func * fix typo and add test for space * fix example test * fix example test * fix the test fail * remove unused example and modified comments * fix typo * follow the latest Dialect trait in sqlparser * fix the parameter name
1 parent 0355713 commit 7bd4b53

File tree

11 files changed

+99
-83
lines changed

11 files changed

+99
-83
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ object_store = { version = "0.9.1", default-features = false }
105105
parking_lot = "0.12"
106106
parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
107107
rand = "0.8"
108+
regex = "1.8"
108109
rstest = "0.19.0"
109110
serde_json = "1"
110111
sqlparser = { version = "0.45.0", features = ["visitor"] }

datafusion-cli/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-examples/examples/plan_to_sql.rs

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ use datafusion_sql::unparser::{plan_to_sql, Unparser};
4949
async fn main() -> Result<()> {
5050
// See how to evaluate expressions
5151
simple_expr_to_sql_demo()?;
52-
simple_expr_to_sql_demo_no_escape()?;
5352
simple_expr_to_sql_demo_escape_mysql_style()?;
5453
simple_plan_to_sql_demo().await?;
5554
round_trip_plan_to_sql_demo().await?;
@@ -61,17 +60,6 @@ async fn main() -> Result<()> {
6160
fn simple_expr_to_sql_demo() -> Result<()> {
6261
let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
6362
let sql = expr_to_sql(&expr)?.to_string();
64-
assert_eq!(sql, r#"(("a" < 5) OR ("a" = 8))"#);
65-
Ok(())
66-
}
67-
68-
/// DataFusion can convert expressions to SQL without escaping column names using
69-
/// using a custom dialect and an explicit unparser
70-
fn simple_expr_to_sql_demo_no_escape() -> Result<()> {
71-
let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
72-
let dialect = CustomDialect::new(None);
73-
let unparser = Unparser::new(&dialect);
74-
let sql = unparser.expr_to_sql(&expr)?.to_string();
7563
assert_eq!(sql, r#"((a < 5) OR (a = 8))"#);
7664
Ok(())
7765
}
@@ -106,7 +94,7 @@ async fn simple_plan_to_sql_demo() -> Result<()> {
10694

10795
assert_eq!(
10896
sql,
109-
r#"SELECT "?table?"."id", "?table?"."int_col", "?table?"."double_col", "?table?"."date_string_col" FROM "?table?""#
97+
r#"SELECT "?table?".id, "?table?".int_col, "?table?".double_col, "?table?".date_string_col FROM "?table?""#
11098
);
11199

112100
Ok(())
@@ -145,7 +133,7 @@ async fn round_trip_plan_to_sql_demo() -> Result<()> {
145133
let sql = plan_to_sql(df.logical_plan())?.to_string();
146134
assert_eq!(
147135
sql,
148-
r#"SELECT "alltypes_plain"."int_col", "alltypes_plain"."double_col", CAST("alltypes_plain"."date_string_col" AS VARCHAR) FROM "alltypes_plain" WHERE (("alltypes_plain"."id" > 1) AND ("alltypes_plain"."tinyint_col" < "alltypes_plain"."double_col"))"#
136+
r#"SELECT alltypes_plain.int_col, alltypes_plain.double_col, CAST(alltypes_plain.date_string_col AS VARCHAR) FROM alltypes_plain WHERE ((alltypes_plain.id > 1) AND (alltypes_plain.tinyint_col < alltypes_plain.double_col))"#
149137
);
150138

151139
Ok(())

datafusion/core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ postgres-protocol = "0.6.4"
145145
postgres-types = { version = "0.2.4", features = ["derive", "with-chrono-0_4"] }
146146
rand = { workspace = true, features = ["small_rng"] }
147147
rand_distr = "0.4.3"
148-
regex = "1.5.4"
148+
regex = { workspace = true }
149149
rstest = { workspace = true }
150150
rust_decimal = { version = "1.27.0", features = ["tokio-pg"] }
151151
serde_json = { workspace = true }

datafusion/functions/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ itertools = { workspace = true }
8080
log = { workspace = true }
8181
md-5 = { version = "^0.10.0", optional = true }
8282
rand = { workspace = true }
83-
regex = { version = "1.8", optional = true }
83+
regex = { worksapce = true, optional = true }
8484
sha2 = { version = "^0.10.1", optional = true }
8585
unicode-segmentation = { version = "^1.7.1", optional = true }
8686
uuid = { version = "1.7", features = ["v4"], optional = true }

datafusion/physical-expr/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ itertools = { workspace = true, features = ["use_std"] }
6666
log = { workspace = true }
6767
paste = "^1.0"
6868
petgraph = "0.6.2"
69-
regex = { version = "1.8", optional = true }
69+
regex = { workspace = true, optional = true }
7070

7171
[dev-dependencies]
7272
arrow = { workspace = true, features = ["test_utils"] }

datafusion/sql/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ arrow-schema = { workspace = true }
4747
datafusion-common = { workspace = true, default-features = true }
4848
datafusion-expr = { workspace = true }
4949
log = { workspace = true }
50+
regex = { workspace = true }
5051
sqlparser = { workspace = true }
5152
strum = { version = "0.26.1", features = ["derive"] }
5253

datafusion/sql/src/unparser/dialect.rs

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,41 +15,54 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
/// Dialect is used to capture dialect specific syntax.
18+
use regex::Regex;
19+
use sqlparser::keywords::ALL_KEYWORDS;
20+
21+
/// `Dialect` to use for Unparsing
22+
///
23+
/// The default dialect tries to avoid quoting identifiers unless necessary (e.g. `a` instead of `"a"`)
24+
/// but this behavior can be overridden as needed
1925
/// Note: this trait will eventually be replaced by the Dialect in the SQLparser package
2026
///
2127
/// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1170>
2228
pub trait Dialect {
23-
fn identifier_quote_style(&self) -> Option<char>;
29+
fn identifier_quote_style(&self, _identifier: &str) -> Option<char>;
2430
}
2531
pub struct DefaultDialect {}
2632

2733
impl Dialect for DefaultDialect {
28-
fn identifier_quote_style(&self) -> Option<char> {
29-
Some('"')
34+
fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
35+
let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
36+
if ALL_KEYWORDS.contains(&identifier.to_uppercase().as_str())
37+
|| !identifier_regex.is_match(identifier)
38+
{
39+
Some('"')
40+
} else {
41+
None
42+
}
3043
}
3144
}
3245

3346
pub struct PostgreSqlDialect {}
3447

3548
impl Dialect for PostgreSqlDialect {
36-
fn identifier_quote_style(&self) -> Option<char> {
49+
fn identifier_quote_style(&self, _: &str) -> Option<char> {
3750
Some('"')
3851
}
3952
}
4053

4154
pub struct MySqlDialect {}
4255

4356
impl Dialect for MySqlDialect {
44-
fn identifier_quote_style(&self) -> Option<char> {
57+
fn identifier_quote_style(&self, _: &str) -> Option<char> {
4558
Some('`')
4659
}
4760
}
4861

4962
pub struct SqliteDialect {}
5063

5164
impl Dialect for SqliteDialect {
52-
fn identifier_quote_style(&self) -> Option<char> {
65+
fn identifier_quote_style(&self, _: &str) -> Option<char> {
5366
Some('`')
5467
}
5568
}
@@ -67,7 +80,7 @@ impl CustomDialect {
6780
}
6881

6982
impl Dialect for CustomDialect {
70-
fn identifier_quote_style(&self) -> Option<char> {
83+
fn identifier_quote_style(&self, _: &str) -> Option<char> {
7184
self.identifier_quote_style
7285
}
7386
}

0 commit comments

Comments
 (0)