From 059aab1776fa4ed0b2383b85e201e9ef191cdedb Mon Sep 17 00:00:00 2001 From: Etgar Perets Date: Sun, 17 Aug 2025 11:08:03 +0300 Subject: [PATCH 1/2] SGA-3801 Added support for concatenating string literals saperated by space in mySQL --- src/dialect/mod.rs | 6 ++++++ src/dialect/mysql.rs | 5 +++++ src/parser/mod.rs | 43 ++++++++++++++++++++++++++++++++++++++-- tests/sqlparser_mysql.rs | 9 +++++++++ 4 files changed, 61 insertions(+), 2 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index f91209722..e9d8628b4 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -476,6 +476,12 @@ pub trait Dialect: Debug + Any { false } + // Does the Dialect support concatenating of string literal + // Example: SELECT 'Hello ' "world" => SELECT 'Hello world' + fn supports_concat_quoted_identifiers(&self) -> bool { + false + } + /// Does the dialect support trailing commas in the projection list? fn supports_projection_trailing_commas(&self) -> bool { self.supports_trailing_commas() diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 6cf24e14e..96bb6dd7c 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -71,6 +71,11 @@ impl Dialect for MySqlDialect { true } + // see + fn supports_concat_quoted_identifiers(&self) -> bool { + true + } + fn ignores_wildcard_escapes(&self) -> bool { true } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6c559eed4..6e4e13cbc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9808,8 +9808,18 @@ impl<'a> Parser<'a> { // bigdecimal feature is enabled, and is otherwise a no-op // (i.e., it returns the input string). Token::Number(n, l) => ok_value(Value::Number(Self::parse(n, span.start)?, l)), - Token::SingleQuotedString(ref s) => ok_value(Value::SingleQuotedString(s.to_string())), - Token::DoubleQuotedString(ref s) => ok_value(Value::DoubleQuotedString(s.to_string())), + Token::SingleQuotedString(ref s) => { + if self.dialect.supports_concat_quoted_identifiers() { + return ok_value(Value::SingleQuotedString(self.combine_quoted(next_token))); + } + ok_value(Value::SingleQuotedString(s.to_string())) + } + Token::DoubleQuotedString(ref s) => { + if self.dialect.supports_concat_quoted_identifiers() { + return ok_value(Value::DoubleQuotedString(self.combine_quoted(next_token))); + } + ok_value(Value::DoubleQuotedString(s.to_string())) + } Token::TripleSingleQuotedString(ref s) => { ok_value(Value::TripleSingleQuotedString(s.to_string())) } @@ -9879,6 +9889,35 @@ impl<'a> Parser<'a> { } } + fn is_quoted_string(&self, token: &Token) -> bool { + matches!( + token, + Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) + ) + } + + fn get_quoted_string(&self, token: &Token) -> String { + match token { + Token::SingleQuotedString(s) => s.clone(), + Token::DoubleQuotedString(s) => s.clone(), + _ => String::new(), + } + } + + fn combine_quoted(&mut self, token: TokenWithSpan) -> String { + let mut combined_string = self.get_quoted_string(&token.token); + loop { + let next_token = self.next_token(); + if !self.is_quoted_string(&next_token.token) { + self.prev_token(); + break; + } + let s = self.get_quoted_string(&next_token.token); + combined_string.push_str(&s); + } + combined_string + } + /// Parse an unsigned numeric literal pub fn parse_number_value(&mut self) -> Result { let value_wrapper = self.parse_value()?; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 184532035..2de35899c 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -4247,3 +4247,12 @@ fn test_create_index_options() { "CREATE INDEX idx_name ON t(c1, c2) USING BTREE LOCK = EXCLUSIVE ALGORITHM = DEFAULT", ); } + +#[test] +fn parse_adjacent_string_literal_concatenation() { + let sql = r#"SELECT 'M' "y" 'S' "q" 'l'"#; + mysql().one_statement_parses_to(sql, r"SELECT 'MySql'"); + + let sql = "SELECT * FROM t WHERE col = 'Hello' \n ' ' \t 'World!'"; + mysql().one_statement_parses_to(sql, r"SELECT * FROM t WHERE col = 'Hello World!'"); +} From ecdee101977427d23415cc1c2b287c4783bbad07 Mon Sep 17 00:00:00 2001 From: Etgar Perets Date: Tue, 26 Aug 2025 10:52:49 +0300 Subject: [PATCH 2/2] SGA-3801 Simplified concat logic, refactor names, moved test to common --- src/dialect/mod.rs | 2 +- src/dialect/mysql.rs | 4 +-- src/parser/mod.rs | 51 +++++++++++---------------------------- tests/sqlparser_common.rs | 10 ++++++++ tests/sqlparser_mysql.rs | 9 ------- 5 files changed, 27 insertions(+), 49 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index e9d8628b4..6eeb1e622 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -478,7 +478,7 @@ pub trait Dialect: Debug + Any { // Does the Dialect support concatenating of string literal // Example: SELECT 'Hello ' "world" => SELECT 'Hello world' - fn supports_concat_quoted_identifiers(&self) -> bool { + fn supports_string_literal_concatenation(&self) -> bool { false } diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 96bb6dd7c..8c63bfda5 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -71,8 +71,8 @@ impl Dialect for MySqlDialect { true } - // see - fn supports_concat_quoted_identifiers(&self) -> bool { + /// see + fn supports_string_literal_concatenation(&self) -> bool { true } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6e4e13cbc..860595364 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9808,18 +9808,12 @@ impl<'a> Parser<'a> { // bigdecimal feature is enabled, and is otherwise a no-op // (i.e., it returns the input string). Token::Number(n, l) => ok_value(Value::Number(Self::parse(n, span.start)?, l)), - Token::SingleQuotedString(ref s) => { - if self.dialect.supports_concat_quoted_identifiers() { - return ok_value(Value::SingleQuotedString(self.combine_quoted(next_token))); - } - ok_value(Value::SingleQuotedString(s.to_string())) - } - Token::DoubleQuotedString(ref s) => { - if self.dialect.supports_concat_quoted_identifiers() { - return ok_value(Value::DoubleQuotedString(self.combine_quoted(next_token))); - } - ok_value(Value::DoubleQuotedString(s.to_string())) - } + Token::SingleQuotedString(ref s) => ok_value(Value::SingleQuotedString( + self.maybe_concat_string_literal(s.to_string()), + )), + Token::DoubleQuotedString(ref s) => ok_value(Value::DoubleQuotedString( + self.maybe_concat_string_literal(s.to_string()), + )), Token::TripleSingleQuotedString(ref s) => { ok_value(Value::TripleSingleQuotedString(s.to_string())) } @@ -9889,33 +9883,16 @@ impl<'a> Parser<'a> { } } - fn is_quoted_string(&self, token: &Token) -> bool { - matches!( - token, - Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) - ) - } - - fn get_quoted_string(&self, token: &Token) -> String { - match token { - Token::SingleQuotedString(s) => s.clone(), - Token::DoubleQuotedString(s) => s.clone(), - _ => String::new(), - } - } - - fn combine_quoted(&mut self, token: TokenWithSpan) -> String { - let mut combined_string = self.get_quoted_string(&token.token); - loop { - let next_token = self.next_token(); - if !self.is_quoted_string(&next_token.token) { - self.prev_token(); - break; + fn maybe_concat_string_literal(&mut self, mut str: String) -> String { + if self.dialect.supports_string_literal_concatenation() { + while let Token::SingleQuotedString(ref s) | Token::DoubleQuotedString(ref s) = + self.peek_token_ref().token + { + str.push_str(s.clone().as_str()); + self.advance_token(); } - let s = self.get_quoted_string(&next_token.token); - combined_string.push_str(&s); } - combined_string + str } /// Parse an unsigned numeric literal diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 8b99bb1dc..6d53d1fad 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -17027,3 +17027,13 @@ fn test_parse_semantic_view_table_factor() { _ => panic!("Expected Query statement"), } } + +#[test] +fn parse_adjacent_string_literal_concatenation() { + let sql = r#"SELECT 'M' "y" 'S' "q" 'l'"#; + let dialects = all_dialects_where(|d| d.supports_string_literal_concatenation()); + dialects.one_statement_parses_to(sql, r"SELECT 'MySql'"); + + let sql = "SELECT * FROM t WHERE col = 'Hello' \n ' ' \t 'World!'"; + dialects.one_statement_parses_to(sql, r"SELECT * FROM t WHERE col = 'Hello World!'"); +} diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 2de35899c..184532035 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -4247,12 +4247,3 @@ fn test_create_index_options() { "CREATE INDEX idx_name ON t(c1, c2) USING BTREE LOCK = EXCLUSIVE ALGORITHM = DEFAULT", ); } - -#[test] -fn parse_adjacent_string_literal_concatenation() { - let sql = r#"SELECT 'M' "y" 'S' "q" 'l'"#; - mysql().one_statement_parses_to(sql, r"SELECT 'MySql'"); - - let sql = "SELECT * FROM t WHERE col = 'Hello' \n ' ' \t 'World!'"; - mysql().one_statement_parses_to(sql, r"SELECT * FROM t WHERE col = 'Hello World!'"); -}