From 8fecdc87a81ee159d16c9486c89ca440791ef66e Mon Sep 17 00:00:00 2001 From: Koudai Aono Date: Sat, 24 May 2025 12:12:11 +0900 Subject: [PATCH] feat: add PEP 750 template string support Co-authored-by: Amaan Qureshi --- src/scanner.c | 17 +++++----- test/corpus/literals.txt | 73 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 8 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 7e55ff78..1fc77cdb 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -85,7 +85,7 @@ static inline void set_end_character(Delimiter *delimiter, int32_t character) { typedef struct { Array(uint16_t) indents; Array(Delimiter) delimiters; - bool inside_f_string; + bool inside_interpolated_string; } Scanner; static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } @@ -177,7 +177,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con lexer->mark_end(lexer); array_pop(&scanner->delimiters); lexer->result_symbol = STRING_END; - scanner->inside_f_string = false; + scanner->inside_interpolated_string = false; } return true; } @@ -195,7 +195,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con advance(lexer); array_pop(&scanner->delimiters); lexer->result_symbol = STRING_END; - scanner->inside_f_string = false; + scanner->inside_interpolated_string = false; } lexer->mark_end(lexer); return true; @@ -280,7 +280,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con if ((valid_symbols[DEDENT] || (!valid_symbols[NEWLINE] && !(valid_symbols[STRING_START] && next_tok_is_string_start) && !within_brackets)) && - indent_length < current_indent_length && !scanner->inside_f_string && + indent_length < current_indent_length && !scanner->inside_interpolated_string && // Wait to create a dedent token until we've consumed any // comments @@ -303,7 +303,8 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con bool has_flags = false; while (lexer->lookahead) { - if (lexer->lookahead == 'f' || lexer->lookahead == 'F') { + if (lexer->lookahead == 'f' || lexer->lookahead == 'F' || lexer->lookahead == 't' || + lexer->lookahead == 'T') { set_format(&delimiter); } else if (lexer->lookahead == 'r' || lexer->lookahead == 'R') { set_raw(&delimiter); @@ -349,7 +350,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con if (end_character(&delimiter)) { array_push(&scanner->delimiters, delimiter); lexer->result_symbol = STRING_START; - scanner->inside_f_string = is_format(&delimiter); + scanner->inside_interpolated_string = is_format(&delimiter); return true; } if (has_flags) { @@ -365,7 +366,7 @@ unsigned tree_sitter_python_external_scanner_serialize(void *payload, char *buff size_t size = 0; - buffer[size++] = (char)scanner->inside_f_string; + buffer[size++] = (char)scanner->inside_interpolated_string; size_t delimiter_count = scanner->delimiters.size; if (delimiter_count > UINT8_MAX) { @@ -398,7 +399,7 @@ void tree_sitter_python_external_scanner_deserialize(void *payload, const char * if (length > 0) { size_t size = 0; - scanner->inside_f_string = (bool)buffer[size++]; + scanner->inside_interpolated_string = (bool)buffer[size++]; size_t delimiter_count = (uint8_t)buffer[size++]; if (delimiter_count > 0) { diff --git a/test/corpus/literals.txt b/test/corpus/literals.txt index ce5f7208..4ff69e0a 100644 --- a/test/corpus/literals.txt +++ b/test/corpus/literals.txt @@ -553,6 +553,79 @@ f"{value:{width + padding!r}.{precision}}" (identifier)))) (string_end)))) +================================================================================ +Template Strings +================================================================================ + +t"Hello, {first_name} {last_name}!" +t"a {b:2} {c:34.5}" +tr"Path: {path}\n" +t""" +Multi-line template +with {variable} +""" +t"Use {{braces}} to escape" +t"Result: {a + b}" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier)) + (string_content) + (interpolation + (identifier)) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier) + (format_specifier)) + (string_content) + (interpolation + (identifier) + (format_specifier)) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier)) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier)) + (string_content) + (string_end))) + (expression_statement + (string + (string_start) + (string_content + (escape_interpolation) + (escape_interpolation)) + (string_end))) + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (binary_operator + (identifier) + (identifier))) + (string_end)))) + ================================================================================ Unicode escape sequences ================================================================================