diff --git a/parser/internal/Cel.g4 b/parser/internal/Cel.g4 index 57ae7e097..9b2c73954 100644 --- a/parser/internal/Cel.g4 +++ b/parser/internal/Cel.g4 @@ -4,7 +4,7 @@ // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, @@ -52,16 +52,17 @@ unary member : primary # PrimaryExpr - | member op='.' (opt='?')? id=IDENTIFIER # Select + | member op='.' (opt='?')? id=escapeIdent # Select | member op='.' id=IDENTIFIER open='(' args=exprList? ')' # MemberCall | member op='[' (opt='?')? index=expr ']' # Index ; primary - : leadingDot='.'? id=IDENTIFIER (op='(' args=exprList? ')')? # IdentOrGlobalCall + : leadingDot='.'? id=IDENTIFIER # Ident + | leadingDot='.'? id=IDENTIFIER (op='(' args=exprList? ')') # GlobalCall | '(' e=expr ')' # Nested | op='[' elems=listInit? ','? ']' # CreateList - | op='{' entries=mapInitializerList? ','? '}' # CreateStruct + | op='{' entries=mapInitializerList? ','? '}' # CreateMap | leadingDot='.'? ids+=IDENTIFIER (ops+='.' ids+=IDENTIFIER)* op='{' entries=fieldInitializerList? ','? '}' # CreateMessage | literal # ConstantLiteral @@ -80,13 +81,18 @@ fieldInitializerList ; optField - : (opt='?')? id=IDENTIFIER + : (opt='?')? escapeIdent ; mapInitializerList : keys+=optExpr cols+=':' values+=expr (',' keys+=optExpr cols+=':' values+=expr)* ; +escapeIdent + : id=IDENTIFIER # SimpleIdentifier + | id=ESC_IDENTIFIER # EscapedIdentifier + ; + optExpr : (opt='?')? e=expr ; @@ -198,3 +204,4 @@ STRING BYTES : ('b' | 'B') STRING; IDENTIFIER : (LETTER | '_') ( LETTER | DIGIT | '_')*; +ESC_IDENTIFIER : '`' (LETTER | DIGIT | '_' | '.' | '-' | '/' | ' ')+ '`'; diff --git a/parser/options.h b/parser/options.h index 50f02a4bb..892a406fd 100644 --- a/parser/options.h +++ b/parser/options.h @@ -53,6 +53,12 @@ struct ParserOptions final { // Enable hidden accumulator variable '@result' for builtin comprehensions. bool enable_hidden_accumulator_var = false; + + // Enables support for identifier quoting syntax: + // "message.`skewer-case-field`" + // + // Limited to field specifiers in select and message creation. + bool enable_quoted_identifiers = false; }; } // namespace cel diff --git a/parser/parser.cc b/parser/parser.cc index 50d72b7e5..6c6527b46 100644 --- a/parser/parser.cc +++ b/parser/parser.cc @@ -608,8 +608,18 @@ class ParserVisitor final : public CelBaseVisitor, absl::string_view accu_var, const cel::MacroRegistry& macro_registry, bool add_macro_calls = false, - bool enable_optional_syntax = false); - ~ParserVisitor() override; + bool enable_optional_syntax = false, + bool enable_quoted_identifiers = false) + : source_(source), + factory_(source_, accu_var), + macro_registry_(macro_registry), + recursion_depth_(0), + max_recursion_depth_(max_recursion_depth), + add_macro_calls_(add_macro_calls), + enable_optional_syntax_(enable_optional_syntax), + enable_quoted_identifiers_(enable_quoted_identifiers) {} + + ~ParserVisitor() override = default; std::any visit(antlr4::tree::ParseTree* tree) override; @@ -630,13 +640,13 @@ class ParserVisitor final : public CelBaseVisitor, CelParser::FieldInitializerListContext* ctx) override; std::vector visitFields( CelParser::FieldInitializerListContext* ctx); - std::any visitIdentOrGlobalCall( - CelParser::IdentOrGlobalCallContext* ctx) override; + std::any visitGlobalCall(CelParser::GlobalCallContext* ctx) override; + std::any visitIdent(CelParser::IdentContext* ctx) override; std::any visitNested(CelParser::NestedContext* ctx) override; std::any visitCreateList(CelParser::CreateListContext* ctx) override; std::vector visitList(CelParser::ListInitContext* ctx); std::vector visitList(CelParser::ExprListContext* ctx); - std::any visitCreateStruct(CelParser::CreateStructContext* ctx) override; + std::any visitCreateMap(CelParser::CreateMapContext* ctx) override; std::any visitConstantLiteral( CelParser::ConstantLiteralContext* ctx) override; std::any visitPrimaryExpr(CelParser::PrimaryExprContext* ctx) override; @@ -691,6 +701,8 @@ class ParserVisitor final : public CelBaseVisitor, Expr target, std::vector args); std::string ExtractQualifiedName(antlr4::ParserRuleContext* ctx, const Expr& e); + + std::string NormalizeIdentifier(CelParser::EscapeIdentContext* ctx); // Attempt to unnest parse context. // // Walk the parse tree to the first complex term to reduce recursive depth in @@ -705,24 +717,9 @@ class ParserVisitor final : public CelBaseVisitor, const int max_recursion_depth_; const bool add_macro_calls_; const bool enable_optional_syntax_; + const bool enable_quoted_identifiers_; }; -ParserVisitor::ParserVisitor(const cel::Source& source, - const int max_recursion_depth, - absl::string_view accu_var, - const cel::MacroRegistry& macro_registry, - const bool add_macro_calls, - bool enable_optional_syntax) - : source_(source), - factory_(source_, accu_var), - macro_registry_(macro_registry), - recursion_depth_(0), - max_recursion_depth_(max_recursion_depth), - add_macro_calls_(add_macro_calls), - enable_optional_syntax_(enable_optional_syntax) {} - -ParserVisitor::~ParserVisitor() {} - template ::value>> T* tree_as(antlr4::tree::ParseTree* tree) { @@ -771,8 +768,8 @@ std::any ParserVisitor::visit(antlr4::tree::ParseTree* tree) { return visitCreateList(ctx); } else if (auto* ctx = tree_as(tree)) { return visitCreateMessage(ctx); - } else if (auto* ctx = tree_as(tree)) { - return visitCreateStruct(ctx); + } else if (auto* ctx = tree_as(tree)) { + return visitCreateMap(ctx); } if (tree) { @@ -788,13 +785,14 @@ std::any ParserVisitor::visitPrimaryExpr(CelParser::PrimaryExprContext* pctx) { CelParser::PrimaryContext* primary = pctx->primary(); if (auto* ctx = tree_as(primary)) { return visitNested(ctx); - } else if (auto* ctx = - tree_as(primary)) { - return visitIdentOrGlobalCall(ctx); + } else if (auto* ctx = tree_as(primary)) { + return visitIdent(ctx); + } else if (auto* ctx = tree_as(primary)) { + return visitGlobalCall(ctx); } else if (auto* ctx = tree_as(primary)) { return visitCreateList(ctx); - } else if (auto* ctx = tree_as(primary)) { - return visitCreateStruct(ctx); + } else if (auto* ctx = tree_as(primary)) { + return visitCreateMap(ctx); } else if (auto* ctx = tree_as(primary)) { return visitCreateMessage(ctx); } else if (auto* ctx = tree_as(primary)) { @@ -1013,6 +1011,25 @@ std::any ParserVisitor::visitNegate(CelParser::NegateContext* ctx) { GlobalCallOrMacro(op_id, CelOperator::NEGATE, std::move(target))); } +std::string ParserVisitor::NormalizeIdentifier( + CelParser::EscapeIdentContext* ctx) { + if (auto* raw_id = tree_as(ctx); raw_id) { + return raw_id->id->getText(); + } + if (auto* escaped_id = tree_as(ctx); + escaped_id) { + if (!enable_quoted_identifiers_) { + factory_.ReportError(SourceRangeFromParserRuleContext(ctx), + "unsupported syntax '`'"); + } + auto escaped_id_text = escaped_id->id->getText(); + return escaped_id_text.substr(1, escaped_id_text.size() - 2); + } + + // Fallthrough might occur if the parser is in an error state. + return ""; +} + std::any ParserVisitor::visitSelect(CelParser::SelectContext* ctx) { auto operand = ExprFromAny(visit(ctx->member())); // Handle the error case where no valid identifier is specified. @@ -1020,7 +1037,7 @@ std::any ParserVisitor::visitSelect(CelParser::SelectContext* ctx) { return ExprToAny(factory_.NewUnspecified( factory_.NextId(SourceRangeFromParserRuleContext(ctx)))); } - auto id = ctx->id->getText(); + auto id = NormalizeIdentifier(ctx->id); if (ctx->opt != nullptr) { if (!enable_optional_syntax_) { return ExprToAny(factory_.ReportError( @@ -1108,12 +1125,15 @@ std::vector ParserVisitor::visitFields( // This is the result of a syntax error detected elsewhere. return res; } - const auto* f = ctx->fields[i]; - if (f->id == nullptr) { + auto* f = ctx->fields[i]; + if (!f->escapeIdent()) { ABSL_DCHECK(HasErrored()); // This is the result of a syntax error detected elsewhere. return res; } + + std::string id = NormalizeIdentifier(f->escapeIdent()); + int64_t init_id = factory_.NextId(SourceRangeFromToken(ctx->cols[i])); if (!enable_optional_syntax_ && f->opt) { factory_.ReportError(SourceRangeFromParserRuleContext(ctx), @@ -1121,15 +1141,14 @@ std::vector ParserVisitor::visitFields( continue; } auto value = ExprFromAny(visit(ctx->values[i])); - res.push_back(factory_.NewStructField(init_id, f->id->getText(), + res.push_back(factory_.NewStructField(init_id, std::move(id), std::move(value), f->opt != nullptr)); } return res; } -std::any ParserVisitor::visitIdentOrGlobalCall( - CelParser::IdentOrGlobalCallContext* ctx) { +std::any ParserVisitor::visitIdent(CelParser::IdentContext* ctx) { std::string ident_name; if (ctx->leadingDot) { ident_name = "."; @@ -1138,23 +1157,43 @@ std::any ParserVisitor::visitIdentOrGlobalCall( return ExprToAny(factory_.NewUnspecified( factory_.NextId(SourceRangeFromParserRuleContext(ctx)))); } + // check if ID is in reserved identifiers if (cel::internal::LexisIsReserved(ctx->id->getText())) { return ExprToAny(factory_.ReportError( SourceRangeFromParserRuleContext(ctx), absl::StrFormat("reserved identifier: %s", ctx->id->getText()))); } - // check if ID is in reserved identifiers + ident_name += ctx->id->getText(); - if (ctx->op) { - int64_t op_id = factory_.NextId(SourceRangeFromToken(ctx->op)); - auto args = visitList(ctx->args); - return ExprToAny( - GlobalCallOrMacroImpl(op_id, std::move(ident_name), std::move(args))); - } + return ExprToAny(factory_.NewIdent( factory_.NextId(SourceRangeFromToken(ctx->id)), std::move(ident_name))); } +std::any ParserVisitor::visitGlobalCall(CelParser::GlobalCallContext* ctx) { + std::string ident_name; + if (ctx->leadingDot) { + ident_name = "."; + } + if (!ctx->id || !ctx->op) { + return ExprToAny(factory_.NewUnspecified( + factory_.NextId(SourceRangeFromParserRuleContext(ctx)))); + } + // check if ID is in reserved identifiers + if (cel::internal::LexisIsReserved(ctx->id->getText())) { + return ExprToAny(factory_.ReportError( + SourceRangeFromParserRuleContext(ctx), + absl::StrFormat("reserved identifier: %s", ctx->id->getText()))); + } + + ident_name += ctx->id->getText(); + + int64_t op_id = factory_.NextId(SourceRangeFromToken(ctx->op)); + auto args = visitList(ctx->args); + return ExprToAny( + GlobalCallOrMacroImpl(op_id, std::move(ident_name), std::move(args))); +} + std::any ParserVisitor::visitNested(CelParser::NestedContext* ctx) { return visit(ctx->e); } @@ -1197,7 +1236,7 @@ std::vector ParserVisitor::visitList(CelParser::ExprListContext* ctx) { return rv; } -std::any ParserVisitor::visitCreateStruct(CelParser::CreateStructContext* ctx) { +std::any ParserVisitor::visitCreateMap(CelParser::CreateMapContext* ctx) { int64_t struct_id = factory_.NextId(SourceRangeFromToken(ctx->op)); std::vector entries; if (ctx->entries) { @@ -1629,7 +1668,8 @@ absl::StatusOr ParseImpl(const cel::Source& source, } ParserVisitor visitor(source, options.max_recursion_depth, accu_var, registry, options.add_macro_calls, - options.enable_optional_syntax); + options.enable_optional_syntax, + options.enable_quoted_identifiers); lexer.removeErrorListeners(); parser.removeErrorListeners(); diff --git a/parser/parser_test.cc b/parser/parser_test.cc index 1b5ff197a..0149538e9 100644 --- a/parser/parser_test.cc +++ b/parser/parser_test.cc @@ -441,7 +441,8 @@ std::vector test_cases = { "ERROR: :4294967295:0: <> parsetree"}, {"t{>C}", "", "ERROR: :1:3: Syntax error: extraneous input '>' expecting {'}', " - "',', '\\u003F', IDENTIFIER}\n | t{>C}\n | ..^\nERROR: :1:5: " + "',', '\\u003F', IDENTIFIER, ESC_IDENTIFIER}\n | t{>C}\n | ..^\nERROR: " + ":1:5: " "Syntax error: " "mismatched input '}' expecting ':'\n | t{>C}\n | ....^"}, @@ -923,6 +924,84 @@ std::vector test_cases = { " | ..^", }, + // Identifier quoting syntax tests. + {"a.`b`", "a^#1:Expr.Ident#.b^#2:Expr.Select#"}, + {"a.`b-c`", "a^#1:Expr.Ident#.b-c^#2:Expr.Select#"}, + {"a.`b c`", "a^#1:Expr.Ident#.b c^#2:Expr.Select#"}, + {"a.`b/c`", "a^#1:Expr.Ident#.b/c^#2:Expr.Select#"}, + {"a.`b.c`", "a^#1:Expr.Ident#.b.c^#2:Expr.Select#"}, + {"a.`in`", "a^#1:Expr.Ident#.in^#2:Expr.Select#"}, + {"A{`b`: 1}", + "A{\n" + " b:1^#3:int64#^#2:Expr.CreateStruct.Entry#\n" + "}^#1:Expr.CreateStruct#"}, + {"A{`b-c`: 1}", + "A{\n" + " b-c:1^#3:int64#^#2:Expr.CreateStruct.Entry#\n" + "}^#1:Expr.CreateStruct#"}, + {"A{`b c`: 1}", + "A{\n" + " b c:1^#3:int64#^#2:Expr.CreateStruct.Entry#\n" + "}^#1:Expr.CreateStruct#"}, + {"A{`b/c`: 1}", + "A{\n" + " b/c:1^#3:int64#^#2:Expr.CreateStruct.Entry#\n" + "}^#1:Expr.CreateStruct#"}, + {"A{`b.c`: 1}", + "A{\n" + " b.c:1^#3:int64#^#2:Expr.CreateStruct.Entry#\n" + "}^#1:Expr.CreateStruct#"}, + {"A{`in`: 1}", + "A{\n" + " in:1^#3:int64#^#2:Expr.CreateStruct.Entry#\n" + "}^#1:Expr.CreateStruct#"}, + {"has(a.`b/c`)", "a^#2:Expr.Ident#.b/c~test-only~^#4:Expr.Select#"}, + // Unsupported quoted identifiers. + {"a.`b\tc`", "", + "ERROR: :1:3: Syntax error: token recognition error at: '`b\\t'\n" + " | a.`b c`\n" + " | ..^\n" + "ERROR: :1:7: Syntax error: token recognition error at: '`'\n" + " | a.`b c`\n" + " | ......^"}, + {"a.`@foo`", "", + "ERROR: :1:3: Syntax error: token recognition error at: '`@'\n" + " | a.`@foo`\n" + " | ..^\n" + "ERROR: :1:8: Syntax error: token recognition error at: '`'\n" + " | a.`@foo`\n" + " | .......^"}, + {"a.`$foo`", "", + "ERROR: :1:3: Syntax error: token recognition error at: '`$'\n" + " | a.`$foo`\n" + " | ..^\n" + "ERROR: :1:8: Syntax error: token recognition error at: '`'\n" + " | a.`$foo`\n" + " | .......^"}, + {"`a.b`", "", + "ERROR: :1:1: Syntax error: mismatched input '`a.b`' expecting " + "{'[', '{', " + "'(', '.', '-', '!', 'true', 'false', 'null', NUM_FLOAT, NUM_INT, " + "NUM_UINT, STRING, " + "BYTES, IDENTIFIER}\n" + " | `a.b`\n" + " | ^"}, + {"`a.b`()", "", + "ERROR: :1:1: Syntax error: extraneous input '`a.b`' expecting " + "{'[', '{', '(', '.', '-', '!', 'true', 'false', 'null', NUM_FLOAT, " + "NUM_INT, NUM_UINT, STRING, BYTES, IDENTIFIER}\n" + " | `a.b`()\n" + " | ^\n" + "ERROR: :1:7: Syntax error: mismatched input ')' expecting {'[', " + "'{', '(', '.', '-', '!', 'true', 'false', 'null', NUM_FLOAT, NUM" + "_INT, NUM_UINT, STRING, BYTES, IDENTIFIER}\n" + " | `a.b`()\n" + " | ......^"}, + {"foo.`a.b`()", "", + "ERROR: :1:10: Syntax error: mismatched input '(' expecting \n" + " | foo.`a.b`()\n" + " | .........^"}, + // Macro calls tests {"x.filter(y, y.filter(z, z > 0))", "__comprehension__(\n" @@ -1406,6 +1485,7 @@ TEST_P(ExpressionTest, Parse) { options.add_macro_calls = true; } options.enable_optional_syntax = true; + options.enable_quoted_identifiers = true; std::vector macros = Macro::AllMacros(); macros.push_back(cel::OptMapMacro()); @@ -1516,6 +1596,18 @@ TEST(ExpressionTest, RecursionDepthExceeded) { HasSubstr("Exceeded max recursion depth of 6 when parsing.")); } +TEST(ExpressionTest, DisableQuotedIdentifiers) { + ParserOptions options; + options.enable_quoted_identifiers = false; + auto result = Parse("foo.`bar`", "", options); + + EXPECT_THAT(result, Not(IsOk())); + EXPECT_THAT(result.status().message(), + HasSubstr("ERROR: :1:5: unsupported syntax '`'\n" + " | foo.`bar`\n" + " | ....^")); +} + TEST(ExpressionTest, DisableStandardMacros) { ParserOptions options; options.disable_standard_macros = true;