diff --git a/parser/ast.go b/parser/ast.go index e5a906a..cd2eb11 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -4169,11 +4169,18 @@ func (j *JSONPath) String() string { return builder.String() } +type JSONTypeHint struct { + Path *JSONPath + Type ColumnType +} + type JSONOption struct { SkipPath *JSONPath SkipRegex *StringLiteral MaxDynamicPaths *NumberLiteral MaxDynamicTypes *NumberLiteral + // Type hint for specific JSON subcolumn path, e.g., "message String" or "a.b UInt64" + Column *JSONTypeHint } func (j *JSONOption) String() string { @@ -4196,6 +4203,16 @@ func (j *JSONOption) String() string { builder.WriteByte('=') builder.WriteString(j.MaxDynamicTypes.String()) } + if j.Column != nil && j.Column.Path != nil && j.Column.Type != nil { + // add a leading space if there is already content + if builder.Len() > 0 { + builder.WriteByte(' ') + } + builder.WriteString(j.Column.Path.String()) + builder.WriteByte(' ') + builder.WriteString(j.Column.Type.String()) + } + return builder.String() } @@ -4216,12 +4233,41 @@ func (j *JSONOptions) End() Pos { func (j *JSONOptions) String() string { var builder strings.Builder builder.WriteByte('(') - for i, item := range j.Items { - if i > 0 { - builder.WriteString(", ") + // Ensure stable, readable ordering: + // 1) numeric options (max_dynamic_*), 2) type-hint items, 3) skip options (SKIP, SKIP REGEXP) + // Preserve original relative order within each group. + numericOptionItems := make([]*JSONOption, 0, len(j.Items)) + columnItems := make([]*JSONOption, 0, len(j.Items)) + skipOptionItems := make([]*JSONOption, 0, len(j.Items)) + for _, item := range j.Items { + if item.MaxDynamicPaths != nil || item.MaxDynamicTypes != nil { + numericOptionItems = append(numericOptionItems, item) + continue + } + if item.Column != nil { + columnItems = append(columnItems, item) + continue + } + if item.SkipPath != nil || item.SkipRegex != nil { + skipOptionItems = append(skipOptionItems, item) + continue + } + // Fallback: treat as numeric option to avoid dropping unknown future fields + numericOptionItems = append(numericOptionItems, item) + } + + writeItems := func(items []*JSONOption) { + for _, item := range items { + if builder.Len() > 1 { // account for the initial '(' + builder.WriteString(", ") + } + builder.WriteString(item.String()) } - builder.WriteString(item.String()) } + + writeItems(numericOptionItems) + writeItems(columnItems) + writeItems(skipOptionItems) builder.WriteByte(')') return builder.String() } diff --git a/parser/parser_column.go b/parser/parser_column.go index 119658b..5312319 100644 --- a/parser/parser_column.go +++ b/parser/parser_column.go @@ -1018,7 +1018,51 @@ func (p *Parser) parseJSONOption() (*JSONOption, error) { SkipPath: jsonPath, }, nil case p.matchTokenKind(TokenKindIdent): - return p.parseJSONMaxDynamicOptions(p.Pos()) + // Could be max_dynamic_* option OR a type hint like: a.b String + // Lookahead to see if there's an '=' following the identifier path (max_dynamic_*) + // or if it's a path followed by a ColumnType. + // We'll parse a JSONPath first, then decide. + // Save lexer state by consuming as path greedily using existing helpers. + // Try: if single ident and next is '=' -> max_dynamic_*; else treat as path + type + + // Peek next token after current ident without consuming type; we need to + // attempt to parse as max_dynamic_* first as it's existing behavior for a single ident. + // To support dotted paths, we need to capture path, then if '=' exists, it's option; otherwise parse type. + path, err := p.parseJSONPath() + if err != nil { + return nil, err + } + if p.tryConsumeTokenKind(TokenKindSingleEQ) != nil { + // This is a max_dynamic_* option; only valid when path is a single ident of that name + // Reconstruct handling similar to parseJSONMaxDynamicOptions but we already consumed ident and '=' + // Determine which option based on the first ident name + if len(path.Idents) != 1 { + return nil, fmt.Errorf("unexpected token kind: %s", p.lastTokenKind()) + } + name := path.Idents[0].Name + switch name { + case "max_dynamic_types": + number, err := p.parseNumber(p.Pos()) + if err != nil { + return nil, err + } + return &JSONOption{MaxDynamicTypes: number}, nil + case "max_dynamic_paths": + number, err := p.parseNumber(p.Pos()) + if err != nil { + return nil, err + } + return &JSONOption{MaxDynamicPaths: number}, nil + default: + return nil, fmt.Errorf("unexpected token kind: %s", p.lastTokenKind()) + } + } + // Otherwise, expect a ColumnType as a type hint for the JSON subpath + colType, err := p.parseColumnType(p.Pos()) + if err != nil { + return nil, err + } + return &JSONOption{Column: &JSONTypeHint{Path: path, Type: colType}}, nil default: return nil, fmt.Errorf("unexpected token kind: %s", p.lastTokenKind()) } diff --git a/parser/testdata/ddl/create_table_json_typehints.sql b/parser/testdata/ddl/create_table_json_typehints.sql new file mode 100644 index 0000000..68c605e --- /dev/null +++ b/parser/testdata/ddl/create_table_json_typehints.sql @@ -0,0 +1,6 @@ +CREATE TABLE t ( + j JSON(message String, a.b UInt64, max_dynamic_paths=0, SKIP x, SKIP REGEXP 're') +) ENGINE = MergeTree +ORDER BY tuple(); + + diff --git a/parser/testdata/ddl/format/create_table_json_typehints.sql b/parser/testdata/ddl/format/create_table_json_typehints.sql new file mode 100644 index 0000000..8b014f1 --- /dev/null +++ b/parser/testdata/ddl/format/create_table_json_typehints.sql @@ -0,0 +1,11 @@ +-- Origin SQL: +CREATE TABLE t ( + j JSON(message String, a.b UInt64, max_dynamic_paths=0, SKIP x, SKIP REGEXP 're') +) ENGINE = MergeTree +ORDER BY tuple(); + + + + +-- Format SQL: +CREATE TABLE t (j JSON(max_dynamic_paths=0, message String, a.b UInt64, SKIP x, SKIP REGEXP 're')) ENGINE = MergeTree ORDER BY tuple(); diff --git a/parser/testdata/ddl/output/create_table_basic.sql.golden.json b/parser/testdata/ddl/output/create_table_basic.sql.golden.json index 5cb87b3..fc79f87 100644 --- a/parser/testdata/ddl/output/create_table_basic.sql.golden.json +++ b/parser/testdata/ddl/output/create_table_basic.sql.golden.json @@ -685,22 +685,24 @@ "SkipRegex": null, "MaxDynamicPaths": null, "MaxDynamicTypes": { - "NumPos": 571, + "NumPos": 589, "NumEnd": 591, "Literal": "10", "Base": 10 - } + }, + "Column": null }, { "SkipPath": null, "SkipRegex": null, "MaxDynamicPaths": { - "NumPos": 593, + "NumPos": 611, "NumEnd": 612, "Literal": "3", "Base": 10 }, - "MaxDynamicTypes": null + "MaxDynamicTypes": null, + "Column": null }, { "SkipPath": { @@ -715,7 +717,8 @@ }, "SkipRegex": null, "MaxDynamicPaths": null, - "MaxDynamicTypes": null + "MaxDynamicTypes": null, + "Column": null }, { "SkipPath": { @@ -742,7 +745,8 @@ }, "SkipRegex": null, "MaxDynamicPaths": null, - "MaxDynamicTypes": null + "MaxDynamicTypes": null, + "Column": null }, { "SkipPath": null, @@ -752,7 +756,8 @@ "Literal": "hello" }, "MaxDynamicPaths": null, - "MaxDynamicTypes": null + "MaxDynamicTypes": null, + "Column": null } ] } diff --git a/parser/testdata/ddl/output/create_table_json_typehints.sql.golden.json b/parser/testdata/ddl/output/create_table_json_typehints.sql.golden.json new file mode 100644 index 0000000..9d2644c --- /dev/null +++ b/parser/testdata/ddl/output/create_table_json_typehints.sql.golden.json @@ -0,0 +1,204 @@ +[ + { + "CreatePos": 0, + "StatementEnd": 139, + "OrReplace": false, + "Name": { + "Database": null, + "Table": { + "Name": "t", + "QuoteType": 1, + "NamePos": 13, + "NameEnd": 14 + } + }, + "IfNotExists": false, + "UUID": null, + "OnCluster": null, + "TableSchema": { + "SchemaPos": 15, + "SchemaEnd": 103, + "Columns": [ + { + "NamePos": 21, + "ColumnEnd": 101, + "Name": { + "Ident": { + "Name": "j", + "QuoteType": 1, + "NamePos": 21, + "NameEnd": 22 + }, + "DotIdent": null + }, + "Type": { + "Name": { + "Name": "JSON", + "QuoteType": 1, + "NamePos": 23, + "NameEnd": 27 + }, + "Options": { + "LParen": 28, + "RParen": 101, + "Items": [ + { + "SkipPath": null, + "SkipRegex": null, + "MaxDynamicPaths": null, + "MaxDynamicTypes": null, + "Column": { + "Path": { + "Idents": [ + { + "Name": "message", + "QuoteType": 1, + "NamePos": 28, + "NameEnd": 35 + } + ] + }, + "Type": { + "Name": { + "Name": "String", + "QuoteType": 1, + "NamePos": 36, + "NameEnd": 42 + } + } + } + }, + { + "SkipPath": null, + "SkipRegex": null, + "MaxDynamicPaths": null, + "MaxDynamicTypes": null, + "Column": { + "Path": { + "Idents": [ + { + "Name": "a", + "QuoteType": 1, + "NamePos": 44, + "NameEnd": 45 + }, + { + "Name": "b", + "QuoteType": 1, + "NamePos": 46, + "NameEnd": 47 + } + ] + }, + "Type": { + "Name": { + "Name": "UInt64", + "QuoteType": 1, + "NamePos": 48, + "NameEnd": 54 + } + } + } + }, + { + "SkipPath": null, + "SkipRegex": null, + "MaxDynamicPaths": { + "NumPos": 74, + "NumEnd": 75, + "Literal": "0", + "Base": 10 + }, + "MaxDynamicTypes": null, + "Column": null + }, + { + "SkipPath": { + "Idents": [ + { + "Name": "x", + "QuoteType": 1, + "NamePos": 82, + "NameEnd": 83 + } + ] + }, + "SkipRegex": null, + "MaxDynamicPaths": null, + "MaxDynamicTypes": null, + "Column": null + }, + { + "SkipPath": null, + "SkipRegex": { + "LiteralPos": 98, + "LiteralEnd": 100, + "Literal": "re" + }, + "MaxDynamicPaths": null, + "MaxDynamicTypes": null, + "Column": null + } + ] + } + }, + "NotNull": null, + "Nullable": null, + "DefaultExpr": null, + "MaterializedExpr": null, + "AliasExpr": null, + "Codec": null, + "TTL": null, + "Comment": null, + "CompressionCodec": null + } + ], + "AliasTable": null, + "TableFunction": null + }, + "Engine": { + "EnginePos": 105, + "EngineEnd": 139, + "Name": "MergeTree", + "Params": null, + "PrimaryKey": null, + "PartitionBy": null, + "SampleBy": null, + "TTL": null, + "Settings": null, + "OrderBy": { + "OrderPos": 124, + "ListEnd": 139, + "Items": [ + { + "OrderPos": 124, + "Expr": { + "Name": { + "Name": "tuple", + "QuoteType": 1, + "NamePos": 133, + "NameEnd": 138 + }, + "Params": { + "LeftParenPos": 138, + "RightParenPos": 139, + "Items": { + "ListPos": 139, + "ListEnd": 139, + "HasDistinct": false, + "Items": [] + }, + "ColumnArgList": null + } + }, + "Alias": null, + "Direction": "" + } + ] + } + }, + "SubQuery": null, + "HasTemporary": false, + "Comment": null + } +] \ No newline at end of file