From 8c24641d9e604d69d1590927d79057ac52756ec2 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 10 Jun 2025 22:21:16 +0200 Subject: [PATCH 1/3] chore(jsonparser): Add test case for mutating original log line Signed-off-by: Christian Haudum --- pkg/logql/log/parser_test.go | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/pkg/logql/log/parser_test.go b/pkg/logql/log/parser_test.go index 2a855389aaddd..3ccd379160287 100644 --- a/pkg/logql/log/parser_test.go +++ b/pkg/logql/log/parser_test.go @@ -36,6 +36,27 @@ func Test_jsonParser_Parse(t *testing.T) { }, NoParserHints(), }, + { + "multi depth with duplicates", + []byte(`{"app":"bar","namespace":"prod","pod":{"uuid":"bar","deployment":{"ref":"foobar"}}}`), + labels.FromStrings("app", "foo", + "pod_uuid", "foo", + ), + labels.FromStrings("app", "foo", + "app_extracted", "bar", + "namespace", "prod", + "pod_uuid", "foo", + "pod_uuid_extracted", "bar", + "pod_deployment_ref", "foobar", + ), + map[string][]string{ + "app_extracted": {"app"}, + "namespace": {"namespace"}, + "pod_uuid_extracted": {"pod", "uuid"}, + "pod_deployment_ref": {"pod", "deployment", "ref"}, + }, + NoParserHints(), + }, {"numeric", []byte(`{"counter":1, "price": {"_net_":5.56909}}`), labels.EmptyLabels(), @@ -193,10 +214,14 @@ func Test_jsonParser_Parse(t *testing.T) { for _, tt := range tests { j := NewJSONParser(true) t.Run(tt.name, func(t *testing.T) { + origLine := string(tt.line) + b := NewBaseLabelsBuilderWithGrouping(nil, tt.hints, false, false).ForLabels(tt.lbs, tt.lbs.Hash()) b.Reset() _, _ = j.Process(0, tt.line, b) - require.Equal(t, tt.want, b.LabelsResult().Labels()) + labels := b.LabelsResult().Labels() + require.Equal(t, origLine, string(tt.line), "original log line was modified") + require.Equal(t, tt.want, labels) // Check JSON paths if provided if len(tt.wantJSONPath) > 0 { From 45537a9da3d19908017cb6c7941d3fa679e800a9 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Wed, 11 Jun 2025 09:07:56 +0200 Subject: [PATCH 2/3] fix(jsonparser): Fix bug parser that could cause mutation of original log line The json parser mutated the extracted unsafe string which can cause an in-memory log entry on the ingester to be modified leading to a malformed JSON string. Signed-off-by: Christian Haudum --- pkg/logql/log/parser.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/logql/log/parser.go b/pkg/logql/log/parser.go index 2aafafcd784de..179918b712c2b 100644 --- a/pkg/logql/log/parser.go +++ b/pkg/logql/log/parser.go @@ -170,7 +170,9 @@ func (j *JSONParser) parseLabelValue(key, value []byte, dataType jsonparser.Valu sanitized := j.buildSanitizedPrefixFromBuffer() keyString, ok := j.keys.Get(sanitized, func() (string, bool) { if j.lbs.BaseHas(string(sanitized)) { - j.prefixBuffer[prefixLen] = append(key, duplicateSuffix...) + j.prefixBuffer[prefixLen] = make([]byte, 0, len(key)+len(duplicateSuffix)) + j.prefixBuffer[prefixLen] = append(j.prefixBuffer[prefixLen], key...) + j.prefixBuffer[prefixLen] = append(j.prefixBuffer[prefixLen], duplicateSuffix...) } keyPrefix := j.buildSanitizedPrefixFromBuffer() From 3246924e168e9d6f3d0ed0b3ab56a7d10e23f2f3 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Wed, 11 Jun 2025 09:22:53 +0200 Subject: [PATCH 3/3] fix(jsonparser): Fix jsonpath of extracted "duplicate" nested fields The jsonpath for nested field that would result in duplicate labels and therefore have the `_extracted` suffix, was incorrectly adding the suffix to the last element of the json path. Signed-off-by: Christian Haudum --- pkg/logql/log/parser.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pkg/logql/log/parser.go b/pkg/logql/log/parser.go index 179918b712c2b..f6ebc1de29f62 100644 --- a/pkg/logql/log/parser.go +++ b/pkg/logql/log/parser.go @@ -184,8 +184,9 @@ func (j *JSONParser) parseLabelValue(key, value []byte, dataType jsonparser.Valu }) if j.captureJSONPath { - jsonPath := j.buildJSONPathFromPrefixBuffer() - j.lbs.SetJSONPath(keyString, jsonPath) + if jsonPath := j.buildJSONPathFromPrefixBuffer(); len(jsonPath) > 0 { + j.lbs.SetJSONPath(keyString, jsonPath) + } } // reset the prefix position @@ -219,10 +220,20 @@ func (j *JSONParser) buildSanitizedPrefixFromBuffer() []byte { return j.sanitizedPrefixBuffer } +// buildJSONPathFromPrefixBuffer constructs the JSON path from the accumulated prefix buffer. +// It returns a slice of strings representing each segment of the JSON path. +// If the prefix buffer is empty, it returns nil. +// The function also removes any "_extracted" suffix from "duplicate" fields. func (j *JSONParser) buildJSONPathFromPrefixBuffer() []string { + if len(j.prefixBuffer) == 0 { + return nil + } + jsonPath := make([]string, 0, len(j.prefixBuffer)) for _, part := range j.prefixBuffer { partStr := unsafe.String(unsafe.SliceData(part), len(part)) // #nosec G103 -- we know the string is not mutated + // Trim _extracted suffix if the extracted field was a duplicate field + partStr = strings.TrimSuffix(partStr, duplicateSuffix) jsonPath = append(jsonPath, partStr) }