-
Notifications
You must be signed in to change notification settings - Fork 25.6k
New Histogram field mapper that supports percentiles aggregations. #48580
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
c4bfdb7
550394c
9d4f9c4
4e3eed7
a168d32
038d429
edc2faf
c527aec
bd59238
71886a8
793a257
579c05c
af1249f
1cb8f53
93229e5
996f8fc
edec448
adf12a4
3c5892e
19f15a2
1f6383d
fe039ee
40f679d
79f7fd9
fbabf1c
f1a1ead
c8a1f12
0045a8b
f8cf1a7
2e8649a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ | |
| import org.elasticsearch.common.settings.Settings; | ||
| import org.elasticsearch.common.xcontent.XContentBuilder; | ||
| import org.elasticsearch.common.xcontent.XContentParser; | ||
| import org.elasticsearch.common.xcontent.XContentSubParser; | ||
| import org.elasticsearch.common.xcontent.support.XContentMapValues; | ||
| import org.elasticsearch.index.IndexSettings; | ||
| import org.elasticsearch.index.fielddata.AtomicHistogramFieldData; | ||
|
|
@@ -298,8 +299,8 @@ private HistogramValue getHistogramValue(final BytesRef bytesRef) throws IOExcep | |
| @Override | ||
| public boolean next() throws IOException { | ||
| if (streamInput.available() > 0) { | ||
| value = streamInput.readDouble(); | ||
| count = streamInput.readVInt(); | ||
| value = streamInput.readDouble(); | ||
| return true; | ||
| } | ||
| isExhausted = true; | ||
|
|
@@ -352,7 +353,7 @@ public void parse(ParseContext context) throws IOException { | |
| } | ||
| context.path().add(simpleName()); | ||
| XContentParser.Token token = null; | ||
| int level = 0; | ||
| XContentSubParser subParser = null; | ||
| try { | ||
| token = context.parser().currentToken(); | ||
| if (token == XContentParser.Token.VALUE_NULL) { | ||
|
|
@@ -363,22 +364,23 @@ public void parse(ParseContext context) throws IOException { | |
| IntArrayList counts = null; | ||
| // should be an object | ||
| ensureExpectedToken(XContentParser.Token.START_OBJECT, token, context.parser()::getTokenLocation); | ||
| token = context.parser().nextToken(); | ||
| subParser = new XContentSubParser(context.parser()); | ||
| token = subParser.nextToken(); | ||
| while (token != XContentParser.Token.END_OBJECT) { | ||
| // should be an field | ||
| ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, context.parser()::getTokenLocation); | ||
| String fieldName = context.parser().currentName(); | ||
| ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, subParser::getTokenLocation); | ||
| String fieldName = subParser.currentName(); | ||
| if (fieldName.equals(VALUES_FIELD.getPreferredName())) { | ||
| token = context.parser().nextToken(); | ||
| token = subParser.nextToken(); | ||
| // should be an array | ||
| ensureExpectedToken(XContentParser.Token.START_ARRAY, token, context.parser()::getTokenLocation); | ||
| ensureExpectedToken(XContentParser.Token.START_ARRAY, token, subParser::getTokenLocation); | ||
| values = new DoubleArrayList(); | ||
| token = context.parser().nextToken(); | ||
| token = subParser.nextToken(); | ||
| double previousVal = -Double.MAX_VALUE; | ||
| while (token != XContentParser.Token.END_ARRAY) { | ||
| // should be a number | ||
| ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, context.parser()::getTokenLocation); | ||
| double val = context.parser().doubleValue(); | ||
| ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, subParser::getTokenLocation); | ||
| double val = subParser.doubleValue(); | ||
| if (val < previousVal) { | ||
| // values must be in increasing order | ||
| throw new MapperParsingException("error parsing field [" | ||
|
|
@@ -387,28 +389,26 @@ public void parse(ParseContext context) throws IOException { | |
| } | ||
| values.add(val); | ||
| previousVal = val; | ||
| token = context.parser().nextToken(); | ||
| token = subParser.nextToken(); | ||
| } | ||
| } else if (fieldName.equals(COUNTS_FIELD.getPreferredName())) { | ||
| token = context.parser().nextToken(); | ||
| token = subParser.nextToken(); | ||
| // should be an array | ||
| ensureExpectedToken(XContentParser.Token.START_ARRAY, token, context.parser()::getTokenLocation); | ||
| ensureExpectedToken(XContentParser.Token.START_ARRAY, token, subParser::getTokenLocation); | ||
| counts = new IntArrayList(); | ||
| token = context.parser().nextToken(); | ||
| token = subParser.nextToken(); | ||
| while (token != XContentParser.Token.END_ARRAY) { | ||
| // should be a number | ||
| ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, context.parser()::getTokenLocation); | ||
| counts.add(context.parser().intValue()); | ||
| token = context.parser().nextToken(); | ||
| ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, subParser::getTokenLocation); | ||
| counts.add(subParser.intValue()); | ||
| token = subParser.nextToken(); | ||
| } | ||
| } else { | ||
| throw new MapperParsingException("error parsing field [" + | ||
| name() + "], with unknown parameter [" + fieldName + "]"); | ||
| } | ||
| token = context.parser().nextToken(); | ||
| level = maybeAddOrRemoveLevel(token, level); | ||
| token = subParser.nextToken(); | ||
| } | ||
| level = 0; | ||
| if (values == null) { | ||
| throw new MapperParsingException("error parsing field [" | ||
| + name() + "], expected field called [" + VALUES_FIELD.getPreferredName() + "]"); | ||
|
|
@@ -431,8 +431,8 @@ public void parse(ParseContext context) throws IOException { | |
| + name() + "], ["+ COUNTS_FIELD + "] elements must be >= 0 but got " + counts.get(i)); | ||
| } else if (count > 0) { | ||
| // we do not add elements with count == 0 | ||
| streamOutput.writeDouble(values.get(i)); | ||
| streamOutput.writeVInt(count); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd suggest putting the count before the values, it might make it easier to better compress in the future by stealing bits of the count. |
||
| streamOutput.writeDouble(values.get(i)); | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we skip values that have a count of 0 from doc values?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make sense, added code to skip zero |
||
| } | ||
|
|
||
|
|
@@ -451,27 +451,16 @@ public void parse(ParseContext context) throws IOException { | |
| ex, fieldType().name(), fieldType().typeName()); | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When ignoreMalformed is true, I think we should also try to move to the end of the histogram object so that other fields can be parsed successfully. See how the geo_shape field does it for instance.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added my own logic to skip the field to the end. Not sure if there is some other utility to what I am doing here by hand.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| // we need to advance until the end of the field | ||
| if (token != null) { | ||
| while (level > 0 || token != XContentParser.Token.END_OBJECT) { | ||
| level = maybeAddOrRemoveLevel(token, level); | ||
| token = context.parser().nextToken(); | ||
| if (subParser != null) { | ||
| while (token != null) { | ||
| token = subParser.nextToken(); | ||
| } | ||
|
||
| } | ||
| context.addIgnoredField(fieldType().name()); | ||
| } | ||
| context.path().remove(); | ||
| } | ||
|
|
||
| private int maybeAddOrRemoveLevel(XContentParser.Token token, int level) { | ||
| if (token == XContentParser.Token.START_OBJECT) { | ||
| return ++level; | ||
| } | ||
| if (token == XContentParser.Token.END_OBJECT) { | ||
| return --level; | ||
| } | ||
| return level; | ||
| } | ||
|
|
||
| @Override | ||
| protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { | ||
| super.doXContentBody(builder, includeDefaults, params); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps another sentence/paragraph at the end?
Or something similar... trying to convey to the user that how they index the data is important and they should chose upfront.