-
Notifications
You must be signed in to change notification settings - Fork 7
Hand rolled parser for SlsVersionType.RELEASE (aka x.y.z) #463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| type: improvement | ||
| improvement: | ||
| description: Parsing the simplest `x.y.z` type of `OrderableSlsVersion` no longer | ||
| uses regexes. Allocation is reduced by about 75% | ||
| links: | ||
| - https://github.com/palantir/sls-version-java/pull/463 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,75 @@ | ||
| /* | ||
| * (c) Copyright 2021 Palantir Technologies Inc. All rights reserved. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.palantir.sls.versions; | ||
|
|
||
| import java.util.regex.Matcher; | ||
|
|
||
| interface MatchResult { | ||
|
|
||
| /** 1-indexed, to match java regexes. */ | ||
| int groupAsInt(int group); | ||
|
|
||
| int groupCount(); | ||
|
|
||
| final class RegexMatchResult implements MatchResult { | ||
| private final Matcher matcher; | ||
|
|
||
| RegexMatchResult(Matcher matcher) { | ||
| this.matcher = matcher; | ||
| } | ||
|
|
||
| @Override | ||
| public int groupAsInt(int group) { | ||
| return Integer.parseInt(matcher.group(group)); | ||
| } | ||
|
|
||
| @Override | ||
| public int groupCount() { | ||
| return matcher.groupCount(); | ||
| } | ||
| } | ||
|
|
||
| final class Int3MatchResult implements MatchResult { | ||
| private final int first; | ||
| private final int second; | ||
| private final int third; | ||
|
|
||
| Int3MatchResult(int first, int second, int third) { | ||
| this.first = first; | ||
| this.second = second; | ||
| this.third = third; | ||
| } | ||
|
|
||
| @Override | ||
| public int groupAsInt(int group) { | ||
| switch (group) { | ||
| case 1: | ||
| return first; | ||
| case 2: | ||
| return second; | ||
| case 3: | ||
| return third; | ||
| } | ||
| throw new IndexOutOfBoundsException(); | ||
| } | ||
|
|
||
| @Override | ||
| public int groupCount() { | ||
| return 3; | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| /* | ||
| * (c) Copyright 2021 Palantir Technologies Inc. All rights reserved. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.palantir.sls.versions; | ||
|
|
||
| import com.google.errorprone.annotations.Immutable; | ||
| import java.util.regex.Pattern; | ||
| import javax.annotation.Nullable; | ||
|
|
||
| @Immutable | ||
| interface Parser { | ||
|
|
||
| /** Returns a {@link MatchResult} if the provided string matches the pattern, or null otherwise. */ | ||
| @Nullable | ||
| MatchResult tryParse(String string); | ||
|
|
||
| /** An equivalent java regex. */ | ||
| Pattern getPattern(); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,125 @@ | ||
| /* | ||
| * (c) Copyright 2021 Palantir Technologies Inc. All rights reserved. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.palantir.sls.versions; | ||
|
|
||
| /** | ||
| * We're using parser combinator-style ideas here, where each parser function {@link Parsers#numberOrX}, | ||
| * {@link Parsers#number}, {@link Parsers#literalX} accepts an index into our source string and returns a two values: | ||
| * | ||
| * - an updated index into the string representing how many characters were parsed | ||
| * - a value that was actually parsed out of the string (if the parser was able to parse the string), otherwise a | ||
| * clear signal that the parser failed (we use {@link Integer#MIN_VALUE} for this. | ||
| * | ||
| * We bit-pack these two integer values into a single long using {@link Parsers#ok} and {@link Parsers#fail} functions | ||
| * because primitive longs live on the stack and don't impact GC. | ||
| */ | ||
| final class Parsers { | ||
|
|
||
| static final int MAGIC_X_NUMBER = -1; | ||
| private static final long INT_MASK = (1L << 32) - 1; | ||
| private static final int PARSE_FAILED = Integer.MIN_VALUE; | ||
|
|
||
| private Parsers() {} | ||
|
|
||
| // "x" is signified by the magic negative number -1, which is distinct from Integer.MIN_VALUE which is a failure | ||
| static long numberOrX(String string, int startIndex) { | ||
| long xResult = literalX(string, startIndex); | ||
| if (isOk(xResult)) { | ||
| return ok(getIndex(xResult), MAGIC_X_NUMBER); | ||
| } | ||
|
|
||
| long numberResult = number(string, startIndex); | ||
| if (isOk(numberResult)) { | ||
| return numberResult; | ||
| } | ||
|
|
||
| return fail(startIndex); | ||
| } | ||
|
|
||
| static long number(String string, int startIndex) { | ||
| int next = startIndex; | ||
| int len = string.length(); | ||
| while (next < len) { | ||
| int codepoint = string.codePointAt(next); | ||
| if (Character.isDigit(codepoint)) { | ||
| next += 1; | ||
| } else { | ||
| break; | ||
| } | ||
| } | ||
| if (next == startIndex) { | ||
| return fail(startIndex); | ||
| } else if (next == startIndex + 1) { | ||
| return ok(next, Character.digit(string.codePointAt(startIndex), 10)); | ||
| } else { | ||
| try { | ||
| return ok(next, Integer.parseUnsignedInt(string.substring(startIndex, next))); | ||
| } catch (NumberFormatException e) { | ||
| if (e.getMessage() != null && e.getMessage().endsWith("exceeds range of unsigned int.")) { | ||
| return fail(startIndex); | ||
| } else { | ||
| throw e; | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // 0 signifies success | ||
| private static long literalX(String string, int startIndex) { | ||
| if (startIndex < string.length() && string.codePointAt(startIndex) == 'x') { | ||
| return ok(startIndex + 1, 0); | ||
| } else { | ||
| return fail(startIndex); | ||
| } | ||
| } | ||
|
|
||
| static long literalDot(String string, int startIndex) { | ||
| if (startIndex < string.length() && string.codePointAt(startIndex) == '.') { | ||
| return ok(startIndex + 1, 0); | ||
| } else { | ||
| return fail(startIndex); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * We are bit-packing two integers into a single long. The 'index' occupies half of the bits and the 'result' | ||
| * occupies the other half. | ||
| */ | ||
| static long ok(int index, int result) { | ||
| return ((long) index) << 32 | (result & INT_MASK); | ||
| } | ||
|
|
||
| static long fail(int index) { | ||
| return ((long) index) << 32 | (PARSE_FAILED & INT_MASK); | ||
| } | ||
|
|
||
| static boolean isOk(long state) { | ||
| return getResult(state) != PARSE_FAILED; | ||
| } | ||
|
|
||
| static boolean failed(long state) { | ||
| return !isOk(state); | ||
| } | ||
|
|
||
| static int getResult(long state) { | ||
| return (int) (state & INT_MASK); | ||
| } | ||
|
|
||
| static int getIndex(long state) { | ||
| return (int) (state >>> 32); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| /* | ||
| * (c) Copyright 2021 Palantir Technologies Inc. All rights reserved. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.palantir.sls.versions; | ||
|
|
||
| import com.google.errorprone.annotations.Immutable; | ||
| import com.palantir.sls.versions.MatchResult.Int3MatchResult; | ||
| import java.util.regex.Pattern; | ||
| import javax.annotation.Nullable; | ||
|
|
||
| @Immutable | ||
| enum ReleaseVersionParser implements Parser { | ||
| INSTANCE; | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public MatchResult tryParse(String string) { | ||
| long state = Parsers.number(string, 0); | ||
| if (Parsers.failed(state)) { | ||
| return null; | ||
| } | ||
| int first = Parsers.getResult(state); | ||
|
|
||
| state = Parsers.literalDot(string, Parsers.getIndex(state)); | ||
| if (Parsers.failed(state)) { | ||
| return null; | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just begging for some kind of monadic bind abstraction... but obviously that would require a lot of allocation in Java. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. totally agree |
||
|
|
||
| state = Parsers.number(string, Parsers.getIndex(state)); | ||
| if (Parsers.failed(state)) { | ||
| return null; | ||
| } | ||
| int second = Parsers.getResult(state); | ||
|
|
||
| state = Parsers.literalDot(string, Parsers.getIndex(state)); | ||
| if (Parsers.failed(state)) { | ||
| return null; | ||
| } | ||
|
|
||
| state = Parsers.number(string, Parsers.getIndex(state)); | ||
| if (Parsers.failed(state)) { | ||
| return null; | ||
| } | ||
| int third = Parsers.getResult(state); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking maybe you could make this less duplicated by having a for loop - but then you'd need to either make an an array or have You could also group |
||
|
|
||
| if (Parsers.getIndex(state) < string.length()) { | ||
| return null; // reject due to trailing stuff | ||
| } | ||
|
|
||
| return new Int3MatchResult(first, second, third); | ||
| } | ||
|
|
||
| @Override | ||
| public Pattern getPattern() { | ||
| return Pattern.compile("^([0-9]+)\\.([0-9]+)\\.([0-9]+)$"); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
all these functions just got moved from
SlsVersionMatcherParser.java- no changes made here.