-
Notifications
You must be signed in to change notification settings - Fork 5.2k
Add Match and isMatch ReadOnlySpan<char> overloads as well as Regex protected methods which expose ReadOnlySpan<char> to avoid allocations. #62509
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
c1c0fa3
2d3aef3
2960e99
d16b8e9
444c017
cc92623
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -222,7 +222,15 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri | |
| DescribeExpression(writer, rm.Code.Tree.Root.Child(0), " // ", rm.Code); // skip implicit root capture | ||
| writer.WriteLine(); | ||
|
|
||
| writer.WriteLine($" protected override bool FindFirstChar()"); | ||
| writer.WriteLine($" protected override void Scan(global::System.Text.RegularExpressions.Regex regex, global::System.ReadOnlySpan<char> text, int textstart, int prevlen, bool quick)"); | ||
| writer.WriteLine($" {{"); | ||
| writer.Indent += 4; | ||
| EmitScan(writer, rm, id); | ||
| writer.Indent -= 4; | ||
| writer.WriteLine($" }}"); | ||
| writer.WriteLine(); | ||
|
|
||
| writer.WriteLine($" private bool FindFirstChar(global::System.ReadOnlySpan<char> inputSpan)"); | ||
| writer.WriteLine($" {{"); | ||
| writer.Indent += 4; | ||
| RequiredHelperFunctions requiredHelpers = EmitFindFirstChar(writer, rm, id); | ||
|
|
@@ -233,7 +241,7 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri | |
| { | ||
| writer.WriteLine($" [global::System.Runtime.CompilerServices.SkipLocalsInit]"); | ||
| } | ||
| writer.WriteLine($" protected override void Go()"); | ||
| writer.WriteLine($" private bool Go(global::System.ReadOnlySpan<char> inputSpan)"); | ||
| writer.WriteLine($" {{"); | ||
| writer.Indent += 4; | ||
| requiredHelpers |= EmitGo(writer, rm, id); | ||
|
|
@@ -299,6 +307,115 @@ static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht) | |
| } | ||
| } | ||
|
|
||
| private static void EmitScan(IndentedTextWriter writer, RegexMethod rm, string id) | ||
| { | ||
| RegexOptions options = (RegexOptions)rm.Options; | ||
|
|
||
| // Emit locals initialization | ||
| //writer.WriteLine("this.quick = quick;"); | ||
| //writer.WriteLine("base.runtextpos = textstart;"); | ||
| //writer.WriteLine("base.runregex = regex;"); | ||
| //writer.WriteLine("base.runtextstart = textstart;"); | ||
| //writer.WriteLine("base.runtextbeg = textbeg;"); | ||
| //writer.WriteLine("base.runtextend = textend;"); | ||
| //writer.WriteLine("bool initialized = false;"); | ||
| //writer.WriteLine(); | ||
|
|
||
| //EmitTimeoutHandling(); | ||
| //writer.WriteLine(); | ||
|
|
||
| // Source generator doesn't support Right-To-Left so there is no need to add the sepcial bump logic. | ||
| // If Right-to-left is ever added to the source generator, then we would need to the logic to define | ||
| // bump, as well as stoppos | ||
| Debug.Assert((options & RegexOptions.RightToLeft) == 0); | ||
|
|
||
| EmitPrevLenCheck(); | ||
| writer.WriteLine(); | ||
|
|
||
| EmitMainSearchLoop(); | ||
| writer.WriteLine(); | ||
|
|
||
| return; | ||
|
|
||
| #pragma warning disable CS8321 // Local function is declared but never used | ||
| void EmitTimeoutHandling() | ||
| { | ||
| writer.WriteLine("// Handle timeout argument"); | ||
| writer.WriteLine("_timeout = -1;"); | ||
| writer.WriteLine("bool ignoreTimeout = global::System.Text.RegularExpressions.Regex.InfiniteMatchTimeout == timeout;"); | ||
| using (EmitBlock(writer, "if (!ignoreTimeout)")) | ||
| { | ||
| writer.WriteLine("// We are using Environment.TickCount and not Stopwatch for performance reasons."); | ||
| writer.WriteLine("// Environment.TickCount is an int that cycles. We intentionally let timeoutOccursAt"); | ||
| writer.WriteLine("// overflow it will still stay ahead of Environment.TickCount for comparisons made"); | ||
| writer.WriteLine("// in DoCheckTimeout()."); | ||
| writer.WriteLine("global::System.Text.RegularExpressions.Regex.ValidateMatchTimeout(timeout); // validate timeout as this could be called from user code due to being protected"); | ||
joperezr marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| writer.WriteLine("_timeout = (int)(timeout.TotalMilliseconds + 0.5); // Round;"); | ||
| writer.WriteLine("_timeoutOccursAt = global::System.Environment.TickCount + _timeout;"); | ||
| writer.WriteLine("_timeoutChecksToSkip = TimeoutCheckFrequency;"); | ||
|
||
| } | ||
| } | ||
| #pragma warning restore CS8321 // Local function is declared but never used | ||
|
|
||
| void EmitPrevLenCheck() | ||
| { | ||
| writer.WriteLine("// If previous match was empty or failed, advance by one before matching."); | ||
| using (EmitBlock(writer, "if (prevlen == 0)")) | ||
| { | ||
| using (EmitBlock(writer, "if (textstart == text.Length)")) | ||
| { | ||
| writer.WriteLine("base.runmatch = global::System.Text.RegularExpressions.Match.Empty;"); | ||
| writer.WriteLine("return;"); | ||
| } | ||
| writer.WriteLine(); | ||
| writer.WriteLine("base.runtextpos++;"); | ||
| } | ||
| } | ||
|
|
||
| void EmitMainSearchLoop() | ||
| { | ||
| using (EmitBlock(writer, "while (true)")) | ||
| { | ||
| using (EmitBlock(writer, "if (FindFirstChar(text))")) | ||
| { | ||
| writer.WriteLine("base.CheckTimeout();"); | ||
stephentoub marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| writer.WriteLine(); | ||
|
|
||
| writer.WriteLine("// If we got a match, we're done."); | ||
| using (EmitBlock(writer, "if (Go(text))")) | ||
|
||
| { | ||
| using (EmitBlock(writer, "if (quick)")) | ||
| { | ||
| writer.WriteLine("base.runmatch = null;"); | ||
|
||
| writer.WriteLine("return;"); | ||
| } | ||
| writer.WriteLine(); | ||
|
|
||
| writer.WriteLine("// base.runmatch!.Tidy(base.runtextpos);"); | ||
| writer.WriteLine("return;"); | ||
| } | ||
| writer.WriteLine(); | ||
|
|
||
| writer.WriteLine("// Reset state for another iteration."); | ||
| writer.WriteLine("base.runtrackpos = base.runtrack!.Length;"); | ||
| writer.WriteLine("base.runstackpos = base.runstack!.Length;"); | ||
| writer.WriteLine("base.runcrawlpos = base.runcrawl!.Length;"); | ||
|
||
| } | ||
| writer.WriteLine(); | ||
|
|
||
| writer.WriteLine("// We failed to find a match. If we're at the end of the input, then we are done."); | ||
| using (EmitBlock(writer, "if (base.runtextpos == text.Length)")) | ||
| { | ||
| writer.WriteLine("base.runmatch = global::System.Text.RegularExpressions.Match.Empty;"); | ||
|
||
| writer.WriteLine("return;"); | ||
| } | ||
| writer.WriteLine(); | ||
|
|
||
| writer.WriteLine("base.runtextpos++;"); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// <summary>Emits the body of the FindFirstChar override.</summary> | ||
| private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, string id) | ||
| { | ||
|
|
@@ -347,7 +464,6 @@ private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writ | |
| { | ||
| case FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive: | ||
| Debug.Assert(!string.IsNullOrEmpty(code.FindOptimizations.LeadingCaseSensitivePrefix)); | ||
| additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;"); | ||
| EmitIndexOf(code.FindOptimizations.LeadingCaseSensitivePrefix); | ||
| break; | ||
|
|
||
|
|
@@ -356,13 +472,11 @@ private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writ | |
| case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive: | ||
| case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive: | ||
| Debug.Assert(code.FindOptimizations.FixedDistanceSets is { Count: > 0 }); | ||
| additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;"); | ||
| EmitFixedSet(); | ||
| break; | ||
|
|
||
| case FindNextStartingPositionMode.LiteralAfterLoop_LeftToRight_CaseSensitive: | ||
| Debug.Assert(code.FindOptimizations.LiteralAfterLoop is not null); | ||
| additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;"); | ||
| EmitLiteralAfterAtomicLoop(); | ||
| break; | ||
|
|
||
|
|
@@ -463,7 +577,6 @@ bool EmitAnchors() | |
| // the other anchors, which all skip all subsequent processing if found, with BOL we just use it | ||
| // to boost our position to the next line, and then continue normally with any searches. | ||
| writer.WriteLine("// Beginning-of-line anchor"); | ||
| additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;"); | ||
| additionalDeclarations.Add("int beginning = base.runtextbeg;"); | ||
| using (EmitBlock(writer, "if (pos > beginning && inputSpan[pos - 1] != '\\n')")) | ||
| { | ||
|
|
@@ -763,13 +876,15 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe | |
| writer.WriteLine($"int end = start + {(node.Kind == RegexNodeKind.Multi ? node.Str!.Length : 1)};"); | ||
| writer.WriteLine("base.Capture(0, start, end);"); | ||
| writer.WriteLine("base.runtextpos = end;"); | ||
| writer.WriteLine("return true;"); | ||
| return requiredHelpers; | ||
|
|
||
| case RegexNodeKind.Empty: | ||
| // This case isn't common in production, but it's very common when first getting started with the | ||
| // source generator and seeing what happens as you add more to expressions. When approaching | ||
| // it from a learning perspective, this is very common, as it's the empty string you start with. | ||
| writer.WriteLine("base.Capture(0, base.runtextpos, base.runtextpos);"); | ||
| writer.WriteLine("return true;"); | ||
| return requiredHelpers; | ||
| } | ||
|
|
||
|
|
@@ -781,7 +896,6 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe | |
|
|
||
| // Declare some locals. | ||
| string sliceSpan = "slice"; | ||
| writer.WriteLine("global::System.ReadOnlySpan<char> inputSpan = base.runtext;"); | ||
| writer.WriteLine("int pos = base.runtextpos, end = base.runtextend;"); | ||
| writer.WriteLine($"int original_pos = pos;"); | ||
| bool hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm); | ||
|
|
@@ -826,7 +940,7 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe | |
| } | ||
| writer.WriteLine("base.runtextpos = pos;"); | ||
| writer.WriteLine("base.Capture(0, original_pos, pos);"); | ||
| writer.WriteLine("return;"); | ||
| writer.WriteLine("return true;"); | ||
| writer.WriteLine(); | ||
|
|
||
| // We only get here in the code if the whole expression fails to match and jumps to | ||
|
|
@@ -837,6 +951,7 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe | |
| { | ||
| EmitUncaptureUntil("0"); | ||
| } | ||
| writer.WriteLine("return false;"); | ||
|
|
||
| // We're done with the match. | ||
|
|
||
|
|
@@ -846,8 +961,6 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe | |
| // And emit any required helpers. | ||
| if (additionalLocalFunctions.Count != 0) | ||
| { | ||
| writer.WriteLine("return;"); // not strictly necessary, just for readability | ||
|
|
||
| foreach (KeyValuePair<string, string[]> localFunctions in additionalLocalFunctions.OrderBy(k => k.Key)) | ||
| { | ||
| writer.WriteLine(); | ||
|
|
@@ -2148,7 +2261,7 @@ void EmitBoundary(RegexNode node) | |
| _ => "base.IsECMABoundary", | ||
| }; | ||
|
|
||
| using (EmitBlock(writer, $"if ({call}(pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}, base.runtextbeg, end))")) | ||
| using (EmitBlock(writer, $"if ({call}(inputSpan, pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}, base.runtextbeg, end))")) | ||
| { | ||
| writer.WriteLine($"goto {doneLabel};"); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -171,6 +171,10 @@ public static void CompileToAssembly(System.Text.RegularExpressions.RegexCompila | |
| public string GroupNameFromNumber(int i) { throw null; } | ||
| public int GroupNumberFromName(string name) { throw null; } | ||
| protected void InitializeReferences() { } | ||
| public bool IsMatch(System.ReadOnlySpan<char> input) { throw null; } | ||
| public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; } | ||
| public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; } | ||
| public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; } | ||
| public bool IsMatch(string input) { throw null; } | ||
| public bool IsMatch(string input, int startat) { throw null; } | ||
| public static bool IsMatch(string input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; } | ||
|
|
@@ -330,17 +334,20 @@ protected void DoubleCrawl() { } | |
| protected void DoubleStack() { } | ||
| protected void DoubleTrack() { } | ||
| protected void EnsureStorage() { } | ||
| protected abstract bool FindFirstChar(); | ||
| protected abstract void Go(); | ||
| protected abstract void InitTrackCount(); | ||
| protected virtual bool FindFirstChar() { throw null; } | ||
| protected virtual void Go() { throw null; } | ||
| protected virtual void InitTrackCount() { throw null; } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we delete our generated override of InitTrackCount in the source generator? What breaks if runtrackcount remains 0? I think that'll just mean that we end up with the default minimums enforced by InitializeForGo (or whatever it's now called). We should probably delete the override and then confirm the defaults make sense, and/or determine whether there's a different number we could be computing that would help presize this better. If memory serves, right now it's yielding the number of opcodes produced by RegexWriter that are backtracking... but that's no longer particularly relevant to how we use runtrack, and we don't use runstack at all... so there's a fair bit of waste here to be cleaned up. |
||
| protected bool IsBoundary(int index, int startpos, int endpos) { throw null; } | ||
| protected bool IsBoundary(System.ReadOnlySpan<char> inputSpan, int index, int startpos, int endpos) { throw null; } | ||
| protected bool IsECMABoundary(int index, int startpos, int endpos) { throw null; } | ||
| protected bool IsECMABoundary(System.ReadOnlySpan<char> inputSpan, int index, int startpos, int endpos) { throw null; } | ||
| protected bool IsMatched(int cap) { throw null; } | ||
| protected int MatchIndex(int cap) { throw null; } | ||
| protected int MatchLength(int cap) { throw null; } | ||
| protected int Popcrawl() { throw null; } | ||
| protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick) { throw null; } | ||
| protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick, System.TimeSpan timeout) { throw null; } | ||
| protected internal virtual void Scan(System.Text.RegularExpressions.Regex regex, System.ReadOnlySpan<char> text, int textstart, int prevlen, bool quick) { throw null; } | ||
|
||
| protected void TransferCapture(int capnum, int uncapnum, int start, int end) { } | ||
| protected void Uncapture() { } | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.