From dd903b00bd58e055b1d89dc15f67421900ee9200 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 9 Sep 2021 12:04:01 -0400 Subject: [PATCH 01/16] Add RegexGeneratorAttribute Adds the new RegexGenerator attribute that's a signal to the regex generator to generate code for the specified regex. --- .../ref/System.Text.RegularExpressions.cs | 10 +++++ .../src/System.Text.RegularExpressions.csproj | 5 ++- .../RegexGeneratorAttribute.cs | 44 +++++++++++++++++++ .../tests/RegexGeneratorAttributeTests.cs | 41 +++++++++++++++++ ...ystem.Text.RegularExpressions.Tests.csproj | 1 + 5 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexGeneratorAttribute.cs create mode 100644 src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorAttributeTests.cs diff --git a/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs b/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs index 2da9d90bb42a36..68e58c45b53d96 100644 --- a/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs +++ b/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs @@ -216,6 +216,16 @@ public RegexCompilationInfo(string pattern, System.Text.RegularExpressions.Regex public System.Text.RegularExpressions.RegexOptions Options { get { throw null; } set { } } public string Pattern { get { throw null; } set { } } } + [System.AttributeUsageAttribute(AttributeTargets.Method, AllowMultiple = false, Inherited = false)] + public sealed partial class RegexGeneratorAttribute : System.Attribute + { + public RegexGeneratorAttribute(string pattern) { } + public RegexGeneratorAttribute(string pattern, System.Text.RegularExpressions.RegexOptions options) { } + public RegexGeneratorAttribute(string pattern, System.Text.RegularExpressions.RegexOptions options, int matchTimeout) { } + public string Pattern { get; } + public System.Text.RegularExpressions.RegexOptions Options { get; } + public int MatchTimeout { get; } + } public partial class RegexMatchTimeoutException : System.TimeoutException, System.Runtime.Serialization.ISerializable { public RegexMatchTimeoutException() { } diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj index 77351ca146f7ae..0294128f3eb62a 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj +++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj @@ -5,10 +5,9 @@ enable + - - @@ -25,8 +24,10 @@ + + diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexGeneratorAttribute.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexGeneratorAttribute.cs new file mode 100644 index 00000000000000..1e6e1e81d4917d --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexGeneratorAttribute.cs @@ -0,0 +1,44 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Threading; + +namespace System.Text.RegularExpressions; + +/// Instructs the System.Text.RegularExpressions source generator to generate an implementation of the specified regular expression. +[AttributeUsage(AttributeTargets.Method, AllowMultiple = false, Inherited = false)] +public sealed class RegexGeneratorAttribute : Attribute +{ + /// Initializes a new instance of the with the specified pattern. + /// The regular expression pattern to match. + public RegexGeneratorAttribute(string pattern) : this (pattern, RegexOptions.None) + { + } + + /// Initializes a new instance of the with the specified pattern and options. + /// The regular expression pattern to match. + /// A bitwise combination of the enumeration values that modify the regular expression. + public RegexGeneratorAttribute(string pattern, RegexOptions options) : this (pattern, options, Timeout.Infinite) + { + } + + /// Initializes a new instance of the with the specified pattern, options, and timeout. + /// The regular expression pattern to match. + /// A bitwise combination of the enumeration values that modify the regular expression. + /// A time-out interval (milliseconds), or to indicate that the method should not time out. + public RegexGeneratorAttribute(string pattern, RegexOptions options, int matchTimeout) + { + Pattern = pattern; + Options = options; + MatchTimeout = matchTimeout; + } + + /// Gets the regular expression pattern to match. + public string Pattern { get; } + + /// Gets a bitwise combination of the enumeration values that modify the regular expression. + public RegexOptions Options { get; } + + /// Gets a time-out interval (milliseconds), or to indicate that the method should not time out. + public int MatchTimeout { get; } +} diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorAttributeTests.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorAttributeTests.cs new file mode 100644 index 00000000000000..aab33600f80dc4 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorAttributeTests.cs @@ -0,0 +1,41 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Threading; +using Xunit; + +namespace System.Text.RegularExpressions.Tests +{ + public class RegexGeneratorAttributeTests + { + [Theory] + [InlineData(null, RegexOptions.None, Timeout.Infinite)] + [InlineData("", (RegexOptions)12345, -2)] + [InlineData("a.*b", RegexOptions.Compiled | RegexOptions.CultureInvariant, 1)] + public void Ctor_Roundtrips(string pattern, RegexOptions options, int matchTimeout) + { + RegexGeneratorAttribute a; + + if (matchTimeout == -1) + { + if (options == RegexOptions.None) + { + a = new RegexGeneratorAttribute(pattern); + Assert.Equal(pattern, a.Pattern); + Assert.Equal(RegexOptions.None, a.Options); + Assert.Equal(Timeout.Infinite, a.MatchTimeout); + } + + a = new RegexGeneratorAttribute(pattern, options); + Assert.Equal(pattern, a.Pattern); + Assert.Equal(options, a.Options); + Assert.Equal(Timeout.Infinite, a.MatchTimeout); + } + + a = new RegexGeneratorAttribute(pattern, options, matchTimeout); + Assert.Equal(pattern, a.Pattern); + Assert.Equal(options, a.Options); + Assert.Equal(matchTimeout, a.MatchTimeout); + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj index 74f7ea6d9255c1..d4c93aa8f8998d 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj @@ -47,6 +47,7 @@ + From 111a50b74cf7ec3c7e03949ff50e13391c32f3f4 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 9 Sep 2021 20:29:11 -0400 Subject: [PATCH 02/16] Implement RegexGenerator Add a source generator for generating C# code for Regex. This is primarily a port of RegexCompiler.cs, generating C# code instead of MSIL. --- .../gen/DiagnosticDescriptors.cs | 85 + .../gen/Polyfills.cs | 84 + .../gen/RegexGenerator.Emitter.cs | 3133 +++++++++++++++++ .../gen/RegexGenerator.Parser.cs | 307 ++ .../gen/RegexGenerator.cs | 68 + .../gen/Resources/Strings.resx | 318 ++ .../gen/Resources/xlf/Strings.cs.xlf | 337 ++ .../gen/Resources/xlf/Strings.de.xlf | 337 ++ .../gen/Resources/xlf/Strings.es.xlf | 337 ++ .../gen/Resources/xlf/Strings.fr.xlf | 337 ++ .../gen/Resources/xlf/Strings.it.xlf | 337 ++ .../gen/Resources/xlf/Strings.ja.xlf | 337 ++ .../gen/Resources/xlf/Strings.ko.xlf | 337 ++ .../gen/Resources/xlf/Strings.pl.xlf | 337 ++ .../gen/Resources/xlf/Strings.pt-BR.xlf | 337 ++ .../gen/Resources/xlf/Strings.ru.xlf | 337 ++ .../gen/Resources/xlf/Strings.tr.xlf | 337 ++ .../gen/Resources/xlf/Strings.zh-Hans.xlf | 337 ++ .../gen/Resources/xlf/Strings.zh-Hant.xlf | 337 ++ ...m.Text.RegularExpressions.Generator.csproj | 43 + .../RegularExpressions/RegexBoyerMoore.cs | 4 +- .../Text/RegularExpressions/RegexCharClass.cs | 76 +- .../Text/RegularExpressions/RegexCode.cs | 57 +- .../Text/RegularExpressions/RegexCompiler.cs | 259 +- .../Text/RegularExpressions/RegexNode.cs | 183 +- .../RegularExpressions/RegexParseError.cs | 7 +- .../Text/RegularExpressions/RegexParser.cs | 7 +- .../RegularExpressions/RegexPrefixAnalyzer.cs | 2 +- .../Text/RegularExpressions/RegexTree.cs | 6 +- 29 files changed, 8728 insertions(+), 292 deletions(-) create mode 100644 src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Polyfills.cs create mode 100644 src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs create mode 100644 src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs create mode 100644 src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf create mode 100644 src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj diff --git a/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs b/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs new file mode 100644 index 00000000000000..fc9484d5aa609c --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs @@ -0,0 +1,85 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using Microsoft.CodeAnalysis; + +namespace System.Text.RegularExpressions.Generator +{ + internal static class DiagnosticDescriptors + { + // TODO: Assign valid IDs + + public static DiagnosticDescriptor InvalidRegexGeneratorAttribute { get; } = new DiagnosticDescriptor( + id: "SYSLIB1100", + title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + messageFormat: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + category: "RegexGenerator", + DiagnosticSeverity.Error, + isEnabledByDefault: true); + + public static DiagnosticDescriptor MultipleRegexGeneratorAttributes { get; } = new DiagnosticDescriptor( + id: "SYSLIB1101", + title: new LocalizableResourceString(nameof(SR.MultipleRegexGeneratorAttributesMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + messageFormat: new LocalizableResourceString(nameof(SR.MultipleRegexGeneratorAttributesMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + category: "RegexGenerator", + DiagnosticSeverity.Error, + isEnabledByDefault: true); + + public static DiagnosticDescriptor InvalidRegexArguments { get; } = new DiagnosticDescriptor( + id: "SYSLIB1102", + title: new LocalizableResourceString(nameof(SR.InvalidRegexArgumentsMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + messageFormat: new LocalizableResourceString(nameof(SR.InvalidRegexArgumentsMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + category: "RegexGenerator", + DiagnosticSeverity.Error, + isEnabledByDefault: true); + + public static DiagnosticDescriptor RegexMethodMustReturnRegex { get; } = new DiagnosticDescriptor( + id: "SYSLIB1103", + title: new LocalizableResourceString(nameof(SR.RegexMethodMustReturnRegexMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustReturnRegexMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + category: "RegexGenerator", + DiagnosticSeverity.Error, + isEnabledByDefault: true); + + public static DiagnosticDescriptor RegexMethodMustBeParameterless { get; } = new DiagnosticDescriptor( + id: "SYSLIB1104", + title: new LocalizableResourceString(nameof(SR.RegexMethodMustBeParameterlessMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustBeParameterlessMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + category: "RegexGenerator", + DiagnosticSeverity.Error, + isEnabledByDefault: true); + + public static DiagnosticDescriptor RegexMethodMustNotBeGeneric { get; } = new DiagnosticDescriptor( + id: "SYSLIB1105", + title: new LocalizableResourceString(nameof(SR.RegexMethodMustNotBeGenericMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustNotBeGenericMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + category: "RegexGenerator", + DiagnosticSeverity.Error, + isEnabledByDefault: true); + + public static DiagnosticDescriptor RegexMethodShouldBePartial { get; } = new DiagnosticDescriptor( + id: "SYSLIB1106", + title: new LocalizableResourceString(nameof(SR.RegexMethodMustBePartialMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustBePartialMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + category: "RegexGenerator", + DiagnosticSeverity.Error, + isEnabledByDefault: true); + + public static DiagnosticDescriptor RegexMethodMustBeStatic { get; } = new DiagnosticDescriptor( + id: "SYSLIB1107", + title: new LocalizableResourceString(nameof(SR.RegexMethodMustBeStaticMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustBeStaticMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + category: "RegexGenerator", + DiagnosticSeverity.Error, + isEnabledByDefault: true); + + public static DiagnosticDescriptor InvalidLangVersion { get; } = new DiagnosticDescriptor( + id: "SYSLIB1108", + title: new LocalizableResourceString(nameof(SR.InvalidLangVersionMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + messageFormat: new LocalizableResourceString(nameof(SR.InvalidLangVersionMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + category: "RegexGenerator", + DiagnosticSeverity.Error, + isEnabledByDefault: true); + } +} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Polyfills.cs b/src/libraries/System.Text.RegularExpressions/gen/Polyfills.cs new file mode 100644 index 00000000000000..692271fb1a5e43 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Polyfills.cs @@ -0,0 +1,84 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Buffers; +using System.Collections; +using System.Collections.Generic; +using System.Text; + +// This file provides helpers used to help compile some Regex source code (e.g. RegexParser) as part of the netstandard2.0 generator assembly. + +namespace System.Text +{ + internal static class StringBuilderExtensions + { + public static StringBuilder Append(this StringBuilder stringBuilder, ReadOnlySpan span) => + stringBuilder.Append(span.ToString()); + + public static ReadOnlyMemory[] GetChunks(this StringBuilder stringBuilder) + { + var chars = new char[stringBuilder.Length]; + stringBuilder.CopyTo(0, chars, 0, chars.Length); + return new[] { new ReadOnlyMemory(chars) }; + } + } +} + +namespace System +{ + internal static class StringExtensions + { + public static string Create(int length, TState state, SpanAction action) + { + var array = new char[length]; + action(array, state); + return new string(array); + } + } +} + +namespace System.Buffers +{ + internal delegate void SpanAction(Span span, TArg arg); +} + +namespace System.Threading +{ + internal static class InterlockedExtensions + { + public static int Or(ref int location1, int value) + { + int current = location1; + while (true) + { + int newValue = current | value; + int oldValue = Interlocked.CompareExchange(ref location1, newValue, current); + if (oldValue == current) + { + return oldValue; + } + current = oldValue; + } + } + } +} + +namespace System.Text.RegularExpressions +{ + internal class RegexReplacement + { + public RegexReplacement(string rep, RegexNode concat, Hashtable caps) { } + + private const int Specials = 4; + public const int LeftPortion = -1; + public const int RightPortion = -2; + public const int LastGroup = -3; + public const int WholeString = -4; + } +} + +namespace System.Runtime.CompilerServices +{ + internal static class IsExternalInit { } +} diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs new file mode 100644 index 00000000000000..d71c44db9e0ae1 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -0,0 +1,3133 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Buffers.Binary; +using System.CodeDom.Compiler; +using System.Collections; +using System.Collections.Generic; +using System.Diagnostics; +using System.Globalization; +using System.IO; +using System.Runtime.InteropServices; +using System.Security.Cryptography; +using System.Text; +using System.Threading; +using Microsoft.CodeAnalysis.CSharp; + +// NOTE: The logic in this file is largely a copy of logic in RegexCompiler, emitting C# instead of MSIL. +// Most changes made to this file should be kept in sync, so far as bug fixes and relevant optimizations +// are concerned. + +namespace System.Text.RegularExpressions.Generator +{ + public partial class RegexGenerator + { + /// Code for a [GeneratedCode] attribute to put on the top-level generated members. + private static readonly string s_generatedCodeAttribute = $"[GeneratedCodeAttribute(\"{typeof(RegexGenerator).Assembly.GetName().Name}\", \"{typeof(RegexGenerator).Assembly.GetName().Version}\")]"; + + /// Emits the code for the specified regular expression classes to a string. + /// A list of classes containing a regex method to be generated. + /// CancellationToken used to request cancellation of the code generation. + /// A string containing the emitted code. + private static string Emit(IReadOnlyList regexClasses, CancellationToken cancellationToken) + { + var sb = new StringBuilder(4096); + + var writer = new IndentedTextWriter(new StringWriter(sb)); + + writer.WriteLine("// "); + writer.WriteLine("#nullable enable"); + writer.WriteLine("#pragma warning disable CS0168 // Variable declared but never used"); + writer.WriteLine("#pragma warning disable CS0164 // Unreferenced label"); + writer.WriteLine(); + writer.WriteLine("using System;"); + writer.WriteLine("using System.Diagnostics;"); + writer.WriteLine("using System.Collections;"); + writer.WriteLine("using System.CodeDom.Compiler;"); + writer.WriteLine("using System.Globalization;"); + writer.WriteLine("using System.Runtime.CompilerServices;"); + writer.WriteLine("using System.Text.RegularExpressions;"); + writer.WriteLine("using System.Threading;"); + + int counter = 0; + foreach (RegexClass rc in regexClasses) + { + cancellationToken.ThrowIfCancellationRequested(); + writer.WriteLine(); + GenerateRegexClass(writer, rc, ref counter); + } + + return sb.ToString(); + } + + /// Generates the code for one regular expression class. + private static void GenerateRegexClass(IndentedTextWriter writer, RegexClass regexClass, ref int counter) + { + // Emit the namespace + if (!string.IsNullOrWhiteSpace(regexClass.Namespace)) + { + writer.WriteLine($"namespace {regexClass.Namespace}"); + writer.WriteLine("{"); + writer.Indent++; + } + + // Emit containing types + RegexClass parent = regexClass.ParentClass; + var parentClasses = new Stack(); + while (parent != null) + { + parentClasses.Push($"partial {parent.Keyword} {parent.Name} {parent.Constraints}"); + parent = parent.ParentClass; + } + while (parentClasses.Count != 0) + { + writer.WriteLine($"{parentClasses.Pop()}"); + writer.WriteLine("{"); + writer.Indent++; + } + + // Emit the direct parent type + writer.WriteLine($"partial {regexClass.Keyword} {regexClass.Name} {regexClass.Constraints}"); + writer.WriteLine("{"); + writer.Indent++; + + // Generate a unique name to describe the regex instance. This includes both + // the method name the user provided and a unique counter value, plus a simple + // non-randomized (for determinism) hash of the previous content to try to make + // the name that much harder to predict. + counter++; + string generatedName = $"GeneratedRegex_{regexClass.Method.MethodName}_{counter:X}_"; + generatedName += ComputeStringHash(generatedName).ToString("X"); + + // Generate the regex type + GenerateRegex(writer, regexClass.Method, generatedName); + + while (writer.Indent != 0) + { + writer.Indent--; + writer.WriteLine("}"); + } + + // FNV-1a hash function. The actual algorithm used doesn't matter; just something simple + // to create a pseudo-random value based on input text. + static uint ComputeStringHash(string s) + { + uint hashCode = 2166136261; + foreach (char c in s) + { + hashCode = (c ^ hashCode) * 16777619; + } + return hashCode; + } + } + + /// Gets whether a given regular expression method is supported by the code generator. + private static bool SupportsCustomCodeGeneration(RegexMethod rm) + { + const RegexOptions SupportedOptions = + RegexOptions.IgnoreCase | + RegexOptions.Multiline | + RegexOptions.ExplicitCapture | + RegexOptions.Compiled | + RegexOptions.Singleline | + RegexOptions.IgnorePatternWhitespace | + RegexOptions.RightToLeft | + RegexOptions.ECMAScript | + RegexOptions.CultureInvariant; + + // If we see an option we're not aware of (but that was allowed through), don't emit custom regex code. + return (rm.Options & ~(int)SupportedOptions) == 0; + } + + /// Generates the code for a regular expression method. + private static void GenerateRegex(IndentedTextWriter writer, RegexMethod rm, string id) + { + string patternExpression = Literal(rm.Pattern); + string optionsExpression = $"(RegexOptions)({rm.Options})"; + string timeoutExpression = rm.MatchTimeout == Timeout.Infinite ? + "Timeout.InfiniteTimeSpan" : + $"TimeSpan.FromMilliseconds({rm.MatchTimeout.Value.ToString(CultureInfo.InvariantCulture)})"; + + writer.WriteLine(s_generatedCodeAttribute); + writer.WriteLine($"{rm.Modifiers} Regex {rm.MethodName}() => {id}.Instance;"); + writer.WriteLine(); + writer.WriteLine(s_generatedCodeAttribute); + writer.WriteLine($"{(writer.Indent != 0 ? "private" : "internal")} sealed class {id} : Regex"); + writer.WriteLine("{"); + writer.Write($" public static Regex Instance {{ get; }} = "); + + // If we can't support custom generation for this regex, spit out a Regex constructor call. + if (!SupportsCustomCodeGeneration(rm)) + { + writer.WriteLine($"new Regex({patternExpression}, {optionsExpression}, {timeoutExpression});"); + writer.WriteLine("}"); + return; + } + + writer.WriteLine($"new {id}();"); + writer.WriteLine(); + writer.WriteLine($" private {id}()"); + writer.WriteLine($" {{"); + writer.WriteLine($" pattern = {patternExpression};"); + writer.WriteLine($" roptions = {optionsExpression};"); + writer.WriteLine($" internalMatchTimeout = {timeoutExpression};"); + writer.WriteLine($" factory = new RunnerFactory();"); + if (rm.Code.Caps is not null) + { + writer.Write($" Caps = new Hashtable {{"); + AppendHashtableContents(writer, rm.Code.Caps); + writer.WriteLine(" };"); + } + if (rm.Tree.CapNames is not null) + { + writer.Write($" CapNames = new Hashtable {{"); + AppendHashtableContents(writer, rm.Tree.CapNames); + writer.WriteLine(" };"); + } + if (rm.Tree.CapsList is not null) + { + writer.Write($" capslist = new string[] {{"); + string separator = ""; + foreach (string s in rm.Tree.CapsList) + { + writer.Write(separator); + writer.Write(Literal(s)); + separator = ", "; + } + writer.WriteLine(" };"); + } + writer.WriteLine($" capsize = {rm.Code.CapSize};"); + writer.WriteLine($" InitializeReferences();"); + writer.WriteLine($" }}"); + writer.WriteLine(" "); + writer.WriteLine($" private sealed class RunnerFactory : RegexRunnerFactory"); + writer.WriteLine($" {{"); + writer.WriteLine($" protected override RegexRunner CreateInstance() => new Runner();"); + writer.WriteLine(); + writer.WriteLine($" private sealed class Runner : RegexRunner"); + writer.WriteLine($" {{"); + + // Main implementation methods + writer.WriteLine($" protected override void InitTrackCount() => runtrackcount = {rm.Code.TrackCount};"); // TODO: Make this a nop + writer.WriteLine(); + writer.WriteLine($" protected override bool FindFirstChar()"); + writer.WriteLine($" {{"); + writer.Indent += 4; + GenerateFindFirstChar(writer, rm, id); + writer.Indent -= 4; + writer.WriteLine($" }}"); + writer.WriteLine(); + writer.WriteLine($" protected override void Go()"); + writer.WriteLine($" {{"); + writer.Indent += 4; + GenerateGo(writer, rm, id); + writer.Indent -= 4; + writer.WriteLine($" }}"); + writer.WriteLine($" }}"); + writer.WriteLine($" }}"); + writer.WriteLine("}"); + + static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht) + { + IDictionaryEnumerator en = ht.GetEnumerator(); + string separator = ""; + while (en.MoveNext()) + { + writer.Write(separator); + separator = ", "; + + writer.Write($" {{ "); + if (en.Key is int key) + { + writer.Write(key); + } + else + { + writer.Write($"\"{en.Key}\""); + } + writer.Write($", {en.Value} }} "); + } + } + } + + /// Emits the body of the FindFirstChar override. + private static void GenerateFindFirstChar(IndentedTextWriter writer, RegexMethod rm, string id) + { + RegexOptions options = (RegexOptions)rm.Options; + var code = rm.Code; + var lcc = code.LeadingCharClasses; + bool rtl = code.RightToLeft; + bool hasTextInfo = false; + bool textInfoEmitted = false; + + // Emit locals initialization + writer.WriteLine("string runtext = base.runtext!;"); + writer.WriteLine("int runtextpos = base.runtextpos;"); + writer.WriteLine("int runtextend = base.runtextend;"); + if (rtl) + { + writer.WriteLine("int runtextbeg = base.runtextbeg;"); + } + writer.WriteLine($"int ch;"); + writer.WriteLine(); + + // Generate length check. If the input isn't long enough to possibly match, fail quickly. + // It's rare for min required length to be 0, so we don't bother special-casing the check, + // especially since we want the "return false" code regardless. + writer.WriteLine($"// Minimum required length check"); + int minRequiredLength = rm.Tree.MinRequiredLength; + string minRequiredLengthOffset = rm.Tree.MinRequiredLength > 0 ? $" - {rm.Tree.MinRequiredLength}" : ""; + Debug.Assert(minRequiredLength >= 0); + using (EmitBlock(writer, !rtl ? + $"if (runtextpos <= runtextend{minRequiredLengthOffset})" : + $"if (runtextpos{minRequiredLengthOffset} >= runtextbeg)")) + { + GenerateAnchorAndLeadingChecks(); + } + writer.WriteLine(); + + writer.WriteLine("// No match"); + writer.WriteLine("ReturnFalse:"); + writer.WriteLine(!rm.Code.RightToLeft ? "base.runtextpos = runtextend;" : "base.runtextpos = runtextbeg;"); + writer.WriteLine("return false;"); + + void GenerateAnchorAndLeadingChecks() + { + // Generate anchor checks. + if ((code.LeadingAnchor & (RegexPrefixAnalyzer.Beginning | RegexPrefixAnalyzer.Start | RegexPrefixAnalyzer.EndZ | RegexPrefixAnalyzer.End | RegexPrefixAnalyzer.Bol)) != 0) + { + // TODO: RegexInterpreter also factors in a Boyer-Moore prefix check in places Compiled just returns true. + // Determine if we should do so here and in Compiled as well, and potentially update RegexInterpreter. + // Interpreted and Compiled also differ in various places as to whether they update positions, as do LTR vs RTL. Determine why. + switch (code.LeadingAnchor) + { + case RegexPrefixAnalyzer.Beginning: + writer.WriteLine($"// Beginning \\A anchor"); + if (!rtl) + { + using (EmitBlock(writer, "if (runtextpos > runtextbeg)")) + { + writer.WriteLine("goto ReturnFalse;"); + } + writer.WriteLine("return true;"); + } + else + { + // TODO: RegexOptions.Compiled doesn't ever return false here. Instead it updates the position. Why? + using (EmitBlock(writer, "if (runtextpos > runtextbeg)")) + { + writer.WriteLine("base.runtextpos = runtextbeg;"); + } + writer.WriteLine("return true;"); + } + writer.WriteLine(); + return; + + case RegexPrefixAnalyzer.Start: + writer.WriteLine($"// Start \\G anchor"); + if (!rtl) + { + using (EmitBlock(writer, "if (runtextpos > runtextstart)")) + { + writer.WriteLine("goto ReturnFalse;"); + } + writer.WriteLine("return true;"); + } + else + { + // TODO: RegexOptions.Compiled doesn't ever return false here. Instead it updates the position. Why? + using (EmitBlock(writer, "if (runtextpos < runtextstart)")) + { + writer.WriteLine("goto ReturnFalse;"); + } + writer.WriteLine("return true;"); + } + writer.WriteLine(); + return; + + case RegexPrefixAnalyzer.EndZ: + // TODO: Why are the LTR and RTL cases inconsistent here with RegexOptions.Compiled? + writer.WriteLine($"// End \\Z anchor"); + if (!rtl) + { + using (EmitBlock(writer, "if (runtextpos < runtextend - 1)")) + { + writer.WriteLine("base.runtextpos = runtextend - 1;"); + } + writer.WriteLine("return true;"); + } + else + { + // TODO: This differs subtely between interpreted and compiled. Why? + using (EmitBlock(writer, "if (runtextpos < runtextend - 1 || (runtextpos == runtextend - 1 && runtext[runtextpos] != '\\n'))")) + { + writer.WriteLine("goto ReturnFalse;"); + } + writer.WriteLine("return true;"); + } + writer.WriteLine(); + return; + + case RegexPrefixAnalyzer.End when minRequiredLength == 0: // if it's > 0, we already output a more stringent check + writer.WriteLine($"// End \\z anchor"); + if (!rtl) + { + using (EmitBlock(writer, "if (runtextpos < runtextend)")) + { + writer.WriteLine("base.runtextpos = runtextend;"); + } + writer.WriteLine("return true;"); + } + else + { + using (EmitBlock(writer, "if (runtextpos < runtextend)")) + { + writer.WriteLine("goto ReturnFalse;"); + } + writer.WriteLine("return true;"); + } + writer.WriteLine(); + return; + + case RegexPrefixAnalyzer.Bol when !rtl: // Don't bother optimizing for the niche case of RegexOptions.RightToLeft | RegexOptions.Multiline + // Optimize the handling of a Beginning-Of-Line (BOL) anchor. BOL is special, in that unlike + // other anchors like Beginning, there are potentially multiple places a BOL can match. So unlike + // the other anchors, which all skip all subsequent processing if found, with BOL we just use it + // to boost our position to the next line, and then continue normally with any Boyer-Moore or + // leading char class searches. + writer.WriteLine("// Beginning-of-line anchor"); + using (EmitBlock(writer, "if (runtextpos > runtextbeg && runtext[runtextpos - 1] != '\\n')")) + { + writer.WriteLine("int newlinePos = runtext.IndexOf('\\n', runtextpos);"); + using (EmitBlock(writer, "if (newlinePos == -1 || newlinePos + 1 > runtextend)")) + { + writer.WriteLine("goto ReturnFalse;"); + } + writer.WriteLine("runtextpos = newlinePos + 1;"); + } + writer.WriteLine(); + break; + } + } + + if (code.BoyerMoorePrefix is RegexBoyerMoore rbm && rbm.NegativeUnicode is null) + { + // Compiled Boyer-Moore string matching + writer.WriteLine("// Boyer-Moore prefix matching"); + + EmitTextInfoIfRequired(writer, ref textInfoEmitted, ref hasTextInfo, rm); + + int beforefirst; + int last; + if (!rtl) + { + //limitLocal = "runtextend"; + beforefirst = -1; + last = rbm.Pattern.Length - 1; + } + else + { + //limitLocal = "runtextbeg"; + beforefirst = rbm.Pattern.Length; + last = 0; + } + + int chLast = rbm.Pattern[last]; + + writer.WriteLine(!rtl ? + $"runtextpos += {rbm.Pattern.Length - 1};" : + $"runtextpos -= {rbm.Pattern.Length};"); + writer.WriteLine("int offset = 0;"); + + writer.WriteLine("goto Start;"); + writer.WriteLine(); + writer.WriteLine("DefaultAdvance:"); + writer.WriteLine($"offset = {(!rtl ? rbm.Pattern.Length : -rbm.Pattern.Length)};"); + writer.WriteLine(); + writer.WriteLine("Advance:"); + writer.WriteLine("runtextpos += offset;"); + writer.WriteLine(); + writer.WriteLine("Start:"); + using (EmitBlock(writer, !rtl ? "if (runtextpos >= runtextend)" : "if (runtextpos < runtextbeg)")) + { + writer.WriteLine("goto ReturnFalse;"); + } + writer.WriteLine(); + writer.WriteLine($"ch = {ToLowerIfNeeded(hasTextInfo, options, "runtext[runtextpos]", rbm.CaseInsensitive)};"); + + using (EmitBlock(writer, $"if (ch == {Literal((char)chLast)})")) + { + writer.WriteLine("goto PartialMatch;"); + } + writer.WriteLine($"ch -= {rbm.LowASCII};"); + using (EmitBlock(writer, $"if ((uint)ch > {rbm.HighASCII - rbm.LowASCII})")) + { + writer.WriteLine("goto DefaultAdvance;"); + } + + writer.Write($"offset = "); + int negativeRange = rbm.HighASCII - rbm.LowASCII + 1; + if (negativeRange > 1) + { + // Create a string to store the lookup table we use to find the offset. + // Store the offsets into the string. RightToLeft has negative offsets, so to support it with chars (unsigned), we negate + // the values to be stored in the string, and then at run time after looking up the offset in the string, negate it again. + Debug.Assert(rbm.Pattern.Length <= char.MaxValue, "RegexBoyerMoore should have limited the size allowed."); + Span span = new char[negativeRange]; + for (int i = 0; i < span.Length; i++) + { + int offset = rbm.NegativeASCII[i + rbm.LowASCII]; + if (offset == beforefirst) + { + offset = rbm.Pattern.Length; + } + else if (rtl) + { + offset = -offset; + } + Debug.Assert(offset >= 0 && offset <= char.MaxValue); + span[i] = (char)offset; + } + + if (rtl) + { + writer.Write('-'); + } + writer.WriteLine($"{Literal(span.ToString())}[ch];"); + } + else + { + Debug.Assert(negativeRange == 1); + int offset = rbm.NegativeASCII[rbm.LowASCII]; + if (offset == beforefirst) + { + offset = rtl ? -rbm.Pattern.Length : rbm.Pattern.Length; + } + writer.WriteLine($"{offset.ToString(CultureInfo.InvariantCulture)};"); + } + writer.WriteLine($"goto Advance;"); + writer.WriteLine(); + + writer.WriteLine($"PartialMatch:"); + writer.WriteLine($"int test = runtextpos;"); + + int nextAvailableLabelId = 0; + int prevLabelOffset = int.MaxValue; + int prevLabel = 0; + for (int i = rbm.Pattern.Length - 2; i >= 0; i--) + { + int charIndex = !rtl ? i : rbm.Pattern.Length - 1 - i; + + string nextCharExpr = ToLowerIfNeeded(hasTextInfo, options, (!rtl ? "runtext[--test]" : "runtext[++test]"), rbm.CaseInsensitive && RegexCharClass.ParticipatesInCaseConversion(rbm.Pattern[charIndex])); + string matchExpr = Literal(rbm.Pattern[charIndex]); + if (prevLabelOffset == rbm.Positive[charIndex]) + { + using (EmitBlock(writer, $"if ({nextCharExpr} != {matchExpr})")) + { + writer.WriteLine($"goto L{prevLabel};"); + } + } + else + { + int lNext = nextAvailableLabelId++; + using (EmitBlock(writer, $"if ({nextCharExpr} == {matchExpr})")) + { + writer.WriteLine($"goto L{lNext};"); + } + prevLabel = nextAvailableLabelId++; + prevLabelOffset = rbm.Positive[charIndex]; + writer.WriteLine(); + + writer.WriteLine($"L{prevLabel}:"); + writer.WriteLine($"offset = {prevLabelOffset.ToString(CultureInfo.InvariantCulture)};"); + writer.WriteLine($"goto Advance;"); + writer.WriteLine(); + + writer.WriteLine($"L{lNext}:"); + } + } + + writer.WriteLine(); + writer.WriteLine(!rtl ? + $"base.runtextpos = test;" : + $"base.runtextpos = test + 1;"); + writer.WriteLine($"return true;"); + } + else if (code.LeadingCharClasses is null) + { + writer.WriteLine("return true;"); + } + else if (rtl) + { + EmitTextInfoIfRequired(writer, ref textInfoEmitted, ref hasTextInfo, rm); + + Debug.Assert(lcc.Length == 1, "Only the FirstChars and not MultiFirstChars computation is supported for RightToLeft"); + string set = lcc[0].CharClass; + if (RegexCharClass.IsSingleton(set)) + { + char ch = RegexCharClass.SingletonChar(set); + using (EmitBlock(writer, "for (int i = runtextpos - 1; i >= runtextbeg; i--)")) + { + using (EmitBlock(writer, $"if (runtext[i] == {ToLowerIfNeeded(hasTextInfo, options, Literal(ch), lcc[0].CaseInsensitive)})")) + { + writer.WriteLine("base.runtextpos = i + 1;"); + writer.WriteLine("return true;"); + } + } + } + else + { + using (EmitBlock(writer, "for (int i = runtextpos - 1; i >= runtextbeg; i--)")) + { + using (EmitBlock(writer, $"if ({MatchCharacterClass(hasTextInfo, options, "runtext[i]", set, lcc[0].CaseInsensitive)})")) + { + writer.WriteLine("runtextpos = i + 1;"); + writer.WriteLine("return true;"); + } + } + } + } + else + { + Debug.Assert(lcc is not null && lcc.Length > 0); + + // If minRequiredLength > 0, we already output a more stringent check. In the rare case + // where we were unable to get an accurate enough min required length to ensure it's larger + // than the prefixes we calculated, we also need to ensure we have enough space for those, + // as they also represent a min required length. + if (minRequiredLength < lcc.Length) + { + writer.WriteLine($"// Validate at least {lcc.Length} characters are available to match"); + string endExpr = lcc.Length > 1 ? $"runtextend - {lcc.Length - 1}" : "runtextend"; + using (EmitBlock(writer, $"if (runtextpos >= {endExpr})")) + { + writer.WriteLine("goto ReturnFalse;"); + } + writer.WriteLine(); + } + + writer.WriteLine("ReadOnlySpan span = runtext.AsSpan(runtextpos, runtextend - runtextpos);"); + + // If we can use IndexOf{Any}, try to accelerate the skip loop via vectorization to match the first prefix. + // We can use it if this is a case-sensitive class with a small number of characters in the class. + Span setChars = stackalloc char[3]; // up to 3 characters handled by IndexOf{Any} below + int setCharsCount = 0, charClassIndex = 0; + bool canUseIndexOf = + !lcc[0].CaseInsensitive && + (setCharsCount = RegexCharClass.GetSetChars(lcc[0].CharClass, setChars)) > 0 && + !RegexCharClass.IsNegated(lcc[0].CharClass); + bool needLoop = !canUseIndexOf || lcc.Length > 1; + + FinishEmitScope loopBlock = default; + if (needLoop) + { + EmitTextInfoIfRequired(writer, ref textInfoEmitted, ref hasTextInfo, rm); + writer.WriteLine(); + string upperBound = lcc.Length > 1 ? $"span.Length - {lcc.Length - 1}" : "span.Length"; + loopBlock = EmitBlock(writer, $"for (int i = 0; i < {upperBound}; i++)"); + } + + if (canUseIndexOf) + { + charClassIndex = 1; + + string span = needLoop ? "span.Slice(i)" : "span"; + string indexOf = setCharsCount switch + { + 1 => $"{span}.IndexOf({Literal(setChars[0])})", + 2 => $"{span}.IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])})", + _ => $"{span}.IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])})", + }; + + if (needLoop) + { + writer.WriteLine($"int indexOfPos = {indexOf};"); + using (EmitBlock(writer, "if (indexOfPos < 0)")) + { + writer.WriteLine("goto ReturnFalse;"); + } + writer.WriteLine(); + writer.WriteLine("i += indexOfPos;"); + + if (lcc.Length > 1) + { + using (EmitBlock(writer, $"if (i >= span.Length - {lcc.Length - 1})")) + { + writer.WriteLine("goto ReturnFalse;"); + } + } + } + else + { + writer.WriteLine($"int i = {indexOf};"); + using (EmitBlock(writer, $"if (i < 0)")) + { + writer.WriteLine("goto ReturnFalse;"); + } + } + } + + Debug.Assert(charClassIndex == 0 || charClassIndex == 1); + for (; charClassIndex < lcc.Length; charClassIndex++) + { + // if (!CharInClass(textSpan[i + charClassIndex], prefix[0], "...")) continue; + Debug.Assert(needLoop); + string spanIndex = charClassIndex > 0 ? $"span[i + {charClassIndex}]" : "span[i]"; + string charInClassExpr = MatchCharacterClass(hasTextInfo, options, spanIndex, lcc[charClassIndex].CharClass, lcc[charClassIndex].CaseInsensitive); + writer.WriteLine($"if (!{charInClassExpr}) continue;"); + } + + writer.WriteLine("base.runtextpos = runtextpos + i;"); + writer.WriteLine("return true;"); + + loopBlock.Dispose(); + } + } + } + + /// Emits the body of the Go override. + private static void GenerateGo(IndentedTextWriter writer, RegexMethod rm, string id) + { + Debug.Assert(rm.Tree.Root.Type == RegexNode.Capture); + if (RegexNode.NodeSupportsSimplifiedCodeGenerationImplementation(rm.Tree.Root.Child(0), RegexNode.DefaultMaxRecursionDepth) && + (((RegexOptions)rm.Tree.Root.Options) & RegexOptions.RightToLeft) == 0) + { + GenerateSimplifiedGo(writer, rm, id); + } + else + { + GenerateCompleteGo(writer, rm, id); + } + } + + /// Emits the body of a simplified Go implementation that's possible when there's minimal backtracking required by the expression. + private static void GenerateSimplifiedGo(IndentedTextWriter writer, RegexMethod rm, string id) + { + RegexOptions options = (RegexOptions)rm.Options; + var code = rm.Code; + var lcc = code.LeadingCharClasses; + bool rtl = code.RightToLeft; + bool hasTimeout = false; + + int nextLocalId = 0; + string GetNextLocalId() => $"i{nextLocalId++}"; ; + + RegexNode node = rm.Tree.Root; + Debug.Assert(node.Type == RegexNode.Capture, "Every generated tree should begin with a capture node"); + Debug.Assert(node.ChildCount() == 1, "Capture nodes should have one child"); + + // Skip the Capture node. We handle the implicit root capture specially. + node = node.Child(0); + + // Declare some locals. + string textSpanLocal = "textSpan"; + writer.WriteLine($"string runtext = base.runtext!;"); + writer.WriteLine($"int runtextpos = base.runtextpos;"); + writer.WriteLine($"int runtextend = base.runtextend;"); + writer.WriteLine($"int originalruntextpos = runtextpos;"); + writer.WriteLine($"char ch;"); + hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm); + + // TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo; // only if the whole expression or any subportion is ignoring case, and we're not using invariant + bool hasTextInfo = EmitInitializeCultureForGoIfNecessary(writer, rm); + + // The implementation tries to use const indexes into the span wherever possible, which we can do + // in all places except for variable-length loops. For everything else, we know at any point in + // the regex exactly how far into it we are, and we can use that to index into the span created + // at the beginning of the routine to begin at exactly where we're starting in the input. For + // variable-length loops, we index at this textSpanPos + i, and then after the loop we slice the input + // by i so that this position is still accurate for everything after it. + int textSpanPos = 0; + LoadTextSpanLocal(writer, defineLocal: true); + writer.WriteLine(); + + int localCounter = 0; + string NextLocalName(string prefix) => $"{prefix}{localCounter++}"; + + int labelCounter = 0; + string DefineLabel() => $"L{labelCounter++}"; + void MarkLabel(string label) => writer.WriteLine($"{label}:"); + void Goto(string label) => writer.WriteLine($"goto {label};"); + string doneLabel = "NoMatch"; + + // Emit the code for all nodes in the tree. + EmitNode(node); + + // Emit success + writer.WriteLine("// Match"); + if (textSpanPos > 0) + { + writer.WriteLine($"runtextpos += {textSpanPos};"); + } + writer.WriteLine("base.runtextpos = runtextpos;"); + writer.WriteLine("Capture(0, originalruntextpos, runtextpos);"); + writer.WriteLine("return;"); + writer.WriteLine(); + + // Emit failure + writer.WriteLine("// No match"); + MarkLabel(doneLabel); + if ((node.Options & RegexNode.HasCapturesFlag) != 0) + { + writer.WriteLine("while (Crawlpos() != 0) Uncapture();"); + } + else + { + // We can't have a label at the end of the method, so explicitly + // add a "return;" if the End label would otherwise be an issue. + writer.WriteLine("return;"); + } + return; + + static bool IsCaseInsensitive(RegexNode node) => (node.Options & RegexOptions.IgnoreCase) != 0; + + // Creates a span for runtext starting at runtextpos until base.runtextend. + void LoadTextSpanLocal(IndentedTextWriter writer, bool defineLocal = false) + { + if (defineLocal) + { + writer.Write("ReadOnlySpan "); + } + writer.WriteLine($"{textSpanLocal} = runtext.AsSpan(runtextpos, runtextend - runtextpos);"); + } + + // Emits the sum of a constant and a value from a local. + string Sum(int constant, string? local = null) => + local is null ? constant.ToString() : + constant == 0 ? local : + $"{constant} + {local}"; + + // Emits a check that the span is large enough at the currently known static position to handle the required additional length. + void EmitSpanLengthCheck(int requiredLength, string? dynamicRequiredLength = null) + { + Debug.Assert(requiredLength > 0); + using (EmitBlock(writer, $"if ({SpanLengthCheck(requiredLength, dynamicRequiredLength)})")) + { + writer.WriteLine($"goto {doneLabel};"); + } + } + + string SpanLengthCheck(int requiredLength, string? dynamicRequiredLength = null) => + $"{Sum(textSpanPos + requiredLength, dynamicRequiredLength)} > (uint){textSpanLocal}.Length"; + + // Adds the value of textSpanPos into the runtextpos local, slices textspan by the corresponding amount, + // and zeros out textSpanPos. + void TransferTextSpanPosToRunTextPos() + { + if (textSpanPos > 0) + { + writer.WriteLine($"runtextpos += {textSpanPos};"); + writer.WriteLine($"{textSpanLocal} = {textSpanLocal}.Slice({textSpanPos});"); + textSpanPos = 0; + } + } + + string AddTextSpanPos() => textSpanPos > 0 ? $" + {textSpanPos}" : ""; + + // Emits the code for an atomic alternate, one that once a branch successfully matches is non-backtracking into it. + // This amounts to generating the code for each branch, with failures in a branch resetting state to what it was initially + // and then jumping to the next branch. We don't need to worry about uncapturing, because capturing is only allowed for the + // implicit capture that happens for the whole match at the end. + void EmitAtomicAlternate(RegexNode node) + { + // Label to jump to when any branch completes successfully. + string doneAlternateLabel = DefineLabel(); + + // Save off runtextpos. We'll need to reset this each time a branch fails. + string startingRunTextPosName = NextLocalName("startingRunTextPos"); + writer.WriteLine($"int {startingRunTextPosName} = runtextpos;"); + int startingTextSpanPos = textSpanPos; + + // If the alternation's branches contain captures, save off the relevant + // state. Note that this is only about subexpressions within the alternation, + // as the alternation is atomic, so we're not concerned about captures after + // the alternation. + bool hasStartingCrawlpos = (node.Options & RegexNode.HasCapturesFlag) != 0; + if (hasStartingCrawlpos) + { + writer.WriteLine("int startingCrawlpos = Crawlpos();"); + } + writer.WriteLine(); + + // A failure in a branch other than the last should jump to the next + // branch, not to the final done. + string postAlternateDoneLabel = doneLabel; + + int childCount = node.ChildCount(); + for (int i = 0; i < childCount - 1; i++) + { + using var __ = EmitScope(writer, $"Branch {i}"); + + string nextBranch = DefineLabel(); + doneLabel = nextBranch; + + // Emit the code for each branch. + EmitNode(node.Child(i)); + + // If we get here in the generated code, the branch completed successfully. + // Before jumping to the end, we need to zero out textSpanPos, so that no + // matter what the value is after the branch, whatever follows the alternate + // will see the same textSpanPos. + TransferTextSpanPosToRunTextPos(); + writer.WriteLine($"goto {doneAlternateLabel};"); + + // Reset state for next branch and loop around to generate it. This includes + // setting runtextpos back to what it was at the beginning of the alternation, + // updating textSpan to be the full length it was, and if there's a capture that + // needs to be reset, uncapturing it. + MarkLabel(nextBranch); + writer.WriteLine($"runtextpos = {startingRunTextPosName};"); + LoadTextSpanLocal(writer); + textSpanPos = startingTextSpanPos; + if (hasStartingCrawlpos) + { + EmitUncaptureUntil(); + } + } + + // If the final branch fails, that's like any other failure, and we jump to + // done (unless we have captures we need to unwind first, in which case we uncapture + // them and then jump to done). + using (EmitScope(writer, $"Branch {childCount - 1}")) + { + if (hasStartingCrawlpos) + { + string uncapture = DefineLabel(); + doneLabel = uncapture; + EmitNode(node.Child(childCount - 1)); + doneLabel = postAlternateDoneLabel; + TransferTextSpanPosToRunTextPos(); + writer.WriteLine($"goto {doneAlternateLabel};"); + MarkLabel(uncapture); + EmitUncaptureUntil(); + writer.WriteLine($"goto {doneLabel};"); + } + else + { + doneLabel = postAlternateDoneLabel; + EmitNode(node.Child(childCount - 1)); + TransferTextSpanPosToRunTextPos(); + } + } + + // Successfully completed the alternate. + MarkLabel(doneAlternateLabel); + writer.WriteLine(";"); + Debug.Assert(textSpanPos == 0); + } + + // Emits the code for a Capture node. + void EmitCapture(RegexNode node) + { + Debug.Assert(node.N == -1); + + // Get the capture number. This needs to be kept in sync with MapCapNum in RegexWriter. + Debug.Assert(node.Type == RegexNode.Capture); + Debug.Assert(node.N == -1, "Currently only support capnum, not uncapnum"); + int capnum = node.M; + if (capnum != -1 && rm.Code.Caps != null) + { + capnum = (int)rm.Code.Caps[capnum]!; + } + + TransferTextSpanPosToRunTextPos(); + string startingRunTextPosName = NextLocalName("startingRunTextPos"); + writer.WriteLine($"int {startingRunTextPosName} = runtextpos;"); + + // Emit child node. + EmitNode(node.Child(0)); + + TransferTextSpanPosToRunTextPos(); + writer.WriteLine($"Capture({capnum}, {startingRunTextPosName}, runtextpos);"); + } + + // Emits code to unwind the capture stack until the crawl position specified in the provided local. + void EmitUncaptureUntil() + { + writer.WriteLine("while (Crawlpos() != startingCrawlpos) Uncapture();"); + } + + // Emits the code to handle a positive lookahead assertion. + void EmitPositiveLookaheadAssertion(RegexNode node) + { + // Save off runtextpos. We'll need to reset this upon successful completion of the lookahead. + string startingRunTextPosName = NextLocalName("startingRunTextPos"); + writer.WriteLine($"int {startingRunTextPosName} = runtextpos;"); + int startingTextSpanPos = textSpanPos; + + // Emit the child. + EmitNode(node.Child(0)); + + // After the child completes successfully, reset the text positions. + // Do not reset captures, which persist beyond the lookahead. + writer.WriteLine($"runtextpos = {startingRunTextPosName};"); + LoadTextSpanLocal(writer); + textSpanPos = startingTextSpanPos; + } + + // Emits the code to handle a negative lookahead assertion. + void EmitNegativeLookaheadAssertion(RegexNode node) + { + // Save off runtextpos. We'll need to reset this upon successful completion of the lookahead. + string startingRunTextPosName = NextLocalName("startingRunTextPos"); + writer.WriteLine($"int {startingRunTextPosName} = runtextpos;"); + int startingTextSpanPos = textSpanPos; + + string originalDoneLabel = doneLabel; + doneLabel = DefineLabel(); + + // Emit the child. + EmitNode(node.Child(0)); + + // If the generated code ends up here, it matched the lookahead, which actually + // means failure for a _negative_ lookahead, so we need to jump to the original done. + Goto(originalDoneLabel); + + // Failures (success for a negative lookahead) jump here. + MarkLabel(doneLabel); + doneLabel = originalDoneLabel; + + // After the child completes in failure (success for negative lookahead), reset the text positions. + writer.WriteLine($"runtextpos = {startingRunTextPosName};"); + LoadTextSpanLocal(writer); + textSpanPos = startingTextSpanPos; + } + + // Emits the code for the node. + void EmitNode(RegexNode node) + { + using var _ = EmitScope(writer, SymbolDisplay.FormatLiteral(node.Description(), quote: false)); + switch (node.Type) + { + case RegexNode.One: + case RegexNode.Notone: + case RegexNode.Set: + EmitSingleChar(node); + break; + + case RegexNode.Boundary: + case RegexNode.NonBoundary: + case RegexNode.ECMABoundary: + case RegexNode.NonECMABoundary: + EmitBoundary(node); + break; + + case RegexNode.Beginning: + case RegexNode.Start: + case RegexNode.Bol: + case RegexNode.Eol: + case RegexNode.End: + case RegexNode.EndZ: + EmitAnchors(node); + break; + + case RegexNode.Multi: + EmitMultiChar(node); + break; + + case RegexNode.Oneloopatomic: + case RegexNode.Notoneloopatomic: + case RegexNode.Setloopatomic: + EmitSingleCharAtomicLoop(node); + break; + + case RegexNode.Loop: + EmitAtomicNodeLoop(node); + break; + + case RegexNode.Lazyloop: + // An atomic lazy loop amounts to doing the minimum amount of work possible. + // That means iterating as little as is required, which means a repeater + // for the min, and if min is 0, doing nothing. + Debug.Assert(node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic)); + if (node.M > 0) + { + EmitNodeRepeater(node); + } + break; + + case RegexNode.Atomic: + EmitNode(node.Child(0)); + break; + + case RegexNode.Alternate: + EmitAtomicAlternate(node); + break; + + case RegexNode.Oneloop: + case RegexNode.Onelazy: + case RegexNode.Notoneloop: + case RegexNode.Notonelazy: + case RegexNode.Setloop: + case RegexNode.Setlazy: + EmitSingleCharRepeater(node); + break; + + case RegexNode.Concatenate: + int childCount = node.ChildCount(); + for (int i = 0; i < childCount; i++) + { + EmitNode(node.Child(i)); + } + break; + + case RegexNode.Capture: + EmitCapture(node); + break; + + case RegexNode.Require: + EmitPositiveLookaheadAssertion(node); + break; + + case RegexNode.Prevent: + EmitNegativeLookaheadAssertion(node); + break; + + case RegexNode.Nothing: + writer.WriteLine($"goto {doneLabel};"); + break; + + case RegexNode.Empty: + // Emit nothing. + break; + + case RegexNode.UpdateBumpalong: + EmitUpdateBumpalong(); + break; + + default: + Debug.Fail($"Unexpected node type: {node.Type}"); + break; + } + } + + // Emits the code to handle updating base.runtextpos to runtextpos in response to + // an UpdateBumpalong node. This is used when we want to inform the scan loop that + // it should bump from this location rather than from the original location. + void EmitUpdateBumpalong() + { + TransferTextSpanPosToRunTextPos(); + writer.WriteLine("base.runtextpos = runtextpos;"); + } + + // Emits the code to handle a single-character match. + void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string offset = null) + { + string expr = $"{textSpanLocal}[{Sum(textSpanPos, offset)}]"; + switch (node.Type) + { + // This only emits a single check, but it's called from the looping constructs in a loop + // to generate the code for a single check, so we map those looping constructs to the + // appropriate single check. + + case RegexNode.Set: + case RegexNode.Setlazy: + case RegexNode.Setloop: + case RegexNode.Setloopatomic: + expr = $"!{MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node))}"; + break; + + case RegexNode.One: + case RegexNode.Onelazy: + case RegexNode.Oneloop: + case RegexNode.Oneloopatomic: + expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch)); + expr = $"{expr} != {Literal(node.Ch)}"; + break; + + default: + Debug.Assert(node.Type == RegexNode.Notone || node.Type == RegexNode.Notonelazy || node.Type == RegexNode.Notoneloop || node.Type == RegexNode.Notoneloopatomic); + expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch)); + expr = $"{expr} == {Literal(node.Ch)}"; + break; + } + + using (EmitBlock(writer, emitLengthCheck ? $"if ({SpanLengthCheck(1, offset)} || {expr})" : $"if ({expr})")) + { + writer.WriteLine($"goto {doneLabel};"); + } + + textSpanPos++; + } + + // Emits the code to handle a boundary check on a character. + void EmitBoundary(RegexNode node) + { + string call; + switch (node.Type) + { + case RegexNode.Boundary: + call = "!IsBoundary"; + break; + + case RegexNode.NonBoundary: + call = "IsBoundary"; + break; + + case RegexNode.ECMABoundary: + call = "!IsECMABoundary"; + break; + + default: + Debug.Assert(node.Type == RegexNode.NonECMABoundary); + call = "IsECMABoundary"; + break; + } + + using (EmitBlock(writer, $"if ({call}(runtextpos{AddTextSpanPos()}, base.runtextbeg, runtextend))")) + { + writer.WriteLine($"goto {doneLabel};"); + } + } + + // Emits the code to handle various anchors. + void EmitAnchors(RegexNode node) + { + Debug.Assert(textSpanPos >= 0); + switch (node.Type) + { + case RegexNode.Beginning: + case RegexNode.Start: + if (textSpanPos > 0) + { + // If we statically know we've already matched part of the regex, there's no way we're at the + // beginning or start, as we've already progressed past it. + Goto(doneLabel); + } + else + { + using (EmitBlock(writer, node.Type == RegexNode.Beginning ? "if (runtextpos != runtextbeg)" : "if (runtextpos != runtextstart)")) + { + writer.WriteLine($"goto {doneLabel};"); + } + } + break; + + case RegexNode.Bol: + if (textSpanPos > 0) + { + using (EmitBlock(writer, $"if ({textSpanLocal}[{textSpanPos - 1}] != '\\n')")) + { + writer.WriteLine($"goto {doneLabel};"); + } + } + else + { + // We can't use our textSpan in this case, because we'd need to access textSpan[-1], so we access the runtext field directly: + using (EmitBlock(writer, $"if (runtextpos > runtextbeg && runtext[runtextpos - 1] != '\\n')")) + { + writer.WriteLine($"goto {doneLabel};"); + } + } + break; + + case RegexNode.End: + using (EmitBlock(writer, $"if ({textSpanPos} < {textSpanLocal}.Length)")) + { + writer.WriteLine($"goto {doneLabel};"); + } + break; + + case RegexNode.EndZ: + writer.WriteLine($"if ({textSpanPos} < {textSpanLocal}.Length - 1 || ({textSpanPos} < {textSpanLocal}.Length && {textSpanLocal}[{textSpanPos}] != '\\n'))"); + using (EmitBlock(writer, null)) + { + writer.WriteLine($"goto {doneLabel};"); + } + break; + + case RegexNode.Eol: + using (EmitBlock(writer, $"if ({textSpanPos} < {textSpanLocal}.Length && {textSpanLocal}[{textSpanPos}] != '\\n')")) + { + writer.WriteLine($"goto {doneLabel};"); + } + break; + } + } + + // Emits the code to handle a multiple-character match. + void EmitMultiChar(RegexNode node) + { + string str = node.Str!; + Debug.Assert(str.Length != 0); + + // TODO: RegexOptions.Compiled has a more complicated unrolling here, but it knows the code is being compiled on the same + // endianness and bitness machine as it'll be executed on. Determine if we want to do something more here. + + bool caseInsensitive = IsCaseInsensitive(node); + + const int MaxUnrollLength = 8; // TODO: Tune this + if (str.Length <= MaxUnrollLength) + { + writer.WriteLine($"if ((uint){textSpanLocal}.Length < {textSpanPos + str.Length} ||"); + for (int i = 0; i < str.Length; i++) + { + writer.Write($" {ToLowerIfNeeded(hasTextInfo, options, $"{textSpanLocal}[{textSpanPos}]", caseInsensitive)} != {Literal(str[i])}"); + textSpanPos++; + writer.WriteLine(i < str.Length - 1 ? " ||" : ")"); + } + using (EmitBlock(writer, null)) + { + writer.WriteLine($"goto {doneLabel};"); + } + } + else if (!caseInsensitive) + { + using (EmitBlock(writer, $"if (!{textSpanLocal}.Slice({textSpanPos}).StartsWith({Literal(node.Str)}))")) + { + writer.WriteLine($"goto {doneLabel};"); + } + textSpanPos += node.Str.Length; + } + else + { + EmitSpanLengthCheck(str.Length); + string i = GetNextLocalId(); + using (EmitBlock(writer, $"for (int {i} = 0; {i} < {Literal(str)}.Length; {i}++)")) + { + using (EmitBlock(writer, $"if ({ToLower(hasTextInfo, options, $"{textSpanLocal}[{textSpanPos} + {i}]")} != {ToLower(hasTextInfo, options, $"{Literal(str)}[{i}]")})")) + { + writer.WriteLine($"goto {doneLabel};"); + } + } + textSpanPos += node.Str.Length; + } + } + + // Emits the code to handle a loop (repeater) with a fixed number of iterations. + // RegexNode.M is used for the number of iterations; RegexNode.N is ignored. + void EmitSingleCharRepeater(RegexNode node) + { + int iterations = node.M; + if (iterations == 0) + { + // No iterations, nothing to do. + return; + } + + // if ((uint)(textSpanPos + iterations - 1) >= (uint)textSpan.Length) goto doneLabel; + EmitSpanLengthCheck(iterations); + + // Arbitrary limit for unrolling vs creating a loop. We want to balance size in the generated + // code with other costs, like the (small) overhead of slicing to create the temp span to iterate. + const int MaxUnrollSize = 16; + + if (iterations <= MaxUnrollSize) + { + // if (textSpan[textSpanPos] != c1 || + // textSpan[textSpanPos + 1] != c2 || + // ...) + // goto doneLabel; + for (int i = 0; i < iterations; i++) + { + EmitSingleChar(node, emitLengthCheck: false); + } + } + else + { + string spanLocal = "slice"; // As this repeater doesn't wrap arbitrary node emits, this shouldn't conflict with anything + writer.WriteLine($"ReadOnlySpan {spanLocal} = {textSpanLocal}.Slice({textSpanPos}, {iterations});"); + string i = GetNextLocalId(); + using (EmitBlock(writer, $"for (int {i} = 0; {i} < {spanLocal}.Length; {i}++)")) + { + EmitTimeoutCheck(writer, hasTimeout); + + string tmpTextSpanLocal = textSpanLocal; // we want EmitSingleChar to refer to this temporary + int tmpTextSpanPos = textSpanPos; + textSpanLocal = spanLocal; + textSpanPos = 0; + EmitSingleChar(node, emitLengthCheck: false, offset: i); + textSpanLocal = tmpTextSpanLocal; + textSpanPos = tmpTextSpanPos; + } + textSpanPos += iterations; + } + } + + // Emits the code to handle a loop (repeater) with a fixed number of iterations. + // This is used both to handle the case of A{5, 5} where the min and max are equal, + // and also to handle part of the case of A{3, 5}, where this method is called to + // handle the A{3, 3} portion, and then remaining A{0, 2} is handled separately. + void EmitNodeRepeater(RegexNode node) + { + int iterations = node.M; + Debug.Assert(iterations > 0); + Debug.Assert(node.ChildCount() == 1); + + if (iterations == 1) + { + EmitNode(node.Child(0)); + return; + } + + // Ensure textSpanPos is 0 prior to emitting the child. + TransferTextSpanPosToRunTextPos(); + + string i = GetNextLocalId(); + using (EmitBlock(writer, $"for (int {i} = 0; {i} < {iterations}; {i}++)")) + { + EmitTimeoutCheck(writer, hasTimeout); + EmitNode(node.Child(0)); + TransferTextSpanPosToRunTextPos(); + Debug.Assert(textSpanPos == 0); + } + } + + // Emits the code to handle a non-backtracking, variable-length loop around a single character comparison. + void EmitSingleCharAtomicLoop(RegexNode node) + { + Debug.Assert( + node.Type == RegexNode.Oneloopatomic || + node.Type == RegexNode.Notoneloopatomic || + node.Type == RegexNode.Setloopatomic); + + // If this is actually a repeater, emit that instead. + if (node.M == node.N) + { + EmitSingleCharRepeater(node); + return; + } + + // If this is actually an optional single char, emit that instead. + if (node.M == 0 && node.N == 1) + { + EmitAtomicSingleCharZeroOrOne(node); + return; + } + + Debug.Assert(node.N > node.M); + int minIterations = node.M; + int maxIterations = node.N; + + string originalDoneLabel = doneLabel; + doneLabel = DefineLabel(); + + Span setChars = stackalloc char[3]; // 3 is max we can use with IndexOfAny + int numSetChars = 0; + + string iterationLocal = "i"; // No need for a dynamically named value, as no other 'i' can be in scope + if (node.Type == RegexNode.Notoneloopatomic && + maxIterations == int.MaxValue && + (!IsCaseInsensitive(node) || !RegexCharClass.ParticipatesInCaseConversion(node.Ch))) + { + // For Notoneloopatomic, we're looking for a specific character, as everything until we find + // it is consumed by the loop. If we're unbounded, such as with ".*" and if we're case-sensitive, + // we can use the vectorized IndexOf to do the search, rather than open-coding it. The unbounded + // restriction is purely for simplicity; it could be removed in the future with additional code to + // handle the unbounded case. + + writer.Write($"int {iterationLocal} = {textSpanLocal}"); + if (textSpanPos > 0) + { + writer.Write($".Slice({textSpanPos})"); + } + writer.WriteLine($".IndexOf({Literal(node.Ch)});"); + + using (EmitBlock(writer, $"if ({iterationLocal} != -1)")) + { + writer.WriteLine($"goto {doneLabel};"); + } + + writer.WriteLine(textSpanPos > 0 ? + $"{iterationLocal} = {textSpanLocal}.Length - {textSpanPos};" : + $"{iterationLocal} = {textSpanLocal}.Length;"); + } + else if (node.Type == RegexNode.Setloopatomic && + maxIterations == int.MaxValue && + !IsCaseInsensitive(node) && + (numSetChars = RegexCharClass.GetSetChars(node.Str!, setChars)) > 1 && + RegexCharClass.IsNegated(node.Str!)) + { + // If the set is negated and contains only 2 or 3 characters (if it contained 1 and was negated, it would + // have been reduced to a Notoneloopatomic), we can use an IndexOfAny to find any of the target characters. + // As with the notoneloopatomic above, the unbounded constraint is purely for simplicity. + + writer.Write($"int {iterationLocal} = {textSpanLocal}"); + if (textSpanPos != 0) + { + writer.Write($".Slice({textSpanPos})"); + } + writer.WriteLine(numSetChars == 2 ? + $".IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])});" : + $".IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])});"); + using (EmitBlock(writer, $"if ({iterationLocal} != -1)")) + { + writer.WriteLine($"goto {doneLabel};"); + } + + writer.WriteLine(textSpanPos > 0 ? + $"{iterationLocal} = {textSpanLocal}.Length - {textSpanPos};" : + $"{iterationLocal} = {textSpanLocal}.Length;"); + } + else if (node.Type == RegexNode.Setloopatomic && maxIterations == int.MaxValue && node.Str == RegexCharClass.AnyClass) + { + // .* was used with RegexOptions.Singleline, which means it'll consume everything. Just jump to the end. + // The unbounded constraint is the same as in the Notoneloopatomic case above, done purely for simplicity. + + // int i = runtextend - runtextpos; + TransferTextSpanPosToRunTextPos(); + writer.WriteLine($"int {iterationLocal} = runtextend - runtextpos;"); + } + else + { + // For everything else, do a normal loop. + + string expr = $"{textSpanLocal}[{iterationLocal}]"; + switch (node.Type) + { + case RegexNode.Oneloopatomic: + expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch)); + expr = $"{expr} != {Literal(node.Ch)}"; + break; + case RegexNode.Notoneloopatomic: + expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch)); + expr = $"{expr} == {Literal(node.Ch)}"; + break; + case RegexNode.Setloopatomic: + expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node)); + expr = $"!{expr}"; + break; + } + + // Transfer text pos to runtextpos to help with bounds check elimination on the loop. + TransferTextSpanPosToRunTextPos(); + + writer.WriteLine($"int {iterationLocal} = 0;"); + using (EmitBlock(writer, $"while (true)")) + { + EmitTimeoutCheck(writer, hasTimeout); + string clause = "if ("; + if (maxIterations != int.MaxValue) + { + clause += $"{iterationLocal} >= {maxIterations} || "; + } + using (EmitBlock(writer, $"{clause}(uint){iterationLocal} >= (uint){textSpanLocal}.Length || {expr})")) + { + writer.WriteLine($"goto {doneLabel};"); + } + writer.WriteLine($"{iterationLocal}++;"); + } + } + + // Done: + MarkLabel(doneLabel); + doneLabel = originalDoneLabel; // Restore the original done label + + // Check to ensure we've found at least min iterations. + if (minIterations > 0) + { + using (EmitBlock(writer, $"if ({iterationLocal} < {minIterations})")) + { + writer.WriteLine($"goto {doneLabel};"); + } + } + + // Now that we've completed our optional iterations, advance the text span + // and runtextpos by the number of iterations completed. + + writer.WriteLine($"{textSpanLocal} = {textSpanLocal}.Slice({iterationLocal});"); + writer.WriteLine($"runtextpos += {iterationLocal};"); + } + + // Emits the code to handle a non-backtracking optional zero-or-one loop. + void EmitAtomicSingleCharZeroOrOne(RegexNode node) + { + string skipUpdatesLabel = DefineLabel(); + + Debug.Assert( + node.Type == RegexNode.Oneloopatomic || + node.Type == RegexNode.Notoneloopatomic || + node.Type == RegexNode.Setloopatomic); + Debug.Assert(node.M == 0 && node.N == 1); + + string expr = $"{textSpanLocal}[{textSpanPos}]"; + switch (node.Type) + { + case RegexNode.Oneloopatomic: + expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch)); + expr = $"{expr} == {Literal(node.Ch)}"; + break; + case RegexNode.Notoneloopatomic: + expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch)); + expr = $"{expr} != {Literal(node.Ch)}"; + break; + case RegexNode.Setloopatomic: + expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node)); + expr = $"{expr}"; + break; + } + + using (EmitBlock(writer, $"if ((uint){textSpanPos} < (uint){textSpanLocal}.Length && {expr})")) + { + writer.WriteLine($"{textSpanLocal} = {textSpanLocal}.Slice(1);"); + writer.WriteLine($"runtextpos++;"); + } + } + + // Emits the code to handle a non-backtracking, variable-length loop around another node. + void EmitAtomicNodeLoop(RegexNode node) + { + Debug.Assert(node.Type == RegexNode.Loop, $"Unexpected type: {node.Type}"); + Debug.Assert(node.M < int.MaxValue, $"Unexpected M={node.M}"); + Debug.Assert(node.N >= node.M, $"Unexpected M={node.M}, N={node.N}"); + int minIterations = node.M; + int maxIterations = node.N; + + // If this is actually a repeater, emit that instead. + if (minIterations == maxIterations) + { + EmitNodeRepeater(node); + return; + } + + string originalDoneLabel = doneLabel; + doneLabel = DefineLabel(); + + // We might loop any number of times. In order to ensure this loop + // and subsequent code sees textSpanPos the same regardless, we always need it to contain + // the same value, and the easiest such value is 0. So, we transfer + // textSpanPos to runtextpos, and ensure that any path out of here has + // textSpanPos as 0. + TransferTextSpanPosToRunTextPos(); + + // int i = 0; + string iterationLocal = NextLocalName("iter"); + writer.WriteLine($"int {iterationLocal} = 0;"); + using (EmitBlock(writer, maxIterations == int.MaxValue ? "while (true)" : $"while ({iterationLocal} < {maxIterations})")) + { + EmitTimeoutCheck(writer, hasTimeout); + string successfulIterationLabel = DefineLabel(); + + // Iteration body + + string prevDone = doneLabel; + doneLabel = DefineLabel(); + + // Save off runtextpos. + string startingRunTextPosLocal = NextLocalName("startingRunTextPos"); + writer.WriteLine($"int {startingRunTextPosLocal} = runtextpos;"); + + // Emit the child. + Debug.Assert(textSpanPos == 0); + EmitNode(node.Child(0)); + TransferTextSpanPosToRunTextPos(); // ensure textSpanPos remains 0 + Goto(successfulIterationLabel); // iteration succeeded + + // If the generated code gets here, the iteration failed. + // Reset state, branch to done. + MarkLabel(doneLabel); + doneLabel = prevDone; // reset done label + writer.WriteLine($"runtextpos = {startingRunTextPosLocal};"); + Goto(doneLabel); + + // Successful iteration. + MarkLabel(successfulIterationLabel); + writer.WriteLine($"{iterationLocal}++;"); + } + + // Check to ensure we've found at least min iterations. + if (minIterations > 0) + { + // Done: + MarkLabel(doneLabel); + doneLabel = originalDoneLabel; // Restore the original done label + using (EmitBlock(writer, $"if ({iterationLocal} < {minIterations})")) + { + writer.WriteLine($"goto {doneLabel};"); + } + } + + // We can't have a label in front of a closing brace, so if we didn't emit the label + // earlier, emit now that we've closed out the scope. + if (minIterations <= 0) + { + // Done: + MarkLabel(doneLabel); + writer.WriteLine(";"); + doneLabel = originalDoneLabel; // Restore the original done label + } + } + } + + /// Emits the body of a complete Go implementation that fully supports backtracking. + private static void GenerateCompleteGo(IndentedTextWriter writer, RegexMethod rm, string id) + { + const int Stackpop = 0; // pop one + const int Stackpop2 = 1; // pop two + const int Capback = 3; // uncapture + const int Capback2 = 4; // uncapture 2 + const int Branchmarkback2 = 5; // back2 part of branchmark + const int Lazybranchmarkback2 = 6; // back2 part of lazybranchmark + const int Branchcountback2 = 7; // back2 part of branchcount + const int Lazybranchcountback2 = 8; // back2 part of lazybranchcount + const int Forejumpback = 9; // back part of forejump + const int Uniquecount = 10; + const string Backtrack = "Backtrack"; // label for backtracking + + int[] codes = rm.Code.Codes; + RegexOptions options = (RegexOptions)rm.Options.Value; + + int labelCounter = 0; + string DefineLabel(string prefix = "L") => $"{prefix}{labelCounter++}"; + void MarkLabel(string label) => writer.WriteLine($"{label}:"); + + var labels = new string?[codes.Length]; // a label for every operation in _codes + BacktrackNote[]? notes = null; // a list of the backtracking states to be generated + int noteCount = 0; // true count of _notes (allocation grows exponentially) + + int currentOpcode = 0; // the current opcode being processed + int currentCodePos = 0; // the current code being translated + int currentBacktrackNote = 0; // the current backtrack-note being translated + + // special code fragments + var uniqueNote = new int[Uniquecount]; // notes indices for code that should be emitted <= once + var forwardJumpsThroughSwitch = new int[codes.Length]; // indices for forward-jumps-through-switch (for allocations) + + // Generates the forward logic corresponding directly to the regex codes. + // In the absence of backtracking, this is all we would need. + writer.WriteLine($"string runtext = base.runtext!;"); + writer.WriteLine($"int runtextbeg = base.runtextbeg;"); + writer.WriteLine($"int runtextend = base.runtextend;"); + writer.WriteLine($"int runtextpos = base.runtextpos;"); + writer.WriteLine($"int[] runtrack = base.runtrack!;"); + writer.WriteLine($"int runtrackpos = base.runtrackpos;"); + writer.WriteLine($"int[] runstack = base.runstack!;"); + writer.WriteLine($"int runstackpos = base.runstackpos;"); + writer.WriteLine("int tmp1, tmp2, ch;"); + bool hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm); + bool hasTextInfo = EmitInitializeCultureForGoIfNecessary(writer, rm); + writer.WriteLine(); + + uniqueNote.AsSpan().Fill(-1); + for (int codepos = 0; codepos < codes.Length; codepos += RegexCode.OpcodeSize(codes[codepos])) + { + forwardJumpsThroughSwitch[codepos] = -1; + labels[codepos] = DefineLabel(); + } + + currentBacktrackNote = -1; + for (int codepos = 0; codepos < codes.Length; codepos += RegexCode.OpcodeSize(codes[codepos])) + { + currentCodePos = codepos; + currentOpcode = codes[codepos]; + GenerateOneCode(labels[codepos]); + writer.WriteLine(); + } + + // Generate the backtracking switch jump table that allows us to simulate a stack of addresses, + // and contains the calls that expand the tracking and the grouping stack when they get too full. + MarkLabel(Backtrack); + + // (Equivalent of EnsureStorage, but written to avoid unnecessary local spilling.) + writer.WriteLine("int limit = runtrackcount * 4;"); + using (EmitBlock(writer, "if (runstackpos < limit)")) + { + writer.WriteLine("base.runstackpos = runstackpos;"); + writer.WriteLine("DoubleStack(); // might change runstackpos and runstack"); + writer.WriteLine("runstackpos = base.runstackpos;"); + writer.WriteLine("runstack = base.runstack!;"); + } + using (EmitBlock(writer, "if (runtrackpos < limit)")) + { + writer.WriteLine("base.runtrackpos = runtrackpos;"); + writer.WriteLine("DoubleTrack(); // might change runtrackpos and runtrack"); + writer.WriteLine("runtrackpos = base.runtrackpos;"); + writer.WriteLine("runtrack = base.runtrack!;"); + } + writer.WriteLine(); + using (EmitBlock(writer, $"switch (runtrack[runtrackpos++])")) + { + for (int i = 0; i < noteCount; i++) + { + using (EmitBlock(writer, $"case {i}:")) + { + BacktrackNote n = notes[i]; + if (n.flags != 0) + { + currentCodePos = n.codepos; + currentBacktrackNote = i; + currentOpcode = codes[n.codepos] | n.flags; + GenerateOneCode(null); // should always end in a goto + } + else + { + writer.WriteLine($"goto {n.label};"); + } + } + + writer.WriteLine(); + } + + using (EmitBlock(writer, "default:")) + { + writer.WriteLine("Debug.Fail($\"Unexpected backtracking state {runtrack[runtrackpos - 1]}\");"); + writer.WriteLine("break;"); + } + } + + return; + + /// + /// The main translation function. It translates the logic for a single opcode at + /// the current position. The structure of this function exactly mirrors + /// the structure of the inner loop of RegexInterpreter.Go(). + /// + /// + /// Note that since we're generating code, we can collapse many cases that are + /// dealt with one-at-a-time in RegexIntepreter. We can also unroll loops that + /// iterate over constant strings or sets. + /// + void GenerateOneCode(string? label) + { + writer.WriteLine($"// {SymbolDisplay.FormatLiteral(RegexCode.OpcodeDescription(currentCodePos, rm.Code.Codes, rm.Code.Strings), quote: false)}"); + + if (label is not null) + { + MarkLabel(label); + } + + // Before executing any Regex code in the unrolled loop, + // we try checking for the match timeout: + EmitTimeoutCheck(writer, hasTimeout); + + // Now generate the code for the Regex code saved in _regexopcode. + switch (currentOpcode) + { + case RegexCode.Stop: + writer.WriteLine("base.runtextpos = runtextpos;"); + writer.WriteLine("return;"); + break; + + case RegexCode.Nothing: + writer.WriteLine($"goto {Backtrack};"); + break; + + case RegexCode.UpdateBumpalong: + // UpdateBumpalong should only exist in the code stream at such a point where the root + // of the backtracking stack contains the runtextpos from the start of this Go call. Replace + // that tracking value with the current runtextpos value. + writer.WriteLine("runtrack[^1] = runtextpos;"); + break; + + case RegexCode.Goto: + Goto(Operand(0)); + break; + + case RegexCode.Testref: + using (EmitBlock(writer, $"if (!IsMatched({Operand(0)}))")) + { + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.Lazybranch: + PushTrack("runtextpos"); + Track(); + break; + + case RegexCode.Lazybranch | RegexCode.Back: + writer.WriteLine($"runtextpos = {PopTrack()};"); + Goto(Operand(0)); + break; + + case RegexCode.Nullmark: + PushStack(-1); + TrackUnique(Stackpop); + break; + + case RegexCode.Setmark: + PushStack("runtextpos"); + TrackUnique(Stackpop); + break; + + case RegexCode.Nullmark | RegexCode.Back: + case RegexCode.Setmark | RegexCode.Back: + PopDiscardStack(); + writer.WriteLine($"goto {Backtrack};"); + break; + + case RegexCode.Getmark: + writer.WriteLine($"runtextpos = {PopStack()};"); + PushTrack("runtextpos"); + Track(); + break; + + case RegexCode.Getmark | RegexCode.Back: + PushStack(PopTrack()); + writer.WriteLine($"goto {Backtrack};"); + break; + + case RegexCode.Capturemark: + { + if (Operand(1) != -1) + { + using (EmitBlock(writer, $"if (!IsMatched({Operand(1)}))")) + { + writer.WriteLine($"goto {Backtrack};"); + } + } + + const string Stacked = "tmp1"; + writer.WriteLine($"{Stacked} = {PopStack()};"); + writer.WriteLine(Operand(1) != -1 ? + $"TransferCapture({Operand(0)}, {Operand(1)}, {Stacked}, runtextpos);" : + $"Capture({Operand(0)}, {Stacked}, runtextpos);"); + PushTrack(Stacked); + TrackUnique(Operand(0) != -1 && Operand(1) != -1 ? Capback2 : Capback); + } + break; + + case RegexCode.Capturemark | RegexCode.Back: + PushStack(PopTrack()); + writer.WriteLine("Uncapture();"); + if (Operand(0) != -1 && Operand(1) != -1) + { + writer.WriteLine("Uncapture();"); + } + writer.WriteLine($"goto {Backtrack};"); + break; + + case RegexCode.Branchmark: + { + const string Mark = "tmp1"; + writer.WriteLine($"{Mark} = {PopStack()}; // mark"); + PushTrack(Mark); + using (EmitBlock(writer, $"if (runtextpos != {Mark})")) + { + PushTrack("runtextpos"); + PushStack("runtextpos"); + Track(); + Goto(Operand(0)); + } + using (EmitBlock(writer, "else")) + { + TrackUnique2(Branchmarkback2); + } + } + break; + + case RegexCode.Branchmark | RegexCode.Back: + writer.WriteLine($"runtextpos = {PopTrack()};"); + PopDiscardStack(); + TrackUnique2(Branchmarkback2); // track spot 0 is already in place + Advance(); + break; + + case RegexCode.Branchmark | RegexCode.Back2: + PushStack(PopTrack()); + writer.WriteLine($"goto {Backtrack};"); + break; + + case RegexCode.Lazybranchmark: + { + const string Mark = "tmp1"; + writer.WriteLine($"{Mark} = {PopStack()}; // mark"); + PushTrack($"{Mark} != -1 ? {Mark} : runtextpos"); + using (EmitBlock(writer, $"if (runtextpos != {Mark})")) + { + PushTrack("runtextpos"); + Track(); + Advance(); + } + PushStack(Mark); + TrackUnique2(Lazybranchmarkback2); + } + break; + + case RegexCode.Lazybranchmark | RegexCode.Back: + writer.WriteLine($"runtextpos = {PopTrack()};"); + PushStack("runtextpos"); + TrackUnique2(Lazybranchmarkback2); + Goto(Operand(0)); + break; + + case RegexCode.Lazybranchmark | RegexCode.Back2: + writer.WriteLine($"{ReadyReplaceStack(0)} = {PopTrack()};"); + writer.WriteLine($"goto {Backtrack};"); + break; + + case RegexCode.Nullcount: + PushStack(-1); + PushStack(Operand(0)); + TrackUnique(Stackpop2); + break; + + case RegexCode.Setcount: + PushStack("runtextpos"); + PushStack(Operand(0)); + TrackUnique(Stackpop2); + break; + + case RegexCode.Nullcount | RegexCode.Back: + case RegexCode.Setcount | RegexCode.Back: + PopDiscardStack(2); + writer.WriteLine($"goto {Backtrack};"); + break; + + case RegexCode.Branchcount: + { + const string Count = "tmp1"; + const string Mark = "tmp2"; + writer.WriteLine($"{Count} = {PopStack()}; // count"); + writer.WriteLine($"{Mark} = {PopStack()}; // mark"); + PushTrack(Mark); + using (EmitBlock(writer, $"if ({Count} < ({Mark} == runtextpos ? 0 : {Operand(1)}))")) + { + PushStack("runtextpos"); + PushStack($"{Count} + 1"); + Track(); + Goto(Operand(0)); + } + PushTrack(Count); + TrackUnique2(Branchcountback2); + } + break; + + case RegexCode.Branchcount | RegexCode.Back: + { + const string Count = "tmp1"; + writer.WriteLine($"{Count} = {PopStack()} - 1; // count"); + using (EmitBlock(writer, $"if ({Count} >= 0)")) + { + writer.WriteLine($"runtextpos = {PopStack()};"); + PushTrack(Count); + TrackUnique2(Branchcountback2); + Advance(); + } + writer.WriteLine($"{ReadyReplaceStack(0)} = {PopTrack()};"); + PushStack(Count); + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.Branchcount | RegexCode.Back2: + { + const string Mark = "tmp1"; + writer.WriteLine($"{Mark} = {PopTrack()}; // mark"); + PushStack(PopTrack()); + PushStack(Mark); + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.Lazybranchcount: + { + const string Count = "tmp1"; + writer.WriteLine($"{Count} = {PopStack()}; // count"); + PushTrack(PopStack()); // mark + using (EmitBlock(writer, $"if ({Count} < 0)")) + { + PushStack("runtextpos"); + PushStack($"{Count} + 1"); + TrackUnique2(Lazybranchcountback2); + Goto(Operand(0)); + } + PushTrack(Count); + PushTrack("runtextpos"); + Track(); + } + break; + + case RegexCode.Lazybranchcount | RegexCode.Back: + { + const string C = "tmp1"; + writer.WriteLine($"runtextpos = {PopTrack()};"); + writer.WriteLine($"{C} = {PopTrack()}; // c"); + using (EmitBlock(writer, $"if ({C} < {Operand(1)} && runtextpos != {TopTrack()})")) + { + PushStack("runtextpos"); + PushStack($"{C} + 1"); + TrackUnique2(Lazybranchcountback2); + Goto(Operand(0)); + } + PushStack(PopTrack()); + PushStack(C); + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.Lazybranchcount | RegexCode.Back2: + writer.WriteLine($"{ReadyReplaceStack(1)} = {PopTrack()};"); + writer.WriteLine($"{ReadyReplaceStack(0)} = {TopStack()} - 1;"); + ReadyReplaceStack(0); + writer.WriteLine($"goto {Backtrack};"); + break; + + case RegexCode.Setjump: + PushStack("runtrack.Length - runtrackpos"); + PushStack("Crawlpos()"); + TrackUnique(Stackpop2); + break; + + case RegexCode.Setjump | RegexCode.Back: + PopDiscardStack(2); + writer.WriteLine($"goto {Backtrack};"); + break; + + case RegexCode.Backjump: + { + const string Stacked = "tmp1"; + writer.WriteLine($"{Stacked} = {PopStack()}; // stacked"); + writer.WriteLine($"runtrackpos = runtrack.Length - {PopStack()};"); + writer.WriteLine($"while (Crawlpos() != {Stacked}) Uncapture();"); + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.Forejump: + { + const string Stacked = "tmp1"; + writer.WriteLine($"{Stacked} = {PopStack()}; // stacked"); + writer.WriteLine($"runtrackpos = runtrack.Length - {PopStack()};"); + PushTrack(Stacked); + TrackUnique(Forejumpback); + } + break; + + case RegexCode.Forejump | RegexCode.Back: + { + const string TrackedCrawlpos = "tmp1"; + writer.WriteLine($"{TrackedCrawlpos} = {PopTrack()}; // tracked crawlpos"); + writer.WriteLine($"while (Crawlpos() != {TrackedCrawlpos}) Uncapture();"); + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.Bol: + using (EmitBlock(writer, $"if (runtextpos <= runtextbeg)")) + { + writer.WriteLine($"goto {labels[NextCodepos()]};"); + } + using (EmitBlock(writer, $"if ({Leftchar()} != '\\n')")) + { + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.Eol: + using (EmitBlock(writer, $"if (runtextpos >= runtextend)")) + { + writer.WriteLine($"goto {labels[NextCodepos()]};"); + } + using (EmitBlock(writer, $"if ({Rightchar()} != '\\n')")) + { + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.Boundary: + case RegexCode.NonBoundary: + using (EmitBlock(writer, $"if ({(Code() == RegexCode.Boundary ? "!" : "")}IsBoundary(runtextpos, runtextbeg, runtextend))")) + { + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.ECMABoundary: + case RegexCode.NonECMABoundary: + using (EmitBlock(writer, $"if ({(Code() == RegexCode.ECMABoundary ? "!" : "")}IsECMABoundary(runtextpos, runtextbeg, runtextend))")) + { + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.Beginning: + using (EmitBlock(writer, $"if (runtextpos > runtextbeg)")) + { + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.Start: + using (EmitBlock(writer, $"if (runtextpos != runtextstart)")) + { + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.EndZ: + using (EmitBlock(writer, $"if (runtextpos < runtextend - 1)")) + { + writer.WriteLine($"goto {Backtrack};"); + } + using (EmitBlock(writer, $"if (runtextpos >= runtextend)")) + { + writer.WriteLine($"goto {labels[NextCodepos()]};"); + } + using (EmitBlock(writer, $"if ({Rightchar()} != '\\n')")) + { + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.End: + using (EmitBlock(writer, $"if (runtextpos < runtextend)")) + { + writer.WriteLine($"goto {Backtrack};"); + } + break; + + case RegexCode.One: + case RegexCode.Notone: + case RegexCode.Set: + case RegexCode.One | RegexCode.Rtl: + case RegexCode.Notone | RegexCode.Rtl: + case RegexCode.Set | RegexCode.Rtl: + case RegexCode.One | RegexCode.Ci: + case RegexCode.Notone | RegexCode.Ci: + case RegexCode.Set | RegexCode.Ci: + case RegexCode.One | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Notone | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Set | RegexCode.Ci | RegexCode.Rtl: + { + string clause; + string expr; + if (!IsRightToLeft()) + { + clause = $"runtextpos >= runtextend || "; + expr = Rightcharnext(); + } + else + { + clause = $"runtextpos <= runtextbeg || "; + expr = Leftcharnext(); + } + + clause += Code() == RegexCode.Set ? + $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive())}" : + $"{ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0)))} {(Code() == RegexCode.One ? "!=" : "==")} {Operand(0)}"; + + using (EmitBlock(writer, $"if ({clause})")) + { + writer.WriteLine($"goto {Backtrack};"); + } + } + break; + + case RegexCode.Multi: + case RegexCode.Multi | RegexCode.Ci: + { + string str = rm.Code.Strings[Operand(0)]; + Debug.Assert(str.Length != 0); + writer.WriteLine($"if (runtextend - runtextpos < {str.Length} ||"); + for (int i = 0; i < str.Length; i++) + { + writer.Write($" {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos{(i == 0 ? "" : $" + {i}")}]", IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(str[i]))} != {Literal(str[i])}"); + writer.WriteLine(i < str.Length - 1 ? " ||" : ")"); + } + using (EmitBlock(writer, null)) + { + writer.WriteLine($"goto {Backtrack};"); + } + writer.WriteLine($"runtextpos += {str.Length};"); + break; + } + + case RegexCode.Multi | RegexCode.Rtl: + case RegexCode.Multi | RegexCode.Ci | RegexCode.Rtl: + { + string str = rm.Code.Strings[Operand(0)]; + Debug.Assert(str.Length != 0); + writer.WriteLine($"if (runtextpos - runtextbeg < {str.Length} ||"); + for (int i = str.Length; i > 0;) + { + i--; + writer.Write($" {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos - {str.Length - i}]", IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(str[i]))} != {Literal(str[i])}"); + writer.WriteLine(i == 0 ? ")" : " ||"); + } + using (EmitBlock(writer, null)) + { + writer.WriteLine($"goto {Backtrack};"); + } + writer.WriteLine($"runtextpos -= {str.Length};"); + break; + } + + case RegexCode.Ref: + case RegexCode.Ref | RegexCode.Ci: + case RegexCode.Ref | RegexCode.Rtl: + case RegexCode.Ref | RegexCode.Ci | RegexCode.Rtl: + { + const string Length = "tmp1"; + const string Index = "tmp2"; + + using (EmitBlock(writer, $"if (!IsMatched({Operand(0)}))")) + { + writer.WriteLine($"goto {((options & RegexOptions.ECMAScript) != 0 ? AdvanceLabel() : Backtrack)};"); + } + + writer.WriteLine($"{Length} = MatchLength({Operand(0)}); // length"); + + using (EmitBlock(writer, !IsRightToLeft() ? $"if (runtextend - runtextpos < {Length})" : $"if (runtextpos - runtextbeg < {Length})")) + { + writer.WriteLine($"goto {Backtrack};"); + } + + if (!IsRightToLeft()) + { + writer.WriteLine($"{Index} = MatchIndex({Operand(0)}) + {Length}; // index"); + writer.WriteLine($"runtextpos += {Length};"); + } + else + { + writer.WriteLine($"{Index} = MatchIndex({Operand(0)}); // index"); + writer.WriteLine($"runtextpos -= {Length};"); + } + + using (EmitBlock(writer, "while (true)")) + { + using (EmitBlock(writer, $"if ({Length} <= 0)")) + { + writer.WriteLine($"goto {AdvanceLabel()};"); + } + + using (EmitBlock(writer, !IsRightToLeft() ? + $"if ({ToLowerIfNeeded(hasTextInfo, options, $"runtext[{Index} - {Length}]", IsCaseInsensitive())} != {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos - {Length}--]", IsCaseInsensitive())})" : + $"if ({ToLowerIfNeeded(hasTextInfo, options, $"runtext[{Index} + --{Length}]", IsCaseInsensitive())} != {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos + {Length}]", IsCaseInsensitive())})")) + { + writer.WriteLine($"break;"); + } + } + + writer.WriteLine($"goto {Backtrack};"); + break; + } + + case RegexCode.Onerep: + case RegexCode.Notonerep: + case RegexCode.Setrep: + case RegexCode.Onerep | RegexCode.Ci: + case RegexCode.Notonerep | RegexCode.Ci: + case RegexCode.Setrep | RegexCode.Ci: + { + int c = Operand(1); + if (c != 0) + { + using (EmitBlock(writer, $"if (runtextend - runtextpos < {c})")) + { + writer.WriteLine($"goto {Backtrack};"); + } + + using (EmitBlock(writer, $"for (int i = 0; i < {c}; i++)")) + { + string expr = "runtext[runtextpos + i]"; + if (Code() == RegexCode.Setrep) + { + EmitTimeoutCheck(writer, hasTimeout); + expr = $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive())}"; + } + else + { + expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0))); + expr = $"{expr} {(Code() == RegexCode.Onerep ? "!=" : "==")} {Literal((char)Operand(0))}"; + } + + using (EmitBlock(writer, $"if ({expr})")) + { + writer.WriteLine($"goto {Backtrack};"); + } + } + writer.WriteLine($"runtextpos += {c};"); + } + } + break; + + case RegexCode.Onerep | RegexCode.Rtl: + case RegexCode.Notonerep | RegexCode.Rtl: + case RegexCode.Setrep | RegexCode.Rtl: + case RegexCode.Onerep | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Notonerep | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Setrep | RegexCode.Ci | RegexCode.Rtl: + { + int c = Operand(1); + if (c != 0) + { + const string Length = "tmp1"; + + using (EmitBlock(writer, $"if (runtextpos - runtextbeg < {c})")) + { + writer.WriteLine($"goto {Backtrack};"); + } + writer.WriteLine($"runtextpos -= {c};"); + writer.WriteLine($"{Length} = {c}; // length"); + + string l1 = DefineLabel(); + MarkLabel(l1); + + string expr = $"runtext[runtextpos + --{Length}]"; + if (Code() == RegexCode.Setrep) + { + EmitTimeoutCheck(writer, hasTimeout); + using (EmitBlock(writer, $"if (!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive())})")) + { + writer.WriteLine($"goto {Backtrack};"); + } + } + else + { + expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0))); + string op = Code() == RegexCode.Onerep ? "!=" : "=="; + using (EmitBlock(writer, $"if ({expr} {op} {Literal((char)Operand(0))})")) + { + writer.WriteLine($"goto {Backtrack};"); + } + } + + using (EmitBlock(writer, $"if ({Length} > 0)")) + { + writer.WriteLine($"goto {l1};"); + } + } + break; + } + + case RegexCode.Oneloop: + case RegexCode.Notoneloop: + case RegexCode.Setloop: + case RegexCode.Oneloop | RegexCode.Rtl: + case RegexCode.Notoneloop | RegexCode.Rtl: + case RegexCode.Setloop | RegexCode.Rtl: + case RegexCode.Oneloop | RegexCode.Ci: + case RegexCode.Notoneloop | RegexCode.Ci: + case RegexCode.Setloop | RegexCode.Ci: + case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Oneloopatomic: + case RegexCode.Notoneloopatomic: + case RegexCode.Setloopatomic: + case RegexCode.Oneloopatomic | RegexCode.Rtl: + case RegexCode.Notoneloopatomic | RegexCode.Rtl: + case RegexCode.Setloopatomic | RegexCode.Rtl: + case RegexCode.Oneloopatomic | RegexCode.Ci: + case RegexCode.Notoneloopatomic | RegexCode.Ci: + case RegexCode.Setloopatomic | RegexCode.Ci: + case RegexCode.Oneloopatomic | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Notoneloopatomic | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Setloopatomic | RegexCode.Ci | RegexCode.Rtl: + { + int c = Operand(1); + if (c != 0) + { + const string Len = "tmp1"; + const string I = "tmp2"; + + if (c == int.MaxValue) + { + writer.WriteLine(!IsRightToLeft() ? + $"{Len} = runtextend - runtextpos; // length" : + $"{Len} = runtextpos - runtextbeg; // length"); + } + else + { + writer.WriteLine(!IsRightToLeft() ? + $"{Len} = Math.Min(runtextend - runtextpos, {c}); // length" : + $"{Len} = Math.Min(runtextpos - runtextbeg, {c}); // length"); + } + + string? set = Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic ? rm.Code.Strings[Operand(0)] : null; + Span setChars = stackalloc char[3]; + int numSetChars; + + // If this is a notoneloop{atomic} and we're left-to-right and case-sensitive, + // we can use the vectorized IndexOf to search for the target character. + if ((Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic) && + !IsRightToLeft() && + (!IsCaseInsensitive() || !RegexCharClass.ParticipatesInCaseConversion(Operand(0)))) + { + writer.WriteLine($"{I} = runtext.AsSpan(runtextpos, {Len}).IndexOf({Literal((char)Operand(0))}); // i"); + using (EmitBlock(writer, $"if ({I} == -1)")) + { + writer.WriteLine($"runtextpos += {Len};"); + writer.WriteLine($"{I} = 0;"); + } + using (EmitBlock(writer, "else")) + { + writer.WriteLine($"runtextpos += {I};"); + writer.WriteLine($"{I} = {Len} - {I};"); + } + } + else if ((Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) && + !IsRightToLeft() && + !IsCaseInsensitive() && + (numSetChars = RegexCharClass.GetSetChars(set!, setChars)) > 1 && + RegexCharClass.IsNegated(set!)) + { + // Similarly, if this is a setloop{atomic} and we're left-to-right and case-sensitive, + // and if the set contains only 2 or 3 negated chars, we can use the vectorized IndexOfAny + // to search for those chars. + + Debug.Assert(numSetChars is 2 or 3); + writer.Write($"{I} = runtext.AsSpan(runtextpos, {Len}).IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])}"); + if (numSetChars == 3) + { + writer.Write($", {Literal(setChars[2])}"); + } + writer.WriteLine("); // i"); + using (EmitBlock(writer, $"if ({I} == -1)")) + { + writer.WriteLine($"runtextpos += {Len};"); + writer.WriteLine($"{I} = 0;"); + } + using (EmitBlock(writer, "else")) + { + writer.WriteLine($"runtextpos += {I};"); + writer.WriteLine($"{I} = {Len} - {I};"); + } + } + else if ((Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) && + !IsRightToLeft() && + set == RegexCharClass.AnyClass) + { + // If someone uses .* along with RegexOptions.Singleline, that becomes [anycharacter]*, which means it'll + // consume everything. As such, we can simply update our position to be the last allowed, without + // actually checking anything. + writer.WriteLine($"runtextpos += {Len};"); + writer.WriteLine($"{I} = 0;"); + } + else + { + // Otherwise, we emit the open-coded loop. + writer.WriteLine($"{I} = {Len} + 1;"); + using (EmitBlock(writer, $"while (--{I} > {0})")) + { + string expr = !IsRightToLeft() ? + Rightcharnext() : + Leftcharnext(); + + if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) + { + EmitTimeoutCheck(writer, hasTimeout); + expr = $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive())}"; + } + else + { + string op = Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopatomic ? "!=" : "=="; + expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0))); + expr = $"{expr} {op} {Literal((char)Operand(0))}"; + } + + using (EmitBlock(writer, $"if ({expr})")) + { + writer.WriteLine(!IsRightToLeft() ? + "runtextpos--;" : + "runtextpos++;"); + writer.WriteLine("break;"); + } + } + } + + if (Code() != RegexCode.Oneloopatomic && Code() != RegexCode.Notoneloopatomic && Code() != RegexCode.Setloopatomic) + { + using (EmitBlock(writer, $"if ({I} >= {Len})")) + { + writer.WriteLine($"goto {AdvanceLabel()};"); + } + PushTrack($"{Len} - {I} - 1"); + PushTrack(!IsRightToLeft() ? + "runtextpos - 1" : + "runtextpos + 1"); + Track(); + } + } + break; + } + + case RegexCode.Oneloop | RegexCode.Back: + case RegexCode.Notoneloop | RegexCode.Back: + case RegexCode.Setloop | RegexCode.Back: + case RegexCode.Oneloop | RegexCode.Rtl | RegexCode.Back: + case RegexCode.Notoneloop | RegexCode.Rtl | RegexCode.Back: + case RegexCode.Setloop | RegexCode.Rtl | RegexCode.Back: + case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Back: + case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Back: + case RegexCode.Setloop | RegexCode.Ci | RegexCode.Back: + case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: + case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: + case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: + { + const string Position = "tmp1"; + writer.WriteLine($"runtextpos = {PopTrack()};"); + writer.WriteLine($"{Position} = {PopTrack()}; // position"); + using (EmitBlock(writer, $"if ({Position} > 0)")) + { + PushTrack($"{Position} - 1"); + PushTrack(!IsRightToLeft() ? + "runtextpos - 1" : + "runtextpos + 1"); + Trackagain(); + } + Advance(); + } + break; + + case RegexCode.Onelazy: + case RegexCode.Notonelazy: + case RegexCode.Setlazy: + case RegexCode.Onelazy | RegexCode.Rtl: + case RegexCode.Notonelazy | RegexCode.Rtl: + case RegexCode.Setlazy | RegexCode.Rtl: + case RegexCode.Onelazy | RegexCode.Ci: + case RegexCode.Notonelazy | RegexCode.Ci: + case RegexCode.Setlazy | RegexCode.Ci: + case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Rtl: + case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Rtl: + { + int count = Operand(1); + if (count != 0) + { + const string C = "tmp1"; + if (count == int.MaxValue) + { + writer.WriteLine(!IsRightToLeft() ? + $"{C} = runtextend - runtextpos; // count" : + $"{C} = runtextpos - runtextbeg; // count"); + } + else + { + writer.WriteLine(!IsRightToLeft() ? + $"{C} = Math.Min(runtextend - runtextpos, {count}); // count" : + $"{C} = Math.Min(runtextpos - runtextbeg, {count}); // count"); + } + + using (EmitBlock(writer, $"if ({C} <= 0)")) + { + writer.WriteLine($"goto {AdvanceLabel()};"); + } + + PushTrack($"{C} - 1"); + PushTrack("runtextpos"); + Track(); + } + break; + } + + case RegexCode.Onelazy | RegexCode.Back: + case RegexCode.Notonelazy | RegexCode.Back: + case RegexCode.Setlazy | RegexCode.Back: + case RegexCode.Onelazy | RegexCode.Rtl | RegexCode.Back: + case RegexCode.Notonelazy | RegexCode.Rtl | RegexCode.Back: + case RegexCode.Setlazy | RegexCode.Rtl | RegexCode.Back: + case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Back: + case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Back: + case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Back: + case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: + case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: + case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: + { + const string I = "tmp1"; + + writer.WriteLine($"runtextpos = {PopTrack()};"); + writer.WriteLine($"{I} = {PopTrack()}; // i"); + + string expr = !IsRightToLeft() ? + Rightcharnext() : + Leftcharnext(); + + if (Code() == RegexCode.Setlazy) + { + EmitTimeoutCheck(writer, hasTimeout); + expr = $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive())}"; + } + else + { + expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0))); + expr = $"{expr} {(Code() == RegexCode.Onelazy ? "!=" : "==")} {Literal((char)Operand(0))}"; + } + + using (EmitBlock(writer, $"if ({expr})")) + { + writer.WriteLine($"goto {Backtrack};"); + } + + using (EmitBlock(writer, $"if ({I} > 0)")) + { + PushTrack($"{I} - 1"); + PushTrack("runtextpos"); + Trackagain(); + } + + Advance(); + } + break; + + default: + Debug.Fail($"Unimplemented state: {currentOpcode:X8}"); + break; + } + } + + + + /// + /// Branch to the MSIL corresponding to the regex code at i + /// + /// + /// A trick: since track and stack space is gobbled up unboundedly + /// only as a result of branching backwards, this is where we check + /// for sufficient space and trigger reallocations. + /// + /// If the "goto" is backwards, we generate code that checks + /// available space against the amount of space that would be needed + /// in the worst case by code that will only go forward; if there's + /// not enough, we push the destination on the tracking stack, then + /// we jump to the place where we invoke the allocator. + /// + /// Since forward gotos pose no threat, they just turn into a Br. + /// + void Goto(int i) + { + // When going backwards, ensure enough space. + if (i < currentCodePos) + { + using (EmitBlock(writer, $"if (runtrackpos <= {rm.Code.TrackCount * 4} || runstackpos <= {rm.Code.TrackCount * 3})")) + { + writer.WriteLine($"{ReadyPushTrack()} = {AddGoto(i)};"); + writer.WriteLine($"goto {Backtrack};"); + } + } + + writer.WriteLine($"goto {labels[i]};"); + } + + string ReadyPushTrack() => "runtrack[--runtrackpos]"; + + void Track() => PushTrack(AddTrack()); + + /// + /// Pushes the current switch index on the tracking stack so the backtracking + /// logic will be repeated again next time we backtrack here. + /// + void Trackagain() => PushTrack(currentBacktrackNote); + + void PushTrack(T expr) => writer.WriteLine($"{ReadyPushTrack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};"); + + /// Retrieves the top entry on the tracking stack without popping. + string TopTrack() => "runtrack[runtrackpos]"; + + int Operand(int i) => codes[currentCodePos + i + 1]; + + /// True if the current operation is marked for the leftward direction. + bool IsRightToLeft() => (currentOpcode & RegexCode.Rtl) != 0; + + /// True if the current operation is marked for case insensitive operation. + bool IsCaseInsensitive() => (currentOpcode & RegexCode.Ci) != 0; + + /// Returns the raw regex opcode (masking out Back and Rtl). + int Code() => currentOpcode & RegexCode.Mask; + + /// Saves the value of a local variable on the grouping stack. + void PushStack(T expr) => writer.WriteLine($"{ReadyPushStack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};"); + + string ReadyPushStack() => "runstack[--runstackpos]"; + + /// Retrieves the top entry on the stack without popping. + string TopStack() => "runstack[runstackpos]"; + + void TrackUnique(int i) => PushTrack(AddUniqueTrack(i)); + + void TrackUnique2(int i) => PushTrack(AddUniqueTrack(i, RegexCode.Back2)); + + int AddUniqueTrack(int i, int flags = RegexCode.Back) + { + if (uniqueNote[i] == -1) + { + uniqueNote[i] = AddTrack(flags); + } + + return uniqueNote[i]; + } + + /// + /// Returns the position of the next operation in the regex code, taking + /// into account the different numbers of arguments taken by operations + /// + int NextCodepos() => currentCodePos + RegexCode.OpcodeSize(codes[currentCodePos]); + + /// The label for the next (forward) operation. + string AdvanceLabel() => labels![NextCodepos()]; + + /// Goto the next (forward) operation. + void Advance() => writer.WriteLine($"goto {AdvanceLabel()};"); + + /// Loads the char to the left of the current position. + string Leftchar() => "runtext[runtextpos - 1]"; + + /// Loads the char to the left of the current position and advances (leftward). + string Leftcharnext() => "runtext[--runtextpos]"; + + /// Loads the char to the right of the current position. + string Rightchar() => "runtext[runtextpos]"; + + /// Loads the char to the right of the current position and advances the current position. + string Rightcharnext() => "runtext[runtextpos++]"; + + /// + /// Adds a backtrack note to the list of them, and returns the index of the new + /// note (which is also the index for the jump used by the switch table) + /// + int AddBacktrackNote(int flags, string l, int codepos) + { + if (notes == null || noteCount >= notes.Length) + { + var newnotes = new BacktrackNote[notes == null ? 16 : notes.Length * 2]; + if (notes != null) + { + Array.Copy(notes, newnotes, noteCount); + } + notes = newnotes; + } + + notes[noteCount] = new BacktrackNote(flags, l, codepos); + return noteCount++; + } + + /// + /// Adds a backtrack note for the current operation; creates a new label for + /// where the code will be, and returns the switch index. + /// + int AddTrack(int flags = RegexCode.Back) => AddBacktrackNote(flags, DefineLabel(), currentCodePos); + + int AddGoto(int destpos) + { + if (forwardJumpsThroughSwitch![destpos] == -1) + { + forwardJumpsThroughSwitch[destpos] = AddBacktrackNote(0, labels![destpos], destpos); + } + + return forwardJumpsThroughSwitch[destpos]; + } + + /// Pops an element off the tracking stack. + string PopTrack() => "runtrack[runtrackpos++]"; + + /// Pops an element off the grouping stack (leave it on the operand stack). + string PopStack() => "runstack[runstackpos++]"; + + /// Pops i elements off the grouping stack and discards them. + void PopDiscardStack(int i = 1) => writer.WriteLine(i == 1 ? "runstackpos++;" : $"runstackpos += {i};"); + + /// Prologue to code that will replace the ith element on the grouping stack. + string ReadyReplaceStack(int i) => i == 0 ? "runstack[runstackpos]" : $"runstack[runstackpos + {i}]"; + } + + /// + /// Keeps track of an operation that needs to be referenced in the backtrack-jump + /// switch table, and that needs backtracking code to be emitted (if flags != 0) + /// + private record BacktrackNote(int flags, string label, int codepos); + + private static bool EmitLoopTimeoutCounterIfNeeded(IndentedTextWriter writer, RegexMethod rm) + { + if (rm.MatchTimeout.HasValue && rm.MatchTimeout.Value != Timeout.Infinite) + { + writer.WriteLine($"int loopTimeoutCounter = 0;"); + return true; + } + + return false; + } + + /// Emits a timeout check. + private static void EmitTimeoutCheck(IndentedTextWriter writer, bool hasTimeout) + { + const int LoopTimeoutCheckCount = 2048; // A conservative value to guarantee the correct timeout handling. + if (hasTimeout) + { + // Increment counter for each loop iteration. + // Emit code to check the timeout every 2048th iteration. + using (EmitBlock(writer, $"if (++loopTimeoutCounter == {LoopTimeoutCheckCount})")) + { + writer.WriteLine("loopTimeoutCounter = 0;"); + writer.WriteLine("CheckTimeout();"); + } + writer.WriteLine(); + } + } + + private static bool EmitInitializeCultureForGoIfNecessary(IndentedTextWriter writer, RegexMethod rm) + { + if (((RegexOptions)rm.Options & RegexOptions.CultureInvariant) == 0) + { + bool needsCulture = ((RegexOptions)rm.Options & RegexOptions.IgnoreCase) != 0; + if (!needsCulture) + { + int[] codes = rm.Code.Codes; + for (int codepos = 0; codepos < codes.Length; codepos += RegexCode.OpcodeSize(codes[codepos])) + { + if ((codes[codepos] & RegexCode.Ci) == RegexCode.Ci) + { + needsCulture = true; + break; + } + } + } + + if (needsCulture) + { + writer.WriteLine("TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo;"); + return true; + } + } + + return false; + } + + private static bool UseToLowerInvariant(bool hasTextInfo, RegexOptions options) => !hasTextInfo || (options & RegexOptions.CultureInvariant) != 0; + + private static string ToLower(bool hasTextInfo, RegexOptions options, string expression) => UseToLowerInvariant(hasTextInfo, options) ? $"char.ToLowerInvariant({expression})" : $"textInfo.ToLower({expression})"; + + private static string ToLowerIfNeeded(bool hasTextInfo, RegexOptions options, string expression, bool toLower) => toLower ? ToLower(hasTextInfo, options, expression) : expression; + + private static void EmitTextInfoIfRequired(IndentedTextWriter writer, ref bool textInfoEmitted, ref bool hasTextInfo, RegexMethod rm) + { + if (textInfoEmitted) + { + return; + } + textInfoEmitted = true; + + // Emit local to store current culture if needed + if ((((RegexOptions)rm.Options) & RegexOptions.CultureInvariant) == 0) + { + bool needsCulture = (((RegexOptions)rm.Options) & RegexOptions.IgnoreCase) != 0 || rm.Code.BoyerMoorePrefix?.CaseInsensitive == true; + if (!needsCulture && rm.Code.LeadingCharClasses is not null) + { + for (int i = 0; i < rm.Code.LeadingCharClasses.Length; i++) + { + if (rm.Code.LeadingCharClasses[i].CaseInsensitive) + { + needsCulture = true; + break; + } + } + } + + if (needsCulture) + { + hasTextInfo = true; + writer.WriteLine("// IgnoreCase with CultureInfo.CurrentCulture"); + writer.WriteLine("TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo;"); + writer.WriteLine(); + } + } + } + + private static string MatchCharacterClass(bool hasTextInfo, RegexOptions options, string chExpr, string charClass, bool caseInsensitive) + { + // We need to perform the equivalent of calling RegexRunner.CharInClass(ch, charClass), + // but that call is relatively expensive. Before we fall back to it, we try to optimize + // some common cases for which we can do much better, such as known character classes + // for which we can call a dedicated method, or a fast-path for ASCII using a lookup table. + + // First, see if the char class is a built-in one for which there's a better function + // we can just call directly. Everything in this section must work correctly for both + // case-sensitive and case-insensitive modes, regardless of culture. + switch (charClass) + { + case RegexCharClass.AnyClass: + // ideally this could just be "return true;", but we need to evaluate the expression for its side effects + return $"({chExpr} != -1)"; // a char is unsigned and thus won't ever be equal to -1, so this is equivalent to true + + case RegexCharClass.DigitClass: + return $"char.IsDigit({chExpr})"; + + case RegexCharClass.NotDigitClass: + return $"!char.IsDigit({chExpr})"; + + case RegexCharClass.SpaceClass: + return $"char.IsWhiteSpace({chExpr})"; + + case RegexCharClass.NotSpaceClass: + return $"!char.IsWhiteSpace({chExpr})"; + } + + // If we're meant to be doing a case-insensitive lookup, and if we're not using the invariant culture, + // lowercase the input. If we're using the invariant culture, we may still end up calling ToLower later + // on, but we may also be able to avoid it, in particular in the case of our lookup table, where we can + // generate the lookup table already factoring in the invariant case sensitivity. There are multiple + // special-code paths between here and the lookup table, but we only take those if invariant is false; + // if it were true, they'd need to use CallToLower(). + bool invariant = false; + if (caseInsensitive) + { + invariant = UseToLowerInvariant(hasTextInfo, options); + if (!invariant) + { + chExpr = ToLower(hasTextInfo, options, chExpr); + } + } + + // Next, handle simple sets of one range, e.g. [A-Z], [0-9], etc. This includes some built-in classes, like ECMADigitClass. + if (!invariant && RegexCharClass.TryGetSingleRange(charClass, out char lowInclusive, out char highInclusive)) + { + bool invert = RegexCharClass.IsNegated(charClass); + if (lowInclusive == highInclusive) + { + chExpr = $"({chExpr} {(invert ? "!=" : "==")} {Literal(lowInclusive)})"; + } + else + { + chExpr = $"(((uint){chExpr}) - {Literal(lowInclusive)} {(invert ? ">=" : "<")} (uint){highInclusive - lowInclusive + 1})"; + } + + return chExpr; + } + + // Next if the character class contains nothing but a single Unicode category, we can calle char.GetUnicodeCategory and + // compare against it. It has a fast-lookup path for ASCII, so is as good or better than any lookup we'd generate (plus + // we get smaller code), and it's what we'd do for the fallback (which we get to avoid generating) as part of CharInClass. + if (!invariant && RegexCharClass.TryGetSingleUnicodeCategory(charClass, out UnicodeCategory category, out bool negated)) + { + return $"(char.GetUnicodeCategory({chExpr}) {(negated ? "!=" : "==")} UnicodeCategory.{category})"; + } + + // Next, if there's only 2 or 3 chars in the set (fairly common due to the sets we create for prefixes), + // it's cheaper and smaller to compare against each than it is to use a lookup table. + if (!invariant) + { + Span setChars = stackalloc char[3]; + int numChars = RegexCharClass.GetSetChars(charClass, setChars); + if (!RegexCharClass.IsNegated(charClass)) + { + switch (numChars) + { + case 2: + return $"({chExpr} is {Literal(setChars[0])} or {Literal(setChars[1])})"; + + case 3: + return $"({chExpr} is {Literal(setChars[0])} or {Literal(setChars[1])} or {Literal(setChars[2])})"; + } + } + } + + // Analyze the character set more to determine what code to generate. + RegexCharClass.CharClassAnalysisResults analysis = RegexCharClass.Analyze(charClass); + + if (!invariant) // if we're being asked to do a case insensitive, invariant comparison, use the lookup table + { + if (analysis.ContainsNoAscii) + { + // We determined that the character class contains only non-ASCII, + // for example if the class were [\p{IsGreek}\p{IsGreekExtended}], which is + // the same as [\u0370-\u03FF\u1F00-1FFF]. (In the future, we could possibly + // extend the analysis to produce a known lower-bound and compare against + // that rather than always using 128 as the pivot point.) + return invariant ? + $"((ch = {chExpr}) >= 128 && CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))" : + $"((ch = {chExpr}) >= 128 && CharInClass((char)ch, {Literal(charClass)}))"; + } + + if (analysis.AllAsciiContained) + { + // We determined that every ASCII character is in the class, for example + // if the class were the negated example from case 1 above: + // [^\p{IsGreek}\p{IsGreekExtended}]. + return invariant ? + $"((ch = {chExpr}) < 128 || CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))" : + $"((ch = {chExpr}) < 128 || CharInClass((char)ch, {Literal(charClass)}))"; + } + } + + // Now, our big hammer is to generate a lookup table that lets us quickly index by character into a yes/no + // answer as to whether the character is in the target character class. However, we don't want to store + // a lookup table for every possible character for every character class in the regular expression; at one + // bit for each of 65K characters, that would be an 8K bitmap per character class. Instead, we handle the + // common case of ASCII input via such a lookup table, which at one bit for each of 128 characters is only + // 16 bytes per character class. We of course still need to be able to handle inputs that aren't ASCII, so + // we check the input against 128, and have a fallback if the input is >= to it. Determining the right + // fallback could itself be expensive. For example, if it's possible that a value >= 128 could match the + // character class, we output a call to RegexRunner.CharInClass, but we don't want to have to enumerate the + // entire character class evaluating every character against it, just to determine whether it's a match. + // Instead, we employ some quick heuristics that will always ensure we provide a correct answer even if + // we could have sometimes generated better code to give that answer. + + // Generate the lookup table to store 128 answers as bits. We use a const string instead of a byte[] / static + // data property because it lets IL emit handle all the details for us. + string bitVectorString = StringExtensions.Create(8, (charClass, invariant), static (dest, state) => // String length is 8 chars == 16 bytes == 128 bits. + { + for (int i = 0; i < 128; i++) + { + char c = (char)i; + bool isSet = state.invariant ? + RegexCharClass.CharInClass(char.ToLowerInvariant(c), state.charClass) : + RegexCharClass.CharInClass(c, state.charClass); + if (isSet) + { + dest[i >> 4] |= (char)(1 << (i & 0xF)); + } + } + }); + + // We determined that the character class may contain ASCII, so we + // output the lookup against the lookup table. + + if (analysis.ContainsOnlyAscii) + { + // We know that all inputs that could match are ASCII, for example if the + // character class were [A-Za-z0-9], so since the ch is now known to be >= 128, we + // can just fail the comparison. + return $"((ch = {chExpr}) < 128 && ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0)"; + } + + if (analysis.AllNonAsciiContained) + { + // We know that all non-ASCII inputs match, for example if the character + // class were [^\r\n], so since we just determined the ch to be >= 128, we can just + // give back success. + return $"((ch = {chExpr}) >= 128 || ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0)"; + } + + // We know that the whole class wasn't ASCII, and we don't know anything about the non-ASCII + // characters other than that some might be included, for example if the character class + // were [\w\d], so since ch >= 128, we need to fall back to calling CharInClass. + return invariant ? + $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0 : CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))" : + $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0 : CharInClass((char)ch, {Literal(charClass)}))"; + } + + private static string Literal(char c) => SymbolDisplay.FormatLiteral(c, quote: true); + + private static string Literal(string s) => SymbolDisplay.FormatLiteral(s, quote: true); + + private static FinishEmitScope EmitScope(IndentedTextWriter writer, string title) => EmitBlock(writer, $"// {title}", appendBlankLine: true); + + private static FinishEmitScope EmitBlock(IndentedTextWriter writer, string? clause, bool appendBlankLine = false) + { + if (clause is not null) + { + writer.WriteLine(clause); + } + writer.WriteLine("{"); + writer.Indent++; + return new FinishEmitScope(writer, appendBlankLine); + } + + private readonly struct FinishEmitScope : IDisposable + { + private readonly IndentedTextWriter _writer; + private readonly bool _appendBlankLine; + + public FinishEmitScope(IndentedTextWriter writer, bool appendBlankLine) + { + _writer = writer; + _appendBlankLine = appendBlankLine; + } + + public void Dispose() + { + if (_writer is not null) + { + _writer.Indent--; + _writer.WriteLine("}"); + if (_appendBlankLine) + { + _writer.WriteLine(); + } + } + } + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs new file mode 100644 index 00000000000000..d56af82c9fa8fe --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs @@ -0,0 +1,307 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.CSharp.Syntax; +using System; +using System.Collections; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Diagnostics; +using System.Globalization; +using System.Linq; +using System.Threading; + +namespace System.Text.RegularExpressions.Generator +{ + public partial class RegexGenerator + { + private const string RegexName = "System.Text.RegularExpressions.Regex"; + private const string RegexGeneratorAttributeName = "System.Text.RegularExpressions.RegexGeneratorAttribute"; + + private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => + node is MethodDeclarationSyntax { AttributeLists: { Count: > 0 } }; + + private static ClassDeclarationSyntax? GetSemanticTargetForGeneration(GeneratorSyntaxContext context) + { + var methodDeclarationSyntax = (MethodDeclarationSyntax)context.Node; + + foreach (AttributeListSyntax attributeListSyntax in methodDeclarationSyntax.AttributeLists) + { + foreach (AttributeSyntax attributeSyntax in attributeListSyntax.Attributes) + { + if (context.SemanticModel.GetSymbolInfo(attributeSyntax).Symbol is IMethodSymbol attributeSymbol && + attributeSymbol.ContainingType.ToDisplayString() == RegexGeneratorAttributeName) + { + return methodDeclarationSyntax.Parent as ClassDeclarationSyntax; + } + } + } + + return null; + } + + private static IReadOnlyList GetRegexClassesToEmit(Compilation compilation, Action reportDiagnostic, IEnumerable classes, CancellationToken cancellationToken) + { + // TODO: Use https://github.com/dotnet/runtime/pull/59092 + INamedTypeSymbol? regexSymbol = compilation.GetTypeByMetadataName(RegexName); + INamedTypeSymbol? regexGeneratorAttributeSymbol = compilation.GetTypeByMetadataName(RegexGeneratorAttributeName); + if (regexSymbol is null || regexGeneratorAttributeSymbol is null) + { + // Required types aren't available + return Array.Empty(); + } + + var results = new List(); + + // Enumerate by SyntaxTree to minimize the need to instantiate semantic models (since they're expensive) + foreach (var group in classes.GroupBy(x => x.SyntaxTree)) + { + SemanticModel? sm = null; + foreach (ClassDeclarationSyntax classDec in group) + { + foreach (MemberDeclarationSyntax member in classDec.Members) + { + cancellationToken.ThrowIfCancellationRequested(); + + // Scope to just methods + if (member is not MethodDeclarationSyntax methodSyntax) + { + continue; + } + + sm ??= compilation.GetSemanticModel(classDec.SyntaxTree); + + IMethodSymbol regexMethodSymbol = sm.GetDeclaredSymbol(methodSyntax, cancellationToken) as IMethodSymbol; + if (regexMethodSymbol is null) + { + continue; + } + + ImmutableArray? boundAttributes = regexMethodSymbol.GetAttributes(); + if (boundAttributes is null || boundAttributes.Value.Length == 0) + { + continue; + } + + DiagnosticDescriptor? errorDescriptor = null; + RegexMethod? regexMethod = null; + foreach (AttributeData attributeData in boundAttributes) + { + // If we already encountered an error, stop looking at this method's attributes. + if (errorDescriptor is not null) + { + break; + } + + // If this isn't + if (!attributeData.AttributeClass.Equals(regexGeneratorAttributeSymbol)) + { + continue; + } + + if (attributeData.ConstructorArguments.Any(ca => ca.Kind == TypedConstantKind.Error)) + { + errorDescriptor = DiagnosticDescriptors.InvalidRegexGeneratorAttribute; + break; + } + + ImmutableArray items = attributeData.ConstructorArguments; + if (items.Length is > 0 and <= 3 && items[0].Value is string pattern) + { + switch (items.Length) + { + case 1: + regexMethod = new RegexMethod { Pattern = pattern }; + break; + + case 2: + regexMethod = new RegexMethod { Pattern = pattern, Options = items[1].Value as int?, }; + break; + + case 3: + regexMethod = new RegexMethod { Pattern = pattern, Options = items[1].Value as int?, MatchTimeout = items[2].Value as int?, }; + break; + } + } + else + { + errorDescriptor = DiagnosticDescriptors.InvalidRegexGeneratorAttribute; + } + } + + if (errorDescriptor is not null) + { + Diag(reportDiagnostic, errorDescriptor, methodSyntax.GetLocation()); + continue; + } + + if (regexMethod is null) + { + continue; + } + + if (regexMethod.Pattern is null) + { + Diag(reportDiagnostic, DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "(null)"); + continue; + } + + if (!regexMethodSymbol.IsPartialDefinition) + { + Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodShouldBePartial, methodSyntax.GetLocation()); + continue; + } + + if (!regexMethodSymbol.IsStatic) + { + Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustBeStatic, methodSyntax.GetLocation()); + continue; + } + + if (regexMethodSymbol.Parameters.Length != 0) + { + Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustBeParameterless, methodSyntax.Body.GetLocation()); + continue; + } + + if (regexMethodSymbol.Arity != 0) + { + Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustNotBeGeneric, methodSyntax.Body.GetLocation()); + continue; + } + + if (!regexMethodSymbol.ReturnType.Equals(regexSymbol)) + { + Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustReturnRegex, methodSyntax.Body.GetLocation()); + continue; + } + + if (classDec.SyntaxTree.Options is CSharpParseOptions { LanguageVersion: < LanguageVersion.CSharp10 }) + { + Diag(reportDiagnostic, DiagnosticDescriptors.InvalidLangVersion, methodSyntax.GetLocation()); + continue; + } + + regexMethod.MethodName = regexMethodSymbol.Name; + regexMethod.Modifiers = methodSyntax.Modifiers.ToString(); + regexMethod.MatchTimeout ??= Timeout.Infinite; + RegexOptions options = regexMethod.Options.HasValue ? (RegexOptions)regexMethod.Options.Value : RegexOptions.None; + regexMethod.Options = (int)RegexOptions.Compiled | (int)options; + + // TODO: This is going to include the culture that's current at the time of compilation. + // What should we do about that? We could: + // - say not specifying CultureInvariant is invalid if anything about options or the expression will look at culture + // - fall back to not generating source if it's not specified + // - just use whatever culture is present at build time + // - devise a new way of not using the culture present at build time + // - ... + CultureInfo culture = (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; + + // Parse the input + try + { + regexMethod.Tree = RegexParser.Parse(regexMethod.Pattern, (RegexOptions)regexMethod.Options, culture); + regexMethod.Code = RegexWriter.Write(regexMethod.Tree); + } + catch (Exception e) + { + Diag(reportDiagnostic, DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), e.Message); + continue; + } + + // Determine the namespace the class is declared in, if any + string? nameSpace = null; + SyntaxNode? potentialNamespaceParent = classDec.Parent; + while (potentialNamespaceParent is not null && + potentialNamespaceParent is not NamespaceDeclarationSyntax && + potentialNamespaceParent is not FileScopedNamespaceDeclarationSyntax) + { + potentialNamespaceParent = potentialNamespaceParent.Parent; + } + + if (potentialNamespaceParent is BaseNamespaceDeclarationSyntax namespaceParent) + { + nameSpace = namespaceParent.Name.ToString(); + while (true) + { + namespaceParent = namespaceParent.Parent as NamespaceDeclarationSyntax; + if (namespaceParent is null) + { + break; + } + + nameSpace = $"{namespaceParent.Name}.{nameSpace}"; + } + } + + var rc = new RegexClass + { + Keyword = classDec.Keyword.ValueText, + Namespace = nameSpace, + Name = $"{classDec.Identifier}{classDec.TypeParameterList}", + Constraints = classDec.ConstraintClauses.ToString(), + ParentClass = null, + Method = regexMethod, + }; + + RegexClass current = rc; + var parent = classDec.Parent as TypeDeclarationSyntax; + + while (parent is not null && IsAllowedKind(parent.Kind())) + { + current.ParentClass = new RegexClass + { + Keyword = parent.Keyword.ValueText, + Namespace = nameSpace, + Name = $"{parent.Identifier}{parent.TypeParameterList}", + Constraints = parent.ConstraintClauses.ToString(), + ParentClass = null, + }; + + current = current.ParentClass; + parent = parent.Parent as TypeDeclarationSyntax; + } + + results.Add(rc); + + bool IsAllowedKind(SyntaxKind kind) => + kind == SyntaxKind.ClassDeclaration || + kind == SyntaxKind.StructDeclaration || + kind == SyntaxKind.RecordDeclaration || + kind == SyntaxKind.RecordStructDeclaration; + } + } + } + + return results; + + static void Diag(Action reportDiagnostic, DiagnosticDescriptor desc, Location? location, params object?[]? messageArgs) => + reportDiagnostic(Diagnostic.Create(desc, location, messageArgs)); + } + + /// A class holding a regex method. + internal sealed class RegexClass + { + public RegexMethod Method; + public string Keyword = string.Empty; + public string Namespace = string.Empty; + public string Name = string.Empty; + public string Constraints = string.Empty; + public RegexClass? ParentClass; + } + + /// A regex method. + internal sealed class RegexMethod + { + public string MethodName = string.Empty; + public string Pattern = string.Empty; + public int? Options; + public int? MatchTimeout; + public string Modifiers = string.Empty; + public RegexTree Tree; + public RegexCode Code; + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs new file mode 100644 index 00000000000000..e5c9f4bdc13ffe --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs @@ -0,0 +1,68 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Diagnostics.Tracing; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.CSharp.Syntax; +using Microsoft.CodeAnalysis.Text; + +[assembly: System.Resources.NeutralResourcesLanguage("en-us")] + +namespace System.Text.RegularExpressions.Generator +{ + /// Generates C# source code to implement regular expressions. + [Generator] + public partial class RegexGenerator : IIncrementalGenerator + { + public void Initialize(IncrementalGeneratorInitializationContext context) + { + IncrementalValuesProvider classDeclarations = context.SyntaxProvider + .CreateSyntaxProvider( + static (s, _) => IsSyntaxTargetForGeneration(s), + static (ctx, _) => GetSemanticTargetForGeneration(ctx)) + .Where(static m => m is not null); + + IncrementalValueProvider<(Compilation, ImmutableArray)> compilationAndClasses = + context.CompilationProvider.Combine(classDeclarations.Collect()); + + context.RegisterImplementationSourceOutput(compilationAndClasses, static (context, source) => + { + ImmutableArray classes = source.Item2; + if (classes.IsDefaultOrEmpty) + { + return; + } + + string result = ""; + try + { + Compilation compilation = source.Item1; + IReadOnlyList regexClasses = GetRegexClassesToEmit(compilation, context.ReportDiagnostic, classes.Distinct(), context.CancellationToken); + if (regexClasses.Count != 0) + { + result = Emit(regexClasses, context.CancellationToken); + } + } + catch (Exception e) when (!(e is OperationCanceledException)) + { + result = "// ERROR:" + Environment.NewLine + string.Join(Environment.NewLine, + e.ToString().Split(new[] { "\r\n", "\n" }, StringSplitOptions.None).Select(s => $"// {SymbolDisplay.FormatLiteral(s, quote: true)}")); + } + + if (result.Length > 0) + { + context.AddSource("RegexGenerator.g.cs", result); + } + }); + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx new file mode 100644 index 00000000000000..8190800add9337 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx @@ -0,0 +1,318 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + The RegexGeneratorAttribute is malformed. + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + The specified regex is invalid. '{0}' + + + Regex method cannot have a body. + + + Regex method must be static. + + + Regex method must be partial + + + Regex method must not be generic. + + + Regex method must be parameterless. + + + Regex method must return Regex. + + + C# LangVersion of 10 or greater is required. + + + Regular expression parser error '{0}' at offset {1}. + + + Alternation conditions do not capture and cannot be named. + + + Alternation conditions cannot be comments. + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + The array cannot contain null elements. + + + Cannot include class \\{0} in character range. + + + Cannot include class in character range. + + + Start index cannot be less than 0 or greater than input length. + + + Capture group numbers must be less than or equal to Int32.MaxValue. + + + Capture number cannot be zero. + + + Count cannot be less than -1. + + + Enumeration has either not started or has already finished. + + + Illegal conditional (?(...)) expression. + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + Illegal \\ at end of pattern. + + + Illegal {x,y} with x > y. + + + Incomplete \\p{X} character escape. + + + Internal error in ScanRegex. + + + Invalid group name: Group names must begin with a word character. + + + Argument {0} cannot be zero-length. + + + Length cannot be less than 0 or exceed input length. + + + Malformed \\k<...> named back reference. + + + (?({0}) ) malformed. + + + Alternation has malformed reference. + + + Malformed \\p{X} character escape. + + + Invalid pattern '{0}' at offset {1}. {2} + + + Missing control character. + + + Nested quantifier '{0}'. + + + Nested quantifier no parenthesized. + + + Result cannot be called on a failed Match. + + + Not enough )'s. + + + Collection is read-only. + + + This operation is only allowed once per object. + + + This platform does not support writing compiled regular expressions to an assembly. + + + Quantifier {x,y} following nothing. + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + Replacement pattern error. + + + [x-y] range in reverse order. + + + A subtraction must be the last element in a character class. + + + Insufficient hexadecimal digits. + + + Too many | in (?()|). + + + Too many )'s. + + + Reference to undefined group number {0}. + + + Reference to undefined group number. + + + Reference to undefined group name '{0}'. + + + Reference to undefined group name. + + + (?({0}) ) reference to undefined group. + + + Alternation has a reference to undefined group. + + + Unexpected opcode in regular expression generation: {0}. + + + Unknown property '{0}'. + + + Unknown property Unicode property. + + + Unrecognized control character. + + + Unrecognized escape sequence \\{0}. + + + Unrecognized grouping construct. + + + Unterminated [] set. + + + Unterminated (?#...) comment. + + diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf new file mode 100644 index 00000000000000..50bb8a0bfa387e --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf new file mode 100644 index 00000000000000..5723d6e86984ef --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf new file mode 100644 index 00000000000000..68627df92ca80b --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf new file mode 100644 index 00000000000000..6b8cd2e3b94cf0 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf new file mode 100644 index 00000000000000..9a7bbbc73c9f32 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf new file mode 100644 index 00000000000000..a7139acb2e2fdc --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf new file mode 100644 index 00000000000000..5727824c263dd0 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf new file mode 100644 index 00000000000000..ede3705af5d083 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf new file mode 100644 index 00000000000000..59972b955b3739 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf new file mode 100644 index 00000000000000..a9f4fa76856711 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf new file mode 100644 index 00000000000000..9644be4868a323 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf new file mode 100644 index 00000000000000..362d1b7d1f7252 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf new file mode 100644 index 00000000000000..5d9ecad1efbc22 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf @@ -0,0 +1,337 @@ + + + + + + Alternation conditions cannot be comments. + Alternation conditions cannot be comments. + + + + Illegal conditional (?(...)) expression. + Illegal conditional (?(...)) expression. + + + + (?({0}) ) malformed. + (?({0}) ) malformed. + + + + Alternation has malformed reference. + Alternation has malformed reference. + + + + Alternation conditions do not capture and cannot be named. + Alternation conditions do not capture and cannot be named. + + + + Too many | in (?()|). + Too many | in (?()|). + + + + (?({0}) ) reference to undefined group. + (?({0}) ) reference to undefined group. + + + + Alternation has a reference to undefined group. + Alternation has a reference to undefined group. + + + + Destination array is not long enough to copy all the items in the collection. Check array index and length. + Destination array is not long enough to copy all the items in the collection. Check array index and length. + + + + The array cannot contain null elements. + The array cannot contain null elements. + + + + Start index cannot be less than 0 or greater than input length. + Start index cannot be less than 0 or greater than input length. + + + + Invalid group name: Group names must begin with a word character. + Invalid group name: Group names must begin with a word character. + + + + Capture number cannot be zero. + Capture number cannot be zero. + + + + Count cannot be less than -1. + Count cannot be less than -1. + + + + Enumeration has either not started or has already finished. + Enumeration has either not started or has already finished. + + + + A subtraction must be the last element in a character class. + A subtraction must be the last element in a character class. + + + + Regular expression parser error '{0}' at offset {1}. + Regular expression parser error '{0}' at offset {1}. + + + + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + AppDomain data '{0}' contains the invalid value or object '{1}' for specifying a default matching timeout for System.Text.RegularExpressions.Regex. + + + + Not enough )'s. + Not enough )'s. + + + + Too many )'s. + Too many )'s. + + + + Insufficient hexadecimal digits. + Insufficient hexadecimal digits. + + + + Internal error in ScanRegex. + Internal error in ScanRegex. + + + + Argument {0} cannot be zero-length. + Argument {0} cannot be zero-length. + + + + Unrecognized grouping construct. + Unrecognized grouping construct. + + + + C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required. + + + + The specified regex is invalid. '{0}' + The specified regex is invalid. '{0}' + + + + The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed. + + + + Incomplete \\p{X} character escape. + Incomplete \\p{X} character escape. + + + + Length cannot be less than 0 or exceed input length. + Length cannot be less than 0 or exceed input length. + + + + Invalid pattern '{0}' at offset {1}. {2} + Invalid pattern '{0}' at offset {1}. {2} + + + + Malformed \\k<...> named back reference. + Malformed \\k<...> named back reference. + + + + Malformed \\p{X} character escape. + Malformed \\p{X} character escape. + + + + Missing control character. + Missing control character. + + + + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + + + + Nested quantifier '{0}'. + Nested quantifier '{0}'. + + + + Nested quantifier no parenthesized. + Nested quantifier no parenthesized. + + + + Result cannot be called on a failed Match. + Result cannot be called on a failed Match. + + + + Collection is read-only. + Collection is read-only. + + + + This operation is only allowed once per object. + This operation is only allowed once per object. + + + + This platform does not support writing compiled regular expressions to an assembly. + This platform does not support writing compiled regular expressions to an assembly. + + + + Quantifier {x,y} following nothing. + Quantifier {x,y} following nothing. + + + + Capture group numbers must be less than or equal to Int32.MaxValue. + Capture group numbers must be less than or equal to Int32.MaxValue. + + + + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. + + + + Regex method cannot have a body. + Regex method cannot have a body. + + + + Regex method must be parameterless. + Regex method must be parameterless. + + + + Regex method must be partial + Regex method must be partial + + + + Regex method must be static. + Regex method must be static. + + + + Regex method must not be generic. + Regex method must not be generic. + + + + Regex method must return Regex. + Regex method must return Regex. + + + + Replacement pattern error. + Replacement pattern error. + + + + [x-y] range in reverse order. + [x-y] range in reverse order. + + + + Illegal {x,y} with x > y. + Illegal {x,y} with x > y. + + + + Cannot include class \\{0} in character range. + Cannot include class \\{0} in character range. + + + + Cannot include class in character range. + Cannot include class in character range. + + + + Reference to undefined group name '{0}'. + Reference to undefined group name '{0}'. + + + + Reference to undefined group name. + Reference to undefined group name. + + + + Reference to undefined group number {0}. + Reference to undefined group number {0}. + + + + Reference to undefined group number. + Reference to undefined group number. + + + + Illegal \\ at end of pattern. + Illegal \\ at end of pattern. + + + + Unexpected opcode in regular expression generation: {0}. + Unexpected opcode in regular expression generation: {0}. + + + + Unrecognized control character. + Unrecognized control character. + + + + Unrecognized escape sequence \\{0}. + Unrecognized escape sequence \\{0}. + + + + Unknown property '{0}'. + Unknown property '{0}'. + + + + Unknown property Unicode property. + Unknown property Unicode property. + + + + Unterminated [] set. + Unterminated [] set. + + + + Unterminated (?#...) comment. + Unterminated (?#...) comment. + + + + + \ No newline at end of file diff --git a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj new file mode 100644 index 00000000000000..ccd6510cb346f7 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj @@ -0,0 +1,43 @@ + + + + netstandard2.0 + enable + true + false + true + false + false + cs + $(NoWarn);CS0436;CS0649 + true + $(DefineConstants);REGEXGENERATOR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs index bf12098b1ec29a..4c033c0b4a475d 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs @@ -359,10 +359,10 @@ public int Scan(string text, int index, int beglimit, int endlimit) #if DEBUG /// Used when dumping for debugging. - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] public override string ToString() => Dump(string.Empty); - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] public string Dump(string indent) { var sb = new StringBuilder(); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index f0565bbffe51c7..0821069cf561fa 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -518,7 +518,7 @@ public void AddWord(bool ecma, bool negate) { if (ecma) { - AddSet(negate ? NotECMAWordSet : ECMAWordSet); + AddSet((negate ? NotECMAWordSet : ECMAWordSet).AsSpan()); } else { @@ -530,7 +530,7 @@ public void AddSpace(bool ecma, bool negate) { if (ecma) { - AddSet(negate ? NotECMASpaceSet : ECMASpaceSet); + AddSet((negate ? NotECMASpaceSet : ECMASpaceSet).AsSpan()); } else { @@ -542,7 +542,7 @@ public void AddDigit(bool ecma, bool negate, string pattern, int currentPos) { if (ecma) { - AddSet(negate ? NotECMADigitSet : ECMADigitSet); + AddSet((negate ? NotECMADigitSet : ECMADigitSet).AsSpan()); } else { @@ -559,7 +559,12 @@ public static string ConvertOldStringsToClass(string set, string category) strLength -= 2; } - return string.Create(strLength, (set, category, startsWithNulls), static (span, state) => +#if REGEXGENERATOR + return StringExtensions.Create +#else + return string.Create +#endif + (strLength, (set, category, startsWithNulls), static (span, state) => { int index; @@ -576,11 +581,11 @@ public static string ConvertOldStringsToClass(string set, string category) span[FlagsIndex] = '\0'; span[SetLengthIndex] = (char)state.set.Length; span[CategoryLengthIndex] = (char)state.category.Length; - state.set.CopyTo(span.Slice(SetStartIndex)); + state.set.AsSpan().CopyTo(span.Slice(SetStartIndex)); index = SetStartIndex + state.set.Length; } - state.category.CopyTo(span.Slice(index)); + state.category.AsSpan().CopyTo(span.Slice(index)); }); } @@ -832,6 +837,43 @@ static bool MayOverlapByEnumeration(string set1, string set2) } } + /// Gets whether the specified character participates in case conversion. + /// + /// This method is used to perform operations as if they were case-sensitive even if they're + /// specified as being case-insensitive. Such a reduction can be applied when the only character + /// that would lower-case to the one being searched for / compared against is that character itself. + /// + public static bool ParticipatesInCaseConversion(int comparison) + { + Debug.Assert((uint)comparison <= char.MaxValue); + + switch (char.GetUnicodeCategory((char)comparison)) + { + case UnicodeCategory.ClosePunctuation: + case UnicodeCategory.ConnectorPunctuation: + case UnicodeCategory.Control: + case UnicodeCategory.DashPunctuation: + case UnicodeCategory.DecimalDigitNumber: + case UnicodeCategory.FinalQuotePunctuation: + case UnicodeCategory.InitialQuotePunctuation: + case UnicodeCategory.LineSeparator: + case UnicodeCategory.OpenPunctuation: + case UnicodeCategory.OtherNumber: + case UnicodeCategory.OtherPunctuation: + case UnicodeCategory.ParagraphSeparator: + case UnicodeCategory.SpaceSeparator: + // All chars in these categories meet the criteria that the only way + // `char.ToLower(toTest, AnyCulture) == charInAboveCategory` is when + // toTest == charInAboveCategory. + return false; + + default: + // We don't know (without testing the character against every other + // character), so assume it does. + return true; + } + } + /// Gets whether we can iterate through the set list pairs in order to completely enumerate the set's contents. private static bool CanEasilyEnumerateSetContents(string set) => set.Length > SetStartIndex && @@ -877,7 +919,7 @@ internal static CharClassAnalysisResults Analyze(string set) // everything ASCII is included. return new CharClassAnalysisResults { - AllNonAsciiContained = set[^1] < 128, + AllNonAsciiContained = set[set.Length - 1] < 128, AllAsciiContained = set[SetStartIndex] >= 128, ContainsNoAscii = false, ContainsOnlyAscii = false @@ -890,7 +932,7 @@ internal static CharClassAnalysisResults Analyze(string set) { AllNonAsciiContained = false, AllAsciiContained = false, - ContainsOnlyAscii = set[^1] <= 128, + ContainsOnlyAscii = set[set.Length - 1] <= 128, ContainsNoAscii = set[SetStartIndex] >= 128, }; } @@ -997,7 +1039,11 @@ public static bool CharInClass(char ch, string set, ref int[]? asciiResultCache) { bitsToSet |= valueBit; } +#if REGEXGENERATOR + InterlockedExtensions.Or(ref slot, bitsToSet); +#else Interlocked.Or(ref slot, bitsToSet); +#endif // Return the computed value. return isInClass; @@ -1391,9 +1437,9 @@ private static ReadOnlySpan SetFromProperty(string capname, bool invert, s string set = s_propTable[mid][1]; Debug.Assert(!string.IsNullOrEmpty(set), "Found a null/empty element in RegexCharClass prop table"); return - !invert ? set : + !invert ? set.AsSpan() : set[0] == NullChar ? set.AsSpan(1) : - NullCharString + set; + (NullCharString + set).AsSpan(); } } @@ -1401,7 +1447,6 @@ private static ReadOnlySpan SetFromProperty(string capname, bool invert, s SR.Format(SR.MakeException, pattern, currentPos, SR.Format(SR.UnrecognizedUnicodeProperty, capname))); } -#if DEBUG public static readonly string[] CategoryIdToName = PopulateCategoryIdToName(); private static string[] PopulateCategoryIdToName() @@ -1417,7 +1462,7 @@ private static string[] PopulateCategoryIdToName() /// /// Produces a human-readable description for a set string. /// - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] public static string SetDescription(string set) { int setLength = set[SetLengthIndex]; @@ -1491,7 +1536,7 @@ public static string SetDescription(string set) } else { - Debug.Fail($"Couldn't find a group to match '{group}'"); + // TODO: The code is incorrectly handling pretty-printing groups like \P{P}. } } @@ -1516,7 +1561,7 @@ public static string SetDescription(string set) /// /// Produces a human-readable description for a single character. /// - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] public static string CharDescription(char ch) { if (ch == '\\') @@ -1552,7 +1597,7 @@ public static string CharDescription(char ch) return sb.ToString(); } - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] private static string CategoryDescription(char ch) { if (ch == SpaceConst) @@ -1572,7 +1617,6 @@ private static string CategoryDescription(char ch) return "\\p{" + CategoryIdToName[(ch - 1)] + "}"; } -#endif /// /// A first/last pair representing a single range of characters. diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs index ffcc7a6e07450d..47f8179d13c702 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs @@ -223,8 +223,7 @@ public static int OpcodeSize(int opcode) } } -#if DEBUG - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] private static string OperatorDescription(int Opcode) { string codeStr = (Opcode & Mask) switch @@ -286,16 +285,18 @@ private static string OperatorDescription(int Opcode) ((Opcode & Back2) != 0 ? "-Back2" : ""); } - [ExcludeFromCodeCoverage(Justification = "Debug only")] - public string OpcodeDescription(int offset) + [ExcludeFromCodeCoverage] + private string OpcodeDescription(int offset) => OpcodeDescription(offset, Codes, Strings); + + [ExcludeFromCodeCoverage] + internal static string OpcodeDescription(int offset, int[] codes, string[] strings) { var sb = new StringBuilder(); - int opcode = Codes[offset]; + int opcode = codes[offset]; sb.Append($"{offset:D6} "); sb.Append(OpcodeBacktracks(opcode & Mask) ? '*' : ' '); sb.Append(OperatorDescription(opcode)); - sb.Append(Indent()); opcode &= Mask; @@ -311,7 +312,7 @@ public string OpcodeDescription(int offset) case Notoneloopatomic: case Onelazy: case Notonelazy: - sb.Append('\'').Append(RegexCharClass.CharDescription((char)Codes[offset + 1])).Append('\''); + sb.Append(Indent()).Append('\'').Append(RegexCharClass.CharDescription((char)codes[offset + 1])).Append('\''); break; case Set: @@ -319,33 +320,29 @@ public string OpcodeDescription(int offset) case Setloop: case Setloopatomic: case Setlazy: - sb.Append(RegexCharClass.SetDescription(Strings[Codes[offset + 1]])); + sb.Append(Indent()).Append(RegexCharClass.SetDescription(strings[codes[offset + 1]])); break; case Multi: - sb.Append('"').Append(Strings[Codes[offset + 1]]).Append('"'); + sb.Append(Indent()).Append('"').Append(strings[codes[offset + 1]]).Append('"'); break; case Ref: case Testref: - sb.Append("index = "); - sb.Append(Codes[offset + 1]); + sb.Append(Indent()).Append("index = ").Append(codes[offset + 1]); break; case Capturemark: - sb.Append("index = "); - sb.Append(Codes[offset + 1]); - if (Codes[offset + 2] != -1) + sb.Append(Indent()).Append("index = ").Append(codes[offset + 1]); + if (codes[offset + 2] != -1) { - sb.Append(", unindex = "); - sb.Append(Codes[offset + 2]); + sb.Append(", unindex = ").Append(codes[offset + 2]); } break; case Nullcount: case Setcount: - sb.Append("value = "); - sb.Append(Codes[offset + 1]); + sb.Append(Indent()).Append("value = ").Append(codes[offset + 1]); break; case Goto: @@ -354,8 +351,7 @@ public string OpcodeDescription(int offset) case Lazybranchmark: case Branchcount: case Lazybranchcount: - sb.Append("addr = "); - sb.Append(Codes[offset + 1]); + sb.Append(Indent()).Append("addr = ").Append(codes[offset + 1]); break; } @@ -374,19 +370,27 @@ public string OpcodeDescription(int offset) case Setloopatomic: case Setlazy: sb.Append(", rep = "); - if (Codes[offset + 2] == int.MaxValue) + if (codes[offset + 2] == int.MaxValue) + { sb.Append("inf"); + } else - sb.Append(Codes[offset + 2]); + { + sb.Append(codes[offset + 2]); + } break; case Branchcount: case Lazybranchcount: sb.Append(", limit = "); - if (Codes[offset + 2] == int.MaxValue) + if (codes[offset + 2] == int.MaxValue) + { sb.Append("inf"); + } else - sb.Append(Codes[offset + 2]); + { + sb.Append(codes[offset + 2]); + } break; } @@ -395,10 +399,11 @@ public string OpcodeDescription(int offset) return sb.ToString(); } - [ExcludeFromCodeCoverage(Justification = "Debug only")] +#if DEBUG + [ExcludeFromCodeCoverage] public void Dump() => Debug.WriteLine(ToString()); - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] public override string ToString() { var sb = new StringBuilder(); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 81f5ca065f0e1c..01728dbaec23ba 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -69,12 +69,6 @@ internal abstract class RegexCompiler private static readonly MethodInfo s_stringIndexOfCharInt = typeof(string).GetMethod("IndexOf", new Type[] { typeof(char), typeof(int) })!; private static readonly MethodInfo s_textInfoToLowerMethod = typeof(TextInfo).GetMethod("ToLower", new Type[] { typeof(char) })!; - /// - /// The max recursion depth used for computations that can recover for not walking the entire node tree. - /// This is used to avoid stack overflows on degenerate expressions. - /// - private const int MaxRecursionDepth = 20; - protected ILGenerator? _ilg; /// true if the compiled code is saved for later use, potentially on a different machine. private readonly bool _persistsAssembly; @@ -824,43 +818,6 @@ private void CallToLower() } } - /// Gets whether the specified character participates in case conversion. - /// - /// This method is used to perform operations as if they were case-sensitive even if they're - /// specified as being case-insensitive. Such a reduction can be applied when the only character - /// that would lower-case to the one being searched for / compared against is that character itself. - /// - private static bool ParticipatesInCaseConversion(int comparison) - { - Debug.Assert((uint)comparison <= char.MaxValue); - - switch (char.GetUnicodeCategory((char)comparison)) - { - case UnicodeCategory.ClosePunctuation: - case UnicodeCategory.ConnectorPunctuation: - case UnicodeCategory.Control: - case UnicodeCategory.DashPunctuation: - case UnicodeCategory.DecimalDigitNumber: - case UnicodeCategory.FinalQuotePunctuation: - case UnicodeCategory.InitialQuotePunctuation: - case UnicodeCategory.LineSeparator: - case UnicodeCategory.OpenPunctuation: - case UnicodeCategory.OtherNumber: - case UnicodeCategory.OtherPunctuation: - case UnicodeCategory.ParagraphSeparator: - case UnicodeCategory.SpaceSeparator: - // All chars in these categories meet the criteria that the only way - // `char.ToLower(toTest, AnyCulture) == charInAboveCategory` is when - // toTest == charInAboveCategory. - return false; - - default: - // We don't know (without testing the character against every other - // character), so assume it does. - return true; - } - } - /// /// Generates the first section of the MSIL. This section contains all /// the forward logic, and corresponds directly to the regex codes. @@ -1435,7 +1392,7 @@ protected void GenerateFindFirstChar() Stloc(testLocal); Ldloc(testLocal); Call(s_stringGetCharsMethod); - if (_boyerMoorePrefix.CaseInsensitive && ParticipatesInCaseConversion(_boyerMoorePrefix.Pattern[charindex])) + if (_boyerMoorePrefix.CaseInsensitive && RegexCharClass.ParticipatesInCaseConversion(_boyerMoorePrefix.Pattern[charindex])) { CallToLower(); } @@ -1700,9 +1657,9 @@ protected void GenerateFindFirstChar() } } - // if (!CharInClass(textSpan[i], prefix[0], "...")) goto returnFalse; - // if (!CharInClass(textSpan[i + 1], prefix[1], "...")) goto returnFalse; - // if (!CharInClass(textSpan[i + 2], prefix[2], "...")) goto returnFalse; + // if (!CharInClass(textSpan[i], prefix[0], "...")) continue; + // if (!CharInClass(textSpan[i + 1], prefix[1], "...")) continue; + // if (!CharInClass(textSpan[i + 2], prefix[2], "...")) continue; // ... Debug.Assert(charClassIndex == 0 || charClassIndex == 1); for ( ; charClassIndex < _leadingCharClasses.Length; charClassIndex++) @@ -1771,13 +1728,9 @@ private bool TryGenerateNonBacktrackingGo(RegexNode node) return false; } - // We use an empty bit from the node's options to store data on whether a node contains captures. - Debug.Assert(Regex.MaxOptionShift == 10); - const RegexOptions HasCapturesFlag = (RegexOptions)(1 << 31); - // Skip the Capture node. We handle the implicit root capture specially. node = node.Child(0); - if (!NodeSupportsNonBacktrackingImplementation(node, maxDepth: MaxRecursionDepth)) + if (!RegexNode.NodeSupportsSimplifiedCodeGenerationImplementation(node, maxDepth: RegexNode.DefaultMaxRecursionDepth)) { return false; } @@ -1852,7 +1805,7 @@ private bool TryGenerateNonBacktrackingGo(RegexNode node) Call(s_captureMethod); // If the graph contained captures, undo any remaining to handle failed matches. - if ((node.Options & HasCapturesFlag) != 0) + if ((node.Options & RegexNode.HasCapturesFlag) != 0) { // while (Crawlpos() != 0) Uncapture(); @@ -1886,170 +1839,6 @@ private bool TryGenerateNonBacktrackingGo(RegexNode node) // Generated code successfully with non-backtracking implementation. return true; - // Determines whether the node supports an optimized implementation that doesn't allow for backtracking. - static bool NodeSupportsNonBacktrackingImplementation(RegexNode node, int maxDepth) - { - bool supported = false; - - // We only support the default left-to-right, not right-to-left, which requires more complication in the gerated code. - // (Right-to-left is only employed when explicitly asked for by the developer or by lookbehind assertions.) - // We also limit the recursion involved to prevent stack dives; this limitation can be removed by switching - // away from a recursive implementation (done for convenience) to an iterative one that's more complicated - // but within the same problems. - if ((node.Options & RegexOptions.RightToLeft) == 0 && maxDepth > 0) - { - int childCount = node.ChildCount(); - Debug.Assert((node.Options & HasCapturesFlag) == 0); - - switch (node.Type) - { - // One/Notone/Set/Multi don't involve any repetition and are easily supported. - case RegexNode.One: - case RegexNode.Notone: - case RegexNode.Set: - case RegexNode.Multi: - // Boundaries are like set checks and don't involve repetition, either. - case RegexNode.Boundary: - case RegexNode.NonBoundary: - case RegexNode.ECMABoundary: - case RegexNode.NonECMABoundary: - // Anchors are also trivial. - case RegexNode.Beginning: - case RegexNode.Start: - case RegexNode.Bol: - case RegexNode.Eol: - case RegexNode.End: - case RegexNode.EndZ: - // {Set/One/Notone}loopatomic are optimized nodes that represent non-backtracking variable-length loops. - // These consume their {Set/One} inputs as long as they match, and don't give up anything they - // matched, which means we can support them without backtracking. - case RegexNode.Oneloopatomic: - case RegexNode.Notoneloopatomic: - case RegexNode.Setloopatomic: - // "Empty" is easy: nothing is emitted for it. - // "Nothing" is also easy: it doesn't match anything. - // "UpdateBumpalong" doesn't match anything, it's just an optional directive to the engine. - case RegexNode.Empty: - case RegexNode.Nothing: - case RegexNode.UpdateBumpalong: - supported = true; - break; - - // Repeaters don't require backtracking as long as their min and max are equal. - // At that point they're just a shorthand for writing out the One/Notone/Set - // that number of times. - case RegexNode.Oneloop: - case RegexNode.Notoneloop: - case RegexNode.Setloop: - Debug.Assert(node.Next == null || node.Next.Type != RegexNode.Atomic, "Loop should have been transformed into an atomic type."); - goto case RegexNode.Onelazy; - case RegexNode.Onelazy: - case RegexNode.Notonelazy: - case RegexNode.Setlazy: - supported = node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic); - break; - - // {Lazy}Loop repeaters are the same, except their child also needs to be supported. - // We also support such loops being atomic. - case RegexNode.Loop: - case RegexNode.Lazyloop: - supported = - (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic)) && - NodeSupportsNonBacktrackingImplementation(node.Child(0), maxDepth - 1); - break; - - // We can handle atomic as long as we can handle making its child atomic, or - // its child doesn't have that concept. - case RegexNode.Atomic: - // Lookahead assertions also only require that the child node be supported. - // The RightToLeft check earlier is important to differentiate lookbehind, - // which is not supported. - case RegexNode.Require: - case RegexNode.Prevent: - supported = NodeSupportsNonBacktrackingImplementation(node.Child(0), maxDepth - 1); - break; - - // We can handle alternates as long as they're atomic (a root / global alternate is - // effectively atomic, as nothing will try to backtrack into it as it's the last thing). - // Its children must all also be supported. - case RegexNode.Alternate: - if (node.Next != null && - (node.IsAtomicByParent() || // atomic alternate - (node.Next.Type == RegexNode.Capture && node.Next.Next is null))) // root alternate - { - goto case RegexNode.Concatenate; - } - break; - - // Concatenation doesn't require backtracking as long as its children don't. - case RegexNode.Concatenate: - supported = true; - for (int i = 0; i < childCount; i++) - { - if (supported && !NodeSupportsNonBacktrackingImplementation(node.Child(i), maxDepth - 1)) - { - supported = false; - break; - } - } - break; - - case RegexNode.Capture: - // Currently we only support capnums without uncapnums (for balancing groups) - supported = node.N == -1; - if (supported) - { - // And we only support them in certain places in the tree. - RegexNode? parent = node.Next; - while (parent != null) - { - switch (parent.Type) - { - case RegexNode.Alternate: - case RegexNode.Atomic: - case RegexNode.Capture: - case RegexNode.Concatenate: - case RegexNode.Require: - parent = parent.Next; - break; - - default: - parent = null; - supported = false; - break; - } - } - - if (supported) - { - // And we only support them if their children are supported. - supported = NodeSupportsNonBacktrackingImplementation(node.Child(0), maxDepth - 1); - - // If we've found a supported capture, mark all of the nodes in its parent - // hierarchy as containing a capture. - if (supported) - { - parent = node; - while (parent != null && ((parent.Options & HasCapturesFlag) == 0)) - { - parent.Options |= HasCapturesFlag; - parent = parent.Next; - } - } - } - } - break; - } - } -#if DEBUG - if (!supported && (node.Options & RegexOptions.Debug) != 0) - { - Debug.WriteLine($"Unable to use non-backtracking code gen: node {node.Description()} isn't supported."); - } -#endif - return supported; - } - static bool IsCaseInsensitive(RegexNode node) => (node.Options & RegexOptions.IgnoreCase) != 0; // Creates a span for runtext starting at runtextpos until this.runtextend. @@ -2167,7 +1956,7 @@ void EmitAtomicAlternate(RegexNode node) // as the alternation is atomic, so we're not concerned about captures after // the alternation. RentedLocalBuilder? startingCrawlpos = null; - if ((node.Options & HasCapturesFlag) != 0) + if ((node.Options & RegexNode.HasCapturesFlag) != 0) { startingCrawlpos = RentInt32Local(); Ldthis(); @@ -2499,7 +2288,7 @@ void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, LocalBuilder? o case RegexNode.Onelazy: case RegexNode.Oneloop: case RegexNode.Oneloopatomic: - if (IsCaseInsensitive(node) && ParticipatesInCaseConversion(node.Ch)) + if (IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch)) { CallToLower(); } @@ -2509,7 +2298,7 @@ void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, LocalBuilder? o default: Debug.Assert(node.Type == RegexNode.Notone || node.Type == RegexNode.Notonelazy || node.Type == RegexNode.Notoneloop || node.Type == RegexNode.Notoneloopatomic); - if (IsCaseInsensitive(node) && ParticipatesInCaseConversion(node.Ch)) + if (IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch)) { CallToLower(); } @@ -2733,7 +2522,7 @@ void EmitMultiChar(RegexNode node) EmitTextSpanOffset(); textSpanPos++; LdindU2(); - if (caseInsensitive && ParticipatesInCaseConversion(s[i])) + if (caseInsensitive && RegexCharClass.ParticipatesInCaseConversion(s[i])) { CallToLower(); } @@ -2911,7 +2700,7 @@ void EmitSingleCharAtomicLoop(RegexNode node) if (node.Type == RegexNode.Notoneloopatomic && maxIterations == int.MaxValue && - (!IsCaseInsensitive(node) || !ParticipatesInCaseConversion(node.Ch))) + (!IsCaseInsensitive(node) || !RegexCharClass.ParticipatesInCaseConversion(node.Ch))) { // For Notoneloopatomic, we're looking for a specific character, as everything until we find // it is consumed by the loop. If we're unbounded, such as with ".*" and if we're case-sensitive, @@ -3045,7 +2834,7 @@ void EmitSingleCharAtomicLoop(RegexNode node) switch (node.Type) { case RegexNode.Oneloopatomic: - if (IsCaseInsensitive(node) && ParticipatesInCaseConversion(node.Ch)) + if (IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch)) { CallToLower(); } @@ -3053,7 +2842,7 @@ void EmitSingleCharAtomicLoop(RegexNode node) BneFar(doneLabel); break; case RegexNode.Notoneloopatomic: - if (IsCaseInsensitive(node) && ParticipatesInCaseConversion(node.Ch)) + if (IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch)) { CallToLower(); } @@ -3139,7 +2928,7 @@ void EmitAtomicSingleCharZeroOrOne(RegexNode node) switch (node.Type) { case RegexNode.Oneloopatomic: - if (IsCaseInsensitive(node) && ParticipatesInCaseConversion(node.Ch)) + if (IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch)) { CallToLower(); } @@ -3147,7 +2936,7 @@ void EmitAtomicSingleCharZeroOrOne(RegexNode node) BneFar(skipUpdatesLabel); break; case RegexNode.Notoneloopatomic: - if (IsCaseInsensitive(node) && ParticipatesInCaseConversion(node.Ch)) + if (IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch)) { CallToLower(); } @@ -3179,7 +2968,7 @@ void EmitAtomicSingleCharZeroOrOne(RegexNode node) void EmitAtomicNodeLoop(RegexNode node) { Debug.Assert(node.Type == RegexNode.Loop); - Debug.Assert(node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic)); + Debug.Assert(node.M == node.N || (node.Next != null && (node.Next.Type is RegexNode.Atomic or RegexNode.Capture))); Debug.Assert(node.M < int.MaxValue); // If this is actually a repeater, emit that instead. @@ -4230,7 +4019,7 @@ private void GenerateOneCode() } else { - if (IsCaseInsensitive() && ParticipatesInCaseConversion(Operand(0))) + if (IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0))) { CallToLower(); } @@ -4276,7 +4065,7 @@ private void GenerateOneCode() Add(); } Call(s_stringGetCharsMethod); - if (IsCaseInsensitive() && ParticipatesInCaseConversion(str[i])) + if (IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(str[i])) { CallToLower(); } @@ -4319,7 +4108,7 @@ private void GenerateOneCode() Ldc(str.Length - i); Sub(); Call(s_stringGetCharsMethod); - if (IsCaseInsensitive() && ParticipatesInCaseConversion(str[i])) + if (IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(str[i])) { CallToLower(); } @@ -4522,7 +4311,7 @@ private void GenerateOneCode() } else { - if (IsCaseInsensitive() && ParticipatesInCaseConversion(Operand(0))) + if (IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0))) { CallToLower(); } @@ -4631,7 +4420,7 @@ private void GenerateOneCode() // we can use the vectorized IndexOf to search for the target character. if ((Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic) && !IsRightToLeft() && - (!IsCaseInsensitive() || !ParticipatesInCaseConversion(Operand(0)))) + (!IsCaseInsensitive() || !RegexCharClass.ParticipatesInCaseConversion(Operand(0)))) { // i = runtext.AsSpan(runtextpos, len).IndexOf(ch); Ldloc(_runtextLocal!); @@ -4799,7 +4588,7 @@ private void GenerateOneCode() } else { - if (IsCaseInsensitive() && ParticipatesInCaseConversion(Operand(0))) + if (IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0))) { CallToLower(); } @@ -5000,7 +4789,7 @@ private void GenerateOneCode() } else { - if (IsCaseInsensitive() && ParticipatesInCaseConversion(Operand(0))) + if (IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0))) { CallToLower(); } @@ -5130,7 +4919,7 @@ private void EmitMatchCharacterClass(string charClass, bool caseInsensitive) // we get smaller code), and it's what we'd do for the fallback (which we get to avoid generating) as part of CharInClass. if (!invariant && RegexCharClass.TryGetSingleUnicodeCategory(charClass, out UnicodeCategory category, out bool negated)) { - // char.GetUnicodeInfo(ch) == category + // char.GetUnicodeCategory(ch) == category Call(s_charGetUnicodeInfo); Ldc((int)category); Ceq(); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 25c3a787f9ca2c..c046447e088fe2 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -103,7 +103,10 @@ internal sealed class RegexNode public const int Testref = 33; // (?(n) | ) - alternation, reference public const int Testgroup = 34; // (?(...) | )- alternation, expression - private const uint DefaultMaxRecursionDepth = 20; // arbitrary cut-off to avoid unbounded recursion + internal const byte DefaultMaxRecursionDepth = 20; // arbitrary cut-off to avoid unbounded recursion + + /// empty bit from the node's options to store data on whether a node contains captures + internal const RegexOptions HasCapturesFlag = (RegexOptions)(1 << 31); private object? Children; public int Type { get; private set; } @@ -200,8 +203,10 @@ private void ValidateFinalTreeInvariants() { var toExamine = new Stack(); toExamine.Push(this); - while (toExamine.TryPop(out RegexNode? node)) + while (toExamine.Count > 0) { + RegexNode node = toExamine.Pop(); + // Validate that we never see certain node types. Debug.Assert(Type != Group, "All Group nodes should have been removed."); @@ -904,7 +909,7 @@ RegexNode ExtractCommonPrefix() RegexOptions startingNodeOptions = startingNode.Options; string? originalStartingString = startingNode.Str; - ReadOnlySpan startingSpan = startingNode.Type == One ? stackalloc char[1] { startingNode.Ch } : (ReadOnlySpan)originalStartingString; + ReadOnlySpan startingSpan = startingNode.Type == One ? stackalloc char[1] { startingNode.Ch } : originalStartingString.AsSpan(); Debug.Assert(startingSpan.Length > 0); // Now compare the rest of the branches against it. @@ -988,7 +993,7 @@ static void ProcessOneOrMulti(RegexNode node, ReadOnlySpan startingSpan) else if (node.Str.Length - 1 == startingSpan.Length) { node.Type = One; - node.Ch = node.Str[^1]; + node.Ch = node.Str[node.Str.Length - 1]; node.Str = null; } else @@ -1754,7 +1759,170 @@ public int ChildCount() return 1; } + // Determines whether the node supports an optimized implementation that doesn't allow for backtracking. + internal static bool NodeSupportsSimplifiedCodeGenerationImplementation(RegexNode node, int maxDepth) + { + bool supported = false; + + // We only support the default left-to-right, not right-to-left, which requires more complication in the gerated code. + // (Right-to-left is only employed when explicitly asked for by the developer or by lookbehind assertions.) + // We also limit the recursion involved to prevent stack dives; this limitation can be removed by switching + // away from a recursive implementation (done for convenience) to an iterative one that's more complicated + // but within the same problems. + if ((node.Options & RegexOptions.RightToLeft) == 0 && maxDepth > 0) + { + int childCount = node.ChildCount(); + Debug.Assert((node.Options & HasCapturesFlag) == 0); + + switch (node.Type) + { + // One/Notone/Set/Multi don't involve any repetition and are easily supported. + case RegexNode.One: + case RegexNode.Notone: + case RegexNode.Set: + case RegexNode.Multi: + // Boundaries are like set checks and don't involve repetition, either. + case RegexNode.Boundary: + case RegexNode.NonBoundary: + case RegexNode.ECMABoundary: + case RegexNode.NonECMABoundary: + // Anchors are also trivial. + case RegexNode.Beginning: + case RegexNode.Start: + case RegexNode.Bol: + case RegexNode.Eol: + case RegexNode.End: + case RegexNode.EndZ: + // {Set/One/Notone}loopatomic are optimized nodes that represent non-backtracking variable-length loops. + // These consume their {Set/One} inputs as long as they match, and don't give up anything they + // matched, which means we can support them without backtracking. + case RegexNode.Oneloopatomic: + case RegexNode.Notoneloopatomic: + case RegexNode.Setloopatomic: + // "Empty" is easy: nothing is emitted for it. + // "Nothing" is also easy: it doesn't match anything. + // "UpdateBumpalong" doesn't match anything, it's just an optional directive to the engine. + case RegexNode.Empty: + case RegexNode.Nothing: + case RegexNode.UpdateBumpalong: + supported = true; + break; + + // Repeaters don't require backtracking as long as their min and max are equal. + // At that point they're just a shorthand for writing out the One/Notone/Set + // that number of times. + case RegexNode.Oneloop: + case RegexNode.Notoneloop: + case RegexNode.Setloop: + Debug.Assert(node.Next == null || node.Next.Type != RegexNode.Atomic, "Loop should have been transformed into an atomic type."); + goto case RegexNode.Onelazy; + case RegexNode.Onelazy: + case RegexNode.Notonelazy: + case RegexNode.Setlazy: + supported = node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic); + break; + + // {Lazy}Loop repeaters are the same, except their child also needs to be supported. + // We also support such loops being atomic. + case RegexNode.Loop: + case RegexNode.Lazyloop: + supported = + (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic)) && + NodeSupportsSimplifiedCodeGenerationImplementation(node.Child(0), maxDepth - 1); + break; + + // We can handle atomic as long as we can handle making its child atomic, or + // its child doesn't have that concept. + case RegexNode.Atomic: + // Lookahead assertions also only require that the child node be supported. + // The RightToLeft check earlier is important to differentiate lookbehind, + // which is not supported. + case RegexNode.Require: + case RegexNode.Prevent: + supported = NodeSupportsSimplifiedCodeGenerationImplementation(node.Child(0), maxDepth - 1); + break; + + // We can handle alternates as long as they're atomic (a root / global alternate is + // effectively atomic, as nothing will try to backtrack into it as it's the last thing). + // Its children must all also be supported. + case RegexNode.Alternate: + if (node.Next != null && + (node.IsAtomicByParent() || // atomic alternate + (node.Next.Type == RegexNode.Capture && node.Next.Next is null))) // root alternate + { + goto case RegexNode.Concatenate; + } + break; + + // Concatenation doesn't require backtracking as long as its children don't. + case RegexNode.Concatenate: + supported = true; + for (int i = 0; i < childCount; i++) + { + if (supported && !NodeSupportsSimplifiedCodeGenerationImplementation(node.Child(i), maxDepth - 1)) + { + supported = false; + break; + } + } + break; + + case RegexNode.Capture: + // Currently we only support capnums without uncapnums (for balancing groups) + supported = node.N == -1; + if (supported) + { + // And we only support them in certain places in the tree. + RegexNode? parent = node.Next; + while (parent != null) + { + switch (parent.Type) + { + case RegexNode.Alternate: + case RegexNode.Atomic: + case RegexNode.Capture: + case RegexNode.Concatenate: + case RegexNode.Require: + parent = parent.Next; + break; + + default: + parent = null; + supported = false; + break; + } + } + + if (supported) + { + // And we only support them if their children are supported. + supported = NodeSupportsSimplifiedCodeGenerationImplementation(node.Child(0), maxDepth - 1); + + // If we've found a supported capture, mark all of the nodes in its parent + // hierarchy as containing a capture. + if (supported) + { + parent = node; + while (parent != null && ((parent.Options & HasCapturesFlag) == 0)) + { + parent.Options |= HasCapturesFlag; + parent = parent.Next; + } + } + } + } + break; + } + } #if DEBUG + if (!supported && (node.Options & RegexOptions.Debug) != 0) + { + Debug.WriteLine($"Unable to use non-backtracking code gen: node {node.Description()} isn't supported."); + } +#endif + return supported; + } + private string TypeName => Type switch { @@ -1799,7 +1967,7 @@ public int ChildCount() _ => $"(unknown {Type})" }; - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] public string Description() { var sb = new StringBuilder(TypeName); @@ -1871,10 +2039,11 @@ public string Description() return sb.ToString(); } - [ExcludeFromCodeCoverage(Justification = "Debug only")] +#if DEBUG + [ExcludeFromCodeCoverage] public void Dump() => Debug.WriteLine(ToString()); - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] public override string ToString() { RegexNode? curNode = this; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseError.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseError.cs index 108d1cbd4861de..c3493d685193e4 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseError.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseError.cs @@ -10,7 +10,12 @@ namespace System.Text.RegularExpressions /// /// This information is made available through . /// - public enum RegexParseError +#if REGEXGENERATOR + internal +#else + public +#endif + enum RegexParseError { /// /// An unknown regular expression parse error. diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs index 8885ae485f4957..0f7bdbef0bc075 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs @@ -2134,7 +2134,12 @@ private void AddConcatenate(int pos, int cch, bool isReplacement) if (cch > 1) { string str = UseOptionI() && !isReplacement ? - string.Create(cch, (_pattern, _culture, pos, cch), static (span, state) => state._pattern.AsSpan(state.pos, state.cch).ToLower(span, state._culture)) : +#if REGEXGENERATOR + StringExtensions.Create +#else + string.Create +#endif + (cch, (_pattern, _culture, pos, cch), static (span, state) => state._pattern.AsSpan(state.pos, state.cch).ToLower(span, state._culture)) : _pattern.Substring(pos, cch); node = new RegexNode(RegexNode.Multi, _options, str); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs index ef20212565a33d..96a709b2338d43 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs @@ -360,7 +360,7 @@ public static int FindLeadingAnchor(RegexTree tree) } #if DEBUG - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] public static string AnchorDescription(int anchors) { var sb = new StringBuilder(); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs index 82dfd368d82a6f..c86d8f54c03370 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTree.cs @@ -33,13 +33,13 @@ internal RegexTree(RegexNode root, Hashtable caps, int[] capNumList, int capTop, } #if DEBUG - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] public void Dump() => Root.Dump(); - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] public override string ToString() => Root.ToString(); - [ExcludeFromCodeCoverage(Justification = "Debug only")] + [ExcludeFromCodeCoverage] public bool Debug => (Options & RegexOptions.Debug) != 0; #endif } From 2380294747ae62c12f10a1994c2f8d25d7980827 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 13 Sep 2021 18:07:54 -0400 Subject: [PATCH 03/16] Add generator test project Adds tests dedicated to the mechanics of the source generator, e.g. that appropriate diagnostics are issued for improper use of RegexGenerator. --- .../System.Text.RegularExpressions.sln | 26 +- .../gen/DiagnosticDescriptors.cs | 4 +- .../gen/RegexGenerator.Parser.cs | 34 +- .../Text/RegularExpressions/RegexCode.cs | 2 +- .../Text/RegularExpressions/RegexOptions.cs | 7 +- .../RegularExpressions/RegexParseException.cs | 7 +- .../RegexGeneratorParserTests.cs | 432 ++++++++++++++++++ ...RegularExpressions.Generators.Tests.csproj | 18 + ...xt.RegularExpressions.Generators.Tests.sln | 31 ++ 9 files changed, 548 insertions(+), 13 deletions(-) create mode 100644 src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs create mode 100644 src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/System.Text.RegularExpressions.Generators.Tests.csproj create mode 100644 src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/System.Text.RegularExpressions.Generators.Tests.sln diff --git a/src/libraries/System.Text.RegularExpressions/System.Text.RegularExpressions.sln b/src/libraries/System.Text.RegularExpressions/System.Text.RegularExpressions.sln index f72697bbd83ac9..38bb3a48ce6ace 100644 --- a/src/libraries/System.Text.RegularExpressions/System.Text.RegularExpressions.sln +++ b/src/libraries/System.Text.RegularExpressions/System.Text.RegularExpressions.sln @@ -1,4 +1,8 @@ -Microsoft Visual Studio Solution File, Format Version 12.00 + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31709.452 +MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestUtilities", "..\Common\tests\TestUtilities\TestUtilities.csproj", "{63551298-BFD4-43FC-8465-AC454228B83C}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Runtime.CompilerServices.Unsafe", "..\System.Runtime.CompilerServices.Unsafe\ref\System.Runtime.CompilerServices.Unsafe.csproj", "{84AABEC1-5CDA-4AB8-819E-9CA508DB6F39}" @@ -17,6 +21,12 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "ref", "{15319A22-BC9 EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{D8FD137E-6961-4629-A71A-53394897FE6B}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "gen", "{7A5AF59C-5114-4788-B5AA-80C977766060}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Generator", "gen\System.Text.RegularExpressions.Generator.csproj", "{3699C8E2-C354-4AED-81DC-ECBAC3EFEB4B}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Generators.Tests", "tests\System.Text.RegularExpressions.Generators.Tests\System.Text.RegularExpressions.Generators.Tests.csproj", "{32ABFCDA-10FD-4A98-A429-145C28021EBE}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -47,17 +57,27 @@ Global {8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Debug|Any CPU.Build.0 = Debug|Any CPU {8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Release|Any CPU.ActiveCfg = Release|Any CPU {8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Release|Any CPU.Build.0 = Release|Any CPU + {3699C8E2-C354-4AED-81DC-ECBAC3EFEB4B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3699C8E2-C354-4AED-81DC-ECBAC3EFEB4B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3699C8E2-C354-4AED-81DC-ECBAC3EFEB4B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3699C8E2-C354-4AED-81DC-ECBAC3EFEB4B}.Release|Any CPU.Build.0 = Release|Any CPU + {32ABFCDA-10FD-4A98-A429-145C28021EBE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {32ABFCDA-10FD-4A98-A429-145C28021EBE}.Debug|Any CPU.Build.0 = Debug|Any CPU + {32ABFCDA-10FD-4A98-A429-145C28021EBE}.Release|Any CPU.ActiveCfg = Release|Any CPU + {32ABFCDA-10FD-4A98-A429-145C28021EBE}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(NestedProjects) = preSolution {63551298-BFD4-43FC-8465-AC454228B83C} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78} - {8EE1A7C4-3630-4900-8976-9B3ADAFF10DC} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78} {84AABEC1-5CDA-4AB8-819E-9CA508DB6F39} = {15319A22-BC91-407B-A795-334DD05C82A0} - {C043B00D-8662-43E4-9E87-8BB317059111} = {15319A22-BC91-407B-A795-334DD05C82A0} {B7E3B087-583F-49B0-8820-787CD98E54C7} = {D8FD137E-6961-4629-A71A-53394897FE6B} + {C043B00D-8662-43E4-9E87-8BB317059111} = {15319A22-BC91-407B-A795-334DD05C82A0} {0409C086-D7CC-43F8-9762-C94FB1E47F5B} = {D8FD137E-6961-4629-A71A-53394897FE6B} + {8EE1A7C4-3630-4900-8976-9B3ADAFF10DC} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78} + {3699C8E2-C354-4AED-81DC-ECBAC3EFEB4B} = {7A5AF59C-5114-4788-B5AA-80C977766060} + {32ABFCDA-10FD-4A98-A429-145C28021EBE} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {1ED4AB32-B7AA-478F-A96B-F725ACD0AABB} diff --git a/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs b/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs index fc9484d5aa609c..a07d93331f48a9 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs @@ -6,7 +6,7 @@ namespace System.Text.RegularExpressions.Generator { - internal static class DiagnosticDescriptors + public static class DiagnosticDescriptors { // TODO: Assign valid IDs @@ -58,7 +58,7 @@ internal static class DiagnosticDescriptors DiagnosticSeverity.Error, isEnabledByDefault: true); - public static DiagnosticDescriptor RegexMethodShouldBePartial { get; } = new DiagnosticDescriptor( + public static DiagnosticDescriptor RegexMethodMustBePartial { get; } = new DiagnosticDescriptor( id: "SYSLIB1106", title: new LocalizableResourceString(nameof(SR.RegexMethodMustBePartialMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustBePartialMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs index d56af82c9fa8fe..d8617ad4106c72 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs @@ -150,7 +150,7 @@ private static IReadOnlyList GetRegexClassesToEmit(Compilation compi if (!regexMethodSymbol.IsPartialDefinition) { - Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodShouldBePartial, methodSyntax.GetLocation()); + Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustBePartial, methodSyntax.GetLocation()); continue; } @@ -162,19 +162,19 @@ private static IReadOnlyList GetRegexClassesToEmit(Compilation compi if (regexMethodSymbol.Parameters.Length != 0) { - Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustBeParameterless, methodSyntax.Body.GetLocation()); + Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustBeParameterless, methodSyntax.GetLocation()); continue; } if (regexMethodSymbol.Arity != 0) { - Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustNotBeGeneric, methodSyntax.Body.GetLocation()); + Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustNotBeGeneric, methodSyntax.GetLocation()); continue; } if (!regexMethodSymbol.ReturnType.Equals(regexSymbol)) { - Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustReturnRegex, methodSyntax.Body.GetLocation()); + Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustReturnRegex, methodSyntax.GetLocation()); continue; } @@ -199,7 +199,31 @@ private static IReadOnlyList GetRegexClassesToEmit(Compilation compi // - ... CultureInfo culture = (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; - // Parse the input + // Validate the options + const RegexOptions SupportedOptions = + RegexOptions.IgnoreCase | + RegexOptions.Multiline | + RegexOptions.ExplicitCapture | + RegexOptions.Compiled | + RegexOptions.Singleline | + RegexOptions.IgnorePatternWhitespace | + RegexOptions.RightToLeft | + RegexOptions.ECMAScript | + RegexOptions.CultureInvariant; + if ((regexMethod.Options.Value & ~(int)SupportedOptions) != 0) + { + Diag(reportDiagnostic, DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "options"); + continue; + } + + // Validate the timeout + if (regexMethod.MatchTimeout.Value is 0 or < -1) + { + Diag(reportDiagnostic, DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "matchTimeout"); + continue; + } + + // Parse the input pattern try { regexMethod.Tree = RegexParser.Parse(regexMethod.Pattern, (RegexOptions)regexMethod.Options, culture); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs index 47f8179d13c702..d8700ebb9bd287 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs @@ -286,7 +286,7 @@ private static string OperatorDescription(int Opcode) } [ExcludeFromCodeCoverage] - private string OpcodeDescription(int offset) => OpcodeDescription(offset, Codes, Strings); + internal string OpcodeDescription(int offset) => OpcodeDescription(offset, Codes, Strings); [ExcludeFromCodeCoverage] internal static string OpcodeDescription(int offset, int[] codes, string[] strings) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOptions.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOptions.cs index 15b08e1062fb03..d39b2d557ab0e8 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOptions.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOptions.cs @@ -4,7 +4,12 @@ namespace System.Text.RegularExpressions { [Flags] - public enum RegexOptions +#if REGEXGENERATOR + internal +#else + public +#endif + enum RegexOptions { None = 0x0000, IgnoreCase = 0x0001, // "i" diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseException.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseException.cs index a0138053b33262..e1c64876c80797 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseException.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParseException.cs @@ -10,7 +10,12 @@ namespace System.Text.RegularExpressions /// detailed information in the and properties. /// [Serializable] - public sealed class RegexParseException : ArgumentException +#if REGEXGENERATOR + internal +#else + public +#endif + sealed class RegexParseException : ArgumentException { /// Gets the error that happened during parsing. public RegexParseError Error { get; } diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs new file mode 100644 index 00000000000000..420c007d89c274 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs @@ -0,0 +1,432 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.Emit; +using Microsoft.CodeAnalysis.Text; +using System.Collections.Generic; +using System.Diagnostics; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Threading; +using System.Threading.Tasks; +using Xunit; + +namespace System.Text.RegularExpressions.Generator.Tests +{ + public class RegexGeneratorParserTests + { + [Theory] + [InlineData("ab[]")] + public async Task Diagnostic_InvalidRegexPattern(string pattern) + { + IReadOnlyList diagnostics = await RunGenerator($@" + using System.Text.RegularExpressions; + partial class C + {{ + [RegexGenerator(""{pattern}"")] + private static partial Regex InvalidPattern(); + }} + "); + + Assert.Equal(DiagnosticDescriptors.InvalidRegexArguments.Id, Assert.Single(diagnostics).Id); + } + + [Theory] + [InlineData(128)] + public async Task Diagnostic_InvalidRegexOptions(int options) + { + IReadOnlyList diagnostics = await RunGenerator(@$" + using System.Text.RegularExpressions; + partial class C + {{ + [RegexGenerator(""ab"", (RegexOptions){options})] + private static partial Regex InvalidPattern(); + }} + "); + + Assert.Equal(DiagnosticDescriptors.InvalidRegexArguments.Id, Assert.Single(diagnostics).Id); + } + + [Theory] + [InlineData(-2)] + [InlineData(0)] + public async Task Diagnostic_InvalidRegexTimeout(int matchTimeout) + { + IReadOnlyList diagnostics = await RunGenerator(@$" + using System.Text.RegularExpressions; + partial class C + {{ + [RegexGenerator(""ab"", RegexOptions.None, {matchTimeout.ToString(CultureInfo.InvariantCulture)})] + private static partial Regex InvalidPattern(); + }} + "); + + Assert.Equal(DiagnosticDescriptors.InvalidRegexArguments.Id, Assert.Single(diagnostics).Id); + } + + [Fact] + public async Task Diagnostic_MethodMustReturnRegex() + { + IReadOnlyList diagnostics = await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"")] + private static partial int MethodMustReturnRegex(); + } + "); + + Assert.Equal(DiagnosticDescriptors.RegexMethodMustReturnRegex.Id, Assert.Single(diagnostics).Id); + } + + [Fact] + public async Task Diagnostic_MethodMustBeStatic() + { + IReadOnlyList diagnostics = await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"")] + private partial Regex MethodMustBeStatic(); + } + "); + + Assert.Equal(DiagnosticDescriptors.RegexMethodMustBeStatic.Id, Assert.Single(diagnostics).Id); + } + + [Fact] + public async Task Diagnostic_MethodMustNotBeGeneric() + { + IReadOnlyList diagnostics = await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex MethodMustNotBeGeneric(); + } + "); + + Assert.Equal(DiagnosticDescriptors.RegexMethodMustNotBeGeneric.Id, Assert.Single(diagnostics).Id); + } + + [Fact] + public async Task Diagnostic_MethodMustBeParameterless() + { + IReadOnlyList diagnostics = await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex MethodMustBeParameterless(int i); + } + "); + + Assert.Equal(DiagnosticDescriptors.RegexMethodMustBeParameterless.Id, Assert.Single(diagnostics).Id); + } + + [Fact] + public async Task Diagnostic_MethodMustBePartial() + { + IReadOnlyList diagnostics = await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"")] + private static Regex MethodMustBePartial() => null; + } + "); + + Assert.Equal(DiagnosticDescriptors.RegexMethodMustBePartial.Id, Assert.Single(diagnostics).Id); + } + + [ActiveIssue("https://github.com/dotnet/roslyn/pull/55866")] + [Fact] + public async Task Diagnostic_InvalidLangVersion() + { + IReadOnlyList diagnostics = await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex InvalidLangVersion(); + } + ", langVersion: LanguageVersion.CSharp9); + + Assert.Equal(DiagnosticDescriptors.InvalidLangVersion.Id, Assert.Single(diagnostics).Id); + } + + [Fact] + public async Task Valid_ClassWithoutNamespace() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + ", compile: true)); + } + + [Fact] + public async Task Valid_ClassWithNamespace() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + namespace A + { + partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + } + ", compile: true)); + } + + [Fact] + public async Task Valid_ClassWithFileScopedNamespace() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + namespace A; + partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + ", compile: true)); + } + + [Fact] + public async Task Valid_NestedClassWithoutNamespace() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + partial class B + { + partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + } + ", compile: true)); + } + + [Fact] + public async Task Valid_NestedClassWithNamespace() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + namespace A + { + partial class B + { + partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + } + } + ", compile: true)); + } + + [Fact] + public async Task Valid_NestedClassWithFileScopedNamespace() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + namespace A; + partial class B + { + partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + } + ", compile: true)); + } + + [Fact] + public async Task Valid_NestedClassesWithNamespace() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + namespace A + { + public partial class B + { + internal partial class C + { + protected internal partial class D + { + private protected partial class E + { + private partial class F + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + } + } + } + } + } + ", compile: true)); + } + + [Fact] + public async Task Valid_NullableRegex() + { + Assert.Empty(await RunGenerator(@" + #nullable enable + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex? Valid(); + } + ", compile: true)); + } + + [Fact] + public async Task Valid_InternalRegex() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"")] + internal static partial Regex Valid(); + } + ", compile: true)); + } + + [Fact] + public async Task Valid_PublicRegex() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"")] + public static partial Regex Valid(); + } + ", compile: true)); + } + + [Fact] + public async Task Valid_PrivateProtectedRegex() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"")] + private protected static partial Regex Valid(); + } + ", compile: true)); + } + + [Fact] + public async Task Valid_PublicSealedClass() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + public sealed partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + ", compile: true)); + } + + [Fact] + public async Task Valid_InternalAbstractClass() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + internal abstract partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + ", compile: true)); + } + + [Fact] + public async Task Valid_MultiplRegexMethodsPerClass() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C1 + { + [RegexGenerator(""a"")] + private static partial Regex A(); + + [RegexGenerator(""b"")] + public static partial Regex B(); + + [RegexGenerator(""b"")] + public static partial Regex C(); + } + partial class C2 + { + [RegexGenerator(""d"")] + public static partial Regex D(); + + [RegexGenerator(""d"")] + public static partial Regex E(); + } + ", compile: true)); + } + + private async Task> RunGenerator( + string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, CancellationToken cancellationToken = default) + { + string corelib = Assembly.GetAssembly(typeof(object))!.Location; + string runtimeDir = Path.GetDirectoryName(corelib)!; + var refs = new List() + { + MetadataReference.CreateFromFile(corelib), + MetadataReference.CreateFromFile(Path.Combine(runtimeDir, "System.Runtime.dll")), + MetadataReference.CreateFromFile(Path.Combine(runtimeDir, "System.Text.RegularExpressions.dll")) + }; + + var proj = new AdhocWorkspace() + .AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create())) + .AddProject("RegexGeneratorTest", "RegexGeneratorTest.dll", "C#") + .WithMetadataReferences(refs) + .WithCompilationOptions(new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary) + .WithNullableContextOptions(NullableContextOptions.Enable)) + .WithParseOptions(new CSharpParseOptions(langVersion)) + .AddDocument("RegexGenerator.g.cs", SourceText.From(code, Encoding.UTF8)).Project; + + Assert.True(proj.Solution.Workspace.TryApplyChanges(proj.Solution)); + + Compilation? comp = await proj!.GetCompilationAsync(CancellationToken.None).ConfigureAwait(false); + Debug.Assert(comp is not null); + + var generator = new RegexGenerator(); + CSharpGeneratorDriver cgd = CSharpGeneratorDriver.Create(new[] { generator.AsSourceGenerator() }, parseOptions: CSharpParseOptions.Default.WithLanguageVersion(langVersion)); + GeneratorDriver gd = cgd.RunGenerators(comp!, cancellationToken); + GeneratorDriverRunResult generatorResults = gd.GetRunResult(); + if (!compile) + { + return generatorResults.Diagnostics; + } + + comp = comp.AddSyntaxTrees(generatorResults.GeneratedTrees.ToArray()); + EmitResult results = comp.Emit(Stream.Null, cancellationToken: cancellationToken); + + return generatorResults.Diagnostics.Concat(results.Diagnostics).Where(d => d.Severity != DiagnosticSeverity.Hidden).ToArray(); + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/System.Text.RegularExpressions.Generators.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/System.Text.RegularExpressions.Generators.Tests.csproj new file mode 100644 index 00000000000000..82dfb654c2e4ab --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/System.Text.RegularExpressions.Generators.Tests.csproj @@ -0,0 +1,18 @@ + + + + $(NetCoreAppCurrent) + true + true + enable + + $(NoWarn);xUnit2008 + + + + + + + + + diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/System.Text.RegularExpressions.Generators.Tests.sln b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/System.Text.RegularExpressions.Generators.Tests.sln new file mode 100644 index 00000000000000..91fab691d5ff0d --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/System.Text.RegularExpressions.Generators.Tests.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31709.452 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Generators.Tests", "System.Text.RegularExpressions.Generators.Tests.csproj", "{66CC1D41-5724-4E9A-A427-51B592D49453}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Generator", "..\..\gen\System.Text.RegularExpressions.Generator.csproj", "{3047EBB3-BF7B-4019-81D5-88E86D9F89EA}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {66CC1D41-5724-4E9A-A427-51B592D49453}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {66CC1D41-5724-4E9A-A427-51B592D49453}.Debug|Any CPU.Build.0 = Debug|Any CPU + {66CC1D41-5724-4E9A-A427-51B592D49453}.Release|Any CPU.ActiveCfg = Release|Any CPU + {66CC1D41-5724-4E9A-A427-51B592D49453}.Release|Any CPU.Build.0 = Release|Any CPU + {3047EBB3-BF7B-4019-81D5-88E86D9F89EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3047EBB3-BF7B-4019-81D5-88E86D9F89EA}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3047EBB3-BF7B-4019-81D5-88E86D9F89EA}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3047EBB3-BF7B-4019-81D5-88E86D9F89EA}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {CAB7B6EA-E3DE-4731-9CFB-22F32D326700} + EndGlobalSection +EndGlobal From 68fea064293aa25a86fe297b1139a1e7b2e87c20 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Wed, 15 Sep 2021 17:35:36 -0400 Subject: [PATCH 04/16] Integrate source generator into many regex tests Start integrating the source generator into the regex test suite, so that many existing tests also validate the generated code. --- .../tests/AttRegexTests.cs | 62 +-- .../tests/MonoRegexTests.cs | 16 +- .../tests/Regex.Groups.Tests.cs | 14 +- .../tests/Regex.KnownPattern.Tests.cs | 434 ++++++++++-------- .../tests/Regex.Match.Tests.cs | 348 ++++++-------- .../tests/Regex.MultipleMatches.Tests.cs | 29 +- .../tests/Regex.Replace.Tests.cs | 78 ++-- .../tests/Regex.Split.Tests.cs | 40 +- .../tests/Regex.Tests.Common.cs | 64 +++ .../tests/RegexCharacterSetTests.cs | 132 +++--- .../tests/RegexCompilationHelper.cs | 44 -- .../tests/RegexCultureTests.cs | 2 + .../tests/RegexGeneratorHelper.netcoreapp.cs | 154 +++++++ .../tests/RegexGeneratorHelper.netfx.cs | 14 + .../RegexGeneratorParserTests.cs | 29 +- ...ystem.Text.RegularExpressions.Tests.csproj | 6 +- 16 files changed, 817 insertions(+), 649 deletions(-) delete mode 100644 src/libraries/System.Text.RegularExpressions/tests/RegexCompilationHelper.cs create mode 100644 src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs create mode 100644 src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netfx.cs diff --git a/src/libraries/System.Text.RegularExpressions/tests/AttRegexTests.cs b/src/libraries/System.Text.RegularExpressions/tests/AttRegexTests.cs index d594bfb836e196..239a94a6d45c42 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/AttRegexTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/AttRegexTests.cs @@ -66,6 +66,7 @@ using System.Collections.Generic; using System.Linq; +using System.Threading.Tasks; using Xunit; namespace System.Text.RegularExpressions.Tests @@ -366,50 +367,53 @@ public class AttRegexTests [InlineData("(a*){2}(x)", "x", "(0,1)(0,0)(0,1)")] [InlineData("(a*){2}(x)", "ax", "(0,2)(1,1)(1,2)")] [InlineData("(a*){2}(x)", "axa", "(0,2)(1,1)(1,2)")] - public void Test(string pattern, string input, string captures) + public async Task Test(string pattern, string input, string captures) { if (input == "NULL") { input = ""; } - foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.Compiled }) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { if (captures == "BADBR") { - Assert.ThrowsAny(() => Regex.IsMatch(input, pattern, options)); + await Assert.ThrowsAnyAsync(async () => (await RegexHelpers.GetRegex(engine, pattern)).IsMatch(input)); + return; } - else if (captures == "NOMATCH") + + Regex r = await RegexHelpers.GetRegex(engine, pattern); + + if (captures == "NOMATCH") { - Assert.False(Regex.IsMatch(input, pattern, options)); + Assert.False(r.IsMatch(input)); + return; } - else - { - Match match = Regex.Match(input, pattern, options); - Assert.True(match.Success); - var expected = new HashSet<(int start, int end)>( - captures - .Split(new[] { '(', ')' }, StringSplitOptions.RemoveEmptyEntries) - .Select(s => s.Split(',')) - .Select(s => (start: int.Parse(s[0]), end: int.Parse(s[1]))) - .Distinct() - .OrderBy(c => c.start) - .ThenBy(c => c.end)); + Match match = r.Match(input); + Assert.True(match.Success); - var actual = new HashSet<(int start, int end)>( - match.Groups - .Cast() - .Select(g => (start: g.Index, end: g.Index + g.Length)) - .Distinct() - .OrderBy(g => g.start) - .ThenBy(g => g.end)); + var expected = new HashSet<(int start, int end)>( + captures + .Split(new[] { '(', ')' }, StringSplitOptions.RemoveEmptyEntries) + .Select(s => s.Split(',')) + .Select(s => (start: int.Parse(s[0]), end: int.Parse(s[1]))) + .Distinct() + .OrderBy(c => c.start) + .ThenBy(c => c.end)); - // The .NET implementation sometimes has extra captures beyond what the original data specifies, so we assert a subset. - if (!expected.IsSubsetOf(actual)) - { - throw new Xunit.Sdk.XunitException($"Actual: {string.Join(", ", actual)}{Environment.NewLine}Expected: {string.Join(", ", expected)}"); - } + var actual = new HashSet<(int start, int end)>( + match.Groups + .Cast() + .Select(g => (start: g.Index, end: g.Index + g.Length)) + .Distinct() + .OrderBy(g => g.start) + .ThenBy(g => g.end)); + + // The .NET implementation sometimes has extra captures beyond what the original data specifies, so we assert a subset. + if (!expected.IsSubsetOf(actual)) + { + throw new Xunit.Sdk.XunitException($"Actual: {string.Join(", ", actual)}{Environment.NewLine}Expected: {string.Join(", ", expected)}"); } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/MonoRegexTests.cs b/src/libraries/System.Text.RegularExpressions/tests/MonoRegexTests.cs index c5c74e1f16d5b3..63720f66f1811a 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/MonoRegexTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/MonoRegexTests.cs @@ -9,6 +9,7 @@ // (c) 2002 using System.Collections.Generic; +using System.Threading.Tasks; using Xunit; namespace System.Text.RegularExpressions.Tests @@ -20,12 +21,12 @@ public class MonoTests [Theory] [MemberData(nameof(RegexTestCasesWithOptions))] - public void ValidateRegex(string pattern, RegexOptions options, string input, string expected) + public async Task ValidateRegex(RegexEngine engine, string pattern, RegexOptions options, string input, string expected) { string result = "Fail."; try { - var re = new Regex(pattern, options); + Regex re = await RegexHelpers.GetRegex(engine, pattern, options); Match m = re.Match(input); if (m.Success) @@ -55,12 +56,13 @@ public void ValidateRegex(string pattern, RegexOptions options, string input, st public static IEnumerable RegexTestCasesWithOptions() { - foreach (object[] obj in RegexTestCases()) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - yield return new object[] { obj[0], obj[1], obj[2], obj[3] }; - yield return new object[] { obj[0], RegexOptions.CultureInvariant | (RegexOptions)obj[1], obj[2], obj[3] }; - yield return new object[] { obj[0], RegexOptions.Compiled | (RegexOptions)obj[1], obj[2], obj[3] }; - yield return new object[] { obj[0], RegexOptions.Compiled | RegexOptions.CultureInvariant | (RegexOptions)obj[1], obj[2], obj[3] }; + foreach (object[] obj in RegexTestCases()) + { + yield return new object[] { engine, obj[0], obj[1], obj[2], obj[3] }; + yield return new object[] { engine, obj[0], RegexOptions.CultureInvariant | (RegexOptions)obj[1], obj[2], obj[3] }; + } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs index 0149fbe16f2062..a668331ce14774 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.Globalization; using System.Tests; +using System.Threading.Tasks; using Xunit; namespace System.Text.RegularExpressions.Tests @@ -905,7 +906,7 @@ public static IEnumerable Groups_CustomCulture_TestData_AzeriLatin() [MemberData(nameof(Groups_CustomCulture_TestData_AzeriLatin))] [ActiveIssue("https://github.com/dotnet/runtime/issues/56407", TestPlatforms.Android)] [ActiveIssue("https://github.com/dotnet/runtime/issues/36900", TestPlatforms.iOS | TestPlatforms.tvOS | TestPlatforms.MacCatalyst)] - public void Groups(string cultureName, string pattern, string input, RegexOptions options, string[] expectedGroups) + public async Task Groups(string cultureName, string pattern, string input, RegexOptions options, string[] expectedGroups) { if (cultureName is null) { @@ -915,13 +916,16 @@ public void Groups(string cultureName, string pattern, string input, RegexOption using (new ThreadCultureChange(cultureName)) { - Groups(pattern, input, options, expectedGroups); - Groups(pattern, input, RegexOptions.Compiled | options, expectedGroups); + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) + { + await GroupsAsync(engine, pattern, input, options, expectedGroups); + } } - static void Groups(string pattern, string input, RegexOptions options, string[] expectedGroups) + static async Task GroupsAsync(RegexEngine engine, string pattern, string input, RegexOptions options, string[] expectedGroups) { - Regex regex = new Regex(pattern, options); + Regex regex = await RegexHelpers.GetRegexAsync(engine, pattern, options); + Match match = regex.Match(input); Assert.True(match.Success); diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs index ff4903ad3a4277..c78c90c6af3200 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs @@ -3,6 +3,7 @@ using System.Globalization; using System.Linq; +using System.Threading.Tasks; using Xunit; namespace System.Text.RegularExpressions.Tests @@ -15,9 +16,8 @@ public class RegexKnownPatternTests // https://docs.microsoft.com/en-us/dotnet/standard/base-types/regular-expression-example-scanning-for-hrefs [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_Examples_ScanningHrefs(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_Examples_ScanningHrefs(RegexEngine engine) { const string HrefPattern = @"href\s*=\s*(?:[""'](?<1>[^""']*)[""']|(?<1>\S+))"; @@ -31,7 +31,9 @@ public void Docs_Examples_ScanningHrefs(RegexOptions options) "" + ".NET Base Class Library blog

"; - Match m = Regex.Match(InputString, HrefPattern, options | RegexOptions.IgnoreCase); + Regex r = await RegexHelpers.GetRegex(engine, HrefPattern, RegexOptions.IgnoreCase); + + Match m = r.Match(InputString); Assert.True(m.Success); Assert.Equal("http://msdn2.microsoft.com", m.Groups[1].ToString()); Assert.Equal(43, m.Groups[1].Index); @@ -52,24 +54,22 @@ public void Docs_Examples_ScanningHrefs(RegexOptions options) // https://docs.microsoft.com/en-us/dotnet/standard/base-types/regular-expression-example-changing-date-formats [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_Examples_MDYtoDMY(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_Examples_MDYtoDMY(RegexEngine engine) { + Regex r = await RegexHelpers.GetRegex(engine, @"\b(?\d{1,2})/(?\d{1,2})/(?\d{2,4})\b"); + string dt = new DateTime(2020, 1, 8, 0, 0, 0, DateTimeKind.Utc).ToString("d", DateTimeFormatInfo.InvariantInfo); - string result = Regex.Replace(dt, @"\b(?\d{1,2})/(?\d{1,2})/(?\d{2,4})\b", "${day}-${month}-${year}", options); - Assert.Equal("08-01-2020", result); + Assert.Equal("08-01-2020", r.Replace(dt, "${day}-${month}-${year}")); } // https://docs.microsoft.com/en-us/dotnet/standard/base-types/how-to-extract-a-protocol-and-port-number-from-a-url [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_Examples_ExtractProtocolPort(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_Examples_ExtractProtocolPort(RegexEngine engine) { - string url = "http://www.contoso.com:8080/letters/readme.html"; - Regex r = new Regex(@"^(?\w+)://[^/]+?(?:\d+)?/", options); - Match m = r.Match(url); + Regex r = await RegexHelpers.GetRegex(engine, @"^(?\w+)://[^/]+?(?:\d+)?/"); + Match m = r.Match("http://www.contoso.com:8080/letters/readme.html"); Assert.True(m.Success); Assert.Equal("http:8080", m.Result("${proto}${port}")); } @@ -90,12 +90,14 @@ public void Docs_Examples_ExtractProtocolPort(RegexOptions options) [InlineData("j.s@server1.proseware.com", true)] [InlineData("\"j\\\"s\\\"\"@proseware.com", true)] [InlineData("js@contoso.\u4E2D\u56FD", true)] - public void Docs_Examples_ValidateEmail(string email, bool expectedIsValid) + public async Task Docs_Examples_ValidateEmail(string email, bool expectedIsValid) { - Assert.Equal(expectedIsValid, IsValidEmail(email, RegexOptions.None)); - Assert.Equal(expectedIsValid, IsValidEmail(email, RegexOptions.Compiled)); + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) + { + Assert.Equal(expectedIsValid, await IsValidEmailAsync(email, engine)); + } - bool IsValidEmail(string email, RegexOptions options) + async Task IsValidEmailAsync(string email, RegexEngine engine) { if (string.IsNullOrWhiteSpace(email)) { @@ -104,11 +106,10 @@ bool IsValidEmail(string email, RegexOptions options) try { - // Normalize the domain - email = Regex.Replace(email, @"(@)(.+)$", DomainMapper, options, TimeSpan.FromMilliseconds(200)); + Regex r = await RegexHelpers.GetRegex(engine, @"(@)(.+)$", matchTimeout: 200); - // Examines the domain part of the email and normalizes it. - string DomainMapper(Match match) + // Normalize the domain part of the email + email = r.Replace(email, match => { // Use IdnMapping class to convert Unicode domain names. var idn = new IdnMapping(); @@ -117,7 +118,7 @@ string DomainMapper(Match match) string domainName = idn.GetAscii(match.Groups[2].Value); return match.Groups[1].Value + domainName; - } + }); } catch (RegexMatchTimeoutException) { @@ -130,10 +131,14 @@ string DomainMapper(Match match) try { - return Regex.IsMatch(email, + Regex r = await RegexHelpers.GetRegex( + engine, @"^(?("")("".+?(?\w+)\s\k\W(?\w+)"; const string Input = "He said that that was the the correct answer."; - Match match = Regex.Match(Input, Pattern, RegexOptions.IgnoreCase | options); + Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + + Match match = r.Match(Input); Assert.True(match.Success); Assert.Equal("that", match.Groups["duplicateWord"].Value); @@ -194,17 +201,18 @@ public void Docs_GroupingConstructs_NamedMatchedSubexpression1(RegexOptions opti // https://docs.microsoft.com/en-us/dotnet/standard/base-types/grouping-constructs-in-regular-expressions#named-matched-subexpressions [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_GroupingConstructs_NamedMatchedSubexpression2(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_GroupingConstructs_NamedMatchedSubexpression2(RegexEngine engine) { const string Pattern = @"\D+(?\d+)\D+(?\d+)?"; string[] inputs = { "abc123def456", "abc123def" }; + Regex r = await RegexHelpers.GetRegex(engine, Pattern); + var actual = new StringBuilder(); foreach (string input in inputs) { - Match m = Regex.Match(input, Pattern, options); + Match m = r.Match(input); if (m.Success) { actual.AppendLine($"Match: {m.Value}"); @@ -234,9 +242,8 @@ public void Docs_GroupingConstructs_NamedMatchedSubexpression2(RegexOptions opti // https://docs.microsoft.com/en-us/dotnet/standard/base-types/grouping-constructs-in-regular-expressions#balancing-group-definitions [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_GroupingConstructs_BalancingGroups(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_GroupingConstructs_BalancingGroups(RegexEngine engine) { const string Pattern = "^[^<>]*" + @@ -247,8 +254,10 @@ public void Docs_GroupingConstructs_BalancingGroups(RegexOptions options) "(?(Open)(?!))$"; const string Input = ">"; + Regex r = await RegexHelpers.GetRegex(engine, Pattern); + var actual = new StringBuilder(); - Match m = Regex.Match(Input, Pattern, options); + Match m = r.Match(Input); if (m.Success) { actual.AppendLine($"Input: \"{Input}\""); @@ -294,14 +303,15 @@ public void Docs_GroupingConstructs_BalancingGroups(RegexOptions options) // https://docs.microsoft.com/en-us/dotnet/standard/base-types/grouping-constructs-in-regular-expressions#noncapturing-groups [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_GroupingConstructs_NoncapturingGroups(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_GroupingConstructs_NoncapturingGroups(RegexEngine engine) { const string Pattern = @"(?:\b(?:\w+)\W*)+\."; const string Input = "This is a short sentence."; - Match match = Regex.Match(Input, Pattern, options); + Regex r = await RegexHelpers.GetRegex(engine, Pattern); + + Match match = r.Match(Input); Assert.True(match.Success); Assert.Equal("This is a short sentence.", match.Value); Assert.Equal(1, match.Groups.Count); @@ -309,14 +319,15 @@ public void Docs_GroupingConstructs_NoncapturingGroups(RegexOptions options) // https://docs.microsoft.com/en-us/dotnet/standard/base-types/grouping-constructs-in-regular-expressions#group-options [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_GroupingConstructs_GroupOptions(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_GroupingConstructs_GroupOptions(RegexEngine engine) { const string Pattern = @"\b(?ix: d \w+)\s"; const string Input = "Dogs are decidedly good pets."; - Match match = Regex.Match(Input, Pattern, options); + Regex r = await RegexHelpers.GetRegex(engine, Pattern); + + Match match = r.Match(Input); Assert.True(match.Success); Assert.Equal("Dogs ", match.Value); Assert.Equal(0, match.Index); @@ -331,38 +342,40 @@ public void Docs_GroupingConstructs_GroupOptions(RegexOptions options) // https://docs.microsoft.com/en-us/dotnet/standard/base-types/grouping-constructs-in-regular-expressions#zero-width-positive-lookahead-assertions [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_GroupingConstructs_ZeroWidthPositiveLookaheadAssertions(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_GroupingConstructs_ZeroWidthPositiveLookaheadAssertions(RegexEngine engine) { const string Pattern = @"\b\w+(?=\sis\b)"; Match match; - match = Regex.Match("The dog is a Malamute.", Pattern, options); + Regex r = await RegexHelpers.GetRegex(engine, Pattern); + + match = r.Match("The dog is a Malamute."); Assert.True(match.Success); Assert.Equal("dog", match.Value); - match = Regex.Match("The island has beautiful birds.", Pattern, options); + match = r.Match("The island has beautiful birds."); Assert.False(match.Success); - match = Regex.Match("The pitch missed home plate.", Pattern, options); + match = r.Match("The pitch missed home plate."); Assert.False(match.Success); - match = Regex.Match("Sunday is a weekend day.", Pattern, options); + match = r.Match("Sunday is a weekend day."); Assert.True(match.Success); Assert.Equal("Sunday", match.Value); } // https://docs.microsoft.com/en-us/dotnet/standard/base-types/grouping-constructs-in-regular-expressions#zero-width-negative-lookahead-assertions [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_GroupingConstructs_ZeroWidthNegativeLookaheadAssertions(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_GroupingConstructs_ZeroWidthNegativeLookaheadAssertions(RegexEngine engine) { const string Pattern = @"\b(?!un)\w+\b"; const string Input = "unite one unethical ethics use untie ultimate"; - MatchCollection matches = Regex.Matches(Input, Pattern, RegexOptions.IgnoreCase | options); + Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + + MatchCollection matches = r.Matches(Input); Assert.Equal("one", matches[0].Value); Assert.Equal("ethics", matches[1].Value); Assert.Equal("use", matches[2].Value); @@ -371,61 +384,62 @@ public void Docs_GroupingConstructs_ZeroWidthNegativeLookaheadAssertions(RegexOp // https://docs.microsoft.com/en-us/dotnet/standard/base-types/grouping-constructs-in-regular-expressions#zero-width-positive-lookbehind-assertions [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_GroupingConstructs_ZeroWidthPositiveLookbehindAssertions(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_GroupingConstructs_ZeroWidthPositiveLookbehindAssertions(RegexEngine engine) { const string Pattern = @"(?<=\b20)\d{2}\b"; const string Input = "2010 1999 1861 2140 2009"; - MatchCollection matches = Regex.Matches(Input, Pattern, RegexOptions.IgnoreCase | options); + Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + + MatchCollection matches = r.Matches(Input); Assert.Equal("10", matches[0].Value); Assert.Equal("09", matches[1].Value); } // https://docs.microsoft.com/en-us/dotnet/standard/base-types/grouping-constructs-in-regular-expressions#zero-width-negative-lookbehind-assertions [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_GroupingConstructs_ZeroWidthNegativeLookbehindAssertions(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_GroupingConstructs_ZeroWidthNegativeLookbehindAssertions(RegexEngine engine) { const string Pattern = @"(?(\w)\1+).\b"; + Regex rBack = await RegexHelpers.GetRegex(engine, @"(\w)\1+.\b"); + Regex rNoBack = await RegexHelpers.GetRegex(engine, @"(?>(\w)\1+).\b"); string[] inputs = { "aaad", "aaaa" }; Match back, noback; - back = Regex.Match("cccd.", Back, options); - noback = Regex.Match("cccd.", NoBack, options); + back = rBack.Match("cccd."); + noback = rNoBack.Match("cccd."); Assert.True(back.Success); Assert.True(noback.Success); Assert.Equal("cccd", back.Value); Assert.Equal("cccd", noback.Value); - back = Regex.Match("aaad", Back, options); - noback = Regex.Match("aaad", NoBack, options); + back = rBack.Match("aaad"); + noback = rNoBack.Match("aaad"); Assert.True(back.Success); Assert.True(noback.Success); Assert.Equal("aaad", back.Value); Assert.Equal("aaad", noback.Value); - back = Regex.Match("aaaa", Back, options); - noback = Regex.Match("aaaa", NoBack, options); + back = rBack.Match("aaaa"); + noback = rNoBack.Match("aaaa"); Assert.True(back.Success); Assert.False(noback.Success); Assert.Equal("aaaa", back.Value); @@ -433,13 +447,15 @@ public void Docs_GroupingConstructs_NonbacktrackingSubexpressions(RegexOptions o // https://docs.microsoft.com/en-us/dotnet/standard/base-types/grouping-constructs-in-regular-expressions#grouping-constructs-and-regular-expression-objects [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_GroupingConstructs_GroupCaptureRelationship(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_GroupingConstructs_GroupCaptureRelationship(RegexEngine engine) { const string Pattern = @"(\b(\w+)\W+)+"; const string Input = "This is a short sentence."; - Match match = Regex.Match(Input, Pattern, options); + + Regex r = await RegexHelpers.GetRegex(engine, Pattern); + + Match match = r.Match(Input); var actual = new StringBuilder(); actual.AppendLine($"Match: '{match.Value}'"); @@ -474,15 +490,16 @@ public void Docs_GroupingConstructs_GroupCaptureRelationship(RegexOptions option // https://docs.microsoft.com/en-us/dotnet/api/system.text.regularexpressions.capture?view=netcore-3.1#examples [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_Capture_Sentences(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_Capture_Sentences(RegexEngine engine) { const string Pattern = @"((\w+)[\s.])+"; const string Input = "Yes. This dog is very friendly."; + Regex r = await RegexHelpers.GetRegex(engine, Pattern); + var actual = new StringBuilder(); - foreach (Match match in Regex.Matches(Input, Pattern, options)) + foreach (Match match in r.Matches(Input)) { actual.AppendLine($"Match: {match.Value}"); for (int groupCtr = 0; groupCtr < match.Groups.Count; groupCtr++) @@ -525,17 +542,18 @@ public void Docs_Capture_Sentences(RegexOptions options) // https://docs.microsoft.com/en-us/dotnet/api/system.text.regularexpressions.capture.value?view=netcore-3.1 [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_Capture_ProductNumber(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_Capture_ProductNumber(RegexEngine engine) { const string Pattern = @"^([a-z]+)(\d+)?\.([a-z]+(\d)*)$"; string[] values = { "AC10", "Za203.CYM", "XYZ.CoA", "ABC.x170" }; + Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + var actual = new StringBuilder(); foreach (var value in values) { - Match m = Regex.Match(value, Pattern, RegexOptions.IgnoreCase | options); + Match m = r.Match(value); if (m.Success) { actual.AppendLine($"Match: '{m.Value}'"); @@ -614,14 +632,15 @@ public void Docs_Capture_ProductNumber(RegexOptions options) // https://docs.microsoft.com/en-us/dotnet/standard/base-types/backtracking-in-regular-expressions#linear-comparison-without-backtracking [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_Backtracking_LinearComparisonWithoutBacktracking(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_Backtracking_LinearComparisonWithoutBacktracking(RegexEngine engine) { const string Pattern = @"e{2}\w\b"; const string Input = "needing a reed"; - MatchCollection matches = Regex.Matches(Input, Pattern, options); + Regex r = await RegexHelpers.GetRegex(engine, Pattern); + + MatchCollection matches = r.Matches(Input); Assert.Equal(1, matches.Count); Assert.Equal("eed", matches[0].Value); Assert.Equal(11, matches[0].Index); @@ -629,14 +648,15 @@ public void Docs_Backtracking_LinearComparisonWithoutBacktracking(RegexOptions o // https://docs.microsoft.com/en-us/dotnet/standard/base-types/backtracking-in-regular-expressions#backtracking-with-optional-quantifiers-or-alternation-constructs [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_Backtracking_WithOptionalQuantifiersOrAlternationConstructs(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_Backtracking_WithOptionalQuantifiersOrAlternationConstructs(RegexEngine engine) { const string Pattern = ".*(es)"; const string Input = "Essential services are provided by regular expressions."; - Match m = Regex.Match(Input, Pattern, RegexOptions.IgnoreCase | options); + Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + + Match m = r.Match(Input); Assert.True(m.Success); Assert.Equal("Essential services are provided by regular expres", m.Value); Assert.Equal(0, m.Index); @@ -647,73 +667,85 @@ public void Docs_Backtracking_WithOptionalQuantifiersOrAlternationConstructs(Reg // https://docs.microsoft.com/en-us/dotnet/standard/base-types/backtracking-in-regular-expressions#nonbacktracking-subexpression [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_Backtracking_WithNestedOptionalQuantifiers(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + [ActiveIssue("https://github.com/dotnet/runtime/issues/57891")] // takes too long due to backtracking + public async Task Docs_Backtracking_WithNestedOptionalQuantifiers_ExcessiveBacktracking(RegexEngine engine) { - const string Input = "b51:4:1DB:9EE1:5:27d60:f44:D4:cd:E:5:0A5:4a:D24:41Ad:"; - // Assert.False(Regex.IsMatch(Input, "^(([0-9a-fA-F]{1,4}:)*([0-9a-fA-F]{1,4}))*(::)$")); // takes too long due to backtracking - Assert.False(Regex.IsMatch(Input, "^((?>[0-9a-fA-F]{1,4}:)*(?>[0-9a-fA-F]{1,4}))*(::)$", options)); // non-backtracking + Regex r = await RegexHelpers.GetRegex(engine, "^(([0-9a-fA-F]{1,4}:)*([0-9a-fA-F]{1,4}))*(::)$"); + Assert.False(r.IsMatch("b51:4:1DB:9EE1:5:27d60:f44:D4:cd:E:5:0A5:4a:D24:41Ad:")); + } + + // https://docs.microsoft.com/en-us/dotnet/standard/base-types/backtracking-in-regular-expressions#nonbacktracking-subexpression + [Theory] + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_Backtracking_WithNestedOptionalQuantifiers_BacktrackingEliminated(RegexEngine engine) + { + Regex r = await RegexHelpers.GetRegex(engine, "^((?>[0-9a-fA-F]{1,4}:)*(?>[0-9a-fA-F]{1,4}))*(::)$"); + Assert.False(r.IsMatch("b51:4:1DB:9EE1:5:27d60:f44:D4:cd:E:5:0A5:4a:D24:41Ad:")); } // https://docs.microsoft.com/en-us/dotnet/standard/base-types/backtracking-in-regular-expressions#lookbehind-assertions [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_Backtracking_LookbehindAssertions(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_Backtracking_LookbehindAssertions(RegexEngine engine) { const string Input = "test@contoso.com"; - const string Pattern = @"^[0-9A-Z]([-.\w]*[0-9A-Z])?@"; - Assert.True(Regex.IsMatch(Input, Pattern, RegexOptions.IgnoreCase | options)); + Regex rPattern = await RegexHelpers.GetRegex(engine, @"^[0-9A-Z]([-.\w]*[0-9A-Z])?@", RegexOptions.IgnoreCase); + Assert.True(rPattern.IsMatch(Input)); - const string BehindPattern = @"^[0-9A-Z][-.\w]*(?<=[0-9A-Z])@"; - Assert.True(Regex.IsMatch(Input, BehindPattern, RegexOptions.IgnoreCase | options)); + Regex rBehindPattern = await RegexHelpers.GetRegex(engine, @"^[0-9A-Z][-.\w]*(?<=[0-9A-Z])@", RegexOptions.IgnoreCase); + Assert.True(rBehindPattern.IsMatch(Input)); } // https://docs.microsoft.com/en-us/dotnet/standard/base-types/backtracking-in-regular-expressions#lookahead-assertions [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_Backtracking_LookaheadAssertions(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + [ActiveIssue("https://github.com/dotnet/runtime/issues/57891")] // takes too long due to backtracking + public async Task Docs_Backtracking_LookaheadAssertions_ExcessiveBacktracking(RegexEngine engine) { - const string Input = "aaaaaaaaaaaaaaaaaaaaaa."; - - //const string Pattern = @"^(([A-Z]\w*)+\.)*[A-Z]\w*$"; - //Assert.False(Regex.IsMatch(Input, Pattern, RegexOptions.IgnoreCase)); // takes too long due to backtracking + Regex r = await RegexHelpers.GetRegex(engine, @"^(([A-Z]\w*)+\.)*[A-Z]\w*$", RegexOptions.IgnoreCase); + Assert.False(r.IsMatch("aaaaaaaaaaaaaaaaaaaaaa.")); + } - const string AheadPattern = @"^((?=[A-Z])\w+\.)*[A-Z]\w*$"; - Assert.False(Regex.IsMatch(Input, AheadPattern, RegexOptions.IgnoreCase | options)); + // https://docs.microsoft.com/en-us/dotnet/standard/base-types/backtracking-in-regular-expressions#lookahead-assertions + [Theory] + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_Backtracking_LookaheadAssertions_BacktrackingEliminated(RegexEngine engine) + { + Regex r = await RegexHelpers.GetRegex(engine, @"^((?=[A-Z])\w+\.)*[A-Z]\w*$", RegexOptions.IgnoreCase); + Assert.False(r.IsMatch("aaaaaaaaaaaaaaaaaaaaaa.")); } // https://docs.microsoft.com/en-us/dotnet/standard/base-types/details-of-regular-expression-behavior#net-framework-engine-capabilities [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_EngineCapabilities_LazyQuantifiers(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_EngineCapabilities_LazyQuantifiers(RegexEngine engine) { const string Input = "This sentence ends with the number 107325."; - const string GreedyPattern = @".+(\d+)\."; - Match match = Regex.Match(Input, GreedyPattern, options); + Regex rGreedy = await RegexHelpers.GetRegex(engine, @".+(\d+)\."); + Match match = rGreedy.Match(Input); Assert.True(match.Success); Assert.Equal("5", match.Groups[1].Value); - const string LazyPattern = @".+?(\d+)\."; - match = Regex.Match(Input, LazyPattern, options); + Regex rLazy = await RegexHelpers.GetRegex(engine, @".+?(\d+)\."); + match = rLazy.Match(Input); Assert.True(match.Success); Assert.Equal("107325", match.Groups[1].Value); } // https://docs.microsoft.com/en-us/dotnet/standard/base-types/details-of-regular-expression-behavior#net-framework-engine-capabilities [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_EngineCapabilities_PositiveLookahead(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_EngineCapabilities_PositiveLookahead(RegexEngine engine) { const string Pattern = @"\b[A-Z]+\b(?=\P{P})"; const string Input = "If so, what comes next?"; - MatchCollection matches = Regex.Matches(Input, Pattern, RegexOptions.IgnoreCase | options); + + Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + + MatchCollection matches = r.Matches(Input); Assert.Equal(3, matches.Count); Assert.Equal("If", matches[0].Value); Assert.Equal("what", matches[1].Value); @@ -722,13 +754,15 @@ public void Docs_EngineCapabilities_PositiveLookahead(RegexOptions options) // https://docs.microsoft.com/en-us/dotnet/standard/base-types/details-of-regular-expression-behavior#net-framework-engine-capabilities [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_EngineCapabilities_NegativeLookahead(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_EngineCapabilities_NegativeLookahead(RegexEngine engine) { const string Pattern = @"\b(?!non)\w+\b"; const string Input = "Nonsense is not always non-functional."; - MatchCollection matches = Regex.Matches(Input, Pattern, RegexOptions.IgnoreCase | options); + + Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + + MatchCollection matches = r.Matches(Input); Assert.Equal(4, matches.Count); Assert.Equal("is", matches[0].Value); Assert.Equal("not", matches[1].Value); @@ -738,14 +772,15 @@ public void Docs_EngineCapabilities_NegativeLookahead(RegexOptions options) // https://docs.microsoft.com/en-us/dotnet/standard/base-types/alternation-constructs-in-regular-expressions#conditional-matching-with-an-expression [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_EngineCapabilities_ConditionalEvaluation(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_EngineCapabilities_ConditionalEvaluation(RegexEngine engine) { const string Pattern = @"\b(?(\d{2}-)\d{2}-\d{7}|\d{3}-\d{2}-\d{4})\b"; const string Input = "01-9999999 020-333333 777-88-9999"; - MatchCollection matches = Regex.Matches(Input, Pattern, options); + Regex r = await RegexHelpers.GetRegex(engine, Pattern); + + MatchCollection matches = r.Matches(Input); Assert.Equal(2, matches.Count); Assert.Equal("01-9999999", matches[0].Value); @@ -757,50 +792,52 @@ public void Docs_EngineCapabilities_ConditionalEvaluation(RegexOptions options) // https://docs.microsoft.com/en-us/dotnet/standard/base-types/details-of-regular-expression-behavior#net-framework-engine-capabilities [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_EngineCapabilities_RightToLeftMatching(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_EngineCapabilities_RightToLeftMatching(RegexEngine engine) { const string GreedyPattern = @".+(\d+)\."; const string Input = "This sentence ends with the number 107325."; + Regex rLTR = await RegexHelpers.GetRegex(engine, GreedyPattern); + Regex rRTL = await RegexHelpers.GetRegex(engine, GreedyPattern, RegexOptions.RightToLeft); + // Match from left-to-right using lazy quantifier .+?. - Match match = Regex.Match(Input, GreedyPattern); + Match match = rLTR.Match(Input); Assert.True(match.Success); Assert.Equal("5", match.Groups[1].Value); // Match from right-to-left using greedy quantifier .+. - match = Regex.Match(Input, GreedyPattern, RegexOptions.RightToLeft | options); + match = rRTL.Match(Input); Assert.True(match.Success); Assert.Equal("107325", match.Groups[1].Value); } // https://docs.microsoft.com/en-us/dotnet/standard/base-types/details-of-regular-expression-behavior#net-framework-engine-capabilities [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_EngineCapabilities_PositiveNegativeLookbehind(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_EngineCapabilities_PositiveNegativeLookbehind(RegexEngine engine) { const string Pattern = @"^[A-Z0-9]([-!#$%&'.*+/=?^`{}|~\w])*(?<=[A-Z0-9])$"; - Assert.True(Regex.IsMatch("jack.sprat", Pattern, RegexOptions.IgnoreCase | options)); - Assert.False(Regex.IsMatch("dog#", Pattern, RegexOptions.IgnoreCase | options)); - Assert.True(Regex.IsMatch("dog#1", Pattern, RegexOptions.IgnoreCase | options)); - Assert.True(Regex.IsMatch("me.myself", Pattern, RegexOptions.IgnoreCase | options)); - Assert.False(Regex.IsMatch("me.myself!", Pattern, RegexOptions.IgnoreCase | options)); + Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + + Assert.True(r.IsMatch("jack.sprat")); + Assert.False(r.IsMatch("dog#")); + Assert.True(r.IsMatch("dog#1")); + Assert.True(r.IsMatch("me.myself")); + Assert.False(r.IsMatch("me.myself!")); } // https://docs.microsoft.com/en-us/dotnet/standard/base-types/miscellaneous-constructs-in-regular-expressions#inline-options [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_InlineOptions(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_InlineOptions(RegexEngine engine) { const string Input = "double dare double Double a Drooling dog The Dreaded Deep"; var actual = new StringBuilder(); - foreach (Match match in Regex.Matches(Input, @"\b(D\w+)\s(d\w+)\b", options)) + foreach (Match match in (await RegexHelpers.GetRegex(engine, @"\b(D\w+)\s(d\w+)\b")).Matches(Input)) { actual.AppendLine(match.Value); if (match.Groups.Count > 1) @@ -813,7 +850,7 @@ public void Docs_InlineOptions(RegexOptions options) } actual.AppendLine(); - foreach (Match match in Regex.Matches(Input, @"\b(D\w+)(?ixn) \s (d\w+) \b", options)) + foreach (Match match in (await RegexHelpers.GetRegex(engine, @"\b(D\w+)(?ixn) \s (d\w+) \b")).Matches(Input)) { actual.AppendLine(match.Value); if (match.Groups.Count > 1) @@ -840,14 +877,15 @@ public void Docs_InlineOptions(RegexOptions options) // https://docs.microsoft.com/en-us/dotnet/standard/base-types/miscellaneous-constructs-in-regular-expressions#inline-comment [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_InlineComment(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_InlineComment(RegexEngine engine) { const string Pattern = @"\b((?# case-sensitive comparison)D\w+)\s(?ixn)((?#case-insensitive comparison)d\w+)\b"; const string Input = "double dare double Double a Drooling dog The Dreaded Deep"; - Match match = Regex.Match(Input, Pattern, options); + Regex r = await RegexHelpers.GetRegex(engine, Pattern); + + Match match = r.Match(Input); Assert.True(match.Success); Assert.Equal("Drooling dog", match.Value); Assert.Equal(2, match.Groups.Count); @@ -864,27 +902,29 @@ public void Docs_InlineComment(RegexOptions options) // https://docs.microsoft.com/en-us/dotnet/standard/base-types/miscellaneous-constructs-in-regular-expressions#end-of-line-comment [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_EndOfLineComment(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_EndOfLineComment(RegexEngine engine) { const string Pattern = @"\{\d+(,-*\d+)*(\:\w{1,4}?)*\}(?x) # Looks for a composite format item."; const string Input = "{0,-3:F}"; - Assert.True(Regex.IsMatch(Input, Pattern, options)); - } + Regex r = await RegexHelpers.GetRegex(engine, Pattern); + + Assert.True(r.IsMatch(Input)); + } // https://docs.microsoft.com/en-us/dotnet/standard/base-types/anchors-in-regular-expressions#contiguous-matches-g [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Docs_Anchors_ContiguousMatches(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Docs_Anchors_ContiguousMatches(RegexEngine engine) { const string Input = "capybara,squirrel,chipmunk,porcupine"; const string Pattern = @"\G(\w+\s?\w*),?"; string[] expected = new[] { "capybara", "squirrel", "chipmunk", "porcupine" }; - Match m = Regex.Match(Input, Pattern, options); + Regex r = await RegexHelpers.GetRegex(engine, Pattern); + + Match m = r.Match(Input); string[] actual = new string[4]; for (int i = 0; i < actual.Length; i++) @@ -910,11 +950,11 @@ public void Docs_Anchors_ContiguousMatches(RegexOptions options) [InlineData("https://foo.com:443/bar/17/groups/0ad1/providers/Network/public/4e-ip?version=16", "Network/public/4e-ip")] [InlineData("ftp://443/notproviders/17/groups/0ad1/providers/Network/public/4e-ip?version=16", "Network/public/4e-ip")] [InlineData("ftp://443/providersnot/17/groups/0ad1/providers/Network/public/4e-ip?version=16", "Network/public/4e-ip")] - public void RealWorld_ExtractResourceUri(string url, string expected) + public async Task RealWorld_ExtractResourceUri(string url, string expected) { - foreach (RegexOptions options in new[] { RegexOptions.Compiled, RegexOptions.None }) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = new Regex(@"/providers/(.+?)\?", options); + Regex r = await RegexHelpers.GetRegex(engine, @"/providers/(.+?)\?"); Match m = r.Match(url); Assert.True(m.Success); Assert.Equal(2, m.Groups.Count); @@ -936,16 +976,16 @@ public void RealWorld_ExtractResourceUri(string url, string expected) [InlineData("david.jones@proseware.com", false)] [InlineData("~david", false)] [InlineData("david~", false)] - public void RealWorld_IsValidCSharpName(string value, bool isExpectedMatch) + public async Task RealWorld_IsValidCSharpName(string value, bool isExpectedMatch) { const string StartCharacterRegex = @"_|[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]"; const string PartCharactersRegex = @"[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}\p{Cf}]"; const string IdentifierRegex = @"^(" + StartCharacterRegex + ")(" + PartCharactersRegex + ")*$"; - foreach (RegexOptions options in new[] { RegexOptions.Compiled, RegexOptions.None }) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = new Regex(IdentifierRegex, options); + Regex r = await RegexHelpers.GetRegex(engine, IdentifierRegex); Assert.Equal(isExpectedMatch, r.IsMatch(value)); } } @@ -957,13 +997,13 @@ public void RealWorld_IsValidCSharpName(string value, bool isExpectedMatch) [InlineData(";", true)] [InlineData(";comment\nNotThisBecauseOfNewLine", false)] [InlineData("-;not a comment", false)] - public void RealWorld_IsCommentLine(string value, bool isExpectedMatch) + public async Task RealWorld_IsCommentLine(string value, bool isExpectedMatch) { const string CommentLineRegex = @"^\s*;\s*(.*?)\s*$"; - foreach (RegexOptions options in new[] { RegexOptions.Compiled, RegexOptions.None }) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = new Regex(CommentLineRegex, options); + Regex r = await RegexHelpers.GetRegex(engine, CommentLineRegex); Assert.Equal(isExpectedMatch, r.IsMatch(value)); } } @@ -976,13 +1016,13 @@ public void RealWorld_IsCommentLine(string value, bool isExpectedMatch) [InlineData("[This Is Not]", false)] [InlineData("This is not[]", false)] [InlineData("[Nor This]/", false)] - public void RealWorld_IsSectionLine(string value, bool isExpectedMatch) + public async Task RealWorld_IsSectionLine(string value, bool isExpectedMatch) { const string SectionLineRegex = @"^\s*\[([\w\.\-\+:\/\(\)\\]+)\]\s*$"; - foreach (RegexOptions options in new[] { RegexOptions.Compiled, RegexOptions.None }) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = new Regex(SectionLineRegex, options); + Regex r = await RegexHelpers.GetRegex(engine, SectionLineRegex); Assert.Equal(isExpectedMatch, r.IsMatch(value)); } } @@ -998,11 +1038,11 @@ public void RealWorld_IsSectionLine(string value, bool isExpectedMatch) [InlineData("fds:-4", "-4")] [InlineData("dsa:-20.04", "-20.04")] [InlineData("dsa:15.a", "15")] - public void RealWorld_ValueParse(string value, string expected) + public async Task RealWorld_ValueParse(string value, string expected) { - foreach (RegexOptions options in new[] { RegexOptions.Compiled, RegexOptions.None }) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = new Regex(@"(?-?\d+(\.\d+)?)", options); + Regex r = await RegexHelpers.GetRegex(engine, @"(?-?\d+(\.\d+)?)"); Match m = r.Match(value); Assert.True(m.Success); Assert.Equal(expected, m.Groups["value"].Value); @@ -1012,11 +1052,11 @@ public void RealWorld_ValueParse(string value, string expected) [Theory] [InlineData("WI-T4.0.0.1963 Firebird 4.0 Beta 2", "4.0.0.1963")] [InlineData("WI-V3.0.5.33220 Firebird 3.0", "3.0.5.33220")] - public void RealWorld_FirebirdVersionString(string value, string expected) + public async Task RealWorld_FirebirdVersionString(string value, string expected) { - foreach (RegexOptions options in new[] { RegexOptions.Compiled, RegexOptions.None }) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = new Regex(@"\w{2}-\w(\d+\.\d+\.\d+\.\d+)", options); + Regex r = await RegexHelpers.GetRegex(engine, @"\w{2}-\w(\d+\.\d+\.\d+\.\d+)"); Match m = r.Match(value); Assert.True(m.Success); Assert.Equal(expected, m.Groups[1].Value); @@ -1028,11 +1068,11 @@ public void RealWorld_FirebirdVersionString(string value, string expected) [InlineData("Foo!Bar.A.B.C", "Foo", "Bar.A.B", "C")] [InlineData("Foo1.Foo2.Foo!Bar.A.B.C", "Foo1.Foo2.Foo", "Bar.A.B", "C")] [InlineData(@"Foo1\Foo2.Foo!Bar.A.B.C", @"Foo1\Foo2.Foo", "Bar.A.B", "C")] - public void RealWorld_ExternalEntryPoint(string value, string a, string b, string c) + public async Task RealWorld_ExternalEntryPoint(string value, string a, string b, string c) { - foreach (RegexOptions options in new[] { RegexOptions.Compiled, RegexOptions.None }) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = new Regex(@"^(.+)!(.+)\.([^.]+)$", options); + Regex r = await RegexHelpers.GetRegex(engine, @"^(.+)!(.+)\.([^.]+)$"); Match m = r.Match(value); Assert.True(m.Success); Assert.Equal(a, m.Groups[1].Value); diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index b5a298ce878d7e..64d5b3d348634e 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -393,60 +393,48 @@ public static IEnumerable Match_Basic_TestData() } // .* : Case sensitive - foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.Compiled }) - { - yield return new object[] { @".*\nfoo", "This shouldn't match", options, 0, 20, false, "" }; - yield return new object[] { @"a.*\nfoo", "This shouldn't match", options, 0, 20, false, "" }; - yield return new object[] { @".*\nFoo", $"\nFooThis should match", options, 0, 21, true, "\nFoo" }; - yield return new object[] { @".*\nfoo", "\nfooThis should match", options, 4, 17, false, "" }; - - yield return new object[] { @".*\dfoo", "This shouldn't match", options, 0, 20, false, "" }; - yield return new object[] { @".*\dFoo", "This1Foo should match", options, 0, 21, true, "This1Foo" }; - yield return new object[] { @".*\dFoo", "This1foo should 2Foo match", options, 0, 26, true, "This1foo should 2Foo" }; - yield return new object[] { @".*\dFoo", "This1foo shouldn't 2foo match", options, 0, 29, false, "" }; - yield return new object[] { @".*\dfoo", "This1foo shouldn't 2foo match", options, 24, 5, false, "" }; - - yield return new object[] { @".*\dfoo", "1fooThis1foo should 1foo match", options, 4, 9, true, "This1foo" }; - yield return new object[] { @".*\dfoo", "This shouldn't match 1foo", options, 0, 20, false, "" }; - } + yield return new object[] { @".*\nfoo", "This shouldn't match", RegexOptions.None, 0, 20, false, "" }; + yield return new object[] { @"a.*\nfoo", "This shouldn't match", RegexOptions.None, 0, 20, false, "" }; + yield return new object[] { @".*\nFoo", $"\nFooThis should match", RegexOptions.None, 0, 21, true, "\nFoo" }; + yield return new object[] { @".*\nfoo", "\nfooThis should match", RegexOptions.None, 4, 17, false, "" }; + + yield return new object[] { @".*\dfoo", "This shouldn't match", RegexOptions.None, 0, 20, false, "" }; + yield return new object[] { @".*\dFoo", "This1Foo should match", RegexOptions.None, 0, 21, true, "This1Foo" }; + yield return new object[] { @".*\dFoo", "This1foo should 2Foo match", RegexOptions.None, 0, 26, true, "This1foo should 2Foo" }; + yield return new object[] { @".*\dFoo", "This1foo shouldn't 2foo match", RegexOptions.None, 0, 29, false, "" }; + yield return new object[] { @".*\dfoo", "This1foo shouldn't 2foo match", RegexOptions.None, 24, 5, false, "" }; + + yield return new object[] { @".*\dfoo", "1fooThis1foo should 1foo match", RegexOptions.None, 4, 9, true, "This1foo" }; + yield return new object[] { @".*\dfoo", "This shouldn't match 1foo", RegexOptions.None, 0, 20, false, "" }; // .* : Case insensitive - foreach (RegexOptions options in new[] { RegexOptions.IgnoreCase, RegexOptions.IgnoreCase | RegexOptions.Compiled }) - { - yield return new object[] { @".*\nFoo", "\nfooThis should match", options, 0, 21, true, "\nfoo" }; - yield return new object[] { @".*\dFoo", "This1foo should match", options, 0, 21, true, "This1foo" }; - yield return new object[] { @".*\dFoo", "This1foo should 2FoO match", options, 0, 26, true, "This1foo should 2FoO" }; - yield return new object[] { @".*\dFoo", "This1Foo should 2fOo match", options, 0, 26, true, "This1Foo should 2fOo" }; - yield return new object[] { @".*\dfoo", "1fooThis1FOO should 1foo match", options, 4, 9, true, "This1FOO" }; - } + yield return new object[] { @".*\nFoo", "\nfooThis should match", RegexOptions.IgnoreCase, 0, 21, true, "\nfoo" }; + yield return new object[] { @".*\dFoo", "This1foo should match", RegexOptions.IgnoreCase, 0, 21, true, "This1foo" }; + yield return new object[] { @".*\dFoo", "This1foo should 2FoO match", RegexOptions.IgnoreCase, 0, 26, true, "This1foo should 2FoO" }; + yield return new object[] { @".*\dFoo", "This1Foo should 2fOo match", RegexOptions.IgnoreCase, 0, 26, true, "This1Foo should 2fOo" }; + yield return new object[] { @".*\dfoo", "1fooThis1FOO should 1foo match", RegexOptions.IgnoreCase, 4, 9, true, "This1FOO" }; // .* : RTL, Case-sensitive - foreach (RegexOptions options in new[] { RegexOptions.None | RegexOptions.RightToLeft, RegexOptions.Compiled | RegexOptions.RightToLeft }) - { - yield return new object[] { @".*\nfoo", "This shouldn't match", options, 0, 20, false, "" }; - yield return new object[] { @"a.*\nfoo", "This shouldn't match", options, 0, 20, false, "" }; - yield return new object[] { @".*\nFoo", $"This should match\nFoo", options, 0, 21, true, "This should match\nFoo" }; - yield return new object[] { @".*\nfoo", "This should matchfoo\n", options, 4, 13, false, "" }; - - yield return new object[] { @".*\dfoo", "This shouldn't match", options, 0, 20, false, "" }; - yield return new object[] { @".*\dFoo", "This1Foo should match", options, 0, 21, true, "This1Foo" }; - yield return new object[] { @".*\dFoo", "This1foo should 2Foo match", options, 0, 26, true, "This1foo should 2Foo" }; - yield return new object[] { @".*\dFoo", "This1foo shouldn't 2foo match", options, 0, 29, false, "" }; - yield return new object[] { @".*\dfoo", "This1foo shouldn't 2foo match", options, 19, 0, false, "" }; - - yield return new object[] { @".*\dfoo", "1fooThis2foo should 1foo match", options, 8, 4, true, "2foo" }; - yield return new object[] { @".*\dfoo", "This shouldn't match 1foo", options, 0, 20, false, "" }; - } + yield return new object[] { @".*\nfoo", "This shouldn't match", RegexOptions.None | RegexOptions.RightToLeft, 0, 20, false, "" }; + yield return new object[] { @"a.*\nfoo", "This shouldn't match", RegexOptions.None | RegexOptions.RightToLeft, 0, 20, false, "" }; + yield return new object[] { @".*\nFoo", $"This should match\nFoo", RegexOptions.None | RegexOptions.RightToLeft, 0, 21, true, "This should match\nFoo" }; + yield return new object[] { @".*\nfoo", "This should matchfoo\n", RegexOptions.None | RegexOptions.RightToLeft, 4, 13, false, "" }; + + yield return new object[] { @".*\dfoo", "This shouldn't match", RegexOptions.None | RegexOptions.RightToLeft, 0, 20, false, "" }; + yield return new object[] { @".*\dFoo", "This1Foo should match", RegexOptions.None | RegexOptions.RightToLeft, 0, 21, true, "This1Foo" }; + yield return new object[] { @".*\dFoo", "This1foo should 2Foo match", RegexOptions.None | RegexOptions.RightToLeft, 0, 26, true, "This1foo should 2Foo" }; + yield return new object[] { @".*\dFoo", "This1foo shouldn't 2foo match", RegexOptions.None | RegexOptions.RightToLeft, 0, 29, false, "" }; + yield return new object[] { @".*\dfoo", "This1foo shouldn't 2foo match", RegexOptions.None | RegexOptions.RightToLeft, 19, 0, false, "" }; + + yield return new object[] { @".*\dfoo", "1fooThis2foo should 1foo match", RegexOptions.None | RegexOptions.RightToLeft, 8, 4, true, "2foo" }; + yield return new object[] { @".*\dfoo", "This shouldn't match 1foo", RegexOptions.None | RegexOptions.RightToLeft, 0, 20, false, "" }; // .* : RTL, case insensitive - foreach (RegexOptions options in new[] { RegexOptions.IgnoreCase | RegexOptions.RightToLeft, RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.RightToLeft }) - { - yield return new object[] { @".*\nFoo", "\nfooThis should match", options, 0, 21, true, "\nfoo" }; - yield return new object[] { @".*\dFoo", "This1foo should match", options, 0, 21, true, "This1foo" }; - yield return new object[] { @".*\dFoo", "This1foo should 2FoO match", options, 0, 26, true, "This1foo should 2FoO" }; - yield return new object[] { @".*\dFoo", "This1Foo should 2fOo match", options, 0, 26, true, "This1Foo should 2fOo" }; - yield return new object[] { @".*\dfoo", "1fooThis2FOO should 1foo match", options, 8, 4, true, "2FOO" }; - } + yield return new object[] { @".*\nFoo", "\nfooThis should match", RegexOptions.IgnoreCase | RegexOptions.RightToLeft, 0, 21, true, "\nfoo" }; + yield return new object[] { @".*\dFoo", "This1foo should match", RegexOptions.IgnoreCase | RegexOptions.RightToLeft, 0, 21, true, "This1foo" }; + yield return new object[] { @".*\dFoo", "This1foo should 2FoO match", RegexOptions.IgnoreCase | RegexOptions.RightToLeft, 0, 26, true, "This1foo should 2FoO" }; + yield return new object[] { @".*\dFoo", "This1Foo should 2fOo match", RegexOptions.IgnoreCase | RegexOptions.RightToLeft, 0, 26, true, "This1Foo should 2fOo" }; + yield return new object[] { @".*\dfoo", "1fooThis2FOO should 1foo match", RegexOptions.IgnoreCase | RegexOptions.RightToLeft, 8, 4, true, "2FOO" }; } public static IEnumerable Match_Basic_TestData_NetCore() @@ -467,49 +455,28 @@ public static IEnumerable Match_Basic_TestData_NetCore() yield return new object[] { @"^(?i:[\u24B6-\u24D0])$", ((char)('\u24CF' + 26)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u24CF' + 26)).ToString() }; } + public static IEnumerable Match_Basic_TestData_WithEngine() => + RegexHelpers.PrependEngines(Match_Basic_TestData()); + + public static IEnumerable Match_Basic_TestData_NetCore_WithEngine() => + RegexHelpers.PrependEngines(Match_Basic_TestData()); + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)] [Theory] - [MemberData(nameof(Match_Basic_TestData_NetCore))] - public void Match_NetCore(string pattern, string input, RegexOptions options, int beginning, int length, bool expectedSuccess, string expectedValue) + [MemberData(nameof(Match_Basic_TestData_NetCore_WithEngine))] + public async Task Match_NetCore(RegexEngine engine, string pattern, string input, RegexOptions options, int beginning, int length, bool expectedSuccess, string expectedValue) { - Match(pattern, input, options, beginning, length, expectedSuccess, expectedValue); + await Match(engine, pattern, input, options, beginning, length, expectedSuccess, expectedValue); } [Theory] - [MemberData(nameof(Match_Basic_TestData))] - [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Match_Basic_TestData), 2, MemberType = typeof(RegexCompilationHelper))] - public void Match(string pattern, string input, RegexOptions options, int beginning, int length, bool expectedSuccess, string expectedValue) + [MemberData(nameof(Match_Basic_TestData_WithEngine))] + public async Task Match(RegexEngine engine, string pattern, string input, RegexOptions options, int beginning, int length, bool expectedSuccess, string expectedValue) { - Regex r; - bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, beginning); bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, length); - if (options == RegexOptions.None) - { - r = new Regex(pattern); - - if (isDefaultStart && isDefaultCount) - { - // Use Match(string) or Match(string, string) - VerifyMatch(r.Match(input), expectedSuccess, expectedValue); - VerifyMatch(Regex.Match(input, pattern), expectedSuccess, expectedValue); - - Assert.Equal(expectedSuccess, r.IsMatch(input)); - Assert.Equal(expectedSuccess, Regex.IsMatch(input, pattern)); - } - if (beginning + length == input.Length) - { - // Use Match(string, int) - VerifyMatch(r.Match(input, beginning), expectedSuccess, expectedValue); - - Assert.Equal(expectedSuccess, r.IsMatch(input, beginning)); - } - // Use Match(string, int, int) - VerifyMatch(r.Match(input, beginning, length), expectedSuccess, expectedValue); - } - - r = new Regex(pattern, options); + Regex r = await RegexHelpers.GetRegex(engine, pattern, options); if (isDefaultStart && isDefaultCount) { @@ -531,13 +498,20 @@ public void Match(string pattern, string input, RegexOptions options, int beginn VerifyMatch(r.Match(input, beginning, length), expectedSuccess, expectedValue); } + public static IEnumerable Match_VaryingLengthStrings_MemberData() + { + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) + { + yield return new object[] { engine, RegexOptions.None }; + yield return new object[] { engine, RegexOptions.IgnoreCase }; + yield return new object[] { engine, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant }; + } + } + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Takes several minutes on .NET Framework")] [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - [InlineData(RegexOptions.Compiled | RegexOptions.IgnoreCase)] - [InlineData(RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)] - public void Match_VaryingLengthStrings(RegexOptions options) + [MemberData(nameof(Match_VaryingLengthStrings_MemberData))] + public async Task Match_VaryingLengthStrings(RegexEngine engine, RegexOptions options) { var lengths = new List() { 2, 3, 4, 5, 6, 7, 8, 9, 31, 32, 33, 63, 64, 65 }; if ((options & RegexOptions.IgnoreCase) == 0) @@ -550,7 +524,7 @@ public void Match_VaryingLengthStrings(RegexOptions options) { string pattern = "[123]" + string.Concat(Enumerable.Range(0, length).Select(i => (char)('A' + (i % 26)))); string input = "2" + string.Concat(Enumerable.Range(0, length).Select(i => (char)((caseInsensitive ? 'a' : 'A') + (i % 26)))); - Match(pattern, input, options, 0, input.Length, expectedSuccess: true, expectedValue: input); + await Match(engine, pattern, input, options, 0, input.Length, expectedSuccess: true, expectedValue: input); } } @@ -565,14 +539,19 @@ private static void VerifyMatch(Match match, bool expectedSuccess, string expect RegexAssert.Equal(expectedValue, match.Groups[0]); } + public static IEnumerable Match_DeepNesting_MemberData() + { + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) + { + yield return new object[] { engine, 1 }; + yield return new object[] { engine, 10 }; + yield return new object[] { engine, 100 }; + } + } + [Theory] - [InlineData(RegexOptions.None, 1)] - [InlineData(RegexOptions.None, 10)] - [InlineData(RegexOptions.None, 100)] - [InlineData(RegexOptions.Compiled, 1)] - [InlineData(RegexOptions.Compiled, 10)] - [InlineData(RegexOptions.Compiled, 100)] - public void Match_DeepNesting(RegexOptions options, int count) + [MemberData(nameof(Match_DeepNesting_MemberData))] + public async void Match_DeepNesting(RegexEngine engine, int count) { const string Start = @"((?>abc|(?:def[ghi]", End = @")))"; const string Match = "defg"; @@ -580,7 +559,7 @@ public void Match_DeepNesting(RegexOptions options, int count) string pattern = string.Concat(Enumerable.Repeat(Start, count)) + string.Concat(Enumerable.Repeat(End, count)); string input = string.Concat(Enumerable.Repeat(Match, count)); - var r = new Regex(pattern, options); + Regex r = await RegexHelpers.GetRegex(engine, pattern); Match m = r.Match(input); Assert.True(m.Success); @@ -588,28 +567,28 @@ public void Match_DeepNesting(RegexOptions options, int count) Assert.Equal(count + 1, m.Groups.Count); } - [Fact] - public void Match_Timeout() + [Theory] + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Match_Timeout(RegexEngine engine) { - Regex regex = new Regex(@"\p{Lu}", RegexOptions.IgnoreCase, TimeSpan.FromHours(1)); + Regex regex = await RegexHelpers.GetRegex(engine, @"\p{Lu}", RegexOptions.IgnoreCase, TimeSpan.FromHours(1)); Match match = regex.Match("abc"); Assert.True(match.Success); RegexAssert.Equal("a", match); } [Theory] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.None | (RegexOptions)0x80 /* Debug */)] - [InlineData(RegexOptions.Compiled)] - [InlineData(RegexOptions.Compiled | (RegexOptions)0x80 /* Debug */)] - public void Match_Timeout_Throws(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Match_Timeout_Throws(RegexEngine engine) { const string Pattern = @"^([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*@(([0-9a-zA-Z])+([-\w]*[0-9a-zA-Z])*\.)+[a-zA-Z]{2,9})$"; string input = new string('a', 50) + "@a.a"; - Assert.Throws(() => new Regex(Pattern, options, TimeSpan.FromMilliseconds(100)).Match(input)); + Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.None, TimeSpan.FromMilliseconds(100)); + Assert.Throws(() => r.Match(input)); } + // TODO: Figure out what to do with default timeouts for source generated regexes [ConditionalTheory(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))] [InlineData(RegexOptions.None)] [InlineData(RegexOptions.None | (RegexOptions)0x80 /* Debug */)] @@ -645,6 +624,7 @@ public void Match_DefaultTimeout_Throws(RegexOptions options) }, ((int)options).ToString(CultureInfo.InvariantCulture)).Dispose(); } + // TODO: Figure out what to do with default timeouts for source generated regexes [Theory] [InlineData(RegexOptions.None)] [InlineData(RegexOptions.None | (RegexOptions)0x80 /* Debug */)] @@ -663,13 +643,10 @@ public void Match_CachedPattern_NewTimeoutApplies(RegexOptions options) // On Linux, we may get killed by the OOM Killer; on Windows, it will swap instead [OuterLoop("Can take several seconds")] [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess), nameof(PlatformDetection.IsWindows))] - [InlineData(@"a\s+", RegexOptions.None)] - [InlineData(@"a\s+", RegexOptions.Compiled)] - [InlineData(@"a\s+ ", RegexOptions.None)] - [InlineData(@"a\s+ ", RegexOptions.Compiled)] - public void Match_Timeout_Loop_Throws(string pattern, RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Match_Timeout_Loop_Throws(RegexEngine engine) { - var regex = new Regex(pattern, options, TimeSpan.FromSeconds(1)); + Regex regex = await RegexHelpers.GetRegex(engine, @"a\s+", RegexOptions.None, TimeSpan.FromSeconds(1)); string input = "a" + new string(' ', 800_000_000) + " "; Assert.Throws(() => regex.Match(input)); } @@ -678,12 +655,11 @@ public void Match_Timeout_Loop_Throws(string pattern, RegexOptions options) // On Linux, we may get killed by the OOM Killer; on Windows, it will swap instead [OuterLoop("Can take several seconds")] [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess), nameof(PlatformDetection.IsWindows))] - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] - public void Match_Timeout_Repetition_Throws(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Match_Timeout_Repetition_Throws(RegexEngine engine) { int repetitionCount = 800_000_000; - var regex = new Regex(@"a\s{" + repetitionCount + "}", options, TimeSpan.FromSeconds(1)); + Regex regex = await RegexHelpers.GetRegex(engine, @"a\s{" + repetitionCount + "}", RegexOptions.None, TimeSpan.FromSeconds(1)); string input = @"a" + new string(' ', repetitionCount) + @"b"; Assert.Throws(() => regex.Match(input)); } @@ -966,48 +942,17 @@ public static IEnumerable Match_Advanced_TestData() }; } + public static IEnumerable Match_Advanced_TestData_WithEngine() => + RegexHelpers.PrependEngines(Match_Advanced_TestData()); + [Theory] - [MemberData(nameof(Match_Advanced_TestData))] - [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Match_Advanced_TestData), 2, MemberType = typeof(RegexCompilationHelper))] - public void Match_Advanced(string pattern, string input, RegexOptions options, int beginning, int length, CaptureData[] expected) + [MemberData(nameof(Match_Advanced_TestData_WithEngine))] + public async Task Match_Advanced(RegexEngine engine, string pattern, string input, RegexOptions options, int beginning, int length, CaptureData[] expected) { - Regex r; - bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, beginning); bool isDefaultCount = RegexHelpers.IsDefaultStart(input, options, length); - if (options == RegexOptions.None) - { - r = new Regex(pattern); - - if (isDefaultStart && isDefaultCount) - { - // Use Match(string) or Match(string, string) - VerifyMatch(r.Match(input), true, expected); - VerifyMatch(Regex.Match(input, pattern), true, expected); - - Assert.True(r.IsMatch(input)); - Assert.True(Regex.IsMatch(input, pattern)); - } - - // Note: this block will fail if any inputs attempt to look for anchors or lookbehinds at the initial position, - // as there is a difference between Match(input, beginning) and Match(input, beginning, input.Length - beginning) - // in that the former doesn't modify from 0 what the engine sees as the beginning of the input whereas the latter - // is equivalent to taking a substring and then matching on that. However, as we currently don't have any such inputs, - // it's currently a viable way to test the additional overload. Same goes for the similar case below with options. - if (beginning + length == input.Length) - { - // Use Match(string, int) - VerifyMatch(r.Match(input, beginning), true, expected); - - Assert.True(r.IsMatch(input, beginning)); - } - - // Use Match(string, int, int) - VerifyMatch(r.Match(input, beginning, length), true, expected); - } - - r = new Regex(pattern, options); + Regex r = await RegexHelpers.GetRegex(engine, pattern, options); if (isDefaultStart && isDefaultCount) { @@ -1033,26 +978,28 @@ public void Match_Advanced(string pattern, string input, RegexOptions options, i public static IEnumerable Match_StartatDiffersFromBeginning_MemberData() { - foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.Singleline, RegexOptions.Multiline }) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - // Anchors - yield return new object[] { @"^.*", "abc", options, 0, true, true }; - yield return new object[] { @"^.*", "abc", options, 1, false, true }; + foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.Singleline, RegexOptions.Multiline }) + { + // Anchors + yield return new object[] { engine, @"^.*", "abc", options, 0, true, true }; + yield return new object[] { engine, @"^.*", "abc", options, 1, false, true }; - // Positive Lookbehinds - yield return new object[] { @"(?<=abc)def", "abcdef", options, 3, true, false }; + // Positive Lookbehinds + yield return new object[] { engine, @"(?<=abc)def", "abcdef", options, 3, true, false }; - // Negative Lookbehinds - yield return new object[] { @"(?(() => RegularExpressions.Match.Empty.Result("any")); } - [Fact] - public void Match_SpecialUnicodeCharacters_enUS() + [Theory] + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Match_SpecialUnicodeCharacters_enUS(RegexEngine engine) { using (new ThreadCultureChange("en-US")) { - Match("\u0131", "\u0049", RegexOptions.IgnoreCase, 0, 1, false, string.Empty); - Match("\u0131", "\u0069", RegexOptions.IgnoreCase, 0, 1, false, string.Empty); + await Match(engine, "\u0131", "\u0049", RegexOptions.IgnoreCase, 0, 1, false, string.Empty); + await Match(engine, "\u0131", "\u0069", RegexOptions.IgnoreCase, 0, 1, false, string.Empty); } } - [Fact] - public void Match_SpecialUnicodeCharacters_Invariant() + [Theory] + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Match_SpecialUnicodeCharacters_Invariant(RegexEngine engine) { using (new ThreadCultureChange(CultureInfo.InvariantCulture)) { - Match("\u0131", "\u0049", RegexOptions.IgnoreCase, 0, 1, false, string.Empty); - Match("\u0131", "\u0069", RegexOptions.IgnoreCase, 0, 1, false, string.Empty); - Match("\u0130", "\u0049", RegexOptions.IgnoreCase, 0, 1, false, string.Empty); - Match("\u0130", "\u0069", RegexOptions.IgnoreCase, 0, 1, false, string.Empty); + await Match(engine, "\u0131", "\u0049", RegexOptions.IgnoreCase, 0, 1, false, string.Empty); + await Match(engine, "\u0131", "\u0069", RegexOptions.IgnoreCase, 0, 1, false, string.Empty); + await Match(engine, "\u0130", "\u0049", RegexOptions.IgnoreCase, 0, 1, false, string.Empty); + await Match(engine, "\u0130", "\u0069", RegexOptions.IgnoreCase, 0, 1, false, string.Empty); } } private static bool IsNotArmProcessAndRemoteExecutorSupported => PlatformDetection.IsNotArmProcess && RemoteExecutor.IsSupported; [ConditionalTheory(nameof(IsNotArmProcessAndRemoteExecutorSupported))] // times out on ARM - [InlineData(RegexOptions.None)] - [InlineData(RegexOptions.Compiled)] [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, ".NET Framework does not have fix for https://github.com/dotnet/runtime/issues/24749")] [SkipOnCoreClr("Long running tests: https://github.com/dotnet/runtime/issues/10680", RuntimeConfiguration.Checked, RuntimeTestModes.JitMinOpts)] - public void Match_ExcessPrefix(RegexOptions options) + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public void Match_ExcessPrefix(RegexEngine engine) { - RemoteExecutor.Invoke(optionsString => + RemoteExecutor.Invoke(async engineString => { - var options = (RegexOptions)Enum.Parse(typeof(RegexOptions), optionsString); + var engine = (RegexEngine)Enum.Parse(typeof(RegexEngine), engineString); // Should not throw out of memory // Repeaters - Assert.False(Regex.IsMatch("a", @"a{2147483647,}", options)); - Assert.False(Regex.IsMatch("a", @"a{50,}", options)); // cutoff for Boyer-Moore prefix in debug - Assert.False(Regex.IsMatch("a", @"a{51,}", options)); - Assert.False(Regex.IsMatch("a", @"a{50_000,}", options)); // cutoff for Boyer-Moore prefix in release - Assert.False(Regex.IsMatch("a", @"a{50_001,}", options)); + Assert.False((await RegexHelpers.GetRegex(engine, @"a{2147483647,}")).IsMatch("a")); + Assert.False((await RegexHelpers.GetRegex(engine, @"a{50,}")).IsMatch("a")); // cutoff for Boyer-Moore prefix in debug + Assert.False((await RegexHelpers.GetRegex(engine, @"a{51,}")).IsMatch("a")); + Assert.False((await RegexHelpers.GetRegex(engine, @"a{50_000,}")).IsMatch("a")); // cutoff for Boyer-Moore prefix in release + Assert.False((await RegexHelpers.GetRegex(engine, @"a{50_001,}")).IsMatch("a")); // Multis foreach (int length in new[] { 50, 51, 50_000, 50_001, char.MaxValue + 1 }) // based on knowledge of cut-offs used in Boyer-Moore { string s = "bcd" + new string('a', length) + "efg"; - Assert.True(Regex.IsMatch(s, @$"a{{{length}}}", options)); + Assert.True((await RegexHelpers.GetRegex(engine, @$"a{{{length}}}")).IsMatch(s)); } - }, options.ToString()).Dispose(); + }, engine.ToString()).Dispose(); } [Fact] @@ -1219,15 +1167,23 @@ public void IsMatch_Invalid() Assert.Throws(() => r.IsMatch("input", 6)); } + public static IEnumerable IsMatch_SucceedQuicklyDueToLoopReduction_MemberData() + { + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) + { + yield return new object[] { engine, @"(?:\w*)+\.", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", false }; + yield return new object[] { engine, @"(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", false }; + yield return new object[] { engine, @"(?:x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", false }; + } + } + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)] // take too long due to backtracking [Theory] - [InlineData(@"(?:\w*)+\.", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", false)] - [InlineData(@"(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", false)] - [InlineData(@"(?:x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", false)] - public void IsMatch_SucceedQuicklyDueToLoopReduction(string regex, string input, bool expected) + [MemberData(nameof(IsMatch_SucceedQuicklyDueToLoopReduction_MemberData))] + public async Task IsMatch_SucceedQuicklyDueToLoopReduction(RegexEngine engine, string pattern, string input, bool expected) { - Assert.Equal(expected, Regex.IsMatch(input, regex, RegexOptions.None)); - Assert.Equal(expected, Regex.IsMatch(input, regex, RegexOptions.Compiled)); + Regex r = await RegexHelpers.GetRegex(engine, pattern); + Assert.Equal(expected, r.IsMatch(input)); } [Fact] @@ -1247,17 +1203,17 @@ public void Synchronized() public static IEnumerable UseRegexConcurrently_ThreadSafe_Success_MemberData() { - foreach (TimeSpan timeout in new[] { Timeout.InfiniteTimeSpan, TimeSpan.FromMinutes(1) }) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - yield return new object[] { RegexOptions.None, timeout }; - yield return new object[] { RegexOptions.Compiled, timeout }; + yield return new object[] { engine, Timeout.InfiniteTimeSpan }; + yield return new object[] { engine, TimeSpan.FromMinutes(1) }; } } [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))] - [MemberData(nameof(UseRegexConcurrently_ThreadSafe_Success_MemberData))] [OuterLoop("Takes several seconds")] - public void UseRegexConcurrently_ThreadSafe_Success(RegexOptions options, TimeSpan timeout) + [MemberData(nameof(UseRegexConcurrently_ThreadSafe_Success_MemberData))] + public async Task UseRegexConcurrently_ThreadSafe_Success(RegexEngine engine, TimeSpan timeout) { const string Input = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas porttitor congue massa. Fusce posuere, magna sed pulvinar ultricies, purus lectus malesuada libero, sit amet commodo magna eros quis urna. Nunc viverra imperdiet enim. Fusce est. Vivamus a tellus. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Proin pharetra nonummy pede. Mauris et orci. Aenean nec lorem. In porttitor. abcdefghijklmnx Donec laoreet nonummy augue. Suspendisse dui purus, scelerisque at, vulputate vitae, pretium mattis, nunc. Mauris eget neque at sem venenatis eleifend. Ut nonummy. Fusce aliquet pede non pede. Suspendisse dapibus lorem pellentesque magna. Integer nulla. Donec blandit feugiat ligula. Donec hendrerit, felis et imperdiet euismod, purus ipsum pretium metus, in lacinia nulla nisl eget sapien. Donec ut est in lectus consequat consequat. Etiam eget dui. Aliquam erat volutpat. Sed at lorem in nunc porta tristique. Proin nec augue. Quisque aliquam tempor magna. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nunc ac magna. Maecenas odio dolor, vulputate vel, auctor ac, accumsan id, felis. Pellentesque cursus sagittis felis. Pellentesque porttitor, velit lacinia egestas auctor, diam eros tempus arcu, nec vulputate augue magna vel risus.nmlkjihgfedcbax"; const int Trials = 100; @@ -1267,7 +1223,7 @@ public void UseRegexConcurrently_ThreadSafe_Success(RegexOptions options, TimeSp for (int trial = 0; trial < Trials; trial++) { - var r = new Regex("[a-q][^u-z]{13}x", options, timeout); + Regex r = await RegexHelpers.GetRegex(engine, "[a-q][^u-z]{13}x", RegexOptions.None, timeout); Task.WaitAll(Enumerable.Range(0, b.ParticipantCount).Select(_ => Task.Factory.StartNew(() => { b.SignalAndWait(); diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs index aa172a0d868c23..83f329900a6d17 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs @@ -2,21 +2,22 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Collections.Generic; +using System.Threading.Tasks; using Xunit; namespace System.Text.RegularExpressions.Tests { public class RegexMultipleMatchTests { - - [Fact] - public void Matches_MultipleCapturingGroups() + [Theory] + [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] + public async Task Matches_MultipleCapturingGroups(RegexEngine engine) { string[] expectedGroupValues = { "abracadabra", "abra", "cad" }; string[] expectedGroupCaptureValues = { "abracad", "abra" }; // Another example - given by Brad Merril in an article on RegularExpressions - Regex regex = new Regex(@"(abra(cad)?)+"); + Regex regex = await RegexHelpers.GetRegexAsync(engine, @"(abra(cad)?)+"); string input = "abracadabra1abracadabra2abracadabra3"; Match match = regex.Match(input); while (match.Success) @@ -291,22 +292,14 @@ public static IEnumerable Matches_TestData() } } + public static IEnumerable Matches_TestData_WithEngine => + RegexHelpers.PrependEngines(Matches_TestData()); + [Theory] - [MemberData(nameof(Matches_TestData))] - [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Matches_TestData), 2, MemberType = typeof(RegexCompilationHelper))] - public void Matches(string pattern, string input, RegexOptions options, CaptureData[] expected) + [MemberData(nameof(Matches_TestData_WithEngine))] + public async Task Matches(RegexEngine engine, string pattern, string input, RegexOptions options, CaptureData[] expected) { - if (options == RegexOptions.None) - { - Regex regexBasic = new Regex(pattern); - VerifyMatches(regexBasic.Matches(input), expected); - VerifyMatches(regexBasic.Match(input), expected); - - VerifyMatches(Regex.Matches(input, pattern), expected); - VerifyMatches(Regex.Match(input, pattern), expected); - } - - Regex regexAdvanced = new Regex(pattern, options); + Regex regexAdvanced = await RegexHelpers.GetRegexAsync(engine, pattern, options); VerifyMatches(regexAdvanced.Matches(input), expected); VerifyMatches(regexAdvanced.Match(input), expected); diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs index 32fa6763a703f1..51a047d567ce50 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Linq; +using System.Threading.Tasks; using Xunit; namespace System.Text.RegularExpressions.Tests @@ -105,42 +106,30 @@ public static IEnumerable Replace_String_TestData() yield return new object[] { @"\Ga", "aaaaa", "b", RegexOptions.None, 5, 0, "bbbbb" }; } + public static IEnumerable Replace_String_TestData_WithEngine => + RegexHelpers.PrependEngines(Replace_String_TestData()); + [Theory] - [MemberData(nameof(Replace_String_TestData))] - [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Replace_String_TestData), 3, MemberType = typeof(RegexCompilationHelper))] - public void Replace(string pattern, string input, string replacement, RegexOptions options, int count, int start, string expected) + [MemberData(nameof(Replace_String_TestData_WithEngine))] + public async Task Replace(RegexEngine engine, string pattern, string input, string replacement, RegexOptions options, int count, int start, string expected) { bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start); bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, count); - if (options == RegexOptions.None) - { - if (isDefaultStart && isDefaultCount) - { - // Use Replace(string, string) or Replace(string, string, string) - Assert.Equal(expected, new Regex(pattern).Replace(input, replacement)); - Assert.Equal(expected, Regex.Replace(input, pattern, replacement)); - } - if (isDefaultStart) - { - // Use Replace(string, string, string, int) - Assert.Equal(expected, new Regex(pattern).Replace(input, replacement, count)); - } - // Use Replace(string, string, int, int) - Assert.Equal(expected, new Regex(pattern).Replace(input, replacement, count, start)); - } + + Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); + if (isDefaultStart && isDefaultCount) { - // Use Replace(string, string) or Replace(string, string, string, RegexOptions) - Assert.Equal(expected, new Regex(pattern, options).Replace(input, replacement)); + Assert.Equal(expected, r.Replace(input, replacement)); Assert.Equal(expected, Regex.Replace(input, pattern, replacement, options)); } + if (isDefaultStart) { - // Use Replace(string, string, string, int) - Assert.Equal(expected, new Regex(pattern, options).Replace(input, replacement, count)); + Assert.Equal(expected, r.Replace(input, replacement, count)); } - // Use Replace(string, string, int, int) - Assert.Equal(expected, new Regex(pattern, options).Replace(input, replacement, count, start)); + + Assert.Equal(expected, r.Replace(input, replacement, count, start)); } public static IEnumerable Replace_MatchEvaluator_TestData() @@ -177,42 +166,29 @@ public static IEnumerable Replace_MatchEvaluator_TestData() yield return new object[] { @"\d", "0123456789foo4567890foo ", new MatchEvaluator(MatchEvaluatorPoundSign), RegexOptions.RightToLeft, -1, 32, "##########foo#######foo " }; } + public static IEnumerable Replace_MatchEvaluator_TestData_WithEngine => + RegexHelpers.PrependEngines(Replace_MatchEvaluator_TestData()); + [Theory] - [MemberData(nameof(Replace_MatchEvaluator_TestData))] - [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Replace_MatchEvaluator_TestData), 3, MemberType = typeof(RegexCompilationHelper))] - public void Replace_MatchEvaluator_Test(string pattern, string input, MatchEvaluator evaluator, RegexOptions options, int count, int start, string expected) + [MemberData(nameof(Replace_MatchEvaluator_TestData_WithEngine))] + public async Task Replace_MatchEvaluator_Test(RegexEngine engine, string pattern, string input, MatchEvaluator evaluator, RegexOptions options, int count, int start, string expected) { bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start); bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, count); - if (options == RegexOptions.None) - { - if (isDefaultStart && isDefaultCount) - { - // Use Replace(string, MatchEvaluator) or Replace(string, string, MatchEvaluator) - Assert.Equal(expected, new Regex(pattern).Replace(input, evaluator)); - Assert.Equal(expected, Regex.Replace(input, pattern, evaluator)); - } - if (isDefaultStart) - { - // Use Replace(string, MatchEvaluator, string, int) - Assert.Equal(expected, new Regex(pattern).Replace(input, evaluator, count)); - } - // Use Replace(string, MatchEvaluator, int, int) - Assert.Equal(expected, new Regex(pattern).Replace(input, evaluator, count, start)); - } + + Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); + if (isDefaultStart && isDefaultCount) { - // Use Replace(string, MatchEvaluator) or Replace(string, MatchEvaluator, RegexOptions) - Assert.Equal(expected, new Regex(pattern, options).Replace(input, evaluator)); - Assert.Equal(expected, Regex.Replace(input, pattern, evaluator, options)); + Assert.Equal(expected, r.Replace(input, evaluator)); } + if (isDefaultStart) { - // Use Replace(string, MatchEvaluator, string, int) - Assert.Equal(expected, new Regex(pattern, options).Replace(input, evaluator, count)); + Assert.Equal(expected, r.Replace(input, evaluator, count)); } - // Use Replace(string, MatchEvaluator, int, int) - Assert.Equal(expected, new Regex(pattern, options).Replace(input, evaluator, count, start)); + + Assert.Equal(expected, r.Replace(input, evaluator, count, start)); } [Fact] diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Split.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Split.Tests.cs index dc539fe0da8fa3..122353d8c6f851 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Split.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Split.Tests.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Collections.Generic; +using System.Threading.Tasks; using Xunit; namespace System.Text.RegularExpressions.Tests @@ -57,42 +58,29 @@ public static IEnumerable Split_TestData() yield return new object[] { @"(?<=\G..)(?=..)", "aabbccdd", RegexOptions.None, 8, 0, new string[] { "aa", "bb", "cc", "dd" } }; } + public static IEnumerable Split_TestData_WithEngine => + RegexHelpers.PrependEngines(Split_TestData()); + [Theory] - [MemberData(nameof(Split_TestData))] - [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Split_TestData), 2, MemberType = typeof(RegexCompilationHelper))] - public void Split(string pattern, string input, RegexOptions options, int count, int start, string[] expected) + [MemberData(nameof(Split_TestData_WithEngine))] + public async Task Split(RegexEngine engine, string pattern, string input, RegexOptions options, int count, int start, string[] expected) { bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start); bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, count); - if (options == RegexOptions.None) - { - // Use Split(string), Split(string, string), Split(string, int) or Split(string, int, int) - if (isDefaultStart && isDefaultCount) - { - // Use Split(string) or Split(string, string) - Assert.Equal(expected, new Regex(pattern).Split(input)); - Assert.Equal(expected, Regex.Split(input, pattern)); - } - if (isDefaultStart) - { - // Use Split(string, int) - Assert.Equal(expected, new Regex(pattern).Split(input, count)); - } - // Use Split(string, int, int) - Assert.Equal(expected, new Regex(pattern).Split(input, count, start)); - } + + Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); + if (isDefaultStart && isDefaultCount) { - // Use Split(string, string, RegexOptions) - Assert.Equal(expected, Regex.Split(input, pattern, options)); + Assert.Equal(expected, r.Split(input)); } + if (isDefaultStart) { - // Use Split(string, int) - Assert.Equal(expected, new Regex(pattern, options).Split(input, count)); + Assert.Equal(expected, r.Split(input, count)); } - // Use Split(string, int, int, int) - Assert.Equal(expected, new Regex(pattern, options).Split(input, count, start)); + + Assert.Equal(expected, r.Split(input, count, start)); } [Fact] diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs index 1b8ed9cb283db4..b2923d0a13950c 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs @@ -1,6 +1,10 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; + namespace System.Text.RegularExpressions.Tests { public static class RegexHelpers @@ -24,6 +28,66 @@ public static bool IsDefaultStart(string input, RegexOptions options, int start) } return start == 0; } + + public static IEnumerable AvailableEngines_MemberData => + from engine in AvailableEngines + select new object[] { engine }; + + public static IEnumerable PrependEngines(IEnumerable cases) + { + foreach (RegexEngine engine in AvailableEngines) + { + foreach (object[] additionalParameters in cases) + { + var parameters = new object[additionalParameters.Length + 1]; + additionalParameters.CopyTo(parameters, 1); + parameters[0] = engine; + yield return parameters; + } + } + } + + public static IEnumerable AvailableEngines + { + get + { + yield return RegexEngine.Interpreter; + yield return RegexEngine.Compiled; + if (PlatformDetection.IsNetCore && + PlatformDetection.IsReflectionEmitSupported && // the source generator doesn't use reflection emit, but it does use Roslyn for the equivalent + PlatformDetection.IsNotBrowser) + { + yield return RegexEngine.SourceGenerated; + } + } + } + + public static async Task GetRegex(RegexEngine engine, string pattern, RegexOptions options = RegexOptions.None, int matchTimeout = -1) + { + switch (engine) + { + case RegexEngine.Interpreter: + return new Regex(pattern, options, TimeSpan.FromMilliseconds(matchTimeout)); + + case RegexEngine.Compiled: + return new Regex(pattern, options | RegexOptions.Compiled, TimeSpan.FromMilliseconds(matchTimeout)); + + case RegexEngine.SourceGenerated: + return await RegexGeneratorHelper.SourceGenRegex(pattern, options, matchTimeout); + } + + throw new ArgumentException($"Unknown engine: {engine}"); + } + + public static Task GetRegex(RegexEngine engine, string pattern, RegexOptions options, TimeSpan timeout) => + GetRegex(engine, pattern, options, (int)timeout.TotalMilliseconds); + } + + public enum RegexEngine + { + Interpreter, + Compiled, + SourceGenerated } public class CaptureData diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexCharacterSetTests.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexCharacterSetTests.cs index a029d679c8ba08..d1d578ff73862f 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexCharacterSetTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexCharacterSetTests.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.Globalization; using System.Linq; +using System.Threading.Tasks; using Xunit; using Xunit.Sdk; @@ -51,25 +52,25 @@ public class RegexCharacterSetTests [InlineData(@"[a-z-[d-w-[m-o]]]", RegexOptions.None, new[] { 'a', 'b', 'c', 'm', 'n', 'n', 'o', 'x', 'y', 'z' })] [InlineData(@"\p{IsBasicLatin}-[\x00-\x7F]", RegexOptions.None, new char[0])] [InlineData(@"[0-9-[2468]]", RegexOptions.None, new[] { '0', '1', '3', '5', '7', '9' })] - public void SetInclusionsExpected(string set, RegexOptions options, char[] expectedIncluded) + public async Task SetInclusionsExpected(string set, RegexOptions options, char[] expectedIncluded) { bool hasBracket = set.Contains("["); if (hasBracket) { - ValidateSet(set, options, new HashSet(expectedIncluded), null, validateEveryChar: true); + await ValidateSetAsync(set, options, new HashSet(expectedIncluded), null, validateEveryChar: true); } else { - ValidateSet($"[{set}]", options, new HashSet(expectedIncluded), null); - ValidateSet($"[^{set}]", options, null, new HashSet(expectedIncluded)); + await ValidateSetAsync($"[{set}]", options, new HashSet(expectedIncluded), null); + await ValidateSetAsync($"[^{set}]", options, null, new HashSet(expectedIncluded)); } } [Theory] [InlineData(@"[^1234-[3456]]", RegexOptions.None, new[] { '1', '2', '3', '4', '5', '6' })] - public void SetExclusionsExpected(string set, RegexOptions options, char[] expectedExcluded) + public async Task SetExclusionsExpected(string set, RegexOptions options, char[] expectedExcluded) { - ValidateSet(set, options, null, new HashSet(expectedExcluded), validateEveryChar: true); + await ValidateSetAsync(set, options, null, new HashSet(expectedExcluded), validateEveryChar: true); } [Theory] @@ -80,86 +81,86 @@ public void SetExclusionsExpected(string set, RegexOptions options, char[] expec [InlineData('\u00FF')] [InlineData('\u0080')] [InlineData('\u0100')] - public void SingleExpected(char c) + public async Task SingleExpected(char c) { string s = $@"\u{(int)c:X4}"; var set = new HashSet() { c }; // One - ValidateSet($"{s}", RegexOptions.None, set, null); - ValidateSet($"[{s}]", RegexOptions.None, set, null); - ValidateSet($"[^{s}]", RegexOptions.None, null, set); + await ValidateSetAsync($"{s}", RegexOptions.None, set, null); + await ValidateSetAsync($"[{s}]", RegexOptions.None, set, null); + await ValidateSetAsync($"[^{s}]", RegexOptions.None, null, set); // Positive lookahead - ValidateSet($"(?={s}){s}", RegexOptions.None, set, null); - ValidateSet($"(?=[^{s}])[^{s}]", RegexOptions.None, null, set); + await ValidateSetAsync($"(?={s}){s}", RegexOptions.None, set, null); + await ValidateSetAsync($"(?=[^{s}])[^{s}]", RegexOptions.None, null, set); // Negative lookahead - ValidateSet($"(?![^{s}]){s}", RegexOptions.None, set, null); - ValidateSet($"(?![{s}])[^{s}]", RegexOptions.None, null, set); + await ValidateSetAsync($"(?![^{s}]){s}", RegexOptions.None, set, null); + await ValidateSetAsync($"(?![{s}])[^{s}]", RegexOptions.None, null, set); // Concatenation - ValidateSet($"[{s}{s}]", RegexOptions.None, set, null); - ValidateSet($"[^{s}{s}{s}]", RegexOptions.None, null, set); + await ValidateSetAsync($"[{s}{s}]", RegexOptions.None, set, null); + await ValidateSetAsync($"[^{s}{s}{s}]", RegexOptions.None, null, set); // Alternation - ValidateSet($"{s}|{s}", RegexOptions.None, set, null); - ValidateSet($"[^{s}]|[^{s}]|[^{s}]", RegexOptions.None, null, set); - ValidateSet($"{s}|[^{s}]", RegexOptions.None, null, new HashSet()); + await ValidateSetAsync($"{s}|{s}", RegexOptions.None, set, null); + await ValidateSetAsync($"[^{s}]|[^{s}]|[^{s}]", RegexOptions.None, null, set); + await ValidateSetAsync($"{s}|[^{s}]", RegexOptions.None, null, new HashSet()); } [Fact] - public void AllEmptySets() + public async Task AllEmptySets() { var set = new HashSet(); - ValidateSet(@"[\u0000-\uFFFF]", RegexOptions.None, null, set); - ValidateSet(@"[\u0000-\uFFFFa-z]", RegexOptions.None, null, set); - ValidateSet(@"[\u0000-\u1000\u1001-\u2002\u2003-\uFFFF]", RegexOptions.None, null, set); - ValidateSet(@"[\u0000-\uFFFE\u0001-\uFFFF]", RegexOptions.None, null, set, validateEveryChar: true); + await ValidateSetAsync(@"[\u0000-\uFFFF]", RegexOptions.None, null, set); + await ValidateSetAsync(@"[\u0000-\uFFFFa-z]", RegexOptions.None, null, set); + await ValidateSetAsync(@"[\u0000-\u1000\u1001-\u2002\u2003-\uFFFF]", RegexOptions.None, null, set); + await ValidateSetAsync(@"[\u0000-\uFFFE\u0001-\uFFFF]", RegexOptions.None, null, set, validateEveryChar: true); - ValidateSet(@"[^\u0000-\uFFFF]", RegexOptions.None, set, null); - ValidateSet(@"[^\u0000-\uFFFFa-z]", RegexOptions.None, set, null); - ValidateSet(@"[^\u0000-\uFFFE\u0001-\uFFFF]", RegexOptions.None, set, null); - ValidateSet(@"[^\u0000-\u1000\u1001-\u2002\u2003-\uFFFF]", RegexOptions.None, set, null, validateEveryChar: true); + await ValidateSetAsync(@"[^\u0000-\uFFFF]", RegexOptions.None, set, null); + await ValidateSetAsync(@"[^\u0000-\uFFFFa-z]", RegexOptions.None, set, null); + await ValidateSetAsync(@"[^\u0000-\uFFFE\u0001-\uFFFF]", RegexOptions.None, set, null); + await ValidateSetAsync(@"[^\u0000-\u1000\u1001-\u2002\u2003-\uFFFF]", RegexOptions.None, set, null, validateEveryChar: true); } [Fact] - public void AllButOneSets() + public async Task AllButOneSets() { - ValidateSet(@"[\u0000-\uFFFE]", RegexOptions.None, null, new HashSet() { '\uFFFF' }); - ValidateSet(@"[\u0001-\uFFFF]", RegexOptions.None, null, new HashSet() { '\u0000' }); - ValidateSet(@"[\u0000-ac-\uFFFF]", RegexOptions.None, null, new HashSet() { 'b' }, validateEveryChar: true); + await ValidateSetAsync(@"[\u0000-\uFFFE]", RegexOptions.None, null, new HashSet() { '\uFFFF' }); + await ValidateSetAsync(@"[\u0001-\uFFFF]", RegexOptions.None, null, new HashSet() { '\u0000' }); + await ValidateSetAsync(@"[\u0000-ac-\uFFFF]", RegexOptions.None, null, new HashSet() { 'b' }, validateEveryChar: true); } [Fact] - public void DotInclusionsExpected() + public async Task DotInclusionsExpected() { - ValidateSet(".", RegexOptions.None, null, new HashSet() { '\n' }); - ValidateSet(".", RegexOptions.IgnoreCase, null, new HashSet() { '\n' }); - ValidateSet(".", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, null, new HashSet() { '\n' }, validateEveryChar: true); + await ValidateSetAsync(".", RegexOptions.None, null, new HashSet() { '\n' }); + await ValidateSetAsync(".", RegexOptions.IgnoreCase, null, new HashSet() { '\n' }); + await ValidateSetAsync(".", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, null, new HashSet() { '\n' }, validateEveryChar: true); - ValidateSet(".", RegexOptions.Singleline, null, new HashSet()); - ValidateSet(".", RegexOptions.Singleline | RegexOptions.IgnoreCase, null, new HashSet()); - ValidateSet(".", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, null, new HashSet(), validateEveryChar: true); + await ValidateSetAsync(".", RegexOptions.Singleline, null, new HashSet()); + await ValidateSetAsync(".", RegexOptions.Singleline | RegexOptions.IgnoreCase, null, new HashSet()); + await ValidateSetAsync(".", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, null, new HashSet(), validateEveryChar: true); } [Fact] - public void WhitespaceInclusionsExpected() + public async Task WhitespaceInclusionsExpected() { var whitespaceInclusions = ComputeIncludedSet(char.IsWhiteSpace); - ValidateSet(@"[\s]", RegexOptions.None, whitespaceInclusions, null); - ValidateSet(@"[^\s]", RegexOptions.None, null, whitespaceInclusions); - ValidateSet(@"[\S]", RegexOptions.None, null, whitespaceInclusions); + await ValidateSetAsync(@"[\s]", RegexOptions.None, whitespaceInclusions, null); + await ValidateSetAsync(@"[^\s]", RegexOptions.None, null, whitespaceInclusions); + await ValidateSetAsync(@"[\S]", RegexOptions.None, null, whitespaceInclusions); } [Fact] - public void DigitInclusionsExpected() + public async Task DigitInclusionsExpected() { var digitInclusions = ComputeIncludedSet(char.IsDigit); - ValidateSet(@"[\d]", RegexOptions.None, digitInclusions, null); - ValidateSet(@"[^\d]", RegexOptions.None, null, digitInclusions); - ValidateSet(@"[\D]", RegexOptions.None, null, digitInclusions); + await ValidateSetAsync(@"[\d]", RegexOptions.None, digitInclusions, null); + await ValidateSetAsync(@"[^\d]", RegexOptions.None, null, digitInclusions); + await ValidateSetAsync(@"[\D]", RegexOptions.None, null, digitInclusions); } [Theory] @@ -167,11 +168,11 @@ public void DigitInclusionsExpected() [InlineData(@"\p{S}", new[] { UnicodeCategory.CurrencySymbol, UnicodeCategory.MathSymbol, UnicodeCategory.ModifierSymbol, UnicodeCategory.OtherSymbol })] [InlineData(@"\p{Lu}\p{Zl}", new[] { UnicodeCategory.UppercaseLetter, UnicodeCategory.LineSeparator })] [InlineData(@"\w", new[] { UnicodeCategory.LowercaseLetter, UnicodeCategory.UppercaseLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.OtherLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.NonSpacingMark, UnicodeCategory.DecimalDigitNumber, UnicodeCategory.ConnectorPunctuation })] - public void UnicodeCategoryInclusionsExpected(string set, UnicodeCategory[] categories) + public async Task UnicodeCategoryInclusionsExpected(string set, UnicodeCategory[] categories) { var categoryInclusions = ComputeIncludedSet(c => Array.IndexOf(categories, char.GetUnicodeCategory(c)) >= 0); - ValidateSet($"[{set}]", RegexOptions.None, categoryInclusions, null); - ValidateSet($"[^{set}]", RegexOptions.None, null, categoryInclusions); + await ValidateSetAsync($"[{set}]", RegexOptions.None, categoryInclusions, null); + await ValidateSetAsync($"[^{set}]", RegexOptions.None, null, categoryInclusions); } [Theory] @@ -282,7 +283,7 @@ public void UnicodeCategoryInclusionsExpected(string set, UnicodeCategory[] cate [InlineData(@"\p{IsSpecials}", new[] { 0xFFF0, 0xFFFF })] [InlineData(@"\p{IsRunic}\p{IsHebrew}", new[] { 0x0590, 0x05FF, 0x16A0, 0x16FF })] [InlineData(@"abx-z\p{IsRunic}\p{IsHebrew}", new[] { 0x0590, 0x05FF, 0x16A0, 0x16FF, 'a', 'a', 'b', 'b', 'x', 'x', 'y', 'z' })] - public void NamedBlocksInclusionsExpected(string set, int[] ranges) + public async Task NamedBlocksInclusionsExpected(string set, int[] ranges) { var included = new HashSet(); for (int i = 0; i < ranges.Length - 1; i += 2) @@ -290,8 +291,8 @@ public void NamedBlocksInclusionsExpected(string set, int[] ranges) ComputeIncludedSet(c => c >= ranges[i] && c <= ranges[i + 1], included); } - ValidateSet($"[{set}]", RegexOptions.None, included, null); - ValidateSet($"[^{set}]", RegexOptions.None, null, included); + await ValidateSetAsync($"[{set}]", RegexOptions.None, included, null); + await ValidateSetAsync($"[^{set}]", RegexOptions.None, null, included); } [Theory] @@ -325,19 +326,20 @@ public void NamedBlocksInclusionsExpected(string set, int[] ranges) [InlineData("Zl", UnicodeCategory.LineSeparator)] [InlineData("Zp", UnicodeCategory.ParagraphSeparator)] [InlineData("Zs", UnicodeCategory.SpaceSeparator)] - public void UnicodeCategoriesInclusionsExpected(string generalCategory, UnicodeCategory unicodeCategory) + public async Task UnicodeCategoriesInclusionsExpected(string generalCategory, UnicodeCategory unicodeCategory) { - foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.Compiled }) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { Regex r; + char[] allChars = Enumerable.Range(0, char.MaxValue + 1).Select(i => (char)i).ToArray(); int expectedInCategory = allChars.Count(c => char.GetUnicodeCategory(c) == unicodeCategory); int expectedNotInCategory = allChars.Length - expectedInCategory; - r = new Regex(@$"\p{{{generalCategory}}}"); + r = await RegexHelpers.GetRegexAsync(engine, @$"\p{{{generalCategory}}}"); Assert.Equal(expectedInCategory, r.Matches(string.Concat(allChars)).Count); - r = new Regex(@$"\P{{{generalCategory}}}"); + r = await RegexHelpers.GetRegexAsync(engine, (@$"\P{{{generalCategory}}}")); Assert.Equal(expectedNotInCategory, r.Matches(string.Concat(allChars)).Count); } } @@ -375,13 +377,13 @@ private static void ComputeIncludedSet(Func func, HashSet incl } [Fact] - public void ValidateValidateSet() + public async Task ValidateValidateSet() { - Assert.Throws(() => ValidateSet("[a]", RegexOptions.None, new HashSet() { 'b' }, null)); - Assert.Throws(() => ValidateSet("[a]", RegexOptions.None, new HashSet() { 'b' }, null, validateEveryChar: true)); + await Assert.ThrowsAsync(() => ValidateSetAsync("[a]", RegexOptions.None, new HashSet() { 'b' }, null)); + await Assert.ThrowsAsync(() => ValidateSetAsync("[a]", RegexOptions.None, new HashSet() { 'b' }, null, validateEveryChar: true)); - Assert.Throws(() => ValidateSet("[b]", RegexOptions.None, null, new HashSet() { 'b' })); - Assert.Throws(() => ValidateSet("[b]", RegexOptions.None, null, new HashSet() { 'b' }, validateEveryChar: true)); + await Assert.ThrowsAsync(() => ValidateSetAsync("[b]", RegexOptions.None, null, new HashSet() { 'b' })); + await Assert.ThrowsAsync(() => ValidateSetAsync("[b]", RegexOptions.None, null, new HashSet() { 'b' }, validateEveryChar: true)); } [Fact] @@ -423,13 +425,13 @@ public DerivedRunner(string text) protected override void InitTrackCount() => throw new NotImplementedException(); } - private static void ValidateSet(string regex, RegexOptions options, HashSet included, HashSet excluded, bool validateEveryChar = false) + private static async Task ValidateSetAsync(string regex, RegexOptions options, HashSet included, HashSet excluded, bool validateEveryChar = false) { Assert.True((included != null) ^ (excluded != null)); - foreach (RegexOptions compiled in new[] { RegexOptions.None, RegexOptions.Compiled }) + foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - var r = new Regex(regex, options | compiled); + Regex r = await RegexHelpers.GetRegexAsync(engine, regex, options); if (validateEveryChar) { diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexCompilationHelper.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexCompilationHelper.cs deleted file mode 100644 index aa07f8e4b2d3c8..00000000000000 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexCompilationHelper.cs +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Collections.Generic; -using System.Linq; -using System.Reflection; - -namespace System.Text.RegularExpressions.Tests -{ - public class RegexCompilationHelper - { - /// - /// Adds RegexOptions.Compiled to the RegexOptions of each item in a given array of test data. - /// - /// The index in the object array of the CompilationOptions enum. - /// - public static IEnumerable TransformRegexOptions(string testDataMethodName, int regexOptionsArrayIndex) - { - IEnumerable types = Assembly.GetExecutingAssembly().GetTypes().Where(t => t.Namespace == typeof(RegexCompilationHelper).Namespace); - foreach (Type type in types) - { - IEnumerable result = InvokeTransform(type, testDataMethodName, regexOptionsArrayIndex); - if (result != null) - { - return result; - } - } - - throw new Exception($"Test method '{testDataMethodName}' not found"); - } - - private static IEnumerable InvokeTransform(Type type, string methodName, int regexOptionsArrayIndex) - { - MethodInfo methodInfo = type.GetMethod(methodName, BindingFlags.Public | BindingFlags.Static); - var data = methodInfo?.Invoke(null, null) as IEnumerable; - - return data?.Select(obj => - { - obj[regexOptionsArrayIndex] = (RegexOptions)obj[regexOptionsArrayIndex] | RegexOptions.Compiled; - return obj; - }); - } - } -} diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexCultureTests.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexCultureTests.cs index 7b5196616046cf..98187e6647da89 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexCultureTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexCultureTests.cs @@ -10,6 +10,8 @@ namespace System.Text.RegularExpressions.Tests { public class RegexCultureTests { + // TODO: Validate source generator after figuring out what to do with culture + [Theory] [InlineData("^aa$", "aA", "da-DK", RegexOptions.None, false)] [InlineData("^aA$", "aA", "da-DK", RegexOptions.None, true)] diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs new file mode 100644 index 00000000000000..7b544769b9e845 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs @@ -0,0 +1,154 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Runtime.Loader; +using System.Text.RegularExpressions.Generator; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.Emit; +using Microsoft.CodeAnalysis.Text; +using Xunit; + +namespace System.Text.RegularExpressions.Tests +{ + public static class RegexGeneratorHelper + { + private static readonly CSharpParseOptions s_previewParseOptions = CSharpParseOptions.Default.WithLanguageVersion(LanguageVersion.Preview); + private static readonly MetadataReference[] s_refs = CreateReferences(); + private static readonly EmitOptions s_emitOptions = new EmitOptions(debugInformationFormat: DebugInformationFormat.Embedded); + private static readonly CSharpGeneratorDriver s_generatorDriver = CSharpGeneratorDriver.Create(new[] { new RegexGenerator().AsSourceGenerator() }, parseOptions: s_previewParseOptions); + private static Compilation? s_compilation; + + private static MetadataReference[] CreateReferences() + { + // Typically we'd want to use the right reference assemblies, but as we're not persisting any + // assets and only using this for testing purposes, referencing implementation assemblies is sufficient. + + string corelib = Assembly.GetAssembly(typeof(object))!.Location; + string runtimeDir = Path.GetDirectoryName(corelib)!; + return new[] + { + MetadataReference.CreateFromFile(corelib), + MetadataReference.CreateFromFile(Path.Combine(runtimeDir, "System.Runtime.dll")), + MetadataReference.CreateFromFile(Path.Combine(runtimeDir, "System.Text.RegularExpressions.dll")) + }; + } + + internal static async Task SourceGenRegex( + string pattern, RegexOptions options = RegexOptions.None, int matchTimeout = -1, CancellationToken cancellationToken = default) + { + // Create the source boilerplate for the pattern + string code = $@" + using System.Text.RegularExpressions; + + public partial class C + {{ + [RegexGenerator( + {SymbolDisplay.FormatLiteral(pattern, quote: true)}, + {string.Join(" | ", options.ToString().Split(',').Select(o => $"RegexOptions.{o.Trim()}"))}, + {matchTimeout.ToString(CultureInfo.InvariantCulture)})] + public static partial Regex Get(); + }}"; + + + // Use a cached compilation to save a little time. Rather than creating an entirely new workspace + // for each test, just create a single compilation, cache it, and then replace its syntax tree + // on each test. + if (s_compilation is not Compilation comp) + { + // Create the project containing the source. + var proj = new AdhocWorkspace() + .AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create())) + .AddProject("Test", "test.dll", "C#") + .WithMetadataReferences(s_refs) + .WithCompilationOptions(new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary) + .WithNullableContextOptions(NullableContextOptions.Enable)) + .WithParseOptions(new CSharpParseOptions(LanguageVersion.Preview)) + .AddDocument("RegexGenerator.g.cs", SourceText.From("// Empty", Encoding.UTF8)).Project; + Assert.True(proj.Solution.Workspace.TryApplyChanges(proj.Solution)); + + s_compilation = comp = await proj!.GetCompilationAsync(CancellationToken.None).ConfigureAwait(false); + Debug.Assert(comp is not null); + } + + comp = comp.ReplaceSyntaxTree(comp.SyntaxTrees.First(), CSharpSyntaxTree.ParseText(SourceText.From(code, Encoding.UTF8), s_previewParseOptions)); + + // Run the generator + GeneratorDriverRunResult generatorResults = s_generatorDriver.RunGenerators(comp!, cancellationToken).GetRunResult(); + if (generatorResults.Diagnostics.Length != 0) + { + throw new ArgumentException( + string.Join(Environment.NewLine, generatorResults.Diagnostics) + Environment.NewLine + + string.Join(Environment.NewLine, generatorResults.GeneratedTrees.Select(t => NumberLines(t.ToString())))); + } + + // Compile the assembly to a stream + var dll = new MemoryStream(); + comp = comp.AddSyntaxTrees(generatorResults.GeneratedTrees.ToArray()); + EmitResult results = comp.Emit(dll, options: s_emitOptions, cancellationToken: cancellationToken); + if (!results.Success) + { + throw new ArgumentException( + string.Join(Environment.NewLine, results.Diagnostics.Concat(generatorResults.Diagnostics)) + Environment.NewLine + + string.Join(Environment.NewLine, generatorResults.GeneratedTrees.Select(t => NumberLines(t.ToString())))); + } + dll.Position = 0; + + // Load the assembly into its own AssemblyLoadContext. + var alc = new RegexLoadContext(Environment.CurrentDirectory); + Assembly a = alc.LoadFromStream(dll); + + // Instantiate a regex using the newly created static Get method that was source generated. + Regex r = (Regex)a.GetType("C")!.GetMethod("Get")!.Invoke(null, null)!; + + // Issue an unload on the ALC, so it'll be collected once the Regex instance is collected + alc.Unload(); + + return r; + } + + /// Number the lines in the source file. + private static string NumberLines(string source) => + string.Join(Environment.NewLine, source.Split(Environment.NewLine).Select((line, lineNumber) => $"{lineNumber,6}: {line}")); + + /// Simple AssemblyLoadContext used to load source generated regex assemblies so they can be unloaded. + private sealed class RegexLoadContext : AssemblyLoadContext + { + private readonly AssemblyDependencyResolver _resolver; + + public RegexLoadContext(string pluginPath) : base(isCollectible: true) + { + _resolver = new AssemblyDependencyResolver(pluginPath); + } + + protected override Assembly? Load(AssemblyName assemblyName) + { + string? assemblyPath = _resolver.ResolveAssemblyToPath(assemblyName); + if (assemblyPath != null) + { + return LoadFromAssemblyPath(assemblyPath); + } + + return null; + } + + protected override IntPtr LoadUnmanagedDll(string unmanagedDllName) + { + string? libraryPath = _resolver.ResolveUnmanagedDllToPath(unmanagedDllName); + if (libraryPath != null) + { + return LoadUnmanagedDllFromPath(libraryPath); + } + + return IntPtr.Zero; + } + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netfx.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netfx.cs new file mode 100644 index 00000000000000..212bc666f46995 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netfx.cs @@ -0,0 +1,14 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Threading; +using System.Threading.Tasks; + +namespace System.Text.RegularExpressions.Tests +{ + public sealed class RegexGeneratorHelper + { + internal static Task SourceGenRegex(string pattern, RegexOptions options = RegexOptions.None, int matchTimeout = -1, CancellationToken cancellationToken = default) => + throw new NotSupportedException(); + } +} diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs index 420c007d89c274..e6f56f81992704 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs @@ -17,6 +17,10 @@ namespace System.Text.RegularExpressions.Generator.Tests { + // Tests don't actually use reflection emit, but they do generate assembly via Roslyn in-memory at run time and expect it to be JIT'd. + // The tests also use typeof(object).Assembly.Location, which returns an empty string on wasm. + [ConditionalClass(typeof(PlatformDetection), nameof(PlatformDetection.IsReflectionEmitSupported))] + [PlatformSpecific(~TestPlatforms.Browser)] public class RegexGeneratorParserTests { [Theory] @@ -391,19 +395,10 @@ partial class C2 private async Task> RunGenerator( string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, CancellationToken cancellationToken = default) { - string corelib = Assembly.GetAssembly(typeof(object))!.Location; - string runtimeDir = Path.GetDirectoryName(corelib)!; - var refs = new List() - { - MetadataReference.CreateFromFile(corelib), - MetadataReference.CreateFromFile(Path.Combine(runtimeDir, "System.Runtime.dll")), - MetadataReference.CreateFromFile(Path.Combine(runtimeDir, "System.Text.RegularExpressions.dll")) - }; - var proj = new AdhocWorkspace() .AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create())) .AddProject("RegexGeneratorTest", "RegexGeneratorTest.dll", "C#") - .WithMetadataReferences(refs) + .WithMetadataReferences(s_refs) .WithCompilationOptions(new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary) .WithNullableContextOptions(NullableContextOptions.Enable)) .WithParseOptions(new CSharpParseOptions(langVersion)) @@ -428,5 +423,19 @@ private async Task> RunGenerator( return generatorResults.Diagnostics.Concat(results.Diagnostics).Where(d => d.Severity != DiagnosticSeverity.Hidden).ToArray(); } + + private static readonly MetadataReference[] s_refs = CreateReferences(); + + private static MetadataReference[] CreateReferences() + { + string corelib = Assembly.GetAssembly(typeof(object))!.Location; + string runtimeDir = Path.GetDirectoryName(corelib)!; + return new[] + { + MetadataReference.CreateFromFile(corelib), + MetadataReference.CreateFromFile(Path.Combine(runtimeDir, "System.Runtime.dll")), + MetadataReference.CreateFromFile(Path.Combine(runtimeDir, "System.Text.RegularExpressions.dll")) + }; + } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj index d4c93aa8f8998d..03688374d1f723 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj @@ -5,6 +5,7 @@ $(NoWarn);xUnit2008 $(NetCoreAppCurrent);net48 true + true @@ -26,7 +27,6 @@ - @@ -36,6 +36,7 @@ + @@ -49,7 +50,10 @@ + + + From e18adbeaa65e6165cc10410d68da3e32d06783e0 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Wed, 15 Sep 2021 21:00:32 -0400 Subject: [PATCH 05/16] Address PR feedback --- docs/project/list-of-diagnostics.md | 10 ++ .../gen/DiagnosticDescriptors.cs | 67 ++++--------- .../gen/RegexGenerator.Emitter.cs | 20 ++-- .../gen/RegexGenerator.Parser.cs | 84 +++++++--------- .../gen/RegexGenerator.cs | 21 ++-- .../gen/Resources/Strings.resx | 19 +--- .../gen/Resources/xlf/Strings.cs.xlf | 31 +----- .../gen/Resources/xlf/Strings.de.xlf | 31 +----- .../gen/Resources/xlf/Strings.es.xlf | 31 +----- .../gen/Resources/xlf/Strings.fr.xlf | 31 +----- .../gen/Resources/xlf/Strings.it.xlf | 31 +----- .../gen/Resources/xlf/Strings.ja.xlf | 31 +----- .../gen/Resources/xlf/Strings.ko.xlf | 31 +----- .../gen/Resources/xlf/Strings.pl.xlf | 31 +----- .../gen/Resources/xlf/Strings.pt-BR.xlf | 31 +----- .../gen/Resources/xlf/Strings.ru.xlf | 31 +----- .../gen/Resources/xlf/Strings.tr.xlf | 31 +----- .../gen/Resources/xlf/Strings.zh-Hans.xlf | 31 +----- .../gen/Resources/xlf/Strings.zh-Hant.xlf | 31 +----- ...m.Text.RegularExpressions.Generator.csproj | 8 +- .../ref/System.Text.RegularExpressions.cs | 4 +- .../Text/RegularExpressions/RegexCharClass.cs | 52 +++------- .../RegexGeneratorAttribute.cs | 9 +- .../Text/RegularExpressions/RegexNode.cs | 2 +- .../tests/AttRegexTests.cs | 4 +- .../tests/MonoRegexTests.cs | 2 +- .../tests/Regex.KnownPattern.Tests.cs | 96 +++++++++---------- .../tests/Regex.Match.Tests.cs | 32 +++---- .../tests/Regex.Tests.Common.cs | 8 +- .../tests/RegexGeneratorAttributeTests.cs | 12 +-- .../tests/RegexGeneratorHelper.netcoreapp.cs | 8 +- .../tests/RegexGeneratorHelper.netfx.cs | 2 +- .../RegexGeneratorParserTests.cs | 88 +++++++++++++++-- 33 files changed, 315 insertions(+), 636 deletions(-) diff --git a/docs/project/list-of-diagnostics.md b/docs/project/list-of-diagnostics.md index f6224ce0631d85..6aefa3a3890d3c 100644 --- a/docs/project/list-of-diagnostics.md +++ b/docs/project/list-of-diagnostics.md @@ -138,3 +138,13 @@ The diagnostic id values reserved for .NET Libraries analyzer warnings are `SYSL | __`SYSLIB1037`__ | *_`SYSLIB1032`-`SYSLIB1039` reserved for System.Text.Json.SourceGeneration._* | | __`SYSLIB1038`__ | *_`SYSLIB1032`-`SYSLIB1039` reserved for System.Text.Json.SourceGeneration._* | | __`SYSLIB1039`__ | *_`SYSLIB1032`-`SYSLIB1039` reserved for System.Text.Json.SourceGeneration._* | +| __`SYSLIB1040`__ | Invalid RegexGenerator attribute | +| __`SYSLIB1041`__ | Multiple RegexGenerator attribute | +| __`SYSLIB1042`__ | Invalid RegexGenerator arguments | +| __`SYSLIB1043`__ | RegexGenerator method must have a valid signature | +| __`SYSLIB1044`__ | RegexGenerator only supports C# 10 and newer | +| __`SYSLIB1045`__ | *_`SYSLIB1045`-`SYSLIB1049` reserved for System.Text.RegularExpressions.Generator._* | +| __`SYSLIB1046`__ | *_`SYSLIB1045`-`SYSLIB1049` reserved for System.Text.RegularExpressions.Generator._* | +| __`SYSLIB1047`__ | *_`SYSLIB1045`-`SYSLIB1049` reserved for System.Text.RegularExpressions.Generator._* | +| __`SYSLIB1048`__ | *_`SYSLIB1045`-`SYSLIB1049` reserved for System.Text.RegularExpressions.Generator._* | +| __`SYSLIB1049`__ | *_`SYSLIB1045`-`SYSLIB1049` reserved for System.Text.RegularExpressions.Generator._* | diff --git a/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs b/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs index a07d93331f48a9..67edabeacd0fae 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs @@ -6,80 +6,51 @@ namespace System.Text.RegularExpressions.Generator { - public static class DiagnosticDescriptors + internal static class DiagnosticDescriptors { - // TODO: Assign valid IDs - public static DiagnosticDescriptor InvalidRegexGeneratorAttribute { get; } = new DiagnosticDescriptor( - id: "SYSLIB1100", + id: "SYSLIB1040", title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), category: "RegexGenerator", DiagnosticSeverity.Error, - isEnabledByDefault: true); + isEnabledByDefault: true, + customTags: WellKnownDiagnosticTags.NotConfigurable); public static DiagnosticDescriptor MultipleRegexGeneratorAttributes { get; } = new DiagnosticDescriptor( - id: "SYSLIB1101", + id: "SYSLIB1041", title: new LocalizableResourceString(nameof(SR.MultipleRegexGeneratorAttributesMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.MultipleRegexGeneratorAttributesMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), category: "RegexGenerator", DiagnosticSeverity.Error, - isEnabledByDefault: true); + isEnabledByDefault: true, + customTags: WellKnownDiagnosticTags.NotConfigurable); public static DiagnosticDescriptor InvalidRegexArguments { get; } = new DiagnosticDescriptor( - id: "SYSLIB1102", + id: "SYSLIB1042", title: new LocalizableResourceString(nameof(SR.InvalidRegexArgumentsMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.InvalidRegexArgumentsMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), category: "RegexGenerator", DiagnosticSeverity.Error, - isEnabledByDefault: true); - - public static DiagnosticDescriptor RegexMethodMustReturnRegex { get; } = new DiagnosticDescriptor( - id: "SYSLIB1103", - title: new LocalizableResourceString(nameof(SR.RegexMethodMustReturnRegexMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustReturnRegexMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - category: "RegexGenerator", - DiagnosticSeverity.Error, - isEnabledByDefault: true); - - public static DiagnosticDescriptor RegexMethodMustBeParameterless { get; } = new DiagnosticDescriptor( - id: "SYSLIB1104", - title: new LocalizableResourceString(nameof(SR.RegexMethodMustBeParameterlessMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustBeParameterlessMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - category: "RegexGenerator", - DiagnosticSeverity.Error, - isEnabledByDefault: true); - - public static DiagnosticDescriptor RegexMethodMustNotBeGeneric { get; } = new DiagnosticDescriptor( - id: "SYSLIB1105", - title: new LocalizableResourceString(nameof(SR.RegexMethodMustNotBeGenericMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustNotBeGenericMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - category: "RegexGenerator", - DiagnosticSeverity.Error, - isEnabledByDefault: true); - - public static DiagnosticDescriptor RegexMethodMustBePartial { get; } = new DiagnosticDescriptor( - id: "SYSLIB1106", - title: new LocalizableResourceString(nameof(SR.RegexMethodMustBePartialMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustBePartialMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - category: "RegexGenerator", - DiagnosticSeverity.Error, - isEnabledByDefault: true); + isEnabledByDefault: true, + customTags: WellKnownDiagnosticTags.NotConfigurable); - public static DiagnosticDescriptor RegexMethodMustBeStatic { get; } = new DiagnosticDescriptor( - id: "SYSLIB1107", - title: new LocalizableResourceString(nameof(SR.RegexMethodMustBeStaticMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustBeStaticMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + public static DiagnosticDescriptor RegexMethodMustHaveValidSignature { get; } = new DiagnosticDescriptor( + id: "SYSLIB1043", + title: new LocalizableResourceString(nameof(SR.RegexMethodMustHaveValidSignatureMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustHaveValidSignatureMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), category: "RegexGenerator", DiagnosticSeverity.Error, - isEnabledByDefault: true); + isEnabledByDefault: true, + customTags: WellKnownDiagnosticTags.NotConfigurable); public static DiagnosticDescriptor InvalidLangVersion { get; } = new DiagnosticDescriptor( - id: "SYSLIB1108", + id: "SYSLIB1044", title: new LocalizableResourceString(nameof(SR.InvalidLangVersionMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.InvalidLangVersionMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), category: "RegexGenerator", DiagnosticSeverity.Error, - isEnabledByDefault: true); + isEnabledByDefault: true, + customTags: WellKnownDiagnosticTags.NotConfigurable); } } diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index d71c44db9e0ae1..d0807e27f24244 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -30,7 +30,7 @@ public partial class RegexGenerator /// A list of classes containing a regex method to be generated. /// CancellationToken used to request cancellation of the code generation. /// A string containing the emitted code. - private static string Emit(IReadOnlyList regexClasses, CancellationToken cancellationToken) + private static string Emit(IReadOnlyList regexClasses, CancellationToken cancellationToken) { var sb = new StringBuilder(4096); @@ -42,16 +42,17 @@ private static string Emit(IReadOnlyList regexClasses, CancellationT writer.WriteLine("#pragma warning disable CS0164 // Unreferenced label"); writer.WriteLine(); writer.WriteLine("using System;"); - writer.WriteLine("using System.Diagnostics;"); - writer.WriteLine("using System.Collections;"); writer.WriteLine("using System.CodeDom.Compiler;"); + writer.WriteLine("using System.Collections;"); + writer.WriteLine("using System.ComponentModel;"); + writer.WriteLine("using System.Diagnostics;"); writer.WriteLine("using System.Globalization;"); writer.WriteLine("using System.Runtime.CompilerServices;"); writer.WriteLine("using System.Text.RegularExpressions;"); writer.WriteLine("using System.Threading;"); int counter = 0; - foreach (RegexClass rc in regexClasses) + foreach (RegexType rc in regexClasses) { cancellationToken.ThrowIfCancellationRequested(); writer.WriteLine(); @@ -62,7 +63,7 @@ private static string Emit(IReadOnlyList regexClasses, CancellationT } /// Generates the code for one regular expression class. - private static void GenerateRegexClass(IndentedTextWriter writer, RegexClass regexClass, ref int counter) + private static void GenerateRegexClass(IndentedTextWriter writer, RegexType regexClass, ref int counter) { // Emit the namespace if (!string.IsNullOrWhiteSpace(regexClass.Namespace)) @@ -73,7 +74,7 @@ private static void GenerateRegexClass(IndentedTextWriter writer, RegexClass reg } // Emit containing types - RegexClass parent = regexClass.ParentClass; + RegexType parent = regexClass.ParentClass; var parentClasses = new Stack(); while (parent != null) { @@ -150,6 +151,7 @@ private static void GenerateRegex(IndentedTextWriter writer, RegexMethod rm, str $"TimeSpan.FromMilliseconds({rm.MatchTimeout.Value.ToString(CultureInfo.InvariantCulture)})"; writer.WriteLine(s_generatedCodeAttribute); + writer.WriteLine("[EditorBrowsable(EditorBrowsableState.Never)]"); writer.WriteLine($"{rm.Modifiers} Regex {rm.MethodName}() => {id}.Instance;"); writer.WriteLine(); writer.WriteLine(s_generatedCodeAttribute); @@ -209,7 +211,7 @@ private static void GenerateRegex(IndentedTextWriter writer, RegexMethod rm, str writer.WriteLine($" {{"); // Main implementation methods - writer.WriteLine($" protected override void InitTrackCount() => runtrackcount = {rm.Code.TrackCount};"); // TODO: Make this a nop + writer.WriteLine($" protected override void InitTrackCount() => runtrackcount = {rm.Code.TrackCount};"); writer.WriteLine(); writer.WriteLine($" protected override bool FindFirstChar()"); writer.WriteLine($" {{"); @@ -359,7 +361,7 @@ void GenerateAnchorAndLeadingChecks() } else { - // TODO: This differs subtely between interpreted and compiled. Why? + // TODO: This differs subtly between interpreted and compiled. Why? using (EmitBlock(writer, "if (runtextpos < runtextend - 1 || (runtextpos == runtextend - 1 && runtext[runtextpos] != '\\n'))")) { writer.WriteLine("goto ReturnFalse;"); @@ -2662,7 +2664,7 @@ void GenerateOneCode(string? label) /// - /// Branch to the MSIL corresponding to the regex code at i + /// Branch to the label corresponding to the regex code at i /// /// /// A trick: since track and stack space is gobbled up unboundedly diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs index d8617ad4106c72..49e34f00062e54 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs @@ -23,7 +23,7 @@ public partial class RegexGenerator private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => node is MethodDeclarationSyntax { AttributeLists: { Count: > 0 } }; - private static ClassDeclarationSyntax? GetSemanticTargetForGeneration(GeneratorSyntaxContext context) + private static TypeDeclarationSyntax? GetSemanticTargetForGeneration(GeneratorSyntaxContext context) { var methodDeclarationSyntax = (MethodDeclarationSyntax)context.Node; @@ -34,7 +34,7 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => if (context.SemanticModel.GetSymbolInfo(attributeSyntax).Symbol is IMethodSymbol attributeSymbol && attributeSymbol.ContainingType.ToDisplayString() == RegexGeneratorAttributeName) { - return methodDeclarationSyntax.Parent as ClassDeclarationSyntax; + return methodDeclarationSyntax.Parent as TypeDeclarationSyntax; } } } @@ -42,7 +42,7 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => return null; } - private static IReadOnlyList GetRegexClassesToEmit(Compilation compilation, Action reportDiagnostic, IEnumerable classes, CancellationToken cancellationToken) + private static IReadOnlyList GetRegexTypesToEmit(Compilation compilation, Action reportDiagnostic, IEnumerable classes, CancellationToken cancellationToken) { // TODO: Use https://github.com/dotnet/runtime/pull/59092 INamedTypeSymbol? regexSymbol = compilation.GetTypeByMetadataName(RegexName); @@ -50,18 +50,18 @@ private static IReadOnlyList GetRegexClassesToEmit(Compilation compi if (regexSymbol is null || regexGeneratorAttributeSymbol is null) { // Required types aren't available - return Array.Empty(); + return Array.Empty(); } - var results = new List(); + var results = new List(); // Enumerate by SyntaxTree to minimize the need to instantiate semantic models (since they're expensive) foreach (var group in classes.GroupBy(x => x.SyntaxTree)) { SemanticModel? sm = null; - foreach (ClassDeclarationSyntax classDec in group) + foreach (TypeDeclarationSyntax typeDec in group) { - foreach (MemberDeclarationSyntax member in classDec.Members) + foreach (MemberDeclarationSyntax member in typeDec.Members) { cancellationToken.ThrowIfCancellationRequested(); @@ -71,7 +71,7 @@ private static IReadOnlyList GetRegexClassesToEmit(Compilation compi continue; } - sm ??= compilation.GetSemanticModel(classDec.SyntaxTree); + sm ??= compilation.GetSemanticModel(typeDec.SyntaxTree); IMethodSymbol regexMethodSymbol = sm.GetDeclaredSymbol(methodSyntax, cancellationToken) as IMethodSymbol; if (regexMethodSymbol is null) @@ -148,37 +148,17 @@ private static IReadOnlyList GetRegexClassesToEmit(Compilation compi continue; } - if (!regexMethodSymbol.IsPartialDefinition) + if (!regexMethodSymbol.IsPartialDefinition || + !regexMethodSymbol.IsStatic || + regexMethodSymbol.Parameters.Length != 0 || + regexMethodSymbol.Arity != 0 || + !regexMethodSymbol.ReturnType.Equals(regexSymbol)) { - Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustBePartial, methodSyntax.GetLocation()); + Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustHaveValidSignature, methodSyntax.GetLocation()); continue; } - if (!regexMethodSymbol.IsStatic) - { - Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustBeStatic, methodSyntax.GetLocation()); - continue; - } - - if (regexMethodSymbol.Parameters.Length != 0) - { - Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustBeParameterless, methodSyntax.GetLocation()); - continue; - } - - if (regexMethodSymbol.Arity != 0) - { - Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustNotBeGeneric, methodSyntax.GetLocation()); - continue; - } - - if (!regexMethodSymbol.ReturnType.Equals(regexSymbol)) - { - Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustReturnRegex, methodSyntax.GetLocation()); - continue; - } - - if (classDec.SyntaxTree.Options is CSharpParseOptions { LanguageVersion: < LanguageVersion.CSharp10 }) + if (typeDec.SyntaxTree.Options is CSharpParseOptions { LanguageVersion: < LanguageVersion.CSharp10 }) { Diag(reportDiagnostic, DiagnosticDescriptors.InvalidLangVersion, methodSyntax.GetLocation()); continue; @@ -236,8 +216,8 @@ private static IReadOnlyList GetRegexClassesToEmit(Compilation compi } // Determine the namespace the class is declared in, if any - string? nameSpace = null; - SyntaxNode? potentialNamespaceParent = classDec.Parent; + string? ns = null; + SyntaxNode? potentialNamespaceParent = typeDec.Parent; while (potentialNamespaceParent is not null && potentialNamespaceParent is not NamespaceDeclarationSyntax && potentialNamespaceParent is not FileScopedNamespaceDeclarationSyntax) @@ -247,7 +227,7 @@ potentialNamespaceParent is not NamespaceDeclarationSyntax && if (potentialNamespaceParent is BaseNamespaceDeclarationSyntax namespaceParent) { - nameSpace = namespaceParent.Name.ToString(); + ns = namespaceParent.Name.ToString(); while (true) { namespaceParent = namespaceParent.Parent as NamespaceDeclarationSyntax; @@ -256,29 +236,29 @@ potentialNamespaceParent is not NamespaceDeclarationSyntax && break; } - nameSpace = $"{namespaceParent.Name}.{nameSpace}"; + ns = $"{namespaceParent.Name}.{ns}"; } } - var rc = new RegexClass + var rc = new RegexType { - Keyword = classDec.Keyword.ValueText, - Namespace = nameSpace, - Name = $"{classDec.Identifier}{classDec.TypeParameterList}", - Constraints = classDec.ConstraintClauses.ToString(), + Keyword = typeDec is RecordDeclarationSyntax rds ? $"{typeDec.Keyword.ValueText} {rds.ClassOrStructKeyword}" : typeDec.Keyword.ValueText, + Namespace = ns, + Name = $"{typeDec.Identifier}{typeDec.TypeParameterList}", + Constraints = typeDec.ConstraintClauses.ToString(), ParentClass = null, Method = regexMethod, }; - RegexClass current = rc; - var parent = classDec.Parent as TypeDeclarationSyntax; + RegexType current = rc; + var parent = typeDec.Parent as TypeDeclarationSyntax; while (parent is not null && IsAllowedKind(parent.Kind())) { - current.ParentClass = new RegexClass + current.ParentClass = new RegexType { - Keyword = parent.Keyword.ValueText, - Namespace = nameSpace, + Keyword = parent is RecordDeclarationSyntax rds2 ? $"{parent.Keyword.ValueText} {rds2.ClassOrStructKeyword}" : parent.Keyword.ValueText, + Namespace = ns, Name = $"{parent.Identifier}{parent.TypeParameterList}", Constraints = parent.ConstraintClauses.ToString(), ParentClass = null, @@ -305,15 +285,15 @@ static void Diag(Action reportDiagnostic, DiagnosticDescriptor desc, reportDiagnostic(Diagnostic.Create(desc, location, messageArgs)); } - /// A class holding a regex method. - internal sealed class RegexClass + /// A type holding a regex method. + internal sealed class RegexType { public RegexMethod Method; public string Keyword = string.Empty; public string Namespace = string.Empty; public string Name = string.Empty; public string Constraints = string.Empty; - public RegexClass? ParentClass; + public RegexType? ParentClass; } /// A regex method. diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs index e5c9f4bdc13ffe..7f2590f3ccc3f7 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs @@ -20,24 +20,24 @@ namespace System.Text.RegularExpressions.Generator { /// Generates C# source code to implement regular expressions. - [Generator] + [Generator(LanguageNames.CSharp)] public partial class RegexGenerator : IIncrementalGenerator { public void Initialize(IncrementalGeneratorInitializationContext context) { - IncrementalValuesProvider classDeclarations = context.SyntaxProvider + IncrementalValuesProvider classDeclarations = context.SyntaxProvider .CreateSyntaxProvider( static (s, _) => IsSyntaxTargetForGeneration(s), static (ctx, _) => GetSemanticTargetForGeneration(ctx)) .Where(static m => m is not null); - IncrementalValueProvider<(Compilation, ImmutableArray)> compilationAndClasses = + IncrementalValueProvider<(Compilation, ImmutableArray)> compilationAndClasses = context.CompilationProvider.Combine(classDeclarations.Collect()); context.RegisterImplementationSourceOutput(compilationAndClasses, static (context, source) => { - ImmutableArray classes = source.Item2; - if (classes.IsDefaultOrEmpty) + ImmutableArray types = source.Item2; + if (types.IsDefaultOrEmpty) { return; } @@ -46,16 +46,17 @@ public void Initialize(IncrementalGeneratorInitializationContext context) try { Compilation compilation = source.Item1; - IReadOnlyList regexClasses = GetRegexClassesToEmit(compilation, context.ReportDiagnostic, classes.Distinct(), context.CancellationToken); - if (regexClasses.Count != 0) + IReadOnlyList regexTypes = GetRegexTypesToEmit(compilation, context.ReportDiagnostic, types.Distinct(), context.CancellationToken); + if (regexTypes.Count != 0) { - result = Emit(regexClasses, context.CancellationToken); + result = Emit(regexTypes, context.CancellationToken); } } catch (Exception e) when (!(e is OperationCanceledException)) { - result = "// ERROR:" + Environment.NewLine + string.Join(Environment.NewLine, - e.ToString().Split(new[] { "\r\n", "\n" }, StringSplitOptions.None).Select(s => $"// {SymbolDisplay.FormatLiteral(s, quote: true)}")); + result = + "// ERROR:" + Environment.NewLine + + string.Join(Environment.NewLine, e.ToString().Split(new[] { "\r\n", "\n" }, StringSplitOptions.None).Select(s => $"// {SymbolDisplay.FormatLiteral(s, quote: false)}")); } if (result.Length > 0) diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx index 8190800add9337..bf025f3d9e52cc 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx @@ -126,23 +126,8 @@ The specified regex is invalid. '{0}' - - Regex method cannot have a body. - - - Regex method must be static. - - - Regex method must be partial - - - Regex method must not be generic. - - - Regex method must be parameterless. - - - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. C# LangVersion of 10 or greater is required. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf index 50bb8a0bfa387e..0863caad887f41 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf index 5723d6e86984ef..260a9d0521ff27 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf index 68627df92ca80b..e5deb9da8d595a 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf index 6b8cd2e3b94cf0..aaaddf05064f82 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf index 9a7bbbc73c9f32..6768025cdf942d 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf index a7139acb2e2fdc..8716f388addfff 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf index 5727824c263dd0..1554de6b120f36 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf index ede3705af5d083..5d181760aea692 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf index 59972b955b3739..201efb3e195039 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf index a9f4fa76856711..fb2bba07f8c221 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf index 9644be4868a323..16acd32087f3f6 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf index 362d1b7d1f7252..eea3de1550a253 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf index 5d9ecad1efbc22..db94fb362d19e9 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf @@ -217,34 +217,9 @@ The RegEx engine has timed out while trying to match a pattern to an input string. This can occur for many reasons, including very large inputs or excessive backtracking caused by nested quantifiers, back-references and other factors. - - Regex method cannot have a body. - Regex method cannot have a body. - - - - Regex method must be parameterless. - Regex method must be parameterless. - - - - Regex method must be partial - Regex method must be partial - - - - Regex method must be static. - Regex method must be static. - - - - Regex method must not be generic. - Regex method must not be generic. - - - - Regex method must return Regex. - Regex method must return Regex. + + Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex. diff --git a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj index ccd6510cb346f7..3bb6655e92ad2b 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj +++ b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj @@ -2,22 +2,22 @@ netstandard2.0 - enable true false true false - false - cs + enable $(NoWarn);CS0436;CS0649 true $(DefineConstants);REGEXGENERATOR + true + cs + false - diff --git a/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs b/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs index 68e58c45b53d96..16c9a9d7acf1d5 100644 --- a/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs +++ b/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs @@ -221,10 +221,10 @@ public sealed partial class RegexGeneratorAttribute : System.Attribute { public RegexGeneratorAttribute(string pattern) { } public RegexGeneratorAttribute(string pattern, System.Text.RegularExpressions.RegexOptions options) { } - public RegexGeneratorAttribute(string pattern, System.Text.RegularExpressions.RegexOptions options, int matchTimeout) { } + public RegexGeneratorAttribute(string pattern, System.Text.RegularExpressions.RegexOptions options, int matchTimeoutMilliseconds) { } public string Pattern { get; } public System.Text.RegularExpressions.RegexOptions Options { get; } - public int MatchTimeout { get; } + public int MatchTimeoutMilliseconds { get; } } public partial class RegexMatchTimeoutException : System.TimeoutException, System.Runtime.Serialization.ISerializable { diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 0821069cf561fa..2a96fc1623da0b 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -1558,44 +1558,22 @@ public static string SetDescription(string set) return desc.Append(']').ToString(); } - /// - /// Produces a human-readable description for a single character. - /// + /// Produces a human-readable description for a single character. [ExcludeFromCodeCoverage] - public static string CharDescription(char ch) - { - if (ch == '\\') - { - return "\\\\"; - } - - if (ch >= ' ' && ch <= '~') - { - return ch.ToString(); - } - - var sb = new StringBuilder(); - int shift; - - if (ch < 256) - { - sb.Append("\\x"); - shift = 8; - } - else - { - sb.Append("\\u"); - shift = 16; - } - - while (shift > 0) - { - shift -= 4; - sb.Append(HexConverter.ToCharLower(ch >> shift)); - } - - return sb.ToString(); - } + public static string CharDescription(char ch) => + ch switch + { + '\a' => "\\a", + '\b' => "\\b", + '\t' => "\\t", + '\r' => "\\r", + '\v' => "\\v", + '\f' => "\\f", + '\n' => "\\n", + '\\' => "\\\\", + >= ' ' and <= '~' => ch.ToString(), + _ => $"\\u{(uint)ch:X4}" + }; [ExcludeFromCodeCoverage] private static string CategoryDescription(char ch) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexGeneratorAttribute.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexGeneratorAttribute.cs index 1e6e1e81d4917d..5b55a41c4cad8a 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexGeneratorAttribute.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexGeneratorAttribute.cs @@ -6,6 +6,7 @@ namespace System.Text.RegularExpressions; /// Instructs the System.Text.RegularExpressions source generator to generate an implementation of the specified regular expression. +/// The generator associated with this attribute only supports C#. It only supplies an implementation when applied to static, partial, parameterless, non-generic methods that are typed to return . [AttributeUsage(AttributeTargets.Method, AllowMultiple = false, Inherited = false)] public sealed class RegexGeneratorAttribute : Attribute { @@ -25,12 +26,12 @@ public RegexGeneratorAttribute(string pattern, RegexOptions options) : this (pat /// Initializes a new instance of the with the specified pattern, options, and timeout. /// The regular expression pattern to match. /// A bitwise combination of the enumeration values that modify the regular expression. - /// A time-out interval (milliseconds), or to indicate that the method should not time out. - public RegexGeneratorAttribute(string pattern, RegexOptions options, int matchTimeout) + /// A time-out interval (milliseconds), or to indicate that the method should not time out. + public RegexGeneratorAttribute(string pattern, RegexOptions options, int matchTimeoutMilliseconds) { Pattern = pattern; Options = options; - MatchTimeout = matchTimeout; + MatchTimeoutMilliseconds = matchTimeoutMilliseconds; } /// Gets the regular expression pattern to match. @@ -40,5 +41,5 @@ public RegexGeneratorAttribute(string pattern, RegexOptions options, int matchTi public RegexOptions Options { get; } /// Gets a time-out interval (milliseconds), or to indicate that the method should not time out. - public int MatchTimeout { get; } + public int MatchTimeoutMilliseconds { get; } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index c046447e088fe2..d2a5d7c365f695 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -1764,7 +1764,7 @@ internal static bool NodeSupportsSimplifiedCodeGenerationImplementation(RegexNod { bool supported = false; - // We only support the default left-to-right, not right-to-left, which requires more complication in the gerated code. + // We only support the default left-to-right, not right-to-left, which requires more complication in the generated code. // (Right-to-left is only employed when explicitly asked for by the developer or by lookbehind assertions.) // We also limit the recursion involved to prevent stack dives; this limitation can be removed by switching // away from a recursive implementation (done for convenience) to an iterative one that's more complicated diff --git a/src/libraries/System.Text.RegularExpressions/tests/AttRegexTests.cs b/src/libraries/System.Text.RegularExpressions/tests/AttRegexTests.cs index 239a94a6d45c42..24d27c049641cb 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/AttRegexTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/AttRegexTests.cs @@ -378,11 +378,11 @@ public async Task Test(string pattern, string input, string captures) { if (captures == "BADBR") { - await Assert.ThrowsAnyAsync(async () => (await RegexHelpers.GetRegex(engine, pattern)).IsMatch(input)); + await Assert.ThrowsAnyAsync(async () => (await RegexHelpers.GetRegexAsync(engine, pattern)).IsMatch(input)); return; } - Regex r = await RegexHelpers.GetRegex(engine, pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, pattern); if (captures == "NOMATCH") { diff --git a/src/libraries/System.Text.RegularExpressions/tests/MonoRegexTests.cs b/src/libraries/System.Text.RegularExpressions/tests/MonoRegexTests.cs index 63720f66f1811a..dcfdbe7d7b40c0 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/MonoRegexTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/MonoRegexTests.cs @@ -26,7 +26,7 @@ public async Task ValidateRegex(RegexEngine engine, string pattern, RegexOptions string result = "Fail."; try { - Regex re = await RegexHelpers.GetRegex(engine, pattern, options); + Regex re = await RegexHelpers.GetRegexAsync(engine, pattern, options); Match m = re.Match(input); if (m.Success) diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs index c78c90c6af3200..41ba4f79372a8d 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs @@ -31,7 +31,7 @@ public async Task Docs_Examples_ScanningHrefs(RegexEngine engine) "" + ".NET Base Class Library blog

"; - Regex r = await RegexHelpers.GetRegex(engine, HrefPattern, RegexOptions.IgnoreCase); + Regex r = await RegexHelpers.GetRegexAsync(engine, HrefPattern, RegexOptions.IgnoreCase); Match m = r.Match(InputString); Assert.True(m.Success); @@ -57,7 +57,7 @@ public async Task Docs_Examples_ScanningHrefs(RegexEngine engine) [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] public async Task Docs_Examples_MDYtoDMY(RegexEngine engine) { - Regex r = await RegexHelpers.GetRegex(engine, @"\b(?\d{1,2})/(?\d{1,2})/(?\d{2,4})\b"); + Regex r = await RegexHelpers.GetRegexAsync(engine, @"\b(?\d{1,2})/(?\d{1,2})/(?\d{2,4})\b"); string dt = new DateTime(2020, 1, 8, 0, 0, 0, DateTimeKind.Utc).ToString("d", DateTimeFormatInfo.InvariantInfo); Assert.Equal("08-01-2020", r.Replace(dt, "${day}-${month}-${year}")); @@ -68,7 +68,7 @@ public async Task Docs_Examples_MDYtoDMY(RegexEngine engine) [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] public async Task Docs_Examples_ExtractProtocolPort(RegexEngine engine) { - Regex r = await RegexHelpers.GetRegex(engine, @"^(?\w+)://[^/]+?(?:\d+)?/"); + Regex r = await RegexHelpers.GetRegexAsync(engine, @"^(?\w+)://[^/]+?(?:\d+)?/"); Match m = r.Match("http://www.contoso.com:8080/letters/readme.html"); Assert.True(m.Success); Assert.Equal("http:8080", m.Result("${proto}${port}")); @@ -106,7 +106,7 @@ async Task IsValidEmailAsync(string email, RegexEngine engine) try { - Regex r = await RegexHelpers.GetRegex(engine, @"(@)(.+)$", matchTimeout: 200); + Regex r = await RegexHelpers.GetRegexAsync(engine, @"(@)(.+)$", matchTimeout: 200); // Normalize the domain part of the email email = r.Replace(email, match => @@ -131,7 +131,7 @@ async Task IsValidEmailAsync(string email, RegexEngine engine) try { - Regex r = await RegexHelpers.GetRegex( + Regex r = await RegexHelpers.GetRegexAsync( engine, @"^(?("")("".+?(?\w+)\s\k\W(?\w+)"; const string Input = "He said that that was the the correct answer."; - Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase); Match match = r.Match(Input); @@ -207,7 +207,7 @@ public async Task Docs_GroupingConstructs_NamedMatchedSubexpression2(RegexEngine const string Pattern = @"\D+(?\d+)\D+(?\d+)?"; string[] inputs = { "abc123def456", "abc123def" }; - Regex r = await RegexHelpers.GetRegex(engine, Pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern); var actual = new StringBuilder(); foreach (string input in inputs) @@ -254,7 +254,7 @@ public async Task Docs_GroupingConstructs_BalancingGroups(RegexEngine engine) "(?(Open)(?!))$"; const string Input = ">"; - Regex r = await RegexHelpers.GetRegex(engine, Pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern); var actual = new StringBuilder(); Match m = r.Match(Input); @@ -309,7 +309,7 @@ public async Task Docs_GroupingConstructs_NoncapturingGroups(RegexEngine engine) const string Pattern = @"(?:\b(?:\w+)\W*)+\."; const string Input = "This is a short sentence."; - Regex r = await RegexHelpers.GetRegex(engine, Pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern); Match match = r.Match(Input); Assert.True(match.Success); @@ -325,7 +325,7 @@ public async Task Docs_GroupingConstructs_GroupOptions(RegexEngine engine) const string Pattern = @"\b(?ix: d \w+)\s"; const string Input = "Dogs are decidedly good pets."; - Regex r = await RegexHelpers.GetRegex(engine, Pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern); Match match = r.Match(Input); Assert.True(match.Success); @@ -348,7 +348,7 @@ public async Task Docs_GroupingConstructs_ZeroWidthPositiveLookaheadAssertions(R const string Pattern = @"\b\w+(?=\sis\b)"; Match match; - Regex r = await RegexHelpers.GetRegex(engine, Pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern); match = r.Match("The dog is a Malamute."); Assert.True(match.Success); @@ -373,7 +373,7 @@ public async Task Docs_GroupingConstructs_ZeroWidthNegativeLookaheadAssertions(R const string Pattern = @"\b(?!un)\w+\b"; const string Input = "unite one unethical ethics use untie ultimate"; - Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase); MatchCollection matches = r.Matches(Input); Assert.Equal("one", matches[0].Value); @@ -390,7 +390,7 @@ public async Task Docs_GroupingConstructs_ZeroWidthPositiveLookbehindAssertions( const string Pattern = @"(?<=\b20)\d{2}\b"; const string Input = "2010 1999 1861 2140 2009"; - Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase); MatchCollection matches = r.Matches(Input); Assert.Equal("10", matches[0].Value); @@ -404,7 +404,7 @@ public async Task Docs_GroupingConstructs_ZeroWidthNegativeLookbehindAssertions( { const string Pattern = @"(?(\w)\1+).\b"); + Regex rBack = await RegexHelpers.GetRegexAsync(engine, @"(\w)\1+.\b"); + Regex rNoBack = await RegexHelpers.GetRegexAsync(engine, @"(?>(\w)\1+).\b"); string[] inputs = { "aaad", "aaaa" }; Match back, noback; @@ -453,7 +453,7 @@ public async Task Docs_GroupingConstructs_GroupCaptureRelationship(RegexEngine e const string Pattern = @"(\b(\w+)\W+)+"; const string Input = "This is a short sentence."; - Regex r = await RegexHelpers.GetRegex(engine, Pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern); Match match = r.Match(Input); @@ -496,7 +496,7 @@ public async Task Docs_Capture_Sentences(RegexEngine engine) const string Pattern = @"((\w+)[\s.])+"; const string Input = "Yes. This dog is very friendly."; - Regex r = await RegexHelpers.GetRegex(engine, Pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern); var actual = new StringBuilder(); foreach (Match match in r.Matches(Input)) @@ -548,7 +548,7 @@ public async Task Docs_Capture_ProductNumber(RegexEngine engine) const string Pattern = @"^([a-z]+)(\d+)?\.([a-z]+(\d)*)$"; string[] values = { "AC10", "Za203.CYM", "XYZ.CoA", "ABC.x170" }; - Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase); var actual = new StringBuilder(); foreach (var value in values) @@ -638,7 +638,7 @@ public async Task Docs_Backtracking_LinearComparisonWithoutBacktracking(RegexEng const string Pattern = @"e{2}\w\b"; const string Input = "needing a reed"; - Regex r = await RegexHelpers.GetRegex(engine, Pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern); MatchCollection matches = r.Matches(Input); Assert.Equal(1, matches.Count); @@ -654,7 +654,7 @@ public async Task Docs_Backtracking_WithOptionalQuantifiersOrAlternationConstruc const string Pattern = ".*(es)"; const string Input = "Essential services are provided by regular expressions."; - Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase); Match m = r.Match(Input); Assert.True(m.Success); @@ -671,7 +671,7 @@ public async Task Docs_Backtracking_WithOptionalQuantifiersOrAlternationConstruc [ActiveIssue("https://github.com/dotnet/runtime/issues/57891")] // takes too long due to backtracking public async Task Docs_Backtracking_WithNestedOptionalQuantifiers_ExcessiveBacktracking(RegexEngine engine) { - Regex r = await RegexHelpers.GetRegex(engine, "^(([0-9a-fA-F]{1,4}:)*([0-9a-fA-F]{1,4}))*(::)$"); + Regex r = await RegexHelpers.GetRegexAsync(engine, "^(([0-9a-fA-F]{1,4}:)*([0-9a-fA-F]{1,4}))*(::)$"); Assert.False(r.IsMatch("b51:4:1DB:9EE1:5:27d60:f44:D4:cd:E:5:0A5:4a:D24:41Ad:")); } @@ -680,7 +680,7 @@ public async Task Docs_Backtracking_WithNestedOptionalQuantifiers_ExcessiveBackt [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] public async Task Docs_Backtracking_WithNestedOptionalQuantifiers_BacktrackingEliminated(RegexEngine engine) { - Regex r = await RegexHelpers.GetRegex(engine, "^((?>[0-9a-fA-F]{1,4}:)*(?>[0-9a-fA-F]{1,4}))*(::)$"); + Regex r = await RegexHelpers.GetRegexAsync(engine, "^((?>[0-9a-fA-F]{1,4}:)*(?>[0-9a-fA-F]{1,4}))*(::)$"); Assert.False(r.IsMatch("b51:4:1DB:9EE1:5:27d60:f44:D4:cd:E:5:0A5:4a:D24:41Ad:")); } @@ -691,10 +691,10 @@ public async Task Docs_Backtracking_LookbehindAssertions(RegexEngine engine) { const string Input = "test@contoso.com"; - Regex rPattern = await RegexHelpers.GetRegex(engine, @"^[0-9A-Z]([-.\w]*[0-9A-Z])?@", RegexOptions.IgnoreCase); + Regex rPattern = await RegexHelpers.GetRegexAsync(engine, @"^[0-9A-Z]([-.\w]*[0-9A-Z])?@", RegexOptions.IgnoreCase); Assert.True(rPattern.IsMatch(Input)); - Regex rBehindPattern = await RegexHelpers.GetRegex(engine, @"^[0-9A-Z][-.\w]*(?<=[0-9A-Z])@", RegexOptions.IgnoreCase); + Regex rBehindPattern = await RegexHelpers.GetRegexAsync(engine, @"^[0-9A-Z][-.\w]*(?<=[0-9A-Z])@", RegexOptions.IgnoreCase); Assert.True(rBehindPattern.IsMatch(Input)); } @@ -704,7 +704,7 @@ public async Task Docs_Backtracking_LookbehindAssertions(RegexEngine engine) [ActiveIssue("https://github.com/dotnet/runtime/issues/57891")] // takes too long due to backtracking public async Task Docs_Backtracking_LookaheadAssertions_ExcessiveBacktracking(RegexEngine engine) { - Regex r = await RegexHelpers.GetRegex(engine, @"^(([A-Z]\w*)+\.)*[A-Z]\w*$", RegexOptions.IgnoreCase); + Regex r = await RegexHelpers.GetRegexAsync(engine, @"^(([A-Z]\w*)+\.)*[A-Z]\w*$", RegexOptions.IgnoreCase); Assert.False(r.IsMatch("aaaaaaaaaaaaaaaaaaaaaa.")); } @@ -713,7 +713,7 @@ public async Task Docs_Backtracking_LookaheadAssertions_ExcessiveBacktracking(Re [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] public async Task Docs_Backtracking_LookaheadAssertions_BacktrackingEliminated(RegexEngine engine) { - Regex r = await RegexHelpers.GetRegex(engine, @"^((?=[A-Z])\w+\.)*[A-Z]\w*$", RegexOptions.IgnoreCase); + Regex r = await RegexHelpers.GetRegexAsync(engine, @"^((?=[A-Z])\w+\.)*[A-Z]\w*$", RegexOptions.IgnoreCase); Assert.False(r.IsMatch("aaaaaaaaaaaaaaaaaaaaaa.")); } @@ -724,12 +724,12 @@ public async Task Docs_EngineCapabilities_LazyQuantifiers(RegexEngine engine) { const string Input = "This sentence ends with the number 107325."; - Regex rGreedy = await RegexHelpers.GetRegex(engine, @".+(\d+)\."); + Regex rGreedy = await RegexHelpers.GetRegexAsync(engine, @".+(\d+)\."); Match match = rGreedy.Match(Input); Assert.True(match.Success); Assert.Equal("5", match.Groups[1].Value); - Regex rLazy = await RegexHelpers.GetRegex(engine, @".+?(\d+)\."); + Regex rLazy = await RegexHelpers.GetRegexAsync(engine, @".+?(\d+)\."); match = rLazy.Match(Input); Assert.True(match.Success); Assert.Equal("107325", match.Groups[1].Value); @@ -743,7 +743,7 @@ public async Task Docs_EngineCapabilities_PositiveLookahead(RegexEngine engine) const string Pattern = @"\b[A-Z]+\b(?=\P{P})"; const string Input = "If so, what comes next?"; - Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase); MatchCollection matches = r.Matches(Input); Assert.Equal(3, matches.Count); @@ -760,7 +760,7 @@ public async Task Docs_EngineCapabilities_NegativeLookahead(RegexEngine engine) const string Pattern = @"\b(?!non)\w+\b"; const string Input = "Nonsense is not always non-functional."; - Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase); MatchCollection matches = r.Matches(Input); Assert.Equal(4, matches.Count); @@ -778,7 +778,7 @@ public async Task Docs_EngineCapabilities_ConditionalEvaluation(RegexEngine engi const string Pattern = @"\b(?(\d{2}-)\d{2}-\d{7}|\d{3}-\d{2}-\d{4})\b"; const string Input = "01-9999999 020-333333 777-88-9999"; - Regex r = await RegexHelpers.GetRegex(engine, Pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern); MatchCollection matches = r.Matches(Input); Assert.Equal(2, matches.Count); @@ -798,8 +798,8 @@ public async Task Docs_EngineCapabilities_RightToLeftMatching(RegexEngine engine const string GreedyPattern = @".+(\d+)\."; const string Input = "This sentence ends with the number 107325."; - Regex rLTR = await RegexHelpers.GetRegex(engine, GreedyPattern); - Regex rRTL = await RegexHelpers.GetRegex(engine, GreedyPattern, RegexOptions.RightToLeft); + Regex rLTR = await RegexHelpers.GetRegexAsync(engine, GreedyPattern); + Regex rRTL = await RegexHelpers.GetRegexAsync(engine, GreedyPattern, RegexOptions.RightToLeft); // Match from left-to-right using lazy quantifier .+?. Match match = rLTR.Match(Input); @@ -819,7 +819,7 @@ public async Task Docs_EngineCapabilities_PositiveNegativeLookbehind(RegexEngine { const string Pattern = @"^[A-Z0-9]([-!#$%&'.*+/=?^`{}|~\w])*(?<=[A-Z0-9])$"; - Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.IgnoreCase); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase); Assert.True(r.IsMatch("jack.sprat")); Assert.False(r.IsMatch("dog#")); @@ -837,7 +837,7 @@ public async Task Docs_InlineOptions(RegexEngine engine) var actual = new StringBuilder(); - foreach (Match match in (await RegexHelpers.GetRegex(engine, @"\b(D\w+)\s(d\w+)\b")).Matches(Input)) + foreach (Match match in (await RegexHelpers.GetRegexAsync(engine, @"\b(D\w+)\s(d\w+)\b")).Matches(Input)) { actual.AppendLine(match.Value); if (match.Groups.Count > 1) @@ -850,7 +850,7 @@ public async Task Docs_InlineOptions(RegexEngine engine) } actual.AppendLine(); - foreach (Match match in (await RegexHelpers.GetRegex(engine, @"\b(D\w+)(?ixn) \s (d\w+) \b")).Matches(Input)) + foreach (Match match in (await RegexHelpers.GetRegexAsync(engine, @"\b(D\w+)(?ixn) \s (d\w+) \b")).Matches(Input)) { actual.AppendLine(match.Value); if (match.Groups.Count > 1) @@ -883,7 +883,7 @@ public async Task Docs_InlineComment(RegexEngine engine) const string Pattern = @"\b((?# case-sensitive comparison)D\w+)\s(?ixn)((?#case-insensitive comparison)d\w+)\b"; const string Input = "double dare double Double a Drooling dog The Dreaded Deep"; - Regex r = await RegexHelpers.GetRegex(engine, Pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern); Match match = r.Match(Input); Assert.True(match.Success); @@ -908,7 +908,7 @@ public async Task Docs_EndOfLineComment(RegexEngine engine) const string Pattern = @"\{\d+(,-*\d+)*(\:\w{1,4}?)*\}(?x) # Looks for a composite format item."; const string Input = "{0,-3:F}"; - Regex r = await RegexHelpers.GetRegex(engine, Pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern); Assert.True(r.IsMatch(Input)); } @@ -922,7 +922,7 @@ public async Task Docs_Anchors_ContiguousMatches(RegexEngine engine) const string Pattern = @"\G(\w+\s?\w*),?"; string[] expected = new[] { "capybara", "squirrel", "chipmunk", "porcupine" }; - Regex r = await RegexHelpers.GetRegex(engine, Pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern); Match m = r.Match(Input); @@ -954,7 +954,7 @@ public async Task RealWorld_ExtractResourceUri(string url, string expected) { foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = await RegexHelpers.GetRegex(engine, @"/providers/(.+?)\?"); + Regex r = await RegexHelpers.GetRegexAsync(engine, @"/providers/(.+?)\?"); Match m = r.Match(url); Assert.True(m.Success); Assert.Equal(2, m.Groups.Count); @@ -985,7 +985,7 @@ public async Task RealWorld_IsValidCSharpName(string value, bool isExpectedMatch foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = await RegexHelpers.GetRegex(engine, IdentifierRegex); + Regex r = await RegexHelpers.GetRegexAsync(engine, IdentifierRegex); Assert.Equal(isExpectedMatch, r.IsMatch(value)); } } @@ -1003,7 +1003,7 @@ public async Task RealWorld_IsCommentLine(string value, bool isExpectedMatch) foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = await RegexHelpers.GetRegex(engine, CommentLineRegex); + Regex r = await RegexHelpers.GetRegexAsync(engine, CommentLineRegex); Assert.Equal(isExpectedMatch, r.IsMatch(value)); } } @@ -1022,7 +1022,7 @@ public async Task RealWorld_IsSectionLine(string value, bool isExpectedMatch) foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = await RegexHelpers.GetRegex(engine, SectionLineRegex); + Regex r = await RegexHelpers.GetRegexAsync(engine, SectionLineRegex); Assert.Equal(isExpectedMatch, r.IsMatch(value)); } } @@ -1042,7 +1042,7 @@ public async Task RealWorld_ValueParse(string value, string expected) { foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = await RegexHelpers.GetRegex(engine, @"(?-?\d+(\.\d+)?)"); + Regex r = await RegexHelpers.GetRegexAsync(engine, @"(?-?\d+(\.\d+)?)"); Match m = r.Match(value); Assert.True(m.Success); Assert.Equal(expected, m.Groups["value"].Value); @@ -1056,7 +1056,7 @@ public async Task RealWorld_FirebirdVersionString(string value, string expected) { foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = await RegexHelpers.GetRegex(engine, @"\w{2}-\w(\d+\.\d+\.\d+\.\d+)"); + Regex r = await RegexHelpers.GetRegexAsync(engine, @"\w{2}-\w(\d+\.\d+\.\d+\.\d+)"); Match m = r.Match(value); Assert.True(m.Success); Assert.Equal(expected, m.Groups[1].Value); @@ -1072,7 +1072,7 @@ public async Task RealWorld_ExternalEntryPoint(string value, string a, string b, { foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { - Regex r = await RegexHelpers.GetRegex(engine, @"^(.+)!(.+)\.([^.]+)$"); + Regex r = await RegexHelpers.GetRegexAsync(engine, @"^(.+)!(.+)\.([^.]+)$"); Match m = r.Match(value); Assert.True(m.Success); Assert.Equal(a, m.Groups[1].Value); diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index 64d5b3d348634e..8d064362e084c0 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -476,7 +476,7 @@ public async Task Match(RegexEngine engine, string pattern, string input, RegexO bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, beginning); bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, length); - Regex r = await RegexHelpers.GetRegex(engine, pattern, options); + Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); if (isDefaultStart && isDefaultCount) { @@ -559,7 +559,7 @@ public async void Match_DeepNesting(RegexEngine engine, int count) string pattern = string.Concat(Enumerable.Repeat(Start, count)) + string.Concat(Enumerable.Repeat(End, count)); string input = string.Concat(Enumerable.Repeat(Match, count)); - Regex r = await RegexHelpers.GetRegex(engine, pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, pattern); Match m = r.Match(input); Assert.True(m.Success); @@ -571,7 +571,7 @@ public async void Match_DeepNesting(RegexEngine engine, int count) [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] public async Task Match_Timeout(RegexEngine engine) { - Regex regex = await RegexHelpers.GetRegex(engine, @"\p{Lu}", RegexOptions.IgnoreCase, TimeSpan.FromHours(1)); + Regex regex = await RegexHelpers.GetRegexAsync(engine, @"\p{Lu}", RegexOptions.IgnoreCase, TimeSpan.FromHours(1)); Match match = regex.Match("abc"); Assert.True(match.Success); RegexAssert.Equal("a", match); @@ -584,7 +584,7 @@ public async Task Match_Timeout_Throws(RegexEngine engine) const string Pattern = @"^([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*@(([0-9a-zA-Z])+([-\w]*[0-9a-zA-Z])*\.)+[a-zA-Z]{2,9})$"; string input = new string('a', 50) + "@a.a"; - Regex r = await RegexHelpers.GetRegex(engine, Pattern, RegexOptions.None, TimeSpan.FromMilliseconds(100)); + Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.None, TimeSpan.FromMilliseconds(100)); Assert.Throws(() => r.Match(input)); } @@ -646,7 +646,7 @@ public void Match_CachedPattern_NewTimeoutApplies(RegexOptions options) [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))] public async Task Match_Timeout_Loop_Throws(RegexEngine engine) { - Regex regex = await RegexHelpers.GetRegex(engine, @"a\s+", RegexOptions.None, TimeSpan.FromSeconds(1)); + Regex regex = await RegexHelpers.GetRegexAsync(engine, @"a\s+", RegexOptions.None, TimeSpan.FromSeconds(1)); string input = "a" + new string(' ', 800_000_000) + " "; Assert.Throws(() => regex.Match(input)); } @@ -659,7 +659,7 @@ public async Task Match_Timeout_Loop_Throws(RegexEngine engine) public async Task Match_Timeout_Repetition_Throws(RegexEngine engine) { int repetitionCount = 800_000_000; - Regex regex = await RegexHelpers.GetRegex(engine, @"a\s{" + repetitionCount + "}", RegexOptions.None, TimeSpan.FromSeconds(1)); + Regex regex = await RegexHelpers.GetRegexAsync(engine, @"a\s{" + repetitionCount + "}", RegexOptions.None, TimeSpan.FromSeconds(1)); string input = @"a" + new string(' ', repetitionCount) + @"b"; Assert.Throws(() => regex.Match(input)); } @@ -952,7 +952,7 @@ public async Task Match_Advanced(RegexEngine engine, string pattern, string inpu bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, beginning); bool isDefaultCount = RegexHelpers.IsDefaultStart(input, options, length); - Regex r = await RegexHelpers.GetRegex(engine, pattern, options); + Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); if (isDefaultStart && isDefaultCount) { @@ -999,7 +999,7 @@ public static IEnumerable Match_StartatDiffersFromBeginning_MemberData [MemberData(nameof(Match_StartatDiffersFromBeginning_MemberData))] public async Task Match_StartatDiffersFromBeginning(RegexEngine engine, string pattern, string input, RegexOptions options, int startat, bool expectedSuccessStartAt, bool expectedSuccessBeginning) { - Regex r = await RegexHelpers.GetRegex(engine, pattern, options); + Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); Assert.Equal(expectedSuccessStartAt, r.IsMatch(input, startat)); Assert.Equal(expectedSuccessStartAt, r.Match(input, startat).Success); @@ -1099,17 +1099,17 @@ public void Match_ExcessPrefix(RegexEngine engine) // Should not throw out of memory // Repeaters - Assert.False((await RegexHelpers.GetRegex(engine, @"a{2147483647,}")).IsMatch("a")); - Assert.False((await RegexHelpers.GetRegex(engine, @"a{50,}")).IsMatch("a")); // cutoff for Boyer-Moore prefix in debug - Assert.False((await RegexHelpers.GetRegex(engine, @"a{51,}")).IsMatch("a")); - Assert.False((await RegexHelpers.GetRegex(engine, @"a{50_000,}")).IsMatch("a")); // cutoff for Boyer-Moore prefix in release - Assert.False((await RegexHelpers.GetRegex(engine, @"a{50_001,}")).IsMatch("a")); + Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{2147483647,}")).IsMatch("a")); + Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{50,}")).IsMatch("a")); // cutoff for Boyer-Moore prefix in debug + Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{51,}")).IsMatch("a")); + Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{50_000,}")).IsMatch("a")); // cutoff for Boyer-Moore prefix in release + Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{50_001,}")).IsMatch("a")); // Multis foreach (int length in new[] { 50, 51, 50_000, 50_001, char.MaxValue + 1 }) // based on knowledge of cut-offs used in Boyer-Moore { string s = "bcd" + new string('a', length) + "efg"; - Assert.True((await RegexHelpers.GetRegex(engine, @$"a{{{length}}}")).IsMatch(s)); + Assert.True((await RegexHelpers.GetRegexAsync(engine, @$"a{{{length}}}")).IsMatch(s)); } }, engine.ToString()).Dispose(); } @@ -1182,7 +1182,7 @@ public static IEnumerable IsMatch_SucceedQuicklyDueToLoopReduction_Mem [MemberData(nameof(IsMatch_SucceedQuicklyDueToLoopReduction_MemberData))] public async Task IsMatch_SucceedQuicklyDueToLoopReduction(RegexEngine engine, string pattern, string input, bool expected) { - Regex r = await RegexHelpers.GetRegex(engine, pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, pattern); Assert.Equal(expected, r.IsMatch(input)); } @@ -1223,7 +1223,7 @@ public async Task UseRegexConcurrently_ThreadSafe_Success(RegexEngine engine, Ti for (int trial = 0; trial < Trials; trial++) { - Regex r = await RegexHelpers.GetRegex(engine, "[a-q][^u-z]{13}x", RegexOptions.None, timeout); + Regex r = await RegexHelpers.GetRegexAsync(engine, "[a-q][^u-z]{13}x", RegexOptions.None, timeout); Task.WaitAll(Enumerable.Range(0, b.ParticipantCount).Select(_ => Task.Factory.StartNew(() => { b.SignalAndWait(); diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs index b2923d0a13950c..3b4f3cdf466647 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs @@ -62,7 +62,7 @@ public static IEnumerable AvailableEngines } } - public static async Task GetRegex(RegexEngine engine, string pattern, RegexOptions options = RegexOptions.None, int matchTimeout = -1) + public static async Task GetRegexAsync(RegexEngine engine, string pattern, RegexOptions options = RegexOptions.None, int matchTimeout = -1) { switch (engine) { @@ -73,14 +73,14 @@ public static async Task GetRegex(RegexEngine engine, string pattern, Reg return new Regex(pattern, options | RegexOptions.Compiled, TimeSpan.FromMilliseconds(matchTimeout)); case RegexEngine.SourceGenerated: - return await RegexGeneratorHelper.SourceGenRegex(pattern, options, matchTimeout); + return await RegexGeneratorHelper.SourceGenRegexAsync(pattern, options, matchTimeout); } throw new ArgumentException($"Unknown engine: {engine}"); } - public static Task GetRegex(RegexEngine engine, string pattern, RegexOptions options, TimeSpan timeout) => - GetRegex(engine, pattern, options, (int)timeout.TotalMilliseconds); + public static Task GetRegexAsync(RegexEngine engine, string pattern, RegexOptions options, TimeSpan timeout) => + GetRegexAsync(engine, pattern, options, (int)timeout.TotalMilliseconds); } public enum RegexEngine diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorAttributeTests.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorAttributeTests.cs index aab33600f80dc4..f6792c564678dc 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorAttributeTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorAttributeTests.cs @@ -12,30 +12,30 @@ public class RegexGeneratorAttributeTests [InlineData(null, RegexOptions.None, Timeout.Infinite)] [InlineData("", (RegexOptions)12345, -2)] [InlineData("a.*b", RegexOptions.Compiled | RegexOptions.CultureInvariant, 1)] - public void Ctor_Roundtrips(string pattern, RegexOptions options, int matchTimeout) + public void Ctor_Roundtrips(string pattern, RegexOptions options, int matchTimeoutMilliseconds) { RegexGeneratorAttribute a; - if (matchTimeout == -1) + if (matchTimeoutMilliseconds == -1) { if (options == RegexOptions.None) { a = new RegexGeneratorAttribute(pattern); Assert.Equal(pattern, a.Pattern); Assert.Equal(RegexOptions.None, a.Options); - Assert.Equal(Timeout.Infinite, a.MatchTimeout); + Assert.Equal(Timeout.Infinite, a.MatchTimeoutMilliseconds); } a = new RegexGeneratorAttribute(pattern, options); Assert.Equal(pattern, a.Pattern); Assert.Equal(options, a.Options); - Assert.Equal(Timeout.Infinite, a.MatchTimeout); + Assert.Equal(Timeout.Infinite, a.MatchTimeoutMilliseconds); } - a = new RegexGeneratorAttribute(pattern, options, matchTimeout); + a = new RegexGeneratorAttribute(pattern, options, matchTimeoutMilliseconds); Assert.Equal(pattern, a.Pattern); Assert.Equal(options, a.Options); - Assert.Equal(matchTimeout, a.MatchTimeout); + Assert.Equal(matchTimeoutMilliseconds, a.MatchTimeoutMilliseconds); } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs index 7b544769b9e845..ed0512af6a9fd7 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs @@ -28,6 +28,12 @@ public static class RegexGeneratorHelper private static MetadataReference[] CreateReferences() { + if (PlatformDetection.IsBrowser) + { + // These tests that use Roslyn don't work well on browser wasm today + return new MetadataReference[0]; + } + // Typically we'd want to use the right reference assemblies, but as we're not persisting any // assets and only using this for testing purposes, referencing implementation assemblies is sufficient. @@ -41,7 +47,7 @@ private static MetadataReference[] CreateReferences() }; } - internal static async Task SourceGenRegex( + internal static async Task SourceGenRegexAsync( string pattern, RegexOptions options = RegexOptions.None, int matchTimeout = -1, CancellationToken cancellationToken = default) { // Create the source boilerplate for the pattern diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netfx.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netfx.cs index 212bc666f46995..f286bd3a380076 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netfx.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netfx.cs @@ -8,7 +8,7 @@ namespace System.Text.RegularExpressions.Tests { public sealed class RegexGeneratorHelper { - internal static Task SourceGenRegex(string pattern, RegexOptions options = RegexOptions.None, int matchTimeout = -1, CancellationToken cancellationToken = default) => + internal static Task SourceGenRegexAsync(string pattern, RegexOptions options = RegexOptions.None, int matchTimeout = -1, CancellationToken cancellationToken = default) => throw new NotSupportedException(); } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs index e6f56f81992704..a58c07fe35a2c9 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs @@ -36,7 +36,7 @@ partial class C }} "); - Assert.Equal(DiagnosticDescriptors.InvalidRegexArguments.Id, Assert.Single(diagnostics).Id); + Assert.Equal("SYSLIB1042", Assert.Single(diagnostics).Id); } [Theory] @@ -52,7 +52,7 @@ partial class C }} "); - Assert.Equal(DiagnosticDescriptors.InvalidRegexArguments.Id, Assert.Single(diagnostics).Id); + Assert.Equal("SYSLIB1042", Assert.Single(diagnostics).Id); } [Theory] @@ -69,7 +69,7 @@ partial class C }} "); - Assert.Equal(DiagnosticDescriptors.InvalidRegexArguments.Id, Assert.Single(diagnostics).Id); + Assert.Equal("SYSLIB1042", Assert.Single(diagnostics).Id); } [Fact] @@ -84,7 +84,7 @@ partial class C } "); - Assert.Equal(DiagnosticDescriptors.RegexMethodMustReturnRegex.Id, Assert.Single(diagnostics).Id); + Assert.Equal("SYSLIB1043", Assert.Single(diagnostics).Id); } [Fact] @@ -99,7 +99,7 @@ partial class C } "); - Assert.Equal(DiagnosticDescriptors.RegexMethodMustBeStatic.Id, Assert.Single(diagnostics).Id); + Assert.Equal("SYSLIB1043", Assert.Single(diagnostics).Id); } [Fact] @@ -114,7 +114,7 @@ partial class C } "); - Assert.Equal(DiagnosticDescriptors.RegexMethodMustNotBeGeneric.Id, Assert.Single(diagnostics).Id); + Assert.Equal("SYSLIB1043", Assert.Single(diagnostics).Id); } [Fact] @@ -129,7 +129,7 @@ partial class C } "); - Assert.Equal(DiagnosticDescriptors.RegexMethodMustBeParameterless.Id, Assert.Single(diagnostics).Id); + Assert.Equal("SYSLIB1043", Assert.Single(diagnostics).Id); } [Fact] @@ -144,7 +144,7 @@ partial class C } "); - Assert.Equal(DiagnosticDescriptors.RegexMethodMustBePartial.Id, Assert.Single(diagnostics).Id); + Assert.Equal("SYSLIB1043", Assert.Single(diagnostics).Id); } [ActiveIssue("https://github.com/dotnet/roslyn/pull/55866")] @@ -160,7 +160,7 @@ partial class C } ", langVersion: LanguageVersion.CSharp9); - Assert.Equal(DiagnosticDescriptors.InvalidLangVersion.Id, Assert.Single(diagnostics).Id); + Assert.Equal("SYSLIB1044", Assert.Single(diagnostics).Id); } [Fact] @@ -392,6 +392,70 @@ partial class C2 ", compile: true)); } + [Fact] + public async Task Valid_OnStruct() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + internal partial struct C + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + ", compile: true)); + } + + [Fact] + public async Task Valid_OnRecord() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + internal partial record C + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + ", compile: true)); + } + + [Fact] + public async Task Valid_OnRecordStruct() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + internal partial record struct C + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + ", compile: true)); + } + + [Fact] + public async Task Valid_NestedVaryingTypes() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + public partial class A + { + public partial record class B + { + public partial record struct C + { + public partial record D + { + public partial struct E + { + [RegexGenerator(""ab"")] + public static partial Regex Valid(); + } + } + } + } + } + ", compile: true)); + } + private async Task> RunGenerator( string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, CancellationToken cancellationToken = default) { @@ -420,6 +484,12 @@ private async Task> RunGenerator( comp = comp.AddSyntaxTrees(generatorResults.GeneratedTrees.ToArray()); EmitResult results = comp.Emit(Stream.Null, cancellationToken: cancellationToken); + if (!results.Success) + { + throw new ArgumentException( + string.Join(Environment.NewLine, results.Diagnostics.Concat(generatorResults.Diagnostics)) + Environment.NewLine + + string.Join(Environment.NewLine, generatorResults.GeneratedTrees.Select(t => t.ToString()))); + } return generatorResults.Diagnostics.Concat(results.Diagnostics).Where(d => d.Severity != DiagnosticSeverity.Hidden).ToArray(); } From 857caed1c4ca444d855fba0781220e0c5a4a249c Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 16 Sep 2021 21:38:31 -0400 Subject: [PATCH 06/16] Improve cachability of source generator Changing the generator to not collect all regexes together means we don't need to reprocess/regenerate all regexes every time any one of them is changed. --- .../gen/RegexGenerator.Emitter.cs | 96 ++-- .../gen/RegexGenerator.Parser.cs | 425 +++++++++--------- .../gen/RegexGenerator.cs | 81 ++-- 3 files changed, 294 insertions(+), 308 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index d0807e27f24244..5b39621e53722d 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -25,46 +25,32 @@ public partial class RegexGenerator { /// Code for a [GeneratedCode] attribute to put on the top-level generated members. private static readonly string s_generatedCodeAttribute = $"[GeneratedCodeAttribute(\"{typeof(RegexGenerator).Assembly.GetName().Name}\", \"{typeof(RegexGenerator).Assembly.GetName().Version}\")]"; - - /// Emits the code for the specified regular expression classes to a string. - /// A list of classes containing a regex method to be generated. - /// CancellationToken used to request cancellation of the code generation. - /// A string containing the emitted code. - private static string Emit(IReadOnlyList regexClasses, CancellationToken cancellationToken) + /// Header comments and usings to include at the top of every generated file. + private static readonly string[] s_headersAndUsings = new string[] { - var sb = new StringBuilder(4096); - - var writer = new IndentedTextWriter(new StringWriter(sb)); - - writer.WriteLine("// "); - writer.WriteLine("#nullable enable"); - writer.WriteLine("#pragma warning disable CS0168 // Variable declared but never used"); - writer.WriteLine("#pragma warning disable CS0164 // Unreferenced label"); - writer.WriteLine(); - writer.WriteLine("using System;"); - writer.WriteLine("using System.CodeDom.Compiler;"); - writer.WriteLine("using System.Collections;"); - writer.WriteLine("using System.ComponentModel;"); - writer.WriteLine("using System.Diagnostics;"); - writer.WriteLine("using System.Globalization;"); - writer.WriteLine("using System.Runtime.CompilerServices;"); - writer.WriteLine("using System.Text.RegularExpressions;"); - writer.WriteLine("using System.Threading;"); - - int counter = 0; - foreach (RegexType rc in regexClasses) - { - cancellationToken.ThrowIfCancellationRequested(); - writer.WriteLine(); - GenerateRegexClass(writer, rc, ref counter); - } - - return sb.ToString(); - } + "// ", + "#nullable enable", + "#pragma warning disable CS0168 // Variable declared but never used", + "#pragma warning disable CS0164 // Unreferenced label", + "", + "using System;", + "using System.CodeDom.Compiler;", + "using System.Collections;", + "using System.ComponentModel;", + "using System.Diagnostics;", + "using System.Globalization;", + "using System.Runtime.CompilerServices;", + "using System.Text.RegularExpressions;", + "using System.Threading;", + "", + }; /// Generates the code for one regular expression class. - private static void GenerateRegexClass(IndentedTextWriter writer, RegexType regexClass, ref int counter) + private static string EmitRegexType(RegexType regexClass) { + var sb = new StringBuilder(1024); + var writer = new IndentedTextWriter(new StringWriter(sb)); + // Emit the namespace if (!string.IsNullOrWhiteSpace(regexClass.Namespace)) { @@ -97,12 +83,11 @@ private static void GenerateRegexClass(IndentedTextWriter writer, RegexType rege // the method name the user provided and a unique counter value, plus a simple // non-randomized (for determinism) hash of the previous content to try to make // the name that much harder to predict. - counter++; - string generatedName = $"GeneratedRegex_{regexClass.Method.MethodName}_{counter:X}_"; + string generatedName = $"GeneratedRegex_{regexClass.Method.MethodName}_"; generatedName += ComputeStringHash(generatedName).ToString("X"); // Generate the regex type - GenerateRegex(writer, regexClass.Method, generatedName); + EmitRegexMethod(writer, regexClass.Method, generatedName); while (writer.Indent != 0) { @@ -110,6 +95,9 @@ private static void GenerateRegexClass(IndentedTextWriter writer, RegexType rege writer.WriteLine("}"); } + writer.Flush(); + return sb.ToString(); + // FNV-1a hash function. The actual algorithm used doesn't matter; just something simple // to create a pseudo-random value based on input text. static uint ComputeStringHash(string s) @@ -142,7 +130,7 @@ private static bool SupportsCustomCodeGeneration(RegexMethod rm) } /// Generates the code for a regular expression method. - private static void GenerateRegex(IndentedTextWriter writer, RegexMethod rm, string id) + private static void EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, string id) { string patternExpression = Literal(rm.Pattern); string optionsExpression = $"(RegexOptions)({rm.Options})"; @@ -151,10 +139,10 @@ private static void GenerateRegex(IndentedTextWriter writer, RegexMethod rm, str $"TimeSpan.FromMilliseconds({rm.MatchTimeout.Value.ToString(CultureInfo.InvariantCulture)})"; writer.WriteLine(s_generatedCodeAttribute); - writer.WriteLine("[EditorBrowsable(EditorBrowsableState.Never)]"); writer.WriteLine($"{rm.Modifiers} Regex {rm.MethodName}() => {id}.Instance;"); writer.WriteLine(); writer.WriteLine(s_generatedCodeAttribute); + writer.WriteLine("[EditorBrowsable(EditorBrowsableState.Never)]"); writer.WriteLine($"{(writer.Indent != 0 ? "private" : "internal")} sealed class {id} : Regex"); writer.WriteLine("{"); writer.Write($" public static Regex Instance {{ get; }} = "); @@ -216,14 +204,14 @@ private static void GenerateRegex(IndentedTextWriter writer, RegexMethod rm, str writer.WriteLine($" protected override bool FindFirstChar()"); writer.WriteLine($" {{"); writer.Indent += 4; - GenerateFindFirstChar(writer, rm, id); + EmitFindFirstChar(writer, rm, id); writer.Indent -= 4; writer.WriteLine($" }}"); writer.WriteLine(); writer.WriteLine($" protected override void Go()"); writer.WriteLine($" {{"); writer.Indent += 4; - GenerateGo(writer, rm, id); + EmitGo(writer, rm, id); writer.Indent -= 4; writer.WriteLine($" }}"); writer.WriteLine($" }}"); @@ -254,7 +242,7 @@ static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht) } /// Emits the body of the FindFirstChar override. - private static void GenerateFindFirstChar(IndentedTextWriter writer, RegexMethod rm, string id) + private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, string id) { RegexOptions options = (RegexOptions)rm.Options; var code = rm.Code; @@ -285,7 +273,7 @@ private static void GenerateFindFirstChar(IndentedTextWriter writer, RegexMethod $"if (runtextpos <= runtextend{minRequiredLengthOffset})" : $"if (runtextpos{minRequiredLengthOffset} >= runtextbeg)")) { - GenerateAnchorAndLeadingChecks(); + EmitAnchorAndLeadingChecks(); } writer.WriteLine(); @@ -294,7 +282,7 @@ private static void GenerateFindFirstChar(IndentedTextWriter writer, RegexMethod writer.WriteLine(!rm.Code.RightToLeft ? "base.runtextpos = runtextend;" : "base.runtextpos = runtextbeg;"); writer.WriteLine("return false;"); - void GenerateAnchorAndLeadingChecks() + void EmitAnchorAndLeadingChecks() { // Generate anchor checks. if ((code.LeadingAnchor & (RegexPrefixAnalyzer.Beginning | RegexPrefixAnalyzer.Start | RegexPrefixAnalyzer.EndZ | RegexPrefixAnalyzer.End | RegexPrefixAnalyzer.Bol)) != 0) @@ -689,22 +677,22 @@ void GenerateAnchorAndLeadingChecks() } /// Emits the body of the Go override. - private static void GenerateGo(IndentedTextWriter writer, RegexMethod rm, string id) + private static void EmitGo(IndentedTextWriter writer, RegexMethod rm, string id) { Debug.Assert(rm.Tree.Root.Type == RegexNode.Capture); if (RegexNode.NodeSupportsSimplifiedCodeGenerationImplementation(rm.Tree.Root.Child(0), RegexNode.DefaultMaxRecursionDepth) && (((RegexOptions)rm.Tree.Root.Options) & RegexOptions.RightToLeft) == 0) { - GenerateSimplifiedGo(writer, rm, id); + EmitSimplifiedGo(writer, rm, id); } else { - GenerateCompleteGo(writer, rm, id); + EmitCompleteGo(writer, rm, id); } } /// Emits the body of a simplified Go implementation that's possible when there's minimal backtracking required by the expression. - private static void GenerateSimplifiedGo(IndentedTextWriter writer, RegexMethod rm, string id) + private static void EmitSimplifiedGo(IndentedTextWriter writer, RegexMethod rm, string id) { RegexOptions options = (RegexOptions)rm.Options; var code = rm.Code; @@ -1653,7 +1641,7 @@ void EmitAtomicNodeLoop(RegexNode node) } /// Emits the body of a complete Go implementation that fully supports backtracking. - private static void GenerateCompleteGo(IndentedTextWriter writer, RegexMethod rm, string id) + private static void EmitCompleteGo(IndentedTextWriter writer, RegexMethod rm, string id) { const int Stackpop = 0; // pop one const int Stackpop2 = 1; // pop two @@ -1713,7 +1701,7 @@ private static void GenerateCompleteGo(IndentedTextWriter writer, RegexMethod rm { currentCodePos = codepos; currentOpcode = codes[codepos]; - GenerateOneCode(labels[codepos]); + EmitOneCode(labels[codepos]); writer.WriteLine(); } @@ -1750,7 +1738,7 @@ private static void GenerateCompleteGo(IndentedTextWriter writer, RegexMethod rm currentCodePos = n.codepos; currentBacktrackNote = i; currentOpcode = codes[n.codepos] | n.flags; - GenerateOneCode(null); // should always end in a goto + EmitOneCode(null); // should always end in a goto } else { @@ -1780,7 +1768,7 @@ private static void GenerateCompleteGo(IndentedTextWriter writer, RegexMethod rm /// dealt with one-at-a-time in RegexIntepreter. We can also unroll loops that /// iterate over constant strings or sets. ///
- void GenerateOneCode(string? label) + void EmitOneCode(string? label) { writer.WriteLine($"// {SymbolDisplay.FormatLiteral(RegexCode.OpcodeDescription(currentCodePos, rm.Code.Codes, rm.Code.Strings), quote: false)}"); diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs index 49e34f00062e54..c64e0aa388cdf7 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs @@ -21,9 +21,10 @@ public partial class RegexGenerator private const string RegexGeneratorAttributeName = "System.Text.RegularExpressions.RegexGeneratorAttribute"; private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => + // We don't have a semantic model here, so the best we can do is say whether there are any attributes. node is MethodDeclarationSyntax { AttributeLists: { Count: > 0 } }; - private static TypeDeclarationSyntax? GetSemanticTargetForGeneration(GeneratorSyntaxContext context) + private static MethodDeclarationSyntax? GetSemanticTargetForGeneration(GeneratorSyntaxContext context) { var methodDeclarationSyntax = (MethodDeclarationSyntax)context.Node; @@ -34,7 +35,7 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => if (context.SemanticModel.GetSymbolInfo(attributeSyntax).Symbol is IMethodSymbol attributeSymbol && attributeSymbol.ContainingType.ToDisplayString() == RegexGeneratorAttributeName) { - return methodDeclarationSyntax.Parent as TypeDeclarationSyntax; + return methodDeclarationSyntax; } } } @@ -42,7 +43,8 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => return null; } - private static IReadOnlyList GetRegexTypesToEmit(Compilation compilation, Action reportDiagnostic, IEnumerable classes, CancellationToken cancellationToken) + // Returns null if nothing to do, Diagnostic if there's an error to report, or RegexType if the type was analyzed successfully. + private static object? GetRegexTypeToEmit(Compilation compilation, MethodDeclarationSyntax methodSyntax, CancellationToken cancellationToken) { // TODO: Use https://github.com/dotnet/runtime/pull/59092 INamedTypeSymbol? regexSymbol = compilation.GetTypeByMetadataName(RegexName); @@ -50,239 +52,212 @@ private static IReadOnlyList GetRegexTypesToEmit(Compilation compilat if (regexSymbol is null || regexGeneratorAttributeSymbol is null) { // Required types aren't available - return Array.Empty(); + return null; } - var results = new List(); + TypeDeclarationSyntax typeDec = methodSyntax.Parent as TypeDeclarationSyntax; + if (typeDec is null) + { + return null; + } + + SemanticModel sm = compilation.GetSemanticModel(methodSyntax.SyntaxTree); + + IMethodSymbol regexMethodSymbol = sm.GetDeclaredSymbol(methodSyntax, cancellationToken) as IMethodSymbol; + if (regexMethodSymbol is null) + { + return null; + } + + ImmutableArray? boundAttributes = regexMethodSymbol.GetAttributes(); + if (boundAttributes is null || boundAttributes.Value.Length == 0) + { + return null; + } + + DiagnosticDescriptor? errorDescriptor = null; + RegexMethod? regexMethod = null; + foreach (AttributeData attributeData in boundAttributes) + { + // If we already encountered an error, stop looking at this method's attributes. + if (errorDescriptor is not null) + { + break; + } + + // If this isn't + if (!attributeData.AttributeClass.Equals(regexGeneratorAttributeSymbol)) + { + continue; + } + + if (attributeData.ConstructorArguments.Any(ca => ca.Kind == TypedConstantKind.Error)) + { + errorDescriptor = DiagnosticDescriptors.InvalidRegexGeneratorAttribute; + break; + } + + ImmutableArray items = attributeData.ConstructorArguments; + if (items.Length is > 0 and <= 3 && items[0].Value is string pattern) + { + switch (items.Length) + { + case 1: + regexMethod = new RegexMethod { Pattern = pattern }; + break; + + case 2: + regexMethod = new RegexMethod { Pattern = pattern, Options = items[1].Value as int?, }; + break; + + case 3: + regexMethod = new RegexMethod { Pattern = pattern, Options = items[1].Value as int?, MatchTimeout = items[2].Value as int?, }; + break; + } + } + else + { + errorDescriptor = DiagnosticDescriptors.InvalidRegexGeneratorAttribute; + } + } + + if (errorDescriptor is not null) + { + return Diagnostic.Create(errorDescriptor, methodSyntax.GetLocation()); + } - // Enumerate by SyntaxTree to minimize the need to instantiate semantic models (since they're expensive) - foreach (var group in classes.GroupBy(x => x.SyntaxTree)) + if (regexMethod is null) { - SemanticModel? sm = null; - foreach (TypeDeclarationSyntax typeDec in group) + return null; + } + + if (regexMethod.Pattern is null) + { + return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "(null)"); + } + + if (!regexMethodSymbol.IsPartialDefinition || + !regexMethodSymbol.IsStatic || + regexMethodSymbol.Parameters.Length != 0 || + regexMethodSymbol.Arity != 0 || + !regexMethodSymbol.ReturnType.Equals(regexSymbol)) + { + return Diagnostic.Create(DiagnosticDescriptors.RegexMethodMustHaveValidSignature, methodSyntax.GetLocation()); + } + + if (typeDec.SyntaxTree.Options is CSharpParseOptions { LanguageVersion: < LanguageVersion.CSharp10 }) + { + return Diagnostic.Create(DiagnosticDescriptors.InvalidLangVersion, methodSyntax.GetLocation()); + } + + regexMethod.MethodName = regexMethodSymbol.Name; + regexMethod.Modifiers = methodSyntax.Modifiers.ToString(); + regexMethod.MatchTimeout ??= Timeout.Infinite; + RegexOptions options = regexMethod.Options.HasValue ? (RegexOptions)regexMethod.Options.Value : RegexOptions.None; + regexMethod.Options = (int)RegexOptions.Compiled | (int)options; + + // TODO: This is going to include the culture that's current at the time of compilation. + // What should we do about that? We could: + // - say not specifying CultureInvariant is invalid if anything about options or the expression will look at culture + // - fall back to not generating source if it's not specified + // - just use whatever culture is present at build time + // - devise a new way of not using the culture present at build time + // - ... + CultureInfo culture = (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; + + // Validate the options + const RegexOptions SupportedOptions = + RegexOptions.IgnoreCase | + RegexOptions.Multiline | + RegexOptions.ExplicitCapture | + RegexOptions.Compiled | + RegexOptions.Singleline | + RegexOptions.IgnorePatternWhitespace | + RegexOptions.RightToLeft | + RegexOptions.ECMAScript | + RegexOptions.CultureInvariant; + if ((regexMethod.Options.Value & ~(int)SupportedOptions) != 0) + { + return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "options"); + } + + // Validate the timeout + if (regexMethod.MatchTimeout.Value is 0 or < -1) + { + return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "matchTimeout"); + } + + // Parse the input pattern + try + { + regexMethod.Tree = RegexParser.Parse(regexMethod.Pattern, (RegexOptions)regexMethod.Options, culture); + regexMethod.Code = RegexWriter.Write(regexMethod.Tree); + } + catch (Exception e) + { + return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), e.Message); + } + + // Determine the namespace the class is declared in, if any + string? ns = null; + SyntaxNode? potentialNamespaceParent = typeDec.Parent; + while (potentialNamespaceParent is not null && + potentialNamespaceParent is not NamespaceDeclarationSyntax && + potentialNamespaceParent is not FileScopedNamespaceDeclarationSyntax) + { + potentialNamespaceParent = potentialNamespaceParent.Parent; + } + + if (potentialNamespaceParent is BaseNamespaceDeclarationSyntax namespaceParent) + { + ns = namespaceParent.Name.ToString(); + while (true) { - foreach (MemberDeclarationSyntax member in typeDec.Members) + namespaceParent = namespaceParent.Parent as NamespaceDeclarationSyntax; + if (namespaceParent is null) { - cancellationToken.ThrowIfCancellationRequested(); - - // Scope to just methods - if (member is not MethodDeclarationSyntax methodSyntax) - { - continue; - } - - sm ??= compilation.GetSemanticModel(typeDec.SyntaxTree); - - IMethodSymbol regexMethodSymbol = sm.GetDeclaredSymbol(methodSyntax, cancellationToken) as IMethodSymbol; - if (regexMethodSymbol is null) - { - continue; - } - - ImmutableArray? boundAttributes = regexMethodSymbol.GetAttributes(); - if (boundAttributes is null || boundAttributes.Value.Length == 0) - { - continue; - } - - DiagnosticDescriptor? errorDescriptor = null; - RegexMethod? regexMethod = null; - foreach (AttributeData attributeData in boundAttributes) - { - // If we already encountered an error, stop looking at this method's attributes. - if (errorDescriptor is not null) - { - break; - } - - // If this isn't - if (!attributeData.AttributeClass.Equals(regexGeneratorAttributeSymbol)) - { - continue; - } - - if (attributeData.ConstructorArguments.Any(ca => ca.Kind == TypedConstantKind.Error)) - { - errorDescriptor = DiagnosticDescriptors.InvalidRegexGeneratorAttribute; - break; - } - - ImmutableArray items = attributeData.ConstructorArguments; - if (items.Length is > 0 and <= 3 && items[0].Value is string pattern) - { - switch (items.Length) - { - case 1: - regexMethod = new RegexMethod { Pattern = pattern }; - break; - - case 2: - regexMethod = new RegexMethod { Pattern = pattern, Options = items[1].Value as int?, }; - break; - - case 3: - regexMethod = new RegexMethod { Pattern = pattern, Options = items[1].Value as int?, MatchTimeout = items[2].Value as int?, }; - break; - } - } - else - { - errorDescriptor = DiagnosticDescriptors.InvalidRegexGeneratorAttribute; - } - } - - if (errorDescriptor is not null) - { - Diag(reportDiagnostic, errorDescriptor, methodSyntax.GetLocation()); - continue; - } - - if (regexMethod is null) - { - continue; - } - - if (regexMethod.Pattern is null) - { - Diag(reportDiagnostic, DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "(null)"); - continue; - } - - if (!regexMethodSymbol.IsPartialDefinition || - !regexMethodSymbol.IsStatic || - regexMethodSymbol.Parameters.Length != 0 || - regexMethodSymbol.Arity != 0 || - !regexMethodSymbol.ReturnType.Equals(regexSymbol)) - { - Diag(reportDiagnostic, DiagnosticDescriptors.RegexMethodMustHaveValidSignature, methodSyntax.GetLocation()); - continue; - } - - if (typeDec.SyntaxTree.Options is CSharpParseOptions { LanguageVersion: < LanguageVersion.CSharp10 }) - { - Diag(reportDiagnostic, DiagnosticDescriptors.InvalidLangVersion, methodSyntax.GetLocation()); - continue; - } - - regexMethod.MethodName = regexMethodSymbol.Name; - regexMethod.Modifiers = methodSyntax.Modifiers.ToString(); - regexMethod.MatchTimeout ??= Timeout.Infinite; - RegexOptions options = regexMethod.Options.HasValue ? (RegexOptions)regexMethod.Options.Value : RegexOptions.None; - regexMethod.Options = (int)RegexOptions.Compiled | (int)options; - - // TODO: This is going to include the culture that's current at the time of compilation. - // What should we do about that? We could: - // - say not specifying CultureInvariant is invalid if anything about options or the expression will look at culture - // - fall back to not generating source if it's not specified - // - just use whatever culture is present at build time - // - devise a new way of not using the culture present at build time - // - ... - CultureInfo culture = (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture; - - // Validate the options - const RegexOptions SupportedOptions = - RegexOptions.IgnoreCase | - RegexOptions.Multiline | - RegexOptions.ExplicitCapture | - RegexOptions.Compiled | - RegexOptions.Singleline | - RegexOptions.IgnorePatternWhitespace | - RegexOptions.RightToLeft | - RegexOptions.ECMAScript | - RegexOptions.CultureInvariant; - if ((regexMethod.Options.Value & ~(int)SupportedOptions) != 0) - { - Diag(reportDiagnostic, DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "options"); - continue; - } - - // Validate the timeout - if (regexMethod.MatchTimeout.Value is 0 or < -1) - { - Diag(reportDiagnostic, DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), "matchTimeout"); - continue; - } - - // Parse the input pattern - try - { - regexMethod.Tree = RegexParser.Parse(regexMethod.Pattern, (RegexOptions)regexMethod.Options, culture); - regexMethod.Code = RegexWriter.Write(regexMethod.Tree); - } - catch (Exception e) - { - Diag(reportDiagnostic, DiagnosticDescriptors.InvalidRegexArguments, methodSyntax.GetLocation(), e.Message); - continue; - } - - // Determine the namespace the class is declared in, if any - string? ns = null; - SyntaxNode? potentialNamespaceParent = typeDec.Parent; - while (potentialNamespaceParent is not null && - potentialNamespaceParent is not NamespaceDeclarationSyntax && - potentialNamespaceParent is not FileScopedNamespaceDeclarationSyntax) - { - potentialNamespaceParent = potentialNamespaceParent.Parent; - } - - if (potentialNamespaceParent is BaseNamespaceDeclarationSyntax namespaceParent) - { - ns = namespaceParent.Name.ToString(); - while (true) - { - namespaceParent = namespaceParent.Parent as NamespaceDeclarationSyntax; - if (namespaceParent is null) - { - break; - } - - ns = $"{namespaceParent.Name}.{ns}"; - } - } - - var rc = new RegexType - { - Keyword = typeDec is RecordDeclarationSyntax rds ? $"{typeDec.Keyword.ValueText} {rds.ClassOrStructKeyword}" : typeDec.Keyword.ValueText, - Namespace = ns, - Name = $"{typeDec.Identifier}{typeDec.TypeParameterList}", - Constraints = typeDec.ConstraintClauses.ToString(), - ParentClass = null, - Method = regexMethod, - }; - - RegexType current = rc; - var parent = typeDec.Parent as TypeDeclarationSyntax; - - while (parent is not null && IsAllowedKind(parent.Kind())) - { - current.ParentClass = new RegexType - { - Keyword = parent is RecordDeclarationSyntax rds2 ? $"{parent.Keyword.ValueText} {rds2.ClassOrStructKeyword}" : parent.Keyword.ValueText, - Namespace = ns, - Name = $"{parent.Identifier}{parent.TypeParameterList}", - Constraints = parent.ConstraintClauses.ToString(), - ParentClass = null, - }; - - current = current.ParentClass; - parent = parent.Parent as TypeDeclarationSyntax; - } - - results.Add(rc); - - bool IsAllowedKind(SyntaxKind kind) => - kind == SyntaxKind.ClassDeclaration || - kind == SyntaxKind.StructDeclaration || - kind == SyntaxKind.RecordDeclaration || - kind == SyntaxKind.RecordStructDeclaration; + break; } + + ns = $"{namespaceParent.Name}.{ns}"; } } - return results; + var rc = new RegexType + { + Keyword = typeDec is RecordDeclarationSyntax rds ? $"{typeDec.Keyword.ValueText} {rds.ClassOrStructKeyword}" : typeDec.Keyword.ValueText, + Namespace = ns, + Name = $"{typeDec.Identifier}{typeDec.TypeParameterList}", + Constraints = typeDec.ConstraintClauses.ToString(), + ParentClass = null, + Method = regexMethod, + }; + + RegexType current = rc; + var parent = typeDec.Parent as TypeDeclarationSyntax; + + while (parent is not null && IsAllowedKind(parent.Kind())) + { + current.ParentClass = new RegexType + { + Keyword = parent is RecordDeclarationSyntax rds2 ? $"{parent.Keyword.ValueText} {rds2.ClassOrStructKeyword}" : parent.Keyword.ValueText, + Namespace = ns, + Name = $"{parent.Identifier}{parent.TypeParameterList}", + Constraints = parent.ConstraintClauses.ToString(), + ParentClass = null, + }; + + current = current.ParentClass; + parent = parent.Parent as TypeDeclarationSyntax; + } + + return rc; - static void Diag(Action reportDiagnostic, DiagnosticDescriptor desc, Location? location, params object?[]? messageArgs) => - reportDiagnostic(Diagnostic.Create(desc, location, messageArgs)); + static bool IsAllowedKind(SyntaxKind kind) => + kind == SyntaxKind.ClassDeclaration || + kind == SyntaxKind.StructDeclaration || + kind == SyntaxKind.RecordDeclaration || + kind == SyntaxKind.RecordStructDeclaration; } /// A type holding a regex method. diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs index 7f2590f3ccc3f7..4be6df47f45dac 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs @@ -25,45 +25,68 @@ public partial class RegexGenerator : IIncrementalGenerator { public void Initialize(IncrementalGeneratorInitializationContext context) { - IncrementalValuesProvider classDeclarations = context.SyntaxProvider - .CreateSyntaxProvider( - static (s, _) => IsSyntaxTargetForGeneration(s), - static (ctx, _) => GetSemanticTargetForGeneration(ctx)) - .Where(static m => m is not null); + IncrementalValueProvider> typesAndCompilation = + context.SyntaxProvider - IncrementalValueProvider<(Compilation, ImmutableArray)> compilationAndClasses = - context.CompilationProvider.Combine(classDeclarations.Collect()); + // Find all MethodDeclarationSyntax nodes attributed with RegexGenerator + .CreateSyntaxProvider(static (s, _) => IsSyntaxTargetForGeneration(s), static (ctx, _) => GetSemanticTargetForGeneration(ctx)) + .Where(static m => m is not null) - context.RegisterImplementationSourceOutput(compilationAndClasses, static (context, source) => - { - ImmutableArray types = source.Item2; - if (types.IsDefaultOrEmpty) + // Pair each with the compilation + .Combine(context.CompilationProvider) + + // Use a custom comparer that ignores the compilation so that it doesn't interface with the generators caching of results based on MethodDeclarationSyntax + .WithComparer(new LambdaComparer<(MethodDeclarationSyntax, Compilation)>(static (left, right) => left.Item1.Equals(left.Item2), static o => o.Item1.GetHashCode())) + + // Get the resulting code string or error Diagnostic for each MethodDeclarationSyntax/Compilation pair + .Select((state, cancellationToken) => { - return; - } + object? result = GetRegexTypeToEmit(state.Item2, state.Item1, cancellationToken); + return result is RegexType regexType ? EmitRegexType(regexType) : result; + }) + .Collect(); + + // When there something to output, take all the generated strings and concatenate them to output, + // and raise all of the created diagnostics. + context.RegisterSourceOutput(typesAndCompilation, static (context, results) => + { + var code = new List(s_headersAndUsings.Length + results.Length); + + // Add file header and required usings + code.AddRange(s_headersAndUsings); - string result = ""; - try + foreach (object? result in results) { - Compilation compilation = source.Item1; - IReadOnlyList regexTypes = GetRegexTypesToEmit(compilation, context.ReportDiagnostic, types.Distinct(), context.CancellationToken); - if (regexTypes.Count != 0) + switch (result) { - result = Emit(regexTypes, context.CancellationToken); + case Diagnostic d: + context.ReportDiagnostic(d); + break; + + case string s: + code.Add(s); + break; } } - catch (Exception e) when (!(e is OperationCanceledException)) - { - result = - "// ERROR:" + Environment.NewLine + - string.Join(Environment.NewLine, e.ToString().Split(new[] { "\r\n", "\n" }, StringSplitOptions.None).Select(s => $"// {SymbolDisplay.FormatLiteral(s, quote: false)}")); - } - if (result.Length > 0) - { - context.AddSource("RegexGenerator.g.cs", result); - } + context.AddSource("RegexGenerator.g.cs", string.Join(Environment.NewLine, code)); }); } + + private sealed class LambdaComparer : IEqualityComparer + { + private readonly Func _equal; + private readonly Func _getHashCode; + + public LambdaComparer(Func equal, Func getHashCode) + { + _equal = equal; + _getHashCode = getHashCode; + } + + public bool Equals(T x, T y) => _equal(x, y); + + public int GetHashCode(T obj) => _getHashCode(obj); + } } } From 2d4f159cb47d4d2772812c74da17fdafa8be87c4 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sun, 19 Sep 2021 20:15:11 -0400 Subject: [PATCH 07/16] Use closest matching ctor from GetRegexAsync To better test the appropriate ctor usage. --- .../tests/Regex.KnownPattern.Tests.cs | 4 ++-- .../tests/Regex.Tests.Common.cs | 21 +++++++++++++------ .../tests/RegexGeneratorHelper.netcoreapp.cs | 20 ++++++++++++------ .../tests/RegexGeneratorHelper.netfx.cs | 2 +- 4 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs index 41ba4f79372a8d..d43a29f774b516 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.KnownPattern.Tests.cs @@ -106,7 +106,7 @@ async Task IsValidEmailAsync(string email, RegexEngine engine) try { - Regex r = await RegexHelpers.GetRegexAsync(engine, @"(@)(.+)$", matchTimeout: 200); + Regex r = await RegexHelpers.GetRegexAsync(engine, @"(@)(.+)$", RegexOptions.None, TimeSpan.FromMilliseconds(200)); // Normalize the domain part of the email email = r.Replace(email, match => @@ -136,7 +136,7 @@ async Task IsValidEmailAsync(string email, RegexEngine engine) @"^(?("")("".+?(? AvailableEngines } } - public static async Task GetRegexAsync(RegexEngine engine, string pattern, RegexOptions options = RegexOptions.None, int matchTimeout = -1) + public static async Task GetRegexAsync(RegexEngine engine, string pattern, RegexOptions? options = null, TimeSpan? matchTimeout = null) { + if (options is null) + { + Assert.Null(matchTimeout); + } + switch (engine) { case RegexEngine.Interpreter: - return new Regex(pattern, options, TimeSpan.FromMilliseconds(matchTimeout)); + return + options is null ? new Regex(pattern) : + matchTimeout is null ? new Regex(pattern, options.Value) : + new Regex(pattern, options.Value, matchTimeout.Value); case RegexEngine.Compiled: - return new Regex(pattern, options | RegexOptions.Compiled, TimeSpan.FromMilliseconds(matchTimeout)); + return + options is null ? new Regex(pattern, RegexOptions.Compiled) : + matchTimeout is null ? new Regex(pattern, options.Value | RegexOptions.Compiled) : + new Regex(pattern, options.Value | RegexOptions.Compiled, matchTimeout.Value); case RegexEngine.SourceGenerated: return await RegexGeneratorHelper.SourceGenRegexAsync(pattern, options, matchTimeout); @@ -78,9 +90,6 @@ public static async Task GetRegexAsync(RegexEngine engine, string pattern throw new ArgumentException($"Unknown engine: {engine}"); } - - public static Task GetRegexAsync(RegexEngine engine, string pattern, RegexOptions options, TimeSpan timeout) => - GetRegexAsync(engine, pattern, options, (int)timeout.TotalMilliseconds); } public enum RegexEngine diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs index ed0512af6a9fd7..a711af0d8f6839 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs @@ -48,18 +48,26 @@ private static MetadataReference[] CreateReferences() } internal static async Task SourceGenRegexAsync( - string pattern, RegexOptions options = RegexOptions.None, int matchTimeout = -1, CancellationToken cancellationToken = default) + string pattern, RegexOptions? options = null, TimeSpan? matchTimeout = null, CancellationToken cancellationToken = default) { + Assert.True(options is not null || matchTimeout is null); + string attr = $"[RegexGenerator({SymbolDisplay.FormatLiteral(pattern, quote: true)}"; + if (options is not null) + { + attr += $", {string.Join(" | ", options.ToString().Split(',').Select(o => $"RegexOptions.{o.Trim()}"))}"; + if (matchTimeout is not null) + { + attr += string.Create(CultureInfo.InvariantCulture, $", {(int)matchTimeout.Value.TotalMilliseconds}"); + } + } + attr += ")]"; + // Create the source boilerplate for the pattern string code = $@" using System.Text.RegularExpressions; - public partial class C {{ - [RegexGenerator( - {SymbolDisplay.FormatLiteral(pattern, quote: true)}, - {string.Join(" | ", options.ToString().Split(',').Select(o => $"RegexOptions.{o.Trim()}"))}, - {matchTimeout.ToString(CultureInfo.InvariantCulture)})] + {attr} public static partial Regex Get(); }}"; diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netfx.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netfx.cs index f286bd3a380076..2a834bb7edcf8d 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netfx.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netfx.cs @@ -8,7 +8,7 @@ namespace System.Text.RegularExpressions.Tests { public sealed class RegexGeneratorHelper { - internal static Task SourceGenRegexAsync(string pattern, RegexOptions options = RegexOptions.None, int matchTimeout = -1, CancellationToken cancellationToken = default) => + internal static Task SourceGenRegexAsync(string pattern, RegexOptions? options = null, TimeSpan? matchTimeout = null, CancellationToken cancellationToken = default) => throw new NotSupportedException(); } } From 8abd7fb7d644364a753b785e0c0db76f4fb2e7d6 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sun, 19 Sep 2021 20:27:07 -0400 Subject: [PATCH 08/16] Improve a few of the polyfills --- .../gen/Polyfills.cs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/Polyfills.cs b/src/libraries/System.Text.RegularExpressions/gen/Polyfills.cs index 692271fb1a5e43..aefab1dc1b0576 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Polyfills.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/Polyfills.cs @@ -5,6 +5,8 @@ using System.Buffers; using System.Collections; using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Text; // This file provides helpers used to help compile some Regex source code (e.g. RegexParser) as part of the netstandard2.0 generator assembly. @@ -13,8 +15,13 @@ namespace System.Text { internal static class StringBuilderExtensions { - public static StringBuilder Append(this StringBuilder stringBuilder, ReadOnlySpan span) => - stringBuilder.Append(span.ToString()); + public static unsafe StringBuilder Append(this StringBuilder stringBuilder, ReadOnlySpan span) + { + fixed (char* ptr = &MemoryMarshal.GetReference(span)) + { + return stringBuilder.Append(ptr, span.Length); + } + } public static ReadOnlyMemory[] GetChunks(this StringBuilder stringBuilder) { @@ -31,9 +38,9 @@ internal static class StringExtensions { public static string Create(int length, TState state, SpanAction action) { - var array = new char[length]; - action(array, state); - return new string(array); + Span span = length <= 256 ? stackalloc char[length] : new char[length]; + action(span, state); + return span.ToString(); } } } From fba00eecb63609f77822755b42a5707535e4d549 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sun, 19 Sep 2021 20:39:42 -0400 Subject: [PATCH 09/16] Address PR feedback --- .../gen/RegexGenerator.Parser.cs | 27 +++---------------- .../gen/RegexGenerator.cs | 10 ++++--- .../gen/{Polyfills.cs => Stubs.cs} | 0 .../RegexGeneratorParserTests.cs | 19 +++++++++++++ 4 files changed, 29 insertions(+), 27 deletions(-) rename src/libraries/System.Text.RegularExpressions/gen/{Polyfills.cs => Stubs.cs} (100%) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs index c64e0aa388cdf7..077c9a6619e29b 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs @@ -85,7 +85,7 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => break; } - // If this isn't + // If this isn't a RegexGeneratorAttribute, skip it. if (!attributeData.AttributeClass.Equals(regexGeneratorAttributeSymbol)) { continue; @@ -199,29 +199,8 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => } // Determine the namespace the class is declared in, if any - string? ns = null; - SyntaxNode? potentialNamespaceParent = typeDec.Parent; - while (potentialNamespaceParent is not null && - potentialNamespaceParent is not NamespaceDeclarationSyntax && - potentialNamespaceParent is not FileScopedNamespaceDeclarationSyntax) - { - potentialNamespaceParent = potentialNamespaceParent.Parent; - } - - if (potentialNamespaceParent is BaseNamespaceDeclarationSyntax namespaceParent) - { - ns = namespaceParent.Name.ToString(); - while (true) - { - namespaceParent = namespaceParent.Parent as NamespaceDeclarationSyntax; - if (namespaceParent is null) - { - break; - } - - ns = $"{namespaceParent.Name}.{ns}"; - } - } + string? ns = regexMethodSymbol?.ContainingType?.ContainingNamespace?.ToDisplayString( + SymbolDisplayFormat.FullyQualifiedFormat.WithGlobalNamespaceStyle(SymbolDisplayGlobalNamespaceStyle.Omitted)); var rc = new RegexType { diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs index 4be6df47f45dac..661546ef6e04fd 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs @@ -25,7 +25,9 @@ public partial class RegexGenerator : IIncrementalGenerator { public void Initialize(IncrementalGeneratorInitializationContext context) { - IncrementalValueProvider> typesAndCompilation = + // Contains one entry per regex method, either the generated code for that regex method, + // a diagnostic to fail with, or null if no action should be taken for that regex. + IncrementalValueProvider> codeOrDiagnostics = context.SyntaxProvider // Find all MethodDeclarationSyntax nodes attributed with RegexGenerator @@ -35,7 +37,9 @@ public void Initialize(IncrementalGeneratorInitializationContext context) // Pair each with the compilation .Combine(context.CompilationProvider) - // Use a custom comparer that ignores the compilation so that it doesn't interface with the generators caching of results based on MethodDeclarationSyntax + // Use a custom comparer that ignores the compilation. We want to avoid regenerating for regex methods + // that haven't been changed, but any change to a regex method will change the Compilation, so we ignore + // the Compilation for purposes of caching. .WithComparer(new LambdaComparer<(MethodDeclarationSyntax, Compilation)>(static (left, right) => left.Item1.Equals(left.Item2), static o => o.Item1.GetHashCode())) // Get the resulting code string or error Diagnostic for each MethodDeclarationSyntax/Compilation pair @@ -48,7 +52,7 @@ public void Initialize(IncrementalGeneratorInitializationContext context) // When there something to output, take all the generated strings and concatenate them to output, // and raise all of the created diagnostics. - context.RegisterSourceOutput(typesAndCompilation, static (context, results) => + context.RegisterSourceOutput(codeOrDiagnostics, static (context, results) => { var code = new List(s_headersAndUsings.Length + results.Length); diff --git a/src/libraries/System.Text.RegularExpressions/gen/Polyfills.cs b/src/libraries/System.Text.RegularExpressions/gen/Stubs.cs similarity index 100% rename from src/libraries/System.Text.RegularExpressions/gen/Polyfills.cs rename to src/libraries/System.Text.RegularExpressions/gen/Stubs.cs diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs index a58c07fe35a2c9..789f6b4687b377 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs @@ -206,6 +206,25 @@ partial class C ", compile: true)); } + [Fact] + public async Task Valid_ClassWithNestedNamespaces() + { + Assert.Empty(await RunGenerator(@" + using System.Text.RegularExpressions; + namespace A + { + namespace B + { + partial class C + { + [RegexGenerator(""ab"")] + private static partial Regex Valid(); + } + } + } + ", compile: true)); + } + [Fact] public async Task Valid_NestedClassWithoutNamespace() { From 3963f7f5aed06fbe9cfef45ab5317e4a2d56077c Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 20 Sep 2021 20:26:47 -0400 Subject: [PATCH 10/16] Fully qualify types and remove unnecessary $s Also fixed one place where the IL we were generating wasn't as good as the reflection emit code. --- .../gen/RegexGenerator.Emitter.cs | 222 +++++++++--------- .../gen/RegexGenerator.cs | 4 +- 2 files changed, 108 insertions(+), 118 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 5b39621e53722d..bcb50f35ecf655 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -24,25 +24,15 @@ namespace System.Text.RegularExpressions.Generator public partial class RegexGenerator { /// Code for a [GeneratedCode] attribute to put on the top-level generated members. - private static readonly string s_generatedCodeAttribute = $"[GeneratedCodeAttribute(\"{typeof(RegexGenerator).Assembly.GetName().Name}\", \"{typeof(RegexGenerator).Assembly.GetName().Version}\")]"; + private static readonly string s_generatedCodeAttribute = $"[global::System.CodeDom.Compiler.GeneratedCodeAttribute(\"{typeof(RegexGenerator).Assembly.GetName().Name}\", \"{typeof(RegexGenerator).Assembly.GetName().Version}\")]"; /// Header comments and usings to include at the top of every generated file. - private static readonly string[] s_headersAndUsings = new string[] + private static readonly string[] s_headers = new string[] { "// ", "#nullable enable", "#pragma warning disable CS0168 // Variable declared but never used", "#pragma warning disable CS0164 // Unreferenced label", "", - "using System;", - "using System.CodeDom.Compiler;", - "using System.Collections;", - "using System.ComponentModel;", - "using System.Diagnostics;", - "using System.Globalization;", - "using System.Runtime.CompilerServices;", - "using System.Text.RegularExpressions;", - "using System.Threading;", - "", }; /// Generates the code for one regular expression class. @@ -133,24 +123,24 @@ private static bool SupportsCustomCodeGeneration(RegexMethod rm) private static void EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, string id) { string patternExpression = Literal(rm.Pattern); - string optionsExpression = $"(RegexOptions)({rm.Options})"; + string optionsExpression = $"(global::System.Text.RegularExpressions.RegexOptions)({rm.Options})"; string timeoutExpression = rm.MatchTimeout == Timeout.Infinite ? - "Timeout.InfiniteTimeSpan" : - $"TimeSpan.FromMilliseconds({rm.MatchTimeout.Value.ToString(CultureInfo.InvariantCulture)})"; + "global::System.Threading.Timeout.InfiniteTimeSpan" : + $"global::System.TimeSpan.FromMilliseconds({rm.MatchTimeout.Value.ToString(CultureInfo.InvariantCulture)})"; writer.WriteLine(s_generatedCodeAttribute); - writer.WriteLine($"{rm.Modifiers} Regex {rm.MethodName}() => {id}.Instance;"); + writer.WriteLine($"{rm.Modifiers} global::System.Text.RegularExpressions.Regex {rm.MethodName}() => {id}.Instance;"); writer.WriteLine(); writer.WriteLine(s_generatedCodeAttribute); - writer.WriteLine("[EditorBrowsable(EditorBrowsableState.Never)]"); - writer.WriteLine($"{(writer.Indent != 0 ? "private" : "internal")} sealed class {id} : Regex"); + writer.WriteLine("[global::System.ComponentModel.EditorBrowsable(global::System.ComponentModel.EditorBrowsableState.Never)]"); + writer.WriteLine($"{(writer.Indent != 0 ? "private" : "internal")} sealed class {id} : global::System.Text.RegularExpressions.Regex"); writer.WriteLine("{"); - writer.Write($" public static Regex Instance {{ get; }} = "); + writer.Write(" public static global::System.Text.RegularExpressions.Regex Instance { get; } = "); // If we can't support custom generation for this regex, spit out a Regex constructor call. if (!SupportsCustomCodeGeneration(rm)) { - writer.WriteLine($"new Regex({patternExpression}, {optionsExpression}, {timeoutExpression});"); + writer.WriteLine($"new global::System.Text.RegularExpressions.Regex({patternExpression}, {optionsExpression}, {timeoutExpression});"); writer.WriteLine("}"); return; } @@ -165,19 +155,19 @@ private static void EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, s writer.WriteLine($" factory = new RunnerFactory();"); if (rm.Code.Caps is not null) { - writer.Write($" Caps = new Hashtable {{"); + writer.Write(" Caps = new global::System.Collections.Hashtable {"); AppendHashtableContents(writer, rm.Code.Caps); writer.WriteLine(" };"); } if (rm.Tree.CapNames is not null) { - writer.Write($" CapNames = new Hashtable {{"); + writer.Write(" CapNames = new global::System.Collections.Hashtable {"); AppendHashtableContents(writer, rm.Tree.CapNames); writer.WriteLine(" };"); } if (rm.Tree.CapsList is not null) { - writer.Write($" capslist = new string[] {{"); + writer.Write(" capslist = new string[] {"); string separator = ""; foreach (string s in rm.Tree.CapsList) { @@ -188,18 +178,18 @@ private static void EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, s writer.WriteLine(" };"); } writer.WriteLine($" capsize = {rm.Code.CapSize};"); - writer.WriteLine($" InitializeReferences();"); + writer.WriteLine($" base.InitializeReferences();"); writer.WriteLine($" }}"); writer.WriteLine(" "); - writer.WriteLine($" private sealed class RunnerFactory : RegexRunnerFactory"); + writer.WriteLine($" private sealed class RunnerFactory : global::System.Text.RegularExpressions.RegexRunnerFactory"); writer.WriteLine($" {{"); - writer.WriteLine($" protected override RegexRunner CreateInstance() => new Runner();"); + writer.WriteLine($" protected override global::System.Text.RegularExpressions.RegexRunner CreateInstance() => new Runner();"); writer.WriteLine(); - writer.WriteLine($" private sealed class Runner : RegexRunner"); + writer.WriteLine($" private sealed class Runner : global::System.Text.RegularExpressions.RegexRunner"); writer.WriteLine($" {{"); // Main implementation methods - writer.WriteLine($" protected override void InitTrackCount() => runtrackcount = {rm.Code.TrackCount};"); + writer.WriteLine($" protected override void InitTrackCount() => base.runtrackcount = {rm.Code.TrackCount};"); writer.WriteLine(); writer.WriteLine($" protected override bool FindFirstChar()"); writer.WriteLine($" {{"); @@ -227,7 +217,7 @@ static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht) writer.Write(separator); separator = ", "; - writer.Write($" {{ "); + writer.Write(" { "); if (en.Key is int key) { writer.Write(key); @@ -259,13 +249,13 @@ private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, { writer.WriteLine("int runtextbeg = base.runtextbeg;"); } - writer.WriteLine($"int ch;"); + writer.WriteLine("int ch;"); writer.WriteLine(); // Generate length check. If the input isn't long enough to possibly match, fail quickly. // It's rare for min required length to be 0, so we don't bother special-casing the check, // especially since we want the "return false" code regardless. - writer.WriteLine($"// Minimum required length check"); + writer.WriteLine("// Minimum required length check"); int minRequiredLength = rm.Tree.MinRequiredLength; string minRequiredLengthOffset = rm.Tree.MinRequiredLength > 0 ? $" - {rm.Tree.MinRequiredLength}" : ""; Debug.Assert(minRequiredLength >= 0); @@ -293,7 +283,7 @@ void EmitAnchorAndLeadingChecks() switch (code.LeadingAnchor) { case RegexPrefixAnalyzer.Beginning: - writer.WriteLine($"// Beginning \\A anchor"); + writer.WriteLine("// Beginning \\A anchor"); if (!rtl) { using (EmitBlock(writer, "if (runtextpos > runtextbeg)")) @@ -315,7 +305,7 @@ void EmitAnchorAndLeadingChecks() return; case RegexPrefixAnalyzer.Start: - writer.WriteLine($"// Start \\G anchor"); + writer.WriteLine("// Start \\G anchor"); if (!rtl) { using (EmitBlock(writer, "if (runtextpos > runtextstart)")) @@ -338,7 +328,7 @@ void EmitAnchorAndLeadingChecks() case RegexPrefixAnalyzer.EndZ: // TODO: Why are the LTR and RTL cases inconsistent here with RegexOptions.Compiled? - writer.WriteLine($"// End \\Z anchor"); + writer.WriteLine("// End \\Z anchor"); if (!rtl) { using (EmitBlock(writer, "if (runtextpos < runtextend - 1)")) @@ -360,7 +350,7 @@ void EmitAnchorAndLeadingChecks() return; case RegexPrefixAnalyzer.End when minRequiredLength == 0: // if it's > 0, we already output a more stringent check - writer.WriteLine($"// End \\z anchor"); + writer.WriteLine("// End \\z anchor"); if (!rtl) { using (EmitBlock(writer, "if (runtextpos < runtextend)")) @@ -456,7 +446,7 @@ void EmitAnchorAndLeadingChecks() writer.WriteLine("goto DefaultAdvance;"); } - writer.Write($"offset = "); + writer.Write("offset = "); int negativeRange = rbm.HighASCII - rbm.LowASCII + 1; if (negativeRange > 1) { @@ -496,11 +486,11 @@ void EmitAnchorAndLeadingChecks() } writer.WriteLine($"{offset.ToString(CultureInfo.InvariantCulture)};"); } - writer.WriteLine($"goto Advance;"); + writer.WriteLine("goto Advance;"); writer.WriteLine(); - writer.WriteLine($"PartialMatch:"); - writer.WriteLine($"int test = runtextpos;"); + writer.WriteLine("PartialMatch:"); + writer.WriteLine("int test = runtextpos;"); int nextAvailableLabelId = 0; int prevLabelOffset = int.MaxValue; @@ -531,7 +521,7 @@ void EmitAnchorAndLeadingChecks() writer.WriteLine($"L{prevLabel}:"); writer.WriteLine($"offset = {prevLabelOffset.ToString(CultureInfo.InvariantCulture)};"); - writer.WriteLine($"goto Advance;"); + writer.WriteLine("goto Advance;"); writer.WriteLine(); writer.WriteLine($"L{lNext}:"); @@ -540,9 +530,9 @@ void EmitAnchorAndLeadingChecks() writer.WriteLine(); writer.WriteLine(!rtl ? - $"base.runtextpos = test;" : - $"base.runtextpos = test + 1;"); - writer.WriteLine($"return true;"); + "base.runtextpos = test;" : + "base.runtextpos = test + 1;"); + writer.WriteLine("return true;"); } else if (code.LeadingCharClasses is null) { @@ -597,7 +587,7 @@ void EmitAnchorAndLeadingChecks() writer.WriteLine(); } - writer.WriteLine("ReadOnlySpan span = runtext.AsSpan(runtextpos, runtextend - runtextpos);"); + writer.WriteLine("global::System.ReadOnlySpan span = global::System.MemoryExtensions.AsSpan(runtext, runtextpos, runtextend - runtextpos);"); // If we can use IndexOf{Any}, try to accelerate the skip loop via vectorization to match the first prefix. // We can use it if this is a case-sensitive class with a small number of characters in the class. @@ -625,9 +615,9 @@ void EmitAnchorAndLeadingChecks() string span = needLoop ? "span.Slice(i)" : "span"; string indexOf = setCharsCount switch { - 1 => $"{span}.IndexOf({Literal(setChars[0])})", - 2 => $"{span}.IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])})", - _ => $"{span}.IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])})", + 1 => $"global::System.MemoryExtensions.IndexOf({span}, {Literal(setChars[0])})", + 2 => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(setChars[0])}, {Literal(setChars[1])})", + _ => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])})", }; if (needLoop) @@ -651,7 +641,7 @@ void EmitAnchorAndLeadingChecks() else { writer.WriteLine($"int i = {indexOf};"); - using (EmitBlock(writer, $"if (i < 0)")) + using (EmitBlock(writer, "if (i < 0)")) { writer.WriteLine("goto ReturnFalse;"); } @@ -712,11 +702,11 @@ private static void EmitSimplifiedGo(IndentedTextWriter writer, RegexMethod rm, // Declare some locals. string textSpanLocal = "textSpan"; - writer.WriteLine($"string runtext = base.runtext!;"); - writer.WriteLine($"int runtextpos = base.runtextpos;"); - writer.WriteLine($"int runtextend = base.runtextend;"); - writer.WriteLine($"int originalruntextpos = runtextpos;"); - writer.WriteLine($"char ch;"); + writer.WriteLine("string runtext = base.runtext!;"); + writer.WriteLine("int runtextpos = base.runtextpos;"); + writer.WriteLine("int runtextend = base.runtextend;"); + writer.WriteLine("int originalruntextpos = runtextpos;"); + writer.WriteLine("char ch;"); hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm); // TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo; // only if the whole expression or any subportion is ignoring case, and we're not using invariant @@ -751,7 +741,7 @@ private static void EmitSimplifiedGo(IndentedTextWriter writer, RegexMethod rm, writer.WriteLine($"runtextpos += {textSpanPos};"); } writer.WriteLine("base.runtextpos = runtextpos;"); - writer.WriteLine("Capture(0, originalruntextpos, runtextpos);"); + writer.WriteLine("base.Capture(0, originalruntextpos, runtextpos);"); writer.WriteLine("return;"); writer.WriteLine(); @@ -760,7 +750,7 @@ private static void EmitSimplifiedGo(IndentedTextWriter writer, RegexMethod rm, MarkLabel(doneLabel); if ((node.Options & RegexNode.HasCapturesFlag) != 0) { - writer.WriteLine("while (Crawlpos() != 0) Uncapture();"); + writer.WriteLine("while (base.Crawlpos() != 0) base.Uncapture();"); } else { @@ -777,9 +767,9 @@ void LoadTextSpanLocal(IndentedTextWriter writer, bool defineLocal = false) { if (defineLocal) { - writer.Write("ReadOnlySpan "); + writer.Write("global::System.ReadOnlySpan "); } - writer.WriteLine($"{textSpanLocal} = runtext.AsSpan(runtextpos, runtextend - runtextpos);"); + writer.WriteLine($"{textSpanLocal} = global::System.MemoryExtensions.AsSpan(runtext, runtextpos, runtextend - runtextpos);"); } // Emits the sum of a constant and a value from a local. @@ -836,7 +826,7 @@ void EmitAtomicAlternate(RegexNode node) bool hasStartingCrawlpos = (node.Options & RegexNode.HasCapturesFlag) != 0; if (hasStartingCrawlpos) { - writer.WriteLine("int startingCrawlpos = Crawlpos();"); + writer.WriteLine("int startingCrawlpos = base.Crawlpos();"); } writer.WriteLine(); @@ -929,13 +919,13 @@ void EmitCapture(RegexNode node) EmitNode(node.Child(0)); TransferTextSpanPosToRunTextPos(); - writer.WriteLine($"Capture({capnum}, {startingRunTextPosName}, runtextpos);"); + writer.WriteLine($"base.Capture({capnum}, {startingRunTextPosName}, runtextpos);"); } // Emits code to unwind the capture stack until the crawl position specified in the provided local. void EmitUncaptureUntil() { - writer.WriteLine("while (Crawlpos() != startingCrawlpos) Uncapture();"); + writer.WriteLine("while (base.Crawlpos() != startingCrawlpos) base.Uncapture();"); } // Emits the code to handle a positive lookahead assertion. @@ -1148,20 +1138,20 @@ void EmitBoundary(RegexNode node) switch (node.Type) { case RegexNode.Boundary: - call = "!IsBoundary"; + call = "!base.IsBoundary"; break; case RegexNode.NonBoundary: - call = "IsBoundary"; + call = "base.IsBoundary"; break; case RegexNode.ECMABoundary: - call = "!IsECMABoundary"; + call = "!base.IsECMABoundary"; break; default: Debug.Assert(node.Type == RegexNode.NonECMABoundary); - call = "IsECMABoundary"; + call = "base.IsECMABoundary"; break; } @@ -1264,7 +1254,7 @@ void EmitMultiChar(RegexNode node) } else if (!caseInsensitive) { - using (EmitBlock(writer, $"if (!{textSpanLocal}.Slice({textSpanPos}).StartsWith({Literal(node.Str)}))")) + using (EmitBlock(writer, $"if (!global::System.MemoryExtensions.StartsWith({textSpanLocal}.Slice({textSpanPos}), {Literal(node.Str)}))")) { writer.WriteLine($"goto {doneLabel};"); } @@ -1317,7 +1307,7 @@ void EmitSingleCharRepeater(RegexNode node) else { string spanLocal = "slice"; // As this repeater doesn't wrap arbitrary node emits, this shouldn't conflict with anything - writer.WriteLine($"ReadOnlySpan {spanLocal} = {textSpanLocal}.Slice({textSpanPos}, {iterations});"); + writer.WriteLine($"global::System.ReadOnlySpan {spanLocal} = {textSpanLocal}.Slice({textSpanPos}, {iterations});"); string i = GetNextLocalId(); using (EmitBlock(writer, $"for (int {i} = 0; {i} < {spanLocal}.Length; {i}++)")) { @@ -1407,12 +1397,12 @@ void EmitSingleCharAtomicLoop(RegexNode node) // restriction is purely for simplicity; it could be removed in the future with additional code to // handle the unbounded case. - writer.Write($"int {iterationLocal} = {textSpanLocal}"); + writer.Write($"int {iterationLocal} = global::System.MemoryExtensions.IndexOf({textSpanLocal}"); if (textSpanPos > 0) { writer.Write($".Slice({textSpanPos})"); } - writer.WriteLine($".IndexOf({Literal(node.Ch)});"); + writer.WriteLine($", {Literal(node.Ch)});"); using (EmitBlock(writer, $"if ({iterationLocal} != -1)")) { @@ -1433,14 +1423,14 @@ void EmitSingleCharAtomicLoop(RegexNode node) // have been reduced to a Notoneloopatomic), we can use an IndexOfAny to find any of the target characters. // As with the notoneloopatomic above, the unbounded constraint is purely for simplicity. - writer.Write($"int {iterationLocal} = {textSpanLocal}"); + writer.Write($"int {iterationLocal} = global::System.MemoryExtensions.IndexOfAny({textSpanLocal}"); if (textSpanPos != 0) { writer.Write($".Slice({textSpanPos})"); } writer.WriteLine(numSetChars == 2 ? - $".IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])});" : - $".IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])});"); + $", {Literal(setChars[0])}, {Literal(setChars[1])});" : + $", {Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])});"); using (EmitBlock(writer, $"if ({iterationLocal} != -1)")) { writer.WriteLine($"goto {doneLabel};"); @@ -1676,14 +1666,14 @@ private static void EmitCompleteGo(IndentedTextWriter writer, RegexMethod rm, st // Generates the forward logic corresponding directly to the regex codes. // In the absence of backtracking, this is all we would need. - writer.WriteLine($"string runtext = base.runtext!;"); - writer.WriteLine($"int runtextbeg = base.runtextbeg;"); - writer.WriteLine($"int runtextend = base.runtextend;"); - writer.WriteLine($"int runtextpos = base.runtextpos;"); - writer.WriteLine($"int[] runtrack = base.runtrack!;"); - writer.WriteLine($"int runtrackpos = base.runtrackpos;"); - writer.WriteLine($"int[] runstack = base.runstack!;"); - writer.WriteLine($"int runstackpos = base.runstackpos;"); + writer.WriteLine("string runtext = base.runtext!;"); + writer.WriteLine("int runtextbeg = base.runtextbeg;"); + writer.WriteLine("int runtextend = base.runtextend;"); + writer.WriteLine("int runtextpos = base.runtextpos;"); + writer.WriteLine("int[] runtrack = base.runtrack!;"); + writer.WriteLine("int runtrackpos = base.runtrackpos;"); + writer.WriteLine("int[] runstack = base.runstack!;"); + writer.WriteLine("int runstackpos = base.runstackpos;"); writer.WriteLine("int tmp1, tmp2, ch;"); bool hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm); bool hasTextInfo = EmitInitializeCultureForGoIfNecessary(writer, rm); @@ -1710,23 +1700,23 @@ private static void EmitCompleteGo(IndentedTextWriter writer, RegexMethod rm, st MarkLabel(Backtrack); // (Equivalent of EnsureStorage, but written to avoid unnecessary local spilling.) - writer.WriteLine("int limit = runtrackcount * 4;"); + writer.WriteLine("int limit = base.runtrackcount * 4;"); using (EmitBlock(writer, "if (runstackpos < limit)")) { writer.WriteLine("base.runstackpos = runstackpos;"); - writer.WriteLine("DoubleStack(); // might change runstackpos and runstack"); + writer.WriteLine("base.DoubleStack(); // might change runstackpos and runstack"); writer.WriteLine("runstackpos = base.runstackpos;"); writer.WriteLine("runstack = base.runstack!;"); } using (EmitBlock(writer, "if (runtrackpos < limit)")) { writer.WriteLine("base.runtrackpos = runtrackpos;"); - writer.WriteLine("DoubleTrack(); // might change runtrackpos and runtrack"); + writer.WriteLine("base.DoubleTrack(); // might change runtrackpos and runtrack"); writer.WriteLine("runtrackpos = base.runtrackpos;"); writer.WriteLine("runtrack = base.runtrack!;"); } writer.WriteLine(); - using (EmitBlock(writer, $"switch (runtrack[runtrackpos++])")) + using (EmitBlock(writer, "switch (runtrack[runtrackpos++])")) { for (int i = 0; i < noteCount; i++) { @@ -1751,7 +1741,7 @@ private static void EmitCompleteGo(IndentedTextWriter writer, RegexMethod rm, st using (EmitBlock(writer, "default:")) { - writer.WriteLine("Debug.Fail($\"Unexpected backtracking state {runtrack[runtrackpos - 1]}\");"); + writer.WriteLine("global::System.Diagnostics.Debug.Fail($\"Unexpected backtracking state {runtrack[runtrackpos - 1]}\");"); writer.WriteLine("break;"); } } @@ -1805,7 +1795,7 @@ void EmitOneCode(string? label) break; case RegexCode.Testref: - using (EmitBlock(writer, $"if (!IsMatched({Operand(0)}))")) + using (EmitBlock(writer, $"if (!base.IsMatched({Operand(0)}))")) { writer.WriteLine($"goto {Backtrack};"); } @@ -1852,7 +1842,7 @@ void EmitOneCode(string? label) { if (Operand(1) != -1) { - using (EmitBlock(writer, $"if (!IsMatched({Operand(1)}))")) + using (EmitBlock(writer, $"if (!base.IsMatched({Operand(1)}))")) { writer.WriteLine($"goto {Backtrack};"); } @@ -1861,8 +1851,8 @@ void EmitOneCode(string? label) const string Stacked = "tmp1"; writer.WriteLine($"{Stacked} = {PopStack()};"); writer.WriteLine(Operand(1) != -1 ? - $"TransferCapture({Operand(0)}, {Operand(1)}, {Stacked}, runtextpos);" : - $"Capture({Operand(0)}, {Stacked}, runtextpos);"); + $"base.TransferCapture({Operand(0)}, {Operand(1)}, {Stacked}, runtextpos);" : + $"base.Capture({Operand(0)}, {Stacked}, runtextpos);"); PushTrack(Stacked); TrackUnique(Operand(0) != -1 && Operand(1) != -1 ? Capback2 : Capback); } @@ -1870,10 +1860,10 @@ void EmitOneCode(string? label) case RegexCode.Capturemark | RegexCode.Back: PushStack(PopTrack()); - writer.WriteLine("Uncapture();"); + writer.WriteLine("base.Uncapture();"); if (Operand(0) != -1 && Operand(1) != -1) { - writer.WriteLine("Uncapture();"); + writer.WriteLine("base.Uncapture();"); } writer.WriteLine($"goto {Backtrack};"); break; @@ -2046,7 +2036,7 @@ void EmitOneCode(string? label) case RegexCode.Setjump: PushStack("runtrack.Length - runtrackpos"); - PushStack("Crawlpos()"); + PushStack("base.Crawlpos()"); TrackUnique(Stackpop2); break; @@ -2060,7 +2050,7 @@ void EmitOneCode(string? label) const string Stacked = "tmp1"; writer.WriteLine($"{Stacked} = {PopStack()}; // stacked"); writer.WriteLine($"runtrackpos = runtrack.Length - {PopStack()};"); - writer.WriteLine($"while (Crawlpos() != {Stacked}) Uncapture();"); + writer.WriteLine($"while (base.Crawlpos() != {Stacked}) base.Uncapture();"); writer.WriteLine($"goto {Backtrack};"); } break; @@ -2079,7 +2069,7 @@ void EmitOneCode(string? label) { const string TrackedCrawlpos = "tmp1"; writer.WriteLine($"{TrackedCrawlpos} = {PopTrack()}; // tracked crawlpos"); - writer.WriteLine($"while (Crawlpos() != {TrackedCrawlpos}) Uncapture();"); + writer.WriteLine($"while (base.Crawlpos() != {TrackedCrawlpos}) base.Uncapture();"); writer.WriteLine($"goto {Backtrack};"); } break; @@ -2108,7 +2098,7 @@ void EmitOneCode(string? label) case RegexCode.Boundary: case RegexCode.NonBoundary: - using (EmitBlock(writer, $"if ({(Code() == RegexCode.Boundary ? "!" : "")}IsBoundary(runtextpos, runtextbeg, runtextend))")) + using (EmitBlock(writer, $"if ({(Code() == RegexCode.Boundary ? "!" : "")}base.IsBoundary(runtextpos, runtextbeg, runtextend))")) { writer.WriteLine($"goto {Backtrack};"); } @@ -2116,7 +2106,7 @@ void EmitOneCode(string? label) case RegexCode.ECMABoundary: case RegexCode.NonECMABoundary: - using (EmitBlock(writer, $"if ({(Code() == RegexCode.ECMABoundary ? "!" : "")}IsECMABoundary(runtextpos, runtextbeg, runtextend))")) + using (EmitBlock(writer, $"if ({(Code() == RegexCode.ECMABoundary ? "!" : "")}base.IsECMABoundary(runtextpos, runtextbeg, runtextend))")) { writer.WriteLine($"goto {Backtrack};"); } @@ -2242,12 +2232,12 @@ void EmitOneCode(string? label) const string Length = "tmp1"; const string Index = "tmp2"; - using (EmitBlock(writer, $"if (!IsMatched({Operand(0)}))")) + using (EmitBlock(writer, $"if (!base.IsMatched({Operand(0)}))")) { writer.WriteLine($"goto {((options & RegexOptions.ECMAScript) != 0 ? AdvanceLabel() : Backtrack)};"); } - writer.WriteLine($"{Length} = MatchLength({Operand(0)}); // length"); + writer.WriteLine($"{Length} = base.MatchLength({Operand(0)}); // length"); using (EmitBlock(writer, !IsRightToLeft() ? $"if (runtextend - runtextpos < {Length})" : $"if (runtextpos - runtextbeg < {Length})")) { @@ -2256,12 +2246,12 @@ void EmitOneCode(string? label) if (!IsRightToLeft()) { - writer.WriteLine($"{Index} = MatchIndex({Operand(0)}) + {Length}; // index"); + writer.WriteLine($"{Index} = base.MatchIndex({Operand(0)}) + {Length}; // index"); writer.WriteLine($"runtextpos += {Length};"); } else { - writer.WriteLine($"{Index} = MatchIndex({Operand(0)}); // index"); + writer.WriteLine($"{Index} = base.MatchIndex({Operand(0)}); // index"); writer.WriteLine($"runtextpos -= {Length};"); } @@ -2412,8 +2402,8 @@ void EmitOneCode(string? label) else { writer.WriteLine(!IsRightToLeft() ? - $"{Len} = Math.Min(runtextend - runtextpos, {c}); // length" : - $"{Len} = Math.Min(runtextpos - runtextbeg, {c}); // length"); + $"{Len} = global::System.Math.Min(runtextend - runtextpos, {c}); // length" : + $"{Len} = global::System.Math.Min(runtextpos - runtextbeg, {c}); // length"); } string? set = Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic ? rm.Code.Strings[Operand(0)] : null; @@ -2426,7 +2416,7 @@ void EmitOneCode(string? label) !IsRightToLeft() && (!IsCaseInsensitive() || !RegexCharClass.ParticipatesInCaseConversion(Operand(0)))) { - writer.WriteLine($"{I} = runtext.AsSpan(runtextpos, {Len}).IndexOf({Literal((char)Operand(0))}); // i"); + writer.WriteLine($"{I} = global::System.MemoryExtensions.IndexOf(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal((char)Operand(0))}); // i"); using (EmitBlock(writer, $"if ({I} == -1)")) { writer.WriteLine($"runtextpos += {Len};"); @@ -2449,7 +2439,7 @@ void EmitOneCode(string? label) // to search for those chars. Debug.Assert(numSetChars is 2 or 3); - writer.Write($"{I} = runtext.AsSpan(runtextpos, {Len}).IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])}"); + writer.Write($"{I} = global::System.MemoryExtensions.IndexOfAny(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal(setChars[0])}, {Literal(setChars[1])}"); if (numSetChars == 3) { writer.Write($", {Literal(setChars[2])}"); @@ -2578,8 +2568,8 @@ void EmitOneCode(string? label) else { writer.WriteLine(!IsRightToLeft() ? - $"{C} = Math.Min(runtextend - runtextpos, {count}); // count" : - $"{C} = Math.Min(runtextpos - runtextbeg, {count}); // count"); + $"{C} = global::System.Math.Min(runtextend - runtextpos, {count}); // count" : + $"{C} = global::System.Math.Min(runtextpos - runtextbeg, {count}); // count"); } using (EmitBlock(writer, $"if ({C} <= 0)")) @@ -2813,7 +2803,7 @@ private static bool EmitLoopTimeoutCounterIfNeeded(IndentedTextWriter writer, Re { if (rm.MatchTimeout.HasValue && rm.MatchTimeout.Value != Timeout.Infinite) { - writer.WriteLine($"int loopTimeoutCounter = 0;"); + writer.WriteLine("int loopTimeoutCounter = 0;"); return true; } @@ -2831,7 +2821,7 @@ private static void EmitTimeoutCheck(IndentedTextWriter writer, bool hasTimeout) using (EmitBlock(writer, $"if (++loopTimeoutCounter == {LoopTimeoutCheckCount})")) { writer.WriteLine("loopTimeoutCounter = 0;"); - writer.WriteLine("CheckTimeout();"); + writer.WriteLine("base.CheckTimeout();"); } writer.WriteLine(); } @@ -2857,7 +2847,7 @@ private static bool EmitInitializeCultureForGoIfNecessary(IndentedTextWriter wri if (needsCulture) { - writer.WriteLine("TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo;"); + writer.WriteLine("global::System.Globalization.TextInfo textInfo = global::System.Globalization.CultureInfo.CurrentCulture.TextInfo;"); return true; } } @@ -2899,7 +2889,7 @@ private static void EmitTextInfoIfRequired(IndentedTextWriter writer, ref bool t { hasTextInfo = true; writer.WriteLine("// IgnoreCase with CultureInfo.CurrentCulture"); - writer.WriteLine("TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo;"); + writer.WriteLine("global::System.Globalization.TextInfo textInfo = global::System.Globalization.CultureInfo.CurrentCulture.TextInfo;"); writer.WriteLine(); } } @@ -2971,7 +2961,7 @@ private static string MatchCharacterClass(bool hasTextInfo, RegexOptions options // we get smaller code), and it's what we'd do for the fallback (which we get to avoid generating) as part of CharInClass. if (!invariant && RegexCharClass.TryGetSingleUnicodeCategory(charClass, out UnicodeCategory category, out bool negated)) { - return $"(char.GetUnicodeCategory({chExpr}) {(negated ? "!=" : "==")} UnicodeCategory.{category})"; + return $"(char.GetUnicodeCategory({chExpr}) {(negated ? "!=" : "==")} global::System.Globalization.UnicodeCategory.{category})"; } // Next, if there's only 2 or 3 chars in the set (fairly common due to the sets we create for prefixes), @@ -2985,10 +2975,10 @@ private static string MatchCharacterClass(bool hasTextInfo, RegexOptions options switch (numChars) { case 2: - return $"({chExpr} is {Literal(setChars[0])} or {Literal(setChars[1])})"; + return $"(((ch = {chExpr}) == {Literal(setChars[0])}) | (ch == {Literal(setChars[1])}))"; case 3: - return $"({chExpr} is {Literal(setChars[0])} or {Literal(setChars[1])} or {Literal(setChars[2])})"; + return $"(((ch = {chExpr}) == {Literal(setChars[0])}) | (ch == {Literal(setChars[1])}) | (ch == {Literal(setChars[2])}))"; } } } @@ -3006,8 +2996,8 @@ private static string MatchCharacterClass(bool hasTextInfo, RegexOptions options // extend the analysis to produce a known lower-bound and compare against // that rather than always using 128 as the pivot point.) return invariant ? - $"((ch = {chExpr}) >= 128 && CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))" : - $"((ch = {chExpr}) >= 128 && CharInClass((char)ch, {Literal(charClass)}))"; + $"((ch = {chExpr}) >= 128 && global::System.Text.RegularExpressions.RegexRunner.CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))" : + $"((ch = {chExpr}) >= 128 && global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))"; } if (analysis.AllAsciiContained) @@ -3016,8 +3006,8 @@ private static string MatchCharacterClass(bool hasTextInfo, RegexOptions options // if the class were the negated example from case 1 above: // [^\p{IsGreek}\p{IsGreekExtended}]. return invariant ? - $"((ch = {chExpr}) < 128 || CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))" : - $"((ch = {chExpr}) < 128 || CharInClass((char)ch, {Literal(charClass)}))"; + $"((ch = {chExpr}) < 128 || global::System.Text.RegularExpressions.RegexRunner.CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))" : + $"((ch = {chExpr}) < 128 || global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))"; } } diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs index 661546ef6e04fd..dfed01439d81f0 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs @@ -54,10 +54,10 @@ public void Initialize(IncrementalGeneratorInitializationContext context) // and raise all of the created diagnostics. context.RegisterSourceOutput(codeOrDiagnostics, static (context, results) => { - var code = new List(s_headersAndUsings.Length + results.Length); + var code = new List(s_headers.Length + results.Length); // Add file header and required usings - code.AddRange(s_headersAndUsings); + code.AddRange(s_headers); foreach (object? result in results) { From 1c1c1f9519e800d4ba32f29fe0ea32f78bcf113f Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 20 Sep 2021 20:39:15 -0400 Subject: [PATCH 11/16] Suppress a couple more warnings --- .../gen/RegexGenerator.Emitter.cs | 4 +++- .../tests/RegexGeneratorHelper.netcoreapp.cs | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index bcb50f35ecf655..52c1e60838c52c 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -30,8 +30,10 @@ public partial class RegexGenerator { "// ", "#nullable enable", - "#pragma warning disable CS0168 // Variable declared but never used", + "#pragma warning disable CS0162 // Unreachable code", "#pragma warning disable CS0164 // Unreferenced label", + "#pragma warning disable CS0168 // Variable declared but never used", + "#pragma warning disable CS0219 // Variable assigned but never used", "", }; diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs index a711af0d8f6839..5344bf8888fcd4 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs @@ -107,7 +107,7 @@ public partial class C var dll = new MemoryStream(); comp = comp.AddSyntaxTrees(generatorResults.GeneratedTrees.ToArray()); EmitResult results = comp.Emit(dll, options: s_emitOptions, cancellationToken: cancellationToken); - if (!results.Success) + if (!results.Success || results.Diagnostics.Length != 0) { throw new ArgumentException( string.Join(Environment.NewLine, results.Diagnostics.Concat(generatorResults.Diagnostics)) + Environment.NewLine + From 818f8448f8974e5ab2d8e3bed65ff214791490c7 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 20 Sep 2021 20:44:24 -0400 Subject: [PATCH 12/16] Fix stray ! --- .../gen/RegexGenerator.Emitter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 52c1e60838c52c..2744bd7309dee2 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -2774,7 +2774,7 @@ int AddBacktrackNote(int flags, string l, int codepos) int AddGoto(int destpos) { - if (forwardJumpsThroughSwitch![destpos] == -1) + if (forwardJumpsThroughSwitch[destpos] == -1) { forwardJumpsThroughSwitch[destpos] = AddBacktrackNote(0, labels![destpos], destpos); } From 2435db44ac84492e873708564f504bdae2567001 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Tue, 21 Sep 2021 11:03:32 -0400 Subject: [PATCH 13/16] Fix TODO about unrolling multi comparisons Also clean up generated code in a few places to make it more readable / concise. --- .../gen/RegexGenerator.Emitter.cs | 132 ++++++++++++++---- .../tests/RegexGeneratorHelper.netcoreapp.cs | 12 +- .../RegexGeneratorParserTests.cs | 12 +- 3 files changed, 114 insertions(+), 42 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 2744bd7309dee2..6ed0d8d36ba113 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -9,6 +9,7 @@ using System.Diagnostics; using System.Globalization; using System.IO; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Security.Cryptography; using System.Text; @@ -629,8 +630,8 @@ void EmitAnchorAndLeadingChecks() { writer.WriteLine("goto ReturnFalse;"); } - writer.WriteLine(); writer.WriteLine("i += indexOfPos;"); + writer.WriteLine(); if (lcc.Length > 1) { @@ -648,16 +649,40 @@ void EmitAnchorAndLeadingChecks() writer.WriteLine("goto ReturnFalse;"); } } + writer.WriteLine(); } Debug.Assert(charClassIndex == 0 || charClassIndex == 1); - for (; charClassIndex < lcc.Length; charClassIndex++) + if (charClassIndex < lcc.Length) { - // if (!CharInClass(textSpan[i + charClassIndex], prefix[0], "...")) continue; + // if (!CharInClass(textSpan[i + charClassIndex], prefix[0], "...") || + // ...) + // { + // continue; + // } Debug.Assert(needLoop); - string spanIndex = charClassIndex > 0 ? $"span[i + {charClassIndex}]" : "span[i]"; - string charInClassExpr = MatchCharacterClass(hasTextInfo, options, spanIndex, lcc[charClassIndex].CharClass, lcc[charClassIndex].CaseInsensitive); - writer.WriteLine($"if (!{charInClassExpr}) continue;"); + int start = charClassIndex; + for (; charClassIndex < lcc.Length; charClassIndex++) + { + string spanIndex = charClassIndex > 0 ? $"span[i + {charClassIndex}]" : "span[i]"; + string charInClassExpr = MatchCharacterClass(hasTextInfo, options, spanIndex, lcc[charClassIndex].CharClass, lcc[charClassIndex].CaseInsensitive); + + if (charClassIndex == start) + { + writer.Write($"if (!{charInClassExpr}"); + } + else + { + writer.WriteLine(" ||"); + writer.Write($" !{charInClassExpr}"); + } + } + writer.WriteLine(")"); + using (EmitBlock(writer, null)) + { + writer.WriteLine("continue;"); + } + writer.WriteLine(); } writer.WriteLine("base.runtextpos = runtextpos + i;"); @@ -791,7 +816,7 @@ void EmitSpanLengthCheck(int requiredLength, string? dynamicRequiredLength = nul } string SpanLengthCheck(int requiredLength, string? dynamicRequiredLength = null) => - $"{Sum(textSpanPos + requiredLength, dynamicRequiredLength)} > (uint){textSpanLocal}.Length"; + $"(uint){textSpanLocal}.Length < {Sum(textSpanPos + requiredLength, dynamicRequiredLength)}"; // Adds the value of textSpanPos into the runtextpos local, slices textspan by the corresponding amount, // and zeros out textSpanPos. @@ -1205,7 +1230,7 @@ void EmitAnchors(RegexNode node) break; case RegexNode.End: - using (EmitBlock(writer, $"if ({textSpanPos} < {textSpanLocal}.Length)")) + using (EmitBlock(writer, $"if ({textSpanLocal}.Length > {textSpanPos})")) { writer.WriteLine($"goto {doneLabel};"); } @@ -1231,49 +1256,96 @@ void EmitAnchors(RegexNode node) // Emits the code to handle a multiple-character match. void EmitMultiChar(RegexNode node) { + bool caseInsensitive = IsCaseInsensitive(node); + string str = node.Str!; Debug.Assert(str.Length != 0); - // TODO: RegexOptions.Compiled has a more complicated unrolling here, but it knows the code is being compiled on the same - // endianness and bitness machine as it'll be executed on. Determine if we want to do something more here. - - bool caseInsensitive = IsCaseInsensitive(node); - - const int MaxUnrollLength = 8; // TODO: Tune this + const int MaxUnrollLength = 64; if (str.Length <= MaxUnrollLength) { - writer.WriteLine($"if ((uint){textSpanLocal}.Length < {textSpanPos + str.Length} ||"); - for (int i = 0; i < str.Length; i++) + // Unroll shorter strings. + + // TODO: This might employ 64-bit operations on a 32-bit machine. Decide if avoiding that + // is worth adding further complexity for (RegexOptions.Compiled doesn't have to deal with + // this, as the machine generating the code in-memory is the same one running it.) + + // For strings more than two characters and when performing case-sensitive searches, we try to do fewer comparisons + // by comparing 2 or 4 characters at a time. Because we might be compiling on one endianness and running on another, + // both little and big endian values are emitted and which is used is selected at run-time. + ReadOnlySpan byteStr = MemoryMarshal.AsBytes(str.AsSpan()); + bool useMultiCharReads = !caseInsensitive && byteStr.Length > sizeof(uint); + if (useMultiCharReads) + { + writer.WriteLine($"ref byte byteStr = ref global::System.Runtime.InteropServices.MemoryMarshal.GetReference(global::System.Runtime.InteropServices.MemoryMarshal.AsBytes({textSpanLocal}));"); + } + + writer.Write($"if ((uint){textSpanLocal}.Length < {textSpanPos + str.Length}"); + + if (useMultiCharReads) { + while (byteStr.Length >= sizeof(ulong)) + { + writer.WriteLine(" ||"); + ulong little = BinaryPrimitives.ReadUInt64LittleEndian(byteStr); + ulong big = BinaryPrimitives.ReadUInt64BigEndian(byteStr); + writer.Write($" global::System.Runtime.CompilerServices.Unsafe.ReadUnaligned(ref global::System.Runtime.CompilerServices.Unsafe.Add(ref byteStr, {textSpanPos * 2})) != (global::System.BitConverter.IsLittleEndian ? 0x{little:X}ul : 0x{big:X}ul)"); + textSpanPos += sizeof(ulong) / 2; + byteStr = byteStr.Slice(sizeof(ulong)); + } + + while (byteStr.Length >= sizeof(uint)) + { + writer.WriteLine(" ||"); + uint little = BinaryPrimitives.ReadUInt32LittleEndian(byteStr); + uint big = BinaryPrimitives.ReadUInt32BigEndian(byteStr); + writer.Write($" global::System.Runtime.CompilerServices.Unsafe.ReadUnaligned(ref global::System.Runtime.CompilerServices.Unsafe.Add(ref byteStr, {textSpanPos * 2})) != (global::System.BitConverter.IsLittleEndian ? 0x{little:X}u : 0x{big:X}u)"); + textSpanPos += sizeof(uint) / 2; + byteStr = byteStr.Slice(sizeof(uint)); + } + } + + // Emit remaining comparisons character by character. + for (int i = (str.Length * 2 - byteStr.Length) / 2; i < str.Length; i++) + { + writer.WriteLine(" ||"); writer.Write($" {ToLowerIfNeeded(hasTextInfo, options, $"{textSpanLocal}[{textSpanPos}]", caseInsensitive)} != {Literal(str[i])}"); textSpanPos++; - writer.WriteLine(i < str.Length - 1 ? " ||" : ")"); } + + writer.WriteLine(")"); using (EmitBlock(writer, null)) { writer.WriteLine($"goto {doneLabel};"); } } - else if (!caseInsensitive) - { - using (EmitBlock(writer, $"if (!global::System.MemoryExtensions.StartsWith({textSpanLocal}.Slice({textSpanPos}), {Literal(node.Str)}))")) - { - writer.WriteLine($"goto {doneLabel};"); - } - textSpanPos += node.Str.Length; - } else { - EmitSpanLengthCheck(str.Length); - string i = GetNextLocalId(); - using (EmitBlock(writer, $"for (int {i} = 0; {i} < {Literal(str)}.Length; {i}++)")) + // Longer strings are compared character by character. If this is a case-sensitive comparison, we can simply + // delegate to StartsWith. If this is case-insensitive, we open-code the comparison loop, as we need to lowercase + // each character involved, and none of the StringComparison options provide the right semantics of comparing + // character-by-character while respecting the culture. + if (!caseInsensitive) { - using (EmitBlock(writer, $"if ({ToLower(hasTextInfo, options, $"{textSpanLocal}[{textSpanPos} + {i}]")} != {ToLower(hasTextInfo, options, $"{Literal(str)}[{i}]")})")) + using (EmitBlock(writer, $"if (!global::System.MemoryExtensions.StartsWith({textSpanLocal}.Slice({textSpanPos}), {Literal(node.Str)}))")) { writer.WriteLine($"goto {doneLabel};"); } + textSpanPos += node.Str.Length; + } + else + { + EmitSpanLengthCheck(str.Length); + string i = GetNextLocalId(); + using (EmitBlock(writer, $"for (int {i} = 0; {i} < {Literal(node.Str)}.Length; {i}++)")) + { + using (EmitBlock(writer, $"if ({ToLower(hasTextInfo, options, $"{textSpanLocal}[{textSpanPos} + {i}]")} != {Literal(str)}[{i}])")) + { + writer.WriteLine($"goto {doneLabel};"); + } + } + textSpanPos += node.Str.Length; } - textSpanPos += node.Str.Length; } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs index 5344bf8888fcd4..f0d52ffc40347b 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs @@ -6,6 +6,7 @@ using System.IO; using System.Linq; using System.Reflection; +using System.Runtime.CompilerServices; using System.Runtime.Loader; using System.Text.RegularExpressions.Generator; using System.Threading; @@ -36,14 +37,13 @@ private static MetadataReference[] CreateReferences() // Typically we'd want to use the right reference assemblies, but as we're not persisting any // assets and only using this for testing purposes, referencing implementation assemblies is sufficient. - - string corelib = Assembly.GetAssembly(typeof(object))!.Location; - string runtimeDir = Path.GetDirectoryName(corelib)!; + string corelibPath = typeof(object).Assembly.Location; return new[] { - MetadataReference.CreateFromFile(corelib), - MetadataReference.CreateFromFile(Path.Combine(runtimeDir, "System.Runtime.dll")), - MetadataReference.CreateFromFile(Path.Combine(runtimeDir, "System.Text.RegularExpressions.dll")) + MetadataReference.CreateFromFile(typeof(object).Assembly.Location), + MetadataReference.CreateFromFile(Path.Combine(Path.GetDirectoryName(corelibPath), "System.Runtime.dll")), + MetadataReference.CreateFromFile(typeof(Unsafe).Assembly.Location), + MetadataReference.CreateFromFile(typeof(Regex).Assembly.Location), }; } diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs index 789f6b4687b377..2014a08569bc49 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs @@ -10,7 +10,7 @@ using System.Globalization; using System.IO; using System.Linq; -using System.Reflection; +using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; using Xunit; @@ -517,13 +517,13 @@ private async Task> RunGenerator( private static MetadataReference[] CreateReferences() { - string corelib = Assembly.GetAssembly(typeof(object))!.Location; - string runtimeDir = Path.GetDirectoryName(corelib)!; + string corelibPath = typeof(object).Assembly.Location; return new[] { - MetadataReference.CreateFromFile(corelib), - MetadataReference.CreateFromFile(Path.Combine(runtimeDir, "System.Runtime.dll")), - MetadataReference.CreateFromFile(Path.Combine(runtimeDir, "System.Text.RegularExpressions.dll")) + MetadataReference.CreateFromFile(typeof(object).Assembly.Location), + MetadataReference.CreateFromFile(Path.Combine(Path.GetDirectoryName(corelibPath)!, "System.Runtime.dll")), + MetadataReference.CreateFromFile(typeof(Unsafe).Assembly.Location), + MetadataReference.CreateFromFile(typeof(Regex).Assembly.Location), }; } } From 400147da1e5679fca4a47f4abedf25619c6b70d8 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Tue, 21 Sep 2021 13:38:05 -0400 Subject: [PATCH 14/16] Update resources per PR feedback --- .../gen/DiagnosticDescriptors.cs | 10 ++++----- .../gen/Resources/Strings.resx | 11 ++++++---- .../gen/Resources/xlf/Strings.cs.xlf | 21 ++++++++++++------- .../gen/Resources/xlf/Strings.de.xlf | 21 ++++++++++++------- .../gen/Resources/xlf/Strings.es.xlf | 21 ++++++++++++------- .../gen/Resources/xlf/Strings.fr.xlf | 21 ++++++++++++------- .../gen/Resources/xlf/Strings.it.xlf | 21 ++++++++++++------- .../gen/Resources/xlf/Strings.ja.xlf | 21 ++++++++++++------- .../gen/Resources/xlf/Strings.ko.xlf | 21 ++++++++++++------- .../gen/Resources/xlf/Strings.pl.xlf | 21 ++++++++++++------- .../gen/Resources/xlf/Strings.pt-BR.xlf | 21 ++++++++++++------- .../gen/Resources/xlf/Strings.ru.xlf | 21 ++++++++++++------- .../gen/Resources/xlf/Strings.tr.xlf | 21 ++++++++++++------- .../gen/Resources/xlf/Strings.zh-Hans.xlf | 21 ++++++++++++------- .../gen/Resources/xlf/Strings.zh-Hant.xlf | 21 ++++++++++++------- 15 files changed, 181 insertions(+), 113 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs b/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs index 67edabeacd0fae..aec397eb7d6437 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs @@ -10,7 +10,7 @@ internal static class DiagnosticDescriptors { public static DiagnosticDescriptor InvalidRegexGeneratorAttribute { get; } = new DiagnosticDescriptor( id: "SYSLIB1040", - title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), category: "RegexGenerator", DiagnosticSeverity.Error, @@ -19,7 +19,7 @@ internal static class DiagnosticDescriptors public static DiagnosticDescriptor MultipleRegexGeneratorAttributes { get; } = new DiagnosticDescriptor( id: "SYSLIB1041", - title: new LocalizableResourceString(nameof(SR.MultipleRegexGeneratorAttributesMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.MultipleRegexGeneratorAttributesMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), category: "RegexGenerator", DiagnosticSeverity.Error, @@ -28,7 +28,7 @@ internal static class DiagnosticDescriptors public static DiagnosticDescriptor InvalidRegexArguments { get; } = new DiagnosticDescriptor( id: "SYSLIB1042", - title: new LocalizableResourceString(nameof(SR.InvalidRegexArgumentsMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.InvalidRegexArgumentsMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), category: "RegexGenerator", DiagnosticSeverity.Error, @@ -37,7 +37,7 @@ internal static class DiagnosticDescriptors public static DiagnosticDescriptor RegexMethodMustHaveValidSignature { get; } = new DiagnosticDescriptor( id: "SYSLIB1043", - title: new LocalizableResourceString(nameof(SR.RegexMethodMustHaveValidSignatureMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustHaveValidSignatureMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), category: "RegexGenerator", DiagnosticSeverity.Error, @@ -46,7 +46,7 @@ internal static class DiagnosticDescriptors public static DiagnosticDescriptor InvalidLangVersion { get; } = new DiagnosticDescriptor( id: "SYSLIB1044", - title: new LocalizableResourceString(nameof(SR.InvalidLangVersionMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.InvalidLangVersionMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), category: "RegexGenerator", DiagnosticSeverity.Error, diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx index bf025f3d9e52cc..d72f6b20b3166d 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx @@ -117,20 +117,23 @@ System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + Invalid RegexGenerator usage + - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed The specified regex is invalid. '{0}' - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required Regular expression parser error '{0}' at offset {1}. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf index 0863caad887f41..1a24ff724c7f90 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf index 260a9d0521ff27..d5bcf488f701e4 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf index e5deb9da8d595a..216d5d6df79cb3 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf index aaaddf05064f82..1f217908ca1f3c 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf index 6768025cdf942d..2b6aa13e072670 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf index 8716f388addfff..a251d23cc91e67 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf index 1554de6b120f36..f85b3734c7bd0b 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf index 5d181760aea692..ff5340089f0721 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf index 201efb3e195039..f809ac5d33ae4a 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf index fb2bba07f8c221..401ad8581cbd0a 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf index 16acd32087f3f6..be79b27d687643 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf index eea3de1550a253..c99a77e9358bc7 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf index db94fb362d19e9..78d21176b6be38 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf @@ -123,8 +123,8 @@ - C# LangVersion of 10 or greater is required. - C# LangVersion of 10 or greater is required. + C# LangVersion of 10 or greater is required + C# LangVersion of 10 or greater is required @@ -133,8 +133,13 @@ - The RegexGeneratorAttribute is malformed. - The RegexGeneratorAttribute is malformed. + The RegexGeneratorAttribute is malformed + The RegexGeneratorAttribute is malformed + + + + Invalid RegexGenerator usage + Invalid RegexGenerator usage @@ -168,8 +173,8 @@ - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. - Multiple RegexGeneratorAttributes were applied to the same method. Only one is allowed. + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed + Multiple RegexGeneratorAttributes were applied to the same method, but only one is allowed @@ -218,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex. - Regex method must be static, partial, parameterless, non-generic, and return Regex. + Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be static, partial, parameterless, non-generic, and return Regex From a07f142a39c378777906cb0ee392c9017b5630d6 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Tue, 21 Sep 2021 17:14:35 -0400 Subject: [PATCH 15/16] Add more tests, clean up parser code, and allow instance/interface methods --- .../gen/RegexGenerator.Emitter.cs | 5 +- .../gen/RegexGenerator.Parser.cs | 49 ++-- .../gen/Resources/Strings.resx | 2 +- .../gen/Resources/xlf/Strings.cs.xlf | 4 +- .../gen/Resources/xlf/Strings.de.xlf | 4 +- .../gen/Resources/xlf/Strings.es.xlf | 4 +- .../gen/Resources/xlf/Strings.fr.xlf | 4 +- .../gen/Resources/xlf/Strings.it.xlf | 4 +- .../gen/Resources/xlf/Strings.ja.xlf | 4 +- .../gen/Resources/xlf/Strings.ko.xlf | 4 +- .../gen/Resources/xlf/Strings.pl.xlf | 4 +- .../gen/Resources/xlf/Strings.pt-BR.xlf | 4 +- .../gen/Resources/xlf/Strings.ru.xlf | 4 +- .../gen/Resources/xlf/Strings.tr.xlf | 4 +- .../gen/Resources/xlf/Strings.zh-Hans.xlf | 4 +- .../gen/Resources/xlf/Strings.zh-Hant.xlf | 4 +- .../RegexGeneratorParserTests.cs | 213 +++++++++--------- 17 files changed, 151 insertions(+), 170 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 6ed0d8d36ba113..bb9271ab2a2484 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -72,9 +72,8 @@ private static string EmitRegexType(RegexType regexClass) writer.WriteLine("{"); writer.Indent++; - // Generate a unique name to describe the regex instance. This includes both - // the method name the user provided and a unique counter value, plus a simple - // non-randomized (for determinism) hash of the previous content to try to make + // Generate a name to describe the regex instance. This includes the method name + // the user provided and a non-randomized (for determinism) hash of it to try to make // the name that much harder to predict. string generatedName = $"GeneratedRegex_{regexClass.Method.MethodName}_"; generatedName += ComputeStringHash(generatedName).ToString("X"); diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs index 077c9a6619e29b..71060a1410b82b 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs @@ -75,17 +75,9 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => return null; } - DiagnosticDescriptor? errorDescriptor = null; RegexMethod? regexMethod = null; foreach (AttributeData attributeData in boundAttributes) { - // If we already encountered an error, stop looking at this method's attributes. - if (errorDescriptor is not null) - { - break; - } - - // If this isn't a RegexGeneratorAttribute, skip it. if (!attributeData.AttributeClass.Equals(regexGeneratorAttributeSymbol)) { continue; @@ -93,37 +85,26 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => if (attributeData.ConstructorArguments.Any(ca => ca.Kind == TypedConstantKind.Error)) { - errorDescriptor = DiagnosticDescriptors.InvalidRegexGeneratorAttribute; - break; + return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexGeneratorAttribute, methodSyntax.GetLocation()); } - ImmutableArray items = attributeData.ConstructorArguments; - if (items.Length is > 0 and <= 3 && items[0].Value is string pattern) + if (regexMethod is not null) { - switch (items.Length) - { - case 1: - regexMethod = new RegexMethod { Pattern = pattern }; - break; - - case 2: - regexMethod = new RegexMethod { Pattern = pattern, Options = items[1].Value as int?, }; - break; - - case 3: - regexMethod = new RegexMethod { Pattern = pattern, Options = items[1].Value as int?, MatchTimeout = items[2].Value as int?, }; - break; - } + return Diagnostic.Create(DiagnosticDescriptors.MultipleRegexGeneratorAttributes, methodSyntax.GetLocation()); } - else + + ImmutableArray items = attributeData.ConstructorArguments; + if (items.Length == 0 || items.Length > 3) { - errorDescriptor = DiagnosticDescriptors.InvalidRegexGeneratorAttribute; + return Diagnostic.Create(DiagnosticDescriptors.InvalidRegexGeneratorAttribute, methodSyntax.GetLocation()); } - } - if (errorDescriptor is not null) - { - return Diagnostic.Create(errorDescriptor, methodSyntax.GetLocation()); + regexMethod = items.Length switch + { + 1 => new RegexMethod { Pattern = items[0].Value as string }, + 2 => new RegexMethod { Pattern = items[0].Value as string, Options = items[1].Value as int? }, + _ => new RegexMethod { Pattern = items[0].Value as string, Options = items[1].Value as int?, MatchTimeout = items[2].Value as int? }, + }; } if (regexMethod is null) @@ -137,7 +118,6 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => } if (!regexMethodSymbol.IsPartialDefinition || - !regexMethodSymbol.IsStatic || regexMethodSymbol.Parameters.Length != 0 || regexMethodSymbol.Arity != 0 || !regexMethodSymbol.ReturnType.Equals(regexSymbol)) @@ -236,7 +216,8 @@ static bool IsAllowedKind(SyntaxKind kind) => kind == SyntaxKind.ClassDeclaration || kind == SyntaxKind.StructDeclaration || kind == SyntaxKind.RecordDeclaration || - kind == SyntaxKind.RecordStructDeclaration; + kind == SyntaxKind.RecordStructDeclaration || + kind == SyntaxKind.InterfaceDeclaration; } /// A type holding a regex method. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx index d72f6b20b3166d..8c2c008f7b7806 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx @@ -130,7 +130,7 @@ The specified regex is invalid. '{0}' - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex C# LangVersion of 10 or greater is required diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf index 1a24ff724c7f90..399ded6b36025d 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf index d5bcf488f701e4..af36d9c9279819 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf index 216d5d6df79cb3..008f0efa1ad31e 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf index 1f217908ca1f3c..eb67f0f6a00e6e 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf index 2b6aa13e072670..b3ea80632e39d0 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf index a251d23cc91e67..f768444ddc0a20 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf index f85b3734c7bd0b..4569f6c86392a4 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf index ff5340089f0721..d8f136e98b3b50 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf index f809ac5d33ae4a..883f4a09bb2352 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf index 401ad8581cbd0a..9f6dc140f2c247 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf index be79b27d687643..2c5c19a25fd580 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf index c99a77e9358bc7..0440e1fb875f9f 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf index 78d21176b6be38..5f399b27c3cad5 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf @@ -223,8 +223,8 @@ - Regex method must be static, partial, parameterless, non-generic, and return Regex - Regex method must be static, partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex + Regex method must be partial, parameterless, non-generic, and return Regex diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs index 2014a08569bc49..18b4cc786404e2 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs @@ -23,6 +23,22 @@ namespace System.Text.RegularExpressions.Generator.Tests [PlatformSpecific(~TestPlatforms.Browser)] public class RegexGeneratorParserTests { + [Fact] + public async Task Diagnostic_MultipleAttributes() + { + IReadOnlyList diagnostics = await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"")] + [RegexGenerator(""abc"")] + private static partial Regex MultipleAttributes(); + } + "); + + Assert.Equal("SYSLIB1041", Assert.Single(diagnostics).Id); + } + [Theory] [InlineData("ab[]")] public async Task Diagnostic_InvalidRegexPattern(string pattern) @@ -87,21 +103,6 @@ partial class C Assert.Equal("SYSLIB1043", Assert.Single(diagnostics).Id); } - [Fact] - public async Task Diagnostic_MethodMustBeStatic() - { - IReadOnlyList diagnostics = await RunGenerator(@" - using System.Text.RegularExpressions; - partial class C - { - [RegexGenerator(""ab"")] - private partial Regex MethodMustBeStatic(); - } - "); - - Assert.Equal("SYSLIB1043", Assert.Single(diagnostics).Id); - } - [Fact] public async Task Diagnostic_MethodMustNotBeGeneric() { @@ -176,6 +177,67 @@ partial class C ", compile: true)); } + [Theory] + [InlineData("RegexOptions.None")] + [InlineData("RegexOptions.Compiled")] + [InlineData("RegexOptions.IgnoreCase | RegexOptions.CultureInvariant")] + public async Task Valid_PatternOptions(string options) + { + Assert.Empty(await RunGenerator($@" + using System.Text.RegularExpressions; + partial class C + {{ + [RegexGenerator(""ab"", {options})] + private static partial Regex Valid(); + }} + ", compile: true)); + } + + [Theory] + [InlineData("-1")] + [InlineData("1")] + [InlineData("1_000")] + public async Task Valid_PatternOptionsTimeout(string timeout) + { + Assert.Empty(await RunGenerator($@" + using System.Text.RegularExpressions; + partial class C + {{ + [RegexGenerator(""ab"", RegexOptions.None, {timeout})] + private static partial Regex Valid(); + }} + ", compile: true)); + } + + [Fact] + public async Task Valid_NamedArguments() + { + Assert.Empty(await RunGenerator($@" + using System.Text.RegularExpressions; + partial class C + {{ + [RegexGenerator(pattern: ""ab"", options: RegexOptions.None, matchTimeoutMilliseconds: -1)] + private static partial Regex Valid(); + }} + ", compile: true)); + } + + [Fact] + public async Task Valid_ReorderedNamedArguments() + { + Assert.Empty(await RunGenerator($@" + using System.Text.RegularExpressions; + partial class C + {{ + [RegexGenerator(options: RegexOptions.None, matchTimeoutMilliseconds: -1, pattern: ""ab"")] + private static partial Regex Valid1(); + + [RegexGenerator(matchTimeoutMilliseconds: -1, pattern: ""ab"", options: RegexOptions.None)] + private static partial Regex Valid2(); + }} + ", compile: true)); + } + [Fact] public async Task Valid_ClassWithNamespace() { @@ -319,68 +381,46 @@ partial class C ", compile: true)); } - [Fact] - public async Task Valid_InternalRegex() + public static IEnumerable Valid_Modifiers_MemberData() { - Assert.Empty(await RunGenerator(@" - using System.Text.RegularExpressions; - partial class C - { - [RegexGenerator(""ab"")] - internal static partial Regex Valid(); - } - ", compile: true)); - } - - [Fact] - public async Task Valid_PublicRegex() - { - Assert.Empty(await RunGenerator(@" - using System.Text.RegularExpressions; - partial class C + foreach (string type in new[] { "class", "struct", "record", "record struct", "record class", "interface" }) + { + string[] typeModifiers = type switch { - [RegexGenerator(""ab"")] - public static partial Regex Valid(); - } - ", compile: true)); - } + "class" => new[] { "", "public", "public sealed", "internal abstract", "internal static" }, + _ => new[] { "", "public", "internal" } + }; - [Fact] - public async Task Valid_PrivateProtectedRegex() - { - Assert.Empty(await RunGenerator(@" - using System.Text.RegularExpressions; - partial class C + foreach (string typeModifier in typeModifiers) { - [RegexGenerator(""ab"")] - private protected static partial Regex Valid(); - } - ", compile: true)); - } + foreach (bool instance in typeModifier.Contains("static") ? new[] { false } : new[] { false, true }) + { + string[] methodVisibilities = type switch + { + "class" when !typeModifier.Contains("sealed") && !typeModifier.Contains("static") => new[] { "public", "internal", "private protected", "protected internal", "private" }, + _ => new[] { "public", "internal", "private" } + }; - [Fact] - public async Task Valid_PublicSealedClass() - { - Assert.Empty(await RunGenerator(@" - using System.Text.RegularExpressions; - public sealed partial class C - { - [RegexGenerator(""ab"")] - private static partial Regex Valid(); + foreach (string methodVisibility in methodVisibilities) + { + yield return new object[] { type, typeModifier, instance, methodVisibility }; + } + } } - ", compile: true)); + } } - [Fact] - public async Task Valid_InternalAbstractClass() + [Theory] + [MemberData(nameof(Valid_Modifiers_MemberData))] + public async Task Valid_Modifiers(string type, string typeModifier, bool instance, string methodVisibility) { - Assert.Empty(await RunGenerator(@" + Assert.Empty(await RunGenerator(@$" using System.Text.RegularExpressions; - internal abstract partial class C - { + {typeModifier} partial {type} C + {{ [RegexGenerator(""ab"")] - private static partial Regex Valid(); - } + {methodVisibility} {(instance ? "" : "static")} partial Regex Valid(); + }} ", compile: true)); } @@ -411,45 +451,6 @@ partial class C2 ", compile: true)); } - [Fact] - public async Task Valid_OnStruct() - { - Assert.Empty(await RunGenerator(@" - using System.Text.RegularExpressions; - internal partial struct C - { - [RegexGenerator(""ab"")] - private static partial Regex Valid(); - } - ", compile: true)); - } - - [Fact] - public async Task Valid_OnRecord() - { - Assert.Empty(await RunGenerator(@" - using System.Text.RegularExpressions; - internal partial record C - { - [RegexGenerator(""ab"")] - private static partial Regex Valid(); - } - ", compile: true)); - } - - [Fact] - public async Task Valid_OnRecordStruct() - { - Assert.Empty(await RunGenerator(@" - using System.Text.RegularExpressions; - internal partial record struct C - { - [RegexGenerator(""ab"")] - private static partial Regex Valid(); - } - ", compile: true)); - } - [Fact] public async Task Valid_NestedVaryingTypes() { @@ -503,7 +504,7 @@ private async Task> RunGenerator( comp = comp.AddSyntaxTrees(generatorResults.GeneratedTrees.ToArray()); EmitResult results = comp.Emit(Stream.Null, cancellationToken: cancellationToken); - if (!results.Success) + if (!results.Success || results.Diagnostics.Length != 0 || generatorResults.Diagnostics.Length != 0) { throw new ArgumentException( string.Join(Environment.NewLine, results.Diagnostics.Concat(generatorResults.Diagnostics)) + Environment.NewLine + From e153b3acc9d206521a46b1d1c9c49a993aaeb6fd Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Tue, 21 Sep 2021 20:23:13 -0400 Subject: [PATCH 16/16] Fix test suppression on mobile --- .../System.Text.RegularExpressions/tests/Regex.Tests.Common.cs | 3 +++ .../RegexGeneratorParserTests.cs | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs index 5212c534b00e42..8c174480f0e020 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs @@ -53,9 +53,12 @@ public static IEnumerable AvailableEngines get { yield return RegexEngine.Interpreter; + yield return RegexEngine.Compiled; + if (PlatformDetection.IsNetCore && PlatformDetection.IsReflectionEmitSupported && // the source generator doesn't use reflection emit, but it does use Roslyn for the equivalent + PlatformDetection.IsNotMobile && PlatformDetection.IsNotBrowser) { yield return RegexEngine.SourceGenerated; diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs index 18b4cc786404e2..8d0b6b6a3d3608 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs @@ -19,8 +19,7 @@ namespace System.Text.RegularExpressions.Generator.Tests { // Tests don't actually use reflection emit, but they do generate assembly via Roslyn in-memory at run time and expect it to be JIT'd. // The tests also use typeof(object).Assembly.Location, which returns an empty string on wasm. - [ConditionalClass(typeof(PlatformDetection), nameof(PlatformDetection.IsReflectionEmitSupported))] - [PlatformSpecific(~TestPlatforms.Browser)] + [ConditionalClass(typeof(PlatformDetection), nameof(PlatformDetection.IsReflectionEmitSupported), nameof(PlatformDetection.IsNotMobile), nameof(PlatformDetection.IsNotBrowser))] public class RegexGeneratorParserTests { [Fact]