Skip to content

Commit b4ec422

Browse files
stephentoubsbomer
andauthored
Add debug-only use of new AssemblyBuilder.Save in Regex.CompileToAssembly (#96462)
* Add debug-only use of new AssemblyBuilder.Save in Regex.CompileToAssembly To aid in debugging RegexCompiler issues and to help vet the new AssemblyBuilder.Save support. * Fix IL2121 warnings --------- Co-authored-by: Sven Boemer <[email protected]>
1 parent 315a2a8 commit b4ec422

File tree

8 files changed

+349
-6
lines changed

8 files changed

+349
-6
lines changed

src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/ModuleBuilderImpl.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ internal void AppendMetadata(MethodBodyStreamEncoder methodBodyEncoder)
185185
}
186186
}
187187

188-
[UnconditionalSuppressMessage("ReflectionAnalysis", "IL2072:DynamicallyAccessedMembers", Justification = "Members are retrieved from internal cache")]
189188
private void WriteInterfaceImplementations(TypeBuilderImpl typeBuilder, TypeDefinitionHandle typeHandle)
190189
{
191190
if (typeBuilder._interfaces != null)

src/libraries/System.Reflection.Emit/src/System/Reflection/Emit/TypeBuilderImpl.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,14 @@ private void ValidateAllAbstractMethodsAreImplemented()
176176
}
177177

178178
[UnconditionalSuppressMessage("ReflectionAnalysis", "IL2065:DynamicallyAccessedMembers", Justification = "Methods are loaded from this TypeBuilder. The interface methods should be available at this point")]
179-
[UnconditionalSuppressMessage("ReflectionAnalysis", "IL2075:DynamicallyAccessedMembers", Justification = "The interface methods should be available at this point")]
180179
[UnconditionalSuppressMessage("ReflectionAnalysis", "IL2085:DynamicallyAccessedMembers", Justification = "Methods are loaded from this TypeBuilder")]
181180
private void CheckInterfaces(Type[] _interfaces)
182181
{
183182
foreach (Type interfaceType in _interfaces)
184183
{
184+
#pragma warning disable IL2075 // Analyzer produces a different warning code than illink. The IL2065 suppression takes care of illink: https://github.com/dotnet/runtime/issues/96646
185185
MethodInfo[] interfaceMethods = interfaceType.GetMethods(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
186+
#pragma warning restore IL2075
186187
for (int i = 0; i < interfaceMethods.Length; i++)
187188
{
188189
MethodInfo interfaceMethod = interfaceMethods[i];
@@ -195,7 +196,9 @@ private void CheckInterfaces(Type[] _interfaces)
195196
}
196197

197198
// Check parent interfaces too
199+
#pragma warning disable IL2075 // Analyzer produces a different warning code than illink. The IL2065 suppression takes care of illink: https://github.com/dotnet/runtime/issues/96646
198200
CheckInterfaces(interfaceType.GetInterfaces());
201+
#pragma warning restore IL2075
199202
}
200203
}
201204

src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
<Compile Include="System\Text\RegularExpressions\Regex.Split.cs" />
2929
<Compile Include="System\Text\RegularExpressions\Regex.EnumerateMatches.cs" />
3030
<Compile Include="System\Text\RegularExpressions\Regex.Timeout.cs" />
31+
<Compile Include="System\Text\RegularExpressions\RegexAssemblyCompiler.cs" />
3132
<Compile Include="System\Text\RegularExpressions\RegexCaseBehavior.cs" />
3233
<Compile Include="System\Text\RegularExpressions\RegexCaseEquivalences.Data.cs" />
3334
<Compile Include="System\Text\RegularExpressions\RegexCaseEquivalences.cs" />

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,41 @@ public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, Assembly
227227
CompileToAssembly(regexinfos, assemblyname, attributes, null);
228228

229229
[Obsolete(Obsoletions.RegexCompileToAssemblyMessage, DiagnosticId = Obsoletions.RegexCompileToAssemblyDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
230-
public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, AssemblyName assemblyname, CustomAttributeBuilder[]? attributes, string? resourceFile) =>
230+
public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, AssemblyName assemblyname, CustomAttributeBuilder[]? attributes, string? resourceFile)
231+
{
232+
#if DEBUG
233+
// This code exists only to help with the development of the RegexCompiler.
234+
// .NET no longer supports CompileToAssembly; the source generator should be used instead.
235+
#pragma warning disable IL3050
236+
ArgumentNullException.ThrowIfNull(assemblyname);
237+
ArgumentNullException.ThrowIfNull(regexinfos);
238+
239+
var c = new RegexAssemblyCompiler(assemblyname, attributes, resourceFile);
240+
241+
for (int i = 0; i < regexinfos.Length; i++)
242+
{
243+
ArgumentNullException.ThrowIfNull(regexinfos[i]);
244+
245+
string pattern = regexinfos[i].Pattern;
246+
247+
RegexOptions options = regexinfos[i].Options | RegexOptions.Compiled; // ensure compiled is set; it enables more optimization specific to compilation
248+
249+
string fullname = regexinfos[i].Namespace.Length == 0 ?
250+
regexinfos[i].Name :
251+
regexinfos[i].Namespace + "." + regexinfos[i].Name;
252+
253+
RegexTree tree = RegexParser.Parse(pattern, options, (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture);
254+
RegexInterpreterCode code = RegexWriter.Write(tree);
255+
256+
c.GenerateRegexType(pattern, options, fullname, regexinfos[i].IsPublic, tree, code, regexinfos[i].MatchTimeout);
257+
}
258+
259+
c.Save(assemblyname.Name ?? "RegexCompileToAssembly");
260+
#pragma warning restore IL3050
261+
#else
231262
throw new PlatformNotSupportedException(SR.PlatformNotSupported_CompileToAssembly);
263+
#endif
264+
}
232265

233266
/// <summary>
234267
/// Escapes a minimal set of metacharacters (\, *, +, ?, |, {, [, (, ), ^, $, ., #, and
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Collections;
5+
using System.Collections.Generic;
6+
using System.Diagnostics.CodeAnalysis;
7+
using System.Reflection;
8+
using System.Reflection.Emit;
9+
using System.Threading;
10+
11+
// WARNING:
12+
// The code generated by RegexCompiler is not intended to be saved to disk and loaded back later.
13+
// It accesses internal and private members of System.Text.RegularExpressions, which may change
14+
// at any point in the future, and relies on details about the current machine/process, e.g. is
15+
// it 32-bit or 64-bit. The generated surface area has also not been vetted. This code exists
16+
// only for debugging purposes, to make it easier to examine the IL that RegexCompiler emits.
17+
18+
#if DEBUG
19+
namespace System.Text.RegularExpressions
20+
{
21+
/// <summary>Compiles a Regex to an assembly that can be saved to disk.</summary>
22+
[RequiresDynamicCode("The RegexAssemblyCompiler type requires dynamic code to be enabled.")]
23+
internal sealed class RegexAssemblyCompiler : RegexCompiler
24+
{
25+
/// <summary>Type count used to augment generated type names to create unique names.</summary>
26+
private static int s_typeCount;
27+
28+
private readonly AssemblyBuilder _assembly;
29+
private readonly ModuleBuilder _module;
30+
private readonly MethodInfo _save;
31+
32+
internal RegexAssemblyCompiler(AssemblyName an, CustomAttributeBuilder[]? attribs, string? resourceFile)
33+
{
34+
if (resourceFile != null)
35+
{
36+
// Unmanaged resources are not supported: _assembly.DefineUnmanagedResource(resourceFile);
37+
throw new PlatformNotSupportedException();
38+
}
39+
40+
// TODO: Use public API when it's available: https://github.com/dotnet/runtime/issues/15704
41+
Type abType = Type.GetType("System.Reflection.Emit.AssemblyBuilderImpl, System.Reflection.Emit", throwOnError: true)!;
42+
MethodInfo defineDynamicAssembly = abType.GetMethod("DefinePersistedAssembly",
43+
BindingFlags.NonPublic | BindingFlags.Static,
44+
[typeof(AssemblyName), typeof(Assembly), typeof(List<CustomAttributeBuilder>)]) ??
45+
throw new InvalidOperationException("Could not find method AssemblyBuilderImpl.DefinePersistedAssembly");
46+
_assembly = (AssemblyBuilder?)defineDynamicAssembly.Invoke(null, [an, typeof(object).Assembly, attribs is not null ? new List<CustomAttributeBuilder>(attribs) : null]) ??
47+
throw new InvalidOperationException("DefinePersistedAssembly returned null");
48+
_save = abType.GetMethod("Save", BindingFlags.NonPublic | BindingFlags.Instance, [typeof(string)]) ??
49+
throw new InvalidOperationException("Could not find method AssemblyBuilderImpl.Save");
50+
51+
_module = _assembly.DefineDynamicModule(an.Name + ".dll");
52+
}
53+
54+
internal void GenerateRegexType(string pattern, RegexOptions options, string name, bool isPublic, RegexTree tree, RegexInterpreterCode code, TimeSpan matchTimeout)
55+
{
56+
// Store arguments into the base type's fields
57+
_options = options;
58+
_regexTree = tree;
59+
60+
// Pick a name for the class.
61+
string typenumString = ((uint)Interlocked.Increment(ref s_typeCount)).ToString();
62+
63+
// Generate the RegexRunner-derived type.
64+
TypeBuilder regexRunnerTypeBuilder = DefineType(_module, $"{name}Runner{typenumString}", isPublic: false, isSealed: true, typeof(RegexRunner));
65+
66+
_ilg = DefineMethod(regexRunnerTypeBuilder, "TryFindNextPossibleStartingPosition", [typeof(ReadOnlySpan<char>)], typeof(bool), out MethodBuilder tryFindNextPossibleStartingPositionMethod);
67+
EmitTryFindNextPossibleStartingPosition();
68+
69+
_ilg = DefineMethod(regexRunnerTypeBuilder, "TryMatchAtCurrentPosition", [typeof(ReadOnlySpan<char>)], typeof(bool), out MethodBuilder tryMatchAtCurrentPositionMethod);
70+
EmitTryMatchAtCurrentPosition();
71+
72+
_ilg = DefineMethod(regexRunnerTypeBuilder, "Scan", [typeof(ReadOnlySpan<char>)], null, out _);
73+
EmitScan(options, tryFindNextPossibleStartingPositionMethod, tryMatchAtCurrentPositionMethod);
74+
75+
Type runnerType = regexRunnerTypeBuilder.CreateType()!;
76+
77+
// Generate the RegexRunnerFactory-derived type.
78+
TypeBuilder regexRunnerFactoryTypeBuilder = DefineType(_module, $"{name}Factory{typenumString}", isPublic: false, isSealed: true, typeof(RegexRunnerFactory));
79+
_ilg = DefineMethod(regexRunnerFactoryTypeBuilder, "CreateInstance", null, typeof(RegexRunner), out _);
80+
GenerateCreateInstance(runnerType);
81+
Type regexRunnerFactoryType = regexRunnerFactoryTypeBuilder.CreateType()!;
82+
83+
// Generate the Regex-derived type.
84+
TypeBuilder regexTypeBuilder = DefineType(_module, name, isPublic, isSealed: false, typeof(Regex));
85+
ConstructorBuilder defaultCtorBuilder = regexTypeBuilder.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, Type.EmptyTypes);
86+
_ilg = defaultCtorBuilder.GetILGenerator();
87+
GenerateRegexDefaultCtor(pattern, options, regexRunnerFactoryType, tree, code, matchTimeout);
88+
if (matchTimeout != Regex.InfiniteMatchTimeout)
89+
{
90+
// We only generate a constructor with a timeout parameter if the regex information supplied has a non-infinite timeout.
91+
// If it has an infinite timeout, then the generated code is not going to respect the timeout. This is a difference from netfx,
92+
// due to the fact that we now special-case an infinite timeout in the code generator to avoid spitting unnecessary code
93+
// and paying for the checks at run time.
94+
_ilg = regexTypeBuilder.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, new Type[] { typeof(TimeSpan) }).GetILGenerator();
95+
GenerateRegexTimeoutCtor(defaultCtorBuilder, regexTypeBuilder);
96+
}
97+
regexTypeBuilder.CreateType();
98+
}
99+
100+
/// <summary>Generates a very simple factory method.</summary>
101+
private void GenerateCreateInstance([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type type)
102+
{
103+
// return new Type();
104+
_ilg!.Emit(OpCodes.Newobj, type.GetConstructor(Type.EmptyTypes)!);
105+
Ret();
106+
}
107+
108+
private void GenerateRegexDefaultCtor(
109+
string pattern,
110+
RegexOptions options,
111+
[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type regexRunnerFactoryType,
112+
RegexTree tree,
113+
RegexInterpreterCode code,
114+
TimeSpan matchTimeout)
115+
{
116+
// Call the base ctor and store pattern, options, and factory.
117+
// base.ctor();
118+
// base.pattern = pattern;
119+
// base.options = options;
120+
// base.factory = new DerivedRegexRunnerFactory();
121+
Ldthis();
122+
_ilg!.Emit(OpCodes.Call, typeof(Regex).GetConstructor(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance, null, Type.EmptyTypes, Array.Empty<ParameterModifier>())!);
123+
Ldthis();
124+
Ldstr(pattern);
125+
Stfld(RegexField(nameof(Regex.pattern)));
126+
Ldthis();
127+
Ldc((int)options);
128+
Stfld(RegexField(nameof(Regex.roptions)));
129+
Ldthis();
130+
_ilg!.Emit(OpCodes.Newobj, regexRunnerFactoryType.GetConstructor(Type.EmptyTypes)!);
131+
Stfld(RegexField(nameof(Regex.factory)));
132+
133+
// Store the timeout (no need to validate as it should have happened in RegexCompilationInfo)
134+
Ldthis();
135+
if (matchTimeout == Regex.InfiniteMatchTimeout)
136+
{
137+
// base.internalMatchTimeout = Regex.InfiniteMatchTimeout;
138+
_ilg.Emit(OpCodes.Ldsfld, RegexField(nameof(Regex.InfiniteMatchTimeout)));
139+
}
140+
else
141+
{
142+
// base.internalMatchTimeout = TimeSpan.FromTick(matchTimeout.Ticks);
143+
LdcI8(matchTimeout.Ticks);
144+
Call(typeof(TimeSpan).GetMethod(nameof(TimeSpan.FromTicks), BindingFlags.Public | BindingFlags.Static)!);
145+
}
146+
Stfld(RegexField(nameof(Regex.internalMatchTimeout)));
147+
148+
// Set capsize, caps, capnames, capslist.
149+
Ldthis();
150+
Ldc(tree.CaptureCount);
151+
Stfld(RegexField(nameof(Regex.capsize)));
152+
if (tree.CaptureNumberSparseMapping != null)
153+
{
154+
// Caps = new Hashtable {{0, 0}, {1, 1}, ... };
155+
GenerateCreateHashtable(RegexField(nameof(Regex.caps)), tree.CaptureNumberSparseMapping);
156+
}
157+
if (tree.CaptureNameToNumberMapping != null)
158+
{
159+
// CapNames = new Hashtable {{"0", 0}, {"1", 1}, ...};
160+
GenerateCreateHashtable(RegexField(nameof(Regex.capnames)), tree.CaptureNameToNumberMapping);
161+
}
162+
if (tree.CaptureNames != null)
163+
{
164+
// capslist = new string[...];
165+
// capslist[0] = "0";
166+
// capslist[1] = "1";
167+
// ...
168+
Ldthis();
169+
Ldc(tree.CaptureNames.Length);
170+
_ilg.Emit(OpCodes.Newarr, typeof(string)); // create new string array
171+
FieldInfo capslistField = RegexField(nameof(Regex.capslist));
172+
Stfld(capslistField);
173+
for (int i = 0; i < tree.CaptureNames.Length; i++)
174+
{
175+
Ldthisfld(capslistField);
176+
Ldc(i);
177+
Ldstr(tree.CaptureNames[i]);
178+
_ilg.Emit(OpCodes.Stelem_Ref);
179+
}
180+
}
181+
182+
// return;
183+
Ret();
184+
}
185+
186+
private void GenerateRegexTimeoutCtor(ConstructorBuilder defaultCtorBuilder, TypeBuilder regexTypeBuilder)
187+
{
188+
// base.ctor();
189+
// ValidateMatchTimeout(timeSpan);
190+
// base.internalMatchTimeout = timeSpan;
191+
Ldthis();
192+
_ilg!.Emit(OpCodes.Call, defaultCtorBuilder);
193+
_ilg.Emit(OpCodes.Ldarg_1);
194+
Call(typeof(Regex).GetMethod(nameof(Regex.ValidateMatchTimeout), BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static)!);
195+
Ldthis();
196+
_ilg.Emit(OpCodes.Ldarg_1);
197+
Stfld(RegexField(nameof(Regex.internalMatchTimeout)));
198+
Ret();
199+
}
200+
201+
internal void GenerateCreateHashtable(FieldInfo field, Hashtable ht)
202+
{
203+
// hashtable = new Hashtable();
204+
Ldthis();
205+
_ilg!.Emit(OpCodes.Newobj, typeof(Hashtable).GetConstructor(Type.EmptyTypes)!);
206+
Stfld(field);
207+
208+
// hashtable.Add(key1, value1);
209+
// hashtable.Add(key2, value2);
210+
// ...
211+
MethodInfo addMethod = typeof(Hashtable).GetMethod(nameof(Hashtable.Add), BindingFlags.Public | BindingFlags.Instance)!;
212+
IDictionaryEnumerator en = ht.GetEnumerator();
213+
while (en.MoveNext())
214+
{
215+
Ldthisfld(field);
216+
217+
if (en.Key is int key)
218+
{
219+
Ldc(key);
220+
_ilg!.Emit(OpCodes.Box, typeof(int));
221+
}
222+
else
223+
{
224+
Ldstr((string)en.Key);
225+
}
226+
227+
Ldc((int)en.Value!);
228+
_ilg!.Emit(OpCodes.Box, typeof(int));
229+
Callvirt(addMethod);
230+
}
231+
}
232+
233+
/// <summary>Gets the named instance field from the Regex type.</summary>
234+
private static FieldInfo RegexField(string fieldname) =>
235+
typeof(Regex).GetField(fieldname, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Static)!;
236+
237+
/// <summary>Saves the assembly to a file in the current directory based on the assembly's name.</summary>
238+
internal void Save(string fileName)
239+
{
240+
if (!fileName.EndsWith(".dll", StringComparison.Ordinal))
241+
{
242+
fileName += ".dll";
243+
}
244+
245+
_save.Invoke(_assembly, [fileName]); // TODO: Use public API when it's available: https://github.com/dotnet/runtime/issues/15704
246+
}
247+
248+
/// <summary>Begins the definition of a new type with a specified base class</summary>
249+
private static TypeBuilder DefineType(
250+
ModuleBuilder moduleBuilder,
251+
string typeName,
252+
bool isPublic,
253+
bool isSealed,
254+
[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)] Type inheritFromClass)
255+
{
256+
TypeAttributes attrs = TypeAttributes.Class | TypeAttributes.BeforeFieldInit | (isPublic ? TypeAttributes.Public : TypeAttributes.NotPublic);
257+
if (isSealed)
258+
{
259+
attrs |= TypeAttributes.Sealed;
260+
}
261+
262+
return moduleBuilder.DefineType(typeName, attrs, inheritFromClass);
263+
}
264+
265+
/// <summary>Begins the definition of a new method (no args) with a specified return value.</summary>
266+
private static ILGenerator DefineMethod(TypeBuilder typeBuilder, string methname, Type[]? parameterTypes, Type? returnType, out MethodBuilder builder)
267+
{
268+
builder = typeBuilder.DefineMethod(methname, MethodAttributes.Family | MethodAttributes.Virtual, returnType, parameterTypes);
269+
return builder.GetILGenerator();
270+
}
271+
}
272+
}
273+
#endif

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5305,7 +5305,7 @@ void EmitStackPop()
53055305
}
53065306
}
53075307

5308-
protected void EmitScan(RegexOptions options, DynamicMethod tryFindNextStartingPositionMethod, DynamicMethod tryMatchAtCurrentPositionMethod)
5308+
protected void EmitScan(RegexOptions options, MethodInfo tryFindNextStartingPositionMethod, MethodInfo tryMatchAtCurrentPositionMethod)
53095309
{
53105310
// As with the source generator, we can emit special code for common circumstances rather than always emitting
53115311
// the most general purpose scan loop. Unlike the source generator, however, code appearance isn't important

0 commit comments

Comments
 (0)