|
| 1 | +// Licensed to the .NET Foundation under one or more agreements. |
| 2 | +// The .NET Foundation licenses this file to you under the MIT license. |
| 3 | + |
| 4 | +using System.Collections; |
| 5 | +using System.Collections.Generic; |
| 6 | +using System.Diagnostics.CodeAnalysis; |
| 7 | +using System.Reflection; |
| 8 | +using System.Reflection.Emit; |
| 9 | +using System.Threading; |
| 10 | + |
| 11 | +// WARNING: |
| 12 | +// The code generated by RegexCompiler is not intended to be saved to disk and loaded back later. |
| 13 | +// It accesses internal and private members of System.Text.RegularExpressions, which may change |
| 14 | +// at any point in the future, and relies on details about the current machine/process, e.g. is |
| 15 | +// it 32-bit or 64-bit. The generated surface area has also not been vetted. This code exists |
| 16 | +// only for debugging purposes, to make it easier to examine the IL that RegexCompiler emits. |
| 17 | + |
| 18 | +#if DEBUG |
| 19 | +namespace System.Text.RegularExpressions |
| 20 | +{ |
| 21 | + /// <summary>Compiles a Regex to an assembly that can be saved to disk.</summary> |
| 22 | + [RequiresDynamicCode("The RegexAssemblyCompiler type requires dynamic code to be enabled.")] |
| 23 | + internal sealed class RegexAssemblyCompiler : RegexCompiler |
| 24 | + { |
| 25 | + /// <summary>Type count used to augment generated type names to create unique names.</summary> |
| 26 | + private static int s_typeCount; |
| 27 | + |
| 28 | + private readonly AssemblyBuilder _assembly; |
| 29 | + private readonly ModuleBuilder _module; |
| 30 | + private readonly MethodInfo _save; |
| 31 | + |
| 32 | + internal RegexAssemblyCompiler(AssemblyName an, CustomAttributeBuilder[]? attribs, string? resourceFile) |
| 33 | + { |
| 34 | + if (resourceFile != null) |
| 35 | + { |
| 36 | + // Unmanaged resources are not supported: _assembly.DefineUnmanagedResource(resourceFile); |
| 37 | + throw new PlatformNotSupportedException(); |
| 38 | + } |
| 39 | + |
| 40 | + // TODO: Use public API when it's available: https://github.com/dotnet/runtime/issues/15704 |
| 41 | + Type abType = Type.GetType("System.Reflection.Emit.AssemblyBuilderImpl, System.Reflection.Emit", throwOnError: true)!; |
| 42 | + MethodInfo defineDynamicAssembly = abType.GetMethod("DefinePersistedAssembly", |
| 43 | + BindingFlags.NonPublic | BindingFlags.Static, |
| 44 | + [typeof(AssemblyName), typeof(Assembly), typeof(List<CustomAttributeBuilder>)]) ?? |
| 45 | + throw new InvalidOperationException("Could not find method AssemblyBuilderImpl.DefinePersistedAssembly"); |
| 46 | + _assembly = (AssemblyBuilder?)defineDynamicAssembly.Invoke(null, [an, typeof(object).Assembly, attribs is not null ? new List<CustomAttributeBuilder>(attribs) : null]) ?? |
| 47 | + throw new InvalidOperationException("DefinePersistedAssembly returned null"); |
| 48 | + _save = abType.GetMethod("Save", BindingFlags.NonPublic | BindingFlags.Instance, [typeof(string)]) ?? |
| 49 | + throw new InvalidOperationException("Could not find method AssemblyBuilderImpl.Save"); |
| 50 | + |
| 51 | + _module = _assembly.DefineDynamicModule(an.Name + ".dll"); |
| 52 | + } |
| 53 | + |
| 54 | + internal void GenerateRegexType(string pattern, RegexOptions options, string name, bool isPublic, RegexTree tree, RegexInterpreterCode code, TimeSpan matchTimeout) |
| 55 | + { |
| 56 | + // Store arguments into the base type's fields |
| 57 | + _options = options; |
| 58 | + _regexTree = tree; |
| 59 | + |
| 60 | + // Pick a name for the class. |
| 61 | + string typenumString = ((uint)Interlocked.Increment(ref s_typeCount)).ToString(); |
| 62 | + |
| 63 | + // Generate the RegexRunner-derived type. |
| 64 | + TypeBuilder regexRunnerTypeBuilder = DefineType(_module, $"{name}Runner{typenumString}", isPublic: false, isSealed: true, typeof(RegexRunner)); |
| 65 | + |
| 66 | + _ilg = DefineMethod(regexRunnerTypeBuilder, "TryFindNextPossibleStartingPosition", [typeof(ReadOnlySpan<char>)], typeof(bool), out MethodBuilder tryFindNextPossibleStartingPositionMethod); |
| 67 | + EmitTryFindNextPossibleStartingPosition(); |
| 68 | + |
| 69 | + _ilg = DefineMethod(regexRunnerTypeBuilder, "TryMatchAtCurrentPosition", [typeof(ReadOnlySpan<char>)], typeof(bool), out MethodBuilder tryMatchAtCurrentPositionMethod); |
| 70 | + EmitTryMatchAtCurrentPosition(); |
| 71 | + |
| 72 | + _ilg = DefineMethod(regexRunnerTypeBuilder, "Scan", [typeof(ReadOnlySpan<char>)], null, out _); |
| 73 | + EmitScan(options, tryFindNextPossibleStartingPositionMethod, tryMatchAtCurrentPositionMethod); |
| 74 | + |
| 75 | + Type runnerType = regexRunnerTypeBuilder.CreateType()!; |
| 76 | + |
| 77 | + // Generate the RegexRunnerFactory-derived type. |
| 78 | + TypeBuilder regexRunnerFactoryTypeBuilder = DefineType(_module, $"{name}Factory{typenumString}", isPublic: false, isSealed: true, typeof(RegexRunnerFactory)); |
| 79 | + _ilg = DefineMethod(regexRunnerFactoryTypeBuilder, "CreateInstance", null, typeof(RegexRunner), out _); |
| 80 | + GenerateCreateInstance(runnerType); |
| 81 | + Type regexRunnerFactoryType = regexRunnerFactoryTypeBuilder.CreateType()!; |
| 82 | + |
| 83 | + // Generate the Regex-derived type. |
| 84 | + TypeBuilder regexTypeBuilder = DefineType(_module, name, isPublic, isSealed: false, typeof(Regex)); |
| 85 | + ConstructorBuilder defaultCtorBuilder = regexTypeBuilder.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, Type.EmptyTypes); |
| 86 | + _ilg = defaultCtorBuilder.GetILGenerator(); |
| 87 | + GenerateRegexDefaultCtor(pattern, options, regexRunnerFactoryType, tree, code, matchTimeout); |
| 88 | + if (matchTimeout != Regex.InfiniteMatchTimeout) |
| 89 | + { |
| 90 | + // We only generate a constructor with a timeout parameter if the regex information supplied has a non-infinite timeout. |
| 91 | + // If it has an infinite timeout, then the generated code is not going to respect the timeout. This is a difference from netfx, |
| 92 | + // due to the fact that we now special-case an infinite timeout in the code generator to avoid spitting unnecessary code |
| 93 | + // and paying for the checks at run time. |
| 94 | + _ilg = regexTypeBuilder.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, new Type[] { typeof(TimeSpan) }).GetILGenerator(); |
| 95 | + GenerateRegexTimeoutCtor(defaultCtorBuilder, regexTypeBuilder); |
| 96 | + } |
| 97 | + regexTypeBuilder.CreateType(); |
| 98 | + } |
| 99 | + |
| 100 | + /// <summary>Generates a very simple factory method.</summary> |
| 101 | + private void GenerateCreateInstance([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type type) |
| 102 | + { |
| 103 | + // return new Type(); |
| 104 | + _ilg!.Emit(OpCodes.Newobj, type.GetConstructor(Type.EmptyTypes)!); |
| 105 | + Ret(); |
| 106 | + } |
| 107 | + |
| 108 | + private void GenerateRegexDefaultCtor( |
| 109 | + string pattern, |
| 110 | + RegexOptions options, |
| 111 | + [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type regexRunnerFactoryType, |
| 112 | + RegexTree tree, |
| 113 | + RegexInterpreterCode code, |
| 114 | + TimeSpan matchTimeout) |
| 115 | + { |
| 116 | + // Call the base ctor and store pattern, options, and factory. |
| 117 | + // base.ctor(); |
| 118 | + // base.pattern = pattern; |
| 119 | + // base.options = options; |
| 120 | + // base.factory = new DerivedRegexRunnerFactory(); |
| 121 | + Ldthis(); |
| 122 | + _ilg!.Emit(OpCodes.Call, typeof(Regex).GetConstructor(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance, null, Type.EmptyTypes, Array.Empty<ParameterModifier>())!); |
| 123 | + Ldthis(); |
| 124 | + Ldstr(pattern); |
| 125 | + Stfld(RegexField(nameof(Regex.pattern))); |
| 126 | + Ldthis(); |
| 127 | + Ldc((int)options); |
| 128 | + Stfld(RegexField(nameof(Regex.roptions))); |
| 129 | + Ldthis(); |
| 130 | + _ilg!.Emit(OpCodes.Newobj, regexRunnerFactoryType.GetConstructor(Type.EmptyTypes)!); |
| 131 | + Stfld(RegexField(nameof(Regex.factory))); |
| 132 | + |
| 133 | + // Store the timeout (no need to validate as it should have happened in RegexCompilationInfo) |
| 134 | + Ldthis(); |
| 135 | + if (matchTimeout == Regex.InfiniteMatchTimeout) |
| 136 | + { |
| 137 | + // base.internalMatchTimeout = Regex.InfiniteMatchTimeout; |
| 138 | + _ilg.Emit(OpCodes.Ldsfld, RegexField(nameof(Regex.InfiniteMatchTimeout))); |
| 139 | + } |
| 140 | + else |
| 141 | + { |
| 142 | + // base.internalMatchTimeout = TimeSpan.FromTick(matchTimeout.Ticks); |
| 143 | + LdcI8(matchTimeout.Ticks); |
| 144 | + Call(typeof(TimeSpan).GetMethod(nameof(TimeSpan.FromTicks), BindingFlags.Public | BindingFlags.Static)!); |
| 145 | + } |
| 146 | + Stfld(RegexField(nameof(Regex.internalMatchTimeout))); |
| 147 | + |
| 148 | + // Set capsize, caps, capnames, capslist. |
| 149 | + Ldthis(); |
| 150 | + Ldc(tree.CaptureCount); |
| 151 | + Stfld(RegexField(nameof(Regex.capsize))); |
| 152 | + if (tree.CaptureNumberSparseMapping != null) |
| 153 | + { |
| 154 | + // Caps = new Hashtable {{0, 0}, {1, 1}, ... }; |
| 155 | + GenerateCreateHashtable(RegexField(nameof(Regex.caps)), tree.CaptureNumberSparseMapping); |
| 156 | + } |
| 157 | + if (tree.CaptureNameToNumberMapping != null) |
| 158 | + { |
| 159 | + // CapNames = new Hashtable {{"0", 0}, {"1", 1}, ...}; |
| 160 | + GenerateCreateHashtable(RegexField(nameof(Regex.capnames)), tree.CaptureNameToNumberMapping); |
| 161 | + } |
| 162 | + if (tree.CaptureNames != null) |
| 163 | + { |
| 164 | + // capslist = new string[...]; |
| 165 | + // capslist[0] = "0"; |
| 166 | + // capslist[1] = "1"; |
| 167 | + // ... |
| 168 | + Ldthis(); |
| 169 | + Ldc(tree.CaptureNames.Length); |
| 170 | + _ilg.Emit(OpCodes.Newarr, typeof(string)); // create new string array |
| 171 | + FieldInfo capslistField = RegexField(nameof(Regex.capslist)); |
| 172 | + Stfld(capslistField); |
| 173 | + for (int i = 0; i < tree.CaptureNames.Length; i++) |
| 174 | + { |
| 175 | + Ldthisfld(capslistField); |
| 176 | + Ldc(i); |
| 177 | + Ldstr(tree.CaptureNames[i]); |
| 178 | + _ilg.Emit(OpCodes.Stelem_Ref); |
| 179 | + } |
| 180 | + } |
| 181 | + |
| 182 | + // return; |
| 183 | + Ret(); |
| 184 | + } |
| 185 | + |
| 186 | + private void GenerateRegexTimeoutCtor(ConstructorBuilder defaultCtorBuilder, TypeBuilder regexTypeBuilder) |
| 187 | + { |
| 188 | + // base.ctor(); |
| 189 | + // ValidateMatchTimeout(timeSpan); |
| 190 | + // base.internalMatchTimeout = timeSpan; |
| 191 | + Ldthis(); |
| 192 | + _ilg!.Emit(OpCodes.Call, defaultCtorBuilder); |
| 193 | + _ilg.Emit(OpCodes.Ldarg_1); |
| 194 | + Call(typeof(Regex).GetMethod(nameof(Regex.ValidateMatchTimeout), BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static)!); |
| 195 | + Ldthis(); |
| 196 | + _ilg.Emit(OpCodes.Ldarg_1); |
| 197 | + Stfld(RegexField(nameof(Regex.internalMatchTimeout))); |
| 198 | + Ret(); |
| 199 | + } |
| 200 | + |
| 201 | + internal void GenerateCreateHashtable(FieldInfo field, Hashtable ht) |
| 202 | + { |
| 203 | + // hashtable = new Hashtable(); |
| 204 | + Ldthis(); |
| 205 | + _ilg!.Emit(OpCodes.Newobj, typeof(Hashtable).GetConstructor(Type.EmptyTypes)!); |
| 206 | + Stfld(field); |
| 207 | + |
| 208 | + // hashtable.Add(key1, value1); |
| 209 | + // hashtable.Add(key2, value2); |
| 210 | + // ... |
| 211 | + MethodInfo addMethod = typeof(Hashtable).GetMethod(nameof(Hashtable.Add), BindingFlags.Public | BindingFlags.Instance)!; |
| 212 | + IDictionaryEnumerator en = ht.GetEnumerator(); |
| 213 | + while (en.MoveNext()) |
| 214 | + { |
| 215 | + Ldthisfld(field); |
| 216 | + |
| 217 | + if (en.Key is int key) |
| 218 | + { |
| 219 | + Ldc(key); |
| 220 | + _ilg!.Emit(OpCodes.Box, typeof(int)); |
| 221 | + } |
| 222 | + else |
| 223 | + { |
| 224 | + Ldstr((string)en.Key); |
| 225 | + } |
| 226 | + |
| 227 | + Ldc((int)en.Value!); |
| 228 | + _ilg!.Emit(OpCodes.Box, typeof(int)); |
| 229 | + Callvirt(addMethod); |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + /// <summary>Gets the named instance field from the Regex type.</summary> |
| 234 | + private static FieldInfo RegexField(string fieldname) => |
| 235 | + typeof(Regex).GetField(fieldname, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Static)!; |
| 236 | + |
| 237 | + /// <summary>Saves the assembly to a file in the current directory based on the assembly's name.</summary> |
| 238 | + internal void Save(string fileName) |
| 239 | + { |
| 240 | + if (!fileName.EndsWith(".dll", StringComparison.Ordinal)) |
| 241 | + { |
| 242 | + fileName += ".dll"; |
| 243 | + } |
| 244 | + |
| 245 | + _save.Invoke(_assembly, [fileName]); // TODO: Use public API when it's available: https://github.com/dotnet/runtime/issues/15704 |
| 246 | + } |
| 247 | + |
| 248 | + /// <summary>Begins the definition of a new type with a specified base class</summary> |
| 249 | + private static TypeBuilder DefineType( |
| 250 | + ModuleBuilder moduleBuilder, |
| 251 | + string typeName, |
| 252 | + bool isPublic, |
| 253 | + bool isSealed, |
| 254 | + [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)] Type inheritFromClass) |
| 255 | + { |
| 256 | + TypeAttributes attrs = TypeAttributes.Class | TypeAttributes.BeforeFieldInit | (isPublic ? TypeAttributes.Public : TypeAttributes.NotPublic); |
| 257 | + if (isSealed) |
| 258 | + { |
| 259 | + attrs |= TypeAttributes.Sealed; |
| 260 | + } |
| 261 | + |
| 262 | + return moduleBuilder.DefineType(typeName, attrs, inheritFromClass); |
| 263 | + } |
| 264 | + |
| 265 | + /// <summary>Begins the definition of a new method (no args) with a specified return value.</summary> |
| 266 | + private static ILGenerator DefineMethod(TypeBuilder typeBuilder, string methname, Type[]? parameterTypes, Type? returnType, out MethodBuilder builder) |
| 267 | + { |
| 268 | + builder = typeBuilder.DefineMethod(methname, MethodAttributes.Family | MethodAttributes.Virtual, returnType, parameterTypes); |
| 269 | + return builder.GetILGenerator(); |
| 270 | + } |
| 271 | + } |
| 272 | +} |
| 273 | +#endif |
0 commit comments