From 40da8443e53b8fafa3828e4749f063c08954b1a5 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 2 Dec 2021 13:06:15 -0500 Subject: [PATCH 1/4] Delete old code generation approach from RegexCompiler / source generator In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiler was effectively an unrolled version of what RegexInterpreter would process. The RegexNode tree would be turned into a series of opcodes via RegexWriter; the interpreter would then sit in a loop processing those opcodes, and the RegexCompiler iterates through the opcodes generating code for each equivalent to what the interpreter would do but with some decisions made at compile-time rather than at run-time. This approach, however, leads to complicated code that's not pay-for-play (e.g. a big backtracking jump table that all compilations go through even if there's no backtracking), that doesn't factor in the shape of the tree (e.g. it's difficult to add optimizations based on interactions between nodes in the graph), and that doesn't read well when emitted as C# instead of IL as part of the source generator. In .NET 5, we started adding an alternative implementation that processed the RegexNode tree directly, addressing all of those cited issues; however, it only worked for a subset of expressions, namely those with little-to-no backtracking (e.g. non-atomic loops and alternations weren't supported). Since then, we've improved it to the point where everything other than RegexOptions.RightToLeft (which implicitly means lookbehinds as well) is supported, and we've agreed it's ok to drop compilation for those constructs; if they ever become an issue, we can add support for them via the new compilation scheme. As such, this PR: - Deletes all of the code associated with the older code generation scheme - Updates the Regex ctor to fall back to selecting the interpreter if the expression can't be compiled - Updates the source generator to fall back to just emitting a cached use of Regex if the expression can't be compiled (and issuing a diagnostic in that case) - Adds several tests that now pass with the new scheme that didn't with the old (and that still don't with the interpreter) --- .../gen/DiagnosticDescriptors.cs | 20 +- .../gen/RegexGenerator.Emitter.cs | 1422 +------- .../gen/RegexGenerator.Parser.cs | 3 +- .../gen/RegexGenerator.cs | 8 +- .../gen/Resources/Strings.resx | 6 + .../gen/Resources/xlf/Strings.cs.xlf | 10 + .../gen/Resources/xlf/Strings.de.xlf | 10 + .../gen/Resources/xlf/Strings.es.xlf | 10 + .../gen/Resources/xlf/Strings.fr.xlf | 10 + .../gen/Resources/xlf/Strings.it.xlf | 10 + .../gen/Resources/xlf/Strings.ja.xlf | 10 + .../gen/Resources/xlf/Strings.ko.xlf | 10 + .../gen/Resources/xlf/Strings.pl.xlf | 10 + .../gen/Resources/xlf/Strings.pt-BR.xlf | 10 + .../gen/Resources/xlf/Strings.ru.xlf | 10 + .../gen/Resources/xlf/Strings.tr.xlf | 10 + .../gen/Resources/xlf/Strings.zh-Hans.xlf | 10 + .../gen/Resources/xlf/Strings.zh-Hant.xlf | 10 + .../System/Text/RegularExpressions/Regex.cs | 9 +- .../Text/RegularExpressions/RegexCompiler.cs | 3010 ++--------------- .../RegularExpressions/RegexLWCGCompiler.cs | 21 +- .../Text/RegularExpressions/RegexNode.cs | 43 +- .../tests/Regex.MultipleMatches.Tests.cs | 43 +- .../tests/RegexGeneratorHelper.netcoreapp.cs | 9 +- .../RegexGeneratorParserTests.cs | 60 + 25 files changed, 582 insertions(+), 4202 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs b/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs index aec397eb7d6437..2c1d2a0d4a881f 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/DiagnosticDescriptors.cs @@ -8,11 +8,13 @@ namespace System.Text.RegularExpressions.Generator { internal static class DiagnosticDescriptors { + private const string Category = "RegexGenerator"; + public static DiagnosticDescriptor InvalidRegexGeneratorAttribute { get; } = new DiagnosticDescriptor( id: "SYSLIB1040", title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - category: "RegexGenerator", + category: Category, DiagnosticSeverity.Error, isEnabledByDefault: true, customTags: WellKnownDiagnosticTags.NotConfigurable); @@ -21,7 +23,7 @@ internal static class DiagnosticDescriptors id: "SYSLIB1041", title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.MultipleRegexGeneratorAttributesMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - category: "RegexGenerator", + category: Category, DiagnosticSeverity.Error, isEnabledByDefault: true, customTags: WellKnownDiagnosticTags.NotConfigurable); @@ -30,7 +32,7 @@ internal static class DiagnosticDescriptors id: "SYSLIB1042", title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.InvalidRegexArgumentsMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - category: "RegexGenerator", + category: Category, DiagnosticSeverity.Error, isEnabledByDefault: true, customTags: WellKnownDiagnosticTags.NotConfigurable); @@ -39,7 +41,7 @@ internal static class DiagnosticDescriptors id: "SYSLIB1043", title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustHaveValidSignatureMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - category: "RegexGenerator", + category: Category, DiagnosticSeverity.Error, isEnabledByDefault: true, customTags: WellKnownDiagnosticTags.NotConfigurable); @@ -48,9 +50,17 @@ internal static class DiagnosticDescriptors id: "SYSLIB1044", title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), messageFormat: new LocalizableResourceString(nameof(SR.InvalidLangVersionMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), - category: "RegexGenerator", + category: Category, DiagnosticSeverity.Error, isEnabledByDefault: true, customTags: WellKnownDiagnosticTags.NotConfigurable); + + public static DiagnosticDescriptor LimitedSourceGeneration { get; } = new DiagnosticDescriptor( + id: "SYSLIB1045", + title: new LocalizableResourceString(nameof(SR.LimitedSourceGenerationTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + messageFormat: new LocalizableResourceString(nameof(SR.LimitedSourceGenerationMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)), + category: Category, + DiagnosticSeverity.Info, + isEnabledByDefault: true); } } diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index ad5306008cf9c1..4bc44709d48704 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -6,6 +6,7 @@ using System.CodeDom.Compiler; using System.Collections; using System.Collections.Generic; +using System.Collections.Immutable; using System.Diagnostics; using System.Globalization; using System.IO; @@ -14,6 +15,7 @@ using System.Security.Cryptography; using System.Text; using System.Threading; +using Microsoft.CodeAnalysis; using Microsoft.CodeAnalysis.CSharp; // NOTE: The logic in this file is largely a copy of logic in RegexCompiler, emitting C# instead of MSIL. @@ -45,7 +47,7 @@ public partial class RegexGenerator }; /// Generates the code for one regular expression class. - private static string EmitRegexType(RegexType regexClass) + private static (string, ImmutableArray) EmitRegexType(RegexType regexClass) { var sb = new StringBuilder(1024); var writer = new IndentedTextWriter(new StringWriter(sb)); @@ -86,7 +88,7 @@ private static string EmitRegexType(RegexType regexClass) generatedName += ComputeStringHash(generatedName).ToString("X"); // Generate the regex type - EmitRegexMethod(writer, regexClass.Method, generatedName); + ImmutableArray diagnostics = EmitRegexMethod(writer, regexClass.Method, generatedName); while (writer.Indent != 0) { @@ -95,10 +97,10 @@ private static string EmitRegexType(RegexType regexClass) } writer.Flush(); - return sb.ToString(); + return (sb.ToString(), diagnostics); // FNV-1a hash function. The actual algorithm used doesn't matter; just something simple - // to create a pseudo-random value based on input text. + // to create a deterministic, pseudo-random value that's based on input text. static uint ComputeStringHash(string s) { uint hashCode = 2166136261; @@ -111,12 +113,49 @@ static uint ComputeStringHash(string s) } /// Gets whether a given regular expression method is supported by the code generator. - private static bool SupportsCustomCodeGeneration(RegexMethod rm) => - // The generator doesn't currently know how to emit code for NonBacktracking. - (rm.Options & RegexOptions.NonBacktracking) == 0; + private static bool SupportsCodeGeneration(RegexMethod rm) + { + RegexNode root = rm.Code.Tree.Root; + + if (!root.SupportsCompilation()) + { + return false; + } + + if (ExceedsMaxDepthForSimpleCodeGeneration(root, allowedDepth: 40)) + { + // Deep RegexNode trees can result in emitting C# code that exceeds C# compiler + // limitations, leading to "CS8078: An expression is too long or complex to compile". + // Place an artificial limit on max tree depth in order to mitigate such issues. + // The allowed depth can be tweaked as needed;its exceedingly rare to find + // expressions with such deep trees. + return false; + } + + return true; + + static bool ExceedsMaxDepthForSimpleCodeGeneration(RegexNode node, int allowedDepth) + { + if (allowedDepth <= 0) + { + return true; + } + + int childCount = node.ChildCount(); + for (int i = 0; i < childCount; i++) + { + if (ExceedsMaxDepthForSimpleCodeGeneration(node.Child(i), allowedDepth - 1)) + { + return true; + } + } + + return false; + } + } /// Generates the code for a regular expression method. - private static void EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, string id) + private static ImmutableArray EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, string id) { string patternExpression = Literal(rm.Pattern); string optionsExpression = $"(global::System.Text.RegularExpressions.RegexOptions)({(int)rm.Options})"; @@ -134,11 +173,11 @@ private static void EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, s writer.Write(" public static global::System.Text.RegularExpressions.Regex Instance { get; } = "); // If we can't support custom generation for this regex, spit out a Regex constructor call. - if (!SupportsCustomCodeGeneration(rm)) + if (!SupportsCodeGeneration(rm)) { writer.WriteLine($"new global::System.Text.RegularExpressions.Regex({patternExpression}, {optionsExpression}, {timeoutExpression});"); writer.WriteLine("}"); - return; + return ImmutableArray.Create(Diagnostic.Create(DiagnosticDescriptors.LimitedSourceGeneration, rm.MethodSyntax.GetLocation())); } writer.WriteLine($"new {id}();"); @@ -213,6 +252,7 @@ private static void EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, s writer.WriteLine($" }}"); writer.WriteLine($" }}"); writer.WriteLine("}"); + return ImmutableArray.Empty; static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht) { @@ -242,7 +282,6 @@ private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, { RegexOptions options = (RegexOptions)rm.Options; RegexCode code = rm.Code; - bool rtl = code.RightToLeft; bool hasTextInfo = false; // In some cases, we need to emit declarations at the beginning of the method, but we only discover we need them later. @@ -254,10 +293,6 @@ private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, // Emit locals initialization writer.WriteLine("global::System.ReadOnlySpan runtextSpan = base.runtext;"); writer.WriteLine("int runtextpos = base.runtextpos;"); - if (rtl) - { - writer.WriteLine("int runtextbeg = base.runtextbeg;"); - } writer.WriteLine($"int runtextend = base.runtextend;{AdditionalDeclarationsPlaceholder}"); // placeholder at the end of a line so the generated indents line up writer.WriteLine(); @@ -266,19 +301,12 @@ private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, // especially since we want the "return false" code regardless. int minRequiredLength = rm.Code.Tree.MinRequiredLength; Debug.Assert(minRequiredLength >= 0); - string clause = !rtl ? - minRequiredLength switch - { - 0 => "if (runtextpos <= runtextend)", - 1 => "if (runtextpos < runtextend)", - _ => $"if (runtextpos < runtextend - {minRequiredLength - 1})" - } : - minRequiredLength switch - { - 0 => "if (runtextpos >= runtextbeg)", - 1 => "if (runtextpos > runtextbeg)", - _ => $"if (runtextpos - {minRequiredLength - 1} > runtextbeg)" - }; + string clause = minRequiredLength switch + { + 0 => "if (runtextpos <= runtextend)", + 1 => "if (runtextpos < runtextend)", + _ => $"if (runtextpos < runtextend - {minRequiredLength - 1})" + }; using (EmitBlock(writer, clause)) { // Emit any anchors. @@ -299,11 +327,6 @@ private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, EmitIndexOf_LeftToRight(code.FindOptimizations.LeadingCaseSensitivePrefix); break; - case FindNextStartingPositionMode.LeadingPrefix_RightToLeft_CaseSensitive: - Debug.Assert(!string.IsNullOrEmpty(code.FindOptimizations.LeadingCaseSensitivePrefix)); - EmitIndexOf_RightToLeft(code.FindOptimizations.LeadingCaseSensitivePrefix); - break; - case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseSensitive: case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive: case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive: @@ -312,12 +335,6 @@ private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, EmitFixedSet_LeftToRight(); break; - case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive: - case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive: - Debug.Assert(code.FindOptimizations.FixedDistanceSets is { Count: > 0 }); - EmitFixedSet_RightToLeft(); - break; - default: Debug.Fail($"Unexpected mode: {code.FindOptimizations.FindMode}"); goto case FindNextStartingPositionMode.NoSearch; @@ -332,7 +349,7 @@ private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, writer.WriteLine("// No match"); writer.WriteLine("ReturnFalse:"); - writer.WriteLine(!rm.Code.RightToLeft ? "base.runtextpos = runtextend;" : "base.runtextpos = runtextbeg;"); + writer.WriteLine("base.runtextpos = runtextend;"); writer.WriteLine("return false;"); // We're done. Patch up any additional declarations. @@ -346,85 +363,40 @@ bool EmitAnchors() // Generate anchor checks. if ((code.FindOptimizations.LeadingAnchor & (RegexPrefixAnalyzer.Beginning | RegexPrefixAnalyzer.Start | RegexPrefixAnalyzer.EndZ | RegexPrefixAnalyzer.End | RegexPrefixAnalyzer.Bol)) != 0) { - // TODO: Interpreted and Compiled differ in various places as to whether they update positions, as do LTR vs RTL. Determine why. switch (code.FindOptimizations.LeadingAnchor) { case RegexPrefixAnalyzer.Beginning: writer.WriteLine("// Beginning \\A anchor"); - if (!rtl) + using (EmitBlock(writer, "if (runtextpos > runtextbeg)")) { - using (EmitBlock(writer, "if (runtextpos > runtextbeg)")) - { - writer.WriteLine("goto ReturnFalse;"); - } - } - else - { - // TODO: RegexOptions.Compiled doesn't ever return false here. Instead it updates the position. Why? - using (EmitBlock(writer, "if (runtextpos > runtextbeg)")) - { - writer.WriteLine("base.runtextpos = runtextbeg;"); - } + writer.WriteLine("goto ReturnFalse;"); } writer.WriteLine("return true;"); return true; case RegexPrefixAnalyzer.Start: writer.WriteLine("// Start \\G anchor"); - if (!rtl) - { - using (EmitBlock(writer, "if (runtextpos > runtextstart)")) - { - writer.WriteLine("goto ReturnFalse;"); - } - } - else + using (EmitBlock(writer, "if (runtextpos > runtextstart)")) { - // TODO: RegexOptions.Compiled doesn't ever return false here. Instead it updates the position. Why? - using (EmitBlock(writer, "if (runtextpos < runtextstart)")) - { - writer.WriteLine("goto ReturnFalse;"); - } + writer.WriteLine("goto ReturnFalse;"); } writer.WriteLine("return true;"); return true; case RegexPrefixAnalyzer.EndZ: - // TODO: Why are the LTR and RTL cases inconsistent here with RegexOptions.Compiled? writer.WriteLine("// End \\Z anchor"); - if (!rtl) - { - using (EmitBlock(writer, "if (runtextpos < runtextend - 1)")) - { - writer.WriteLine("base.runtextpos = runtextend - 1;"); - } - } - else + using (EmitBlock(writer, "if (runtextpos < runtextend - 1)")) { - // TODO: This differs subtly between interpreted and compiled. Why? - using (EmitBlock(writer, "if (runtextpos < runtextend - 1 || (runtextpos == runtextend - 1 && runtextSpan[runtextpos] != '\\n'))")) - { - writer.WriteLine("goto ReturnFalse;"); - } + writer.WriteLine("base.runtextpos = runtextend - 1;"); } writer.WriteLine("return true;"); return true; case RegexPrefixAnalyzer.End: writer.WriteLine("// End \\z anchor"); - if (!rtl) - { - using (EmitBlock(writer, "if (runtextpos < runtextend)")) - { - writer.WriteLine("base.runtextpos = runtextend;"); - } - } - else + using (EmitBlock(writer, "if (runtextpos < runtextend)")) { - using (EmitBlock(writer, "if (runtextpos < runtextend)")) - { - writer.WriteLine("goto ReturnFalse;"); - } + writer.WriteLine("base.runtextpos = runtextend;"); } writer.WriteLine("return true;"); return true; @@ -434,7 +406,6 @@ bool EmitAnchors() // other anchors like Beginning, there are potentially multiple places a BOL can match. So unlike // the other anchors, which all skip all subsequent processing if found, with BOL we just use it // to boost our position to the next line, and then continue normally with any searches. - Debug.Assert(!rtl, "RightToLeft isn't implemented and should have been filtered out previously"); writer.WriteLine("// Beginning-of-line anchor"); using (EmitBlock(writer, "if (runtextpos > runtextbeg && runtextSpan[runtextpos - 1] != '\\n')")) { @@ -464,46 +435,6 @@ void EmitIndexOf_LeftToRight(string prefix) writer.WriteLine("}"); } - // Emits a case-sensitive right-to-left prefix search for a string at the beginning of the pattern. - void EmitIndexOf_RightToLeft(string prefix) - { - writer.WriteLine($"int i = global::System.MemoryExtensions.LastIndexOf(runtextSpan.Slice(runtextbeg, runtextpos - runtextbeg), {Literal(prefix)});"); - writer.WriteLine("if (i >= 0)"); - writer.WriteLine("{"); - writer.WriteLine($" base.runtextpos = runtextbeg + i + {prefix.Length};"); - writer.WriteLine(" return true;"); - writer.WriteLine("}"); - } - - // Emits a right-to-left search for a set at a fixed position from the start of the pattern. - // (Currently that position will always be a distance of 0, meaning the start of the pattern itself.) - void EmitFixedSet_RightToLeft() - { - (char[]? Chars, string Set, int Distance, bool CaseInsensitive) set = code.FindOptimizations.FixedDistanceSets![0]; - Debug.Assert(set.Distance == 0); - - if (set.Chars is { Length: 1 } && !set.CaseInsensitive) - { - writer.WriteLine($"int i = global::System.MemoryExtensions.LastIndexOf(runtextSpan.Slice(runtextbeg, runtextpos - runtextbeg), {Literal(set.Chars[0])});"); - writer.WriteLine("if (i >= 0)"); - writer.WriteLine("{"); - writer.WriteLine(" base.runtextpos = runtextbeg + i + 1;"); - writer.WriteLine(" return true;"); - writer.WriteLine("}"); - } - else - { - using (EmitBlock(writer, "for (int i = runtextpos - 1; i >= runtextbeg; i--)")) - { - using (EmitBlock(writer, $"if ({MatchCharacterClass(hasTextInfo, options, "runtextSpan[i]", set.Set, set.CaseInsensitive, additionalDeclarations)})")) - { - writer.WriteLine("base.runtextpos = i + 1;"); - writer.WriteLine("return true;"); - } - } - } - } - // Emits a left-to-right search for a set at a fixed position from the start of the pattern, // and potentially other sets at other fixed positions in the pattern. void EmitFixedSet_LeftToRight() @@ -629,10 +560,8 @@ static void EmitTextInfo(IndentedTextWriter writer, ref bool hasTextInfo, RegexM bool needsCulture = rm.Code.FindOptimizations.FindMode switch { FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive or - FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive or FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or - FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or - FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive => true, + FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive => true, _ when rm.Code.FindOptimizations.FixedDistanceSets is List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets => sets.Exists(set => set.CaseInsensitive), @@ -653,62 +582,12 @@ private static void EmitGo(IndentedTextWriter writer, RegexMethod rm, string id) { Debug.Assert(rm.Code.Tree.Root.Type == RegexNode.Capture); - if ((rm.Options & RegexOptions.NonBacktracking) != 0) - { - EmitNonBacktrackingGo(writer, rm, id); - return; - } - RegexNode root = rm.Code.Tree.Root; - if (!ExceedsMaxDepthForSimpleCodeGeneration(root) && - root.Child(0).SupportsSimplifiedCodeGenerationImplementation() && - (((RegexOptions)root.Options) & RegexOptions.RightToLeft) == 0) - { - EmitSimplifiedGo(writer, rm, id); - return; - } - - EmitCompleteGo(writer, rm, id); - - // Deep RegexNode trees used with the simplified code generator can result in - // emitting C# code that exceeds C# compiler limitations, leading to "CS8078: An - // expression is too long or complex to compile". Place an artificial limit on - // max tree depth in order to mitigate such issues. - static bool ExceedsMaxDepthForSimpleCodeGeneration(RegexNode node, int maxDepth = 30) - { - if (maxDepth <= 0) - { - return true; - } - - int childCount = node.ChildCount(); - for (int i = 0; i < childCount; i++) - { - if (ExceedsMaxDepthForSimpleCodeGeneration(node.Child(i), maxDepth - 1)) - { - return true; - } - } - - return false; - } - } - - /// Emits the body of a Go method supporting RegexOptions.NonBacktracking. - private static void EmitNonBacktrackingGo(IndentedTextWriter writer, RegexMethod rm, string id) - { - // TODO: Implement this and remove SupportsCustomCodeGeneration. - } - - /// Emits the body of a simplified Go implementation that's possible when there's minimal backtracking required by the expression. - private static void EmitSimplifiedGo(IndentedTextWriter writer, RegexMethod rm, string id) - { // Arbitrary limit for unrolling vs creating a loop. We want to balance size in the generated // code with other costs, like the (small) overhead of slicing to create the temp span to iterate. const int MaxUnrollSize = 16; RegexOptions options = (RegexOptions)rm.Options; RegexCode code = rm.Code; - bool rtl = code.RightToLeft; bool hasTimeout = false; // Helper to define names. Names start unadorned, but as soon as there's repetition, @@ -2874,1175 +2753,6 @@ void EmitRunstackResizeIfNeeded(int count) string RunstackPop() => "base.runstack![--runstackpos]"; } - /// Emits the body of a complete Go implementation that fully supports backtracking. - private static void EmitCompleteGo(IndentedTextWriter writer, RegexMethod rm, string id) - { - const int Stackpop = 0; // pop one - const int Stackpop2 = 1; // pop two - const int Capback = 3; // uncapture - const int Capback2 = 4; // uncapture 2 - const int Branchmarkback2 = 5; // back2 part of branchmark - const int Lazybranchmarkback2 = 6; // back2 part of lazybranchmark - const int Branchcountback2 = 7; // back2 part of branchcount - const int Lazybranchcountback2 = 8; // back2 part of lazybranchcount - const int Forejumpback = 9; // back part of forejump - const int Uniquecount = 10; - const string Backtrack = "Backtrack"; // label for backtracking - - int[] codes = rm.Code.Codes; - RegexOptions options = rm.Options; - - int labelCounter = 0; - string DefineLabel(string prefix = "L") => $"{prefix}{labelCounter++}"; - void MarkLabel(string label) => writer.WriteLine($"{label}:"); - - var labels = new string?[codes.Length]; // a label for every operation in _codes - BacktrackNote[]? notes = null; // a list of the backtracking states to be generated - int noteCount = 0; // true count of _notes (allocation grows exponentially) - - int currentOpcode = 0; // the current opcode being processed - int currentCodePos = 0; // the current code being translated - int currentBacktrackNote = 0; // the current backtrack-note being translated - - // special code fragments - var uniqueNote = new int[Uniquecount]; // notes indices for code that should be emitted <= once - var forwardJumpsThroughSwitch = new int[codes.Length]; // indices for forward-jumps-through-switch (for allocations) - - // Generates the forward logic corresponding directly to the regex codes. - // In the absence of backtracking, this is all we would need. - writer.WriteLine("string runtext = base.runtext!;"); - writer.WriteLine("int runtextbeg = base.runtextbeg;"); - writer.WriteLine("int runtextend = base.runtextend;"); - writer.WriteLine("int runtextpos = base.runtextpos;"); - writer.WriteLine("int[] runtrack = base.runtrack!;"); - writer.WriteLine("int runtrackpos = base.runtrackpos;"); - writer.WriteLine("int[] runstack = base.runstack!;"); - writer.WriteLine("int runstackpos = base.runstackpos;"); - writer.WriteLine("int tmp1, tmp2, ch;"); - bool hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm); - bool hasTextInfo = EmitInitializeCultureForGoIfNecessary(writer, rm); - writer.WriteLine(); - - uniqueNote.AsSpan().Fill(-1); - for (int codepos = 0; codepos < codes.Length; codepos += RegexCode.OpcodeSize(codes[codepos])) - { - forwardJumpsThroughSwitch[codepos] = -1; - labels[codepos] = DefineLabel(); - } - - currentBacktrackNote = -1; - for (int codepos = 0; codepos < codes.Length; codepos += RegexCode.OpcodeSize(codes[codepos])) - { - currentCodePos = codepos; - currentOpcode = codes[codepos]; - EmitOneCode(labels[codepos]); - writer.WriteLine(); - } - - // Generate the backtracking switch jump table that allows us to simulate a stack of addresses, - // and contains the calls that expand the tracking and the grouping stack when they get too full. - MarkLabel(Backtrack); - - // (Equivalent of EnsureStorage, but written to avoid unnecessary local spilling.) - writer.WriteLine("int limit = base.runtrackcount * 4;"); - using (EmitBlock(writer, "if (runstackpos < limit)")) - { - writer.WriteLine("base.runstackpos = runstackpos;"); - writer.WriteLine("base.DoubleStack(); // might change runstackpos and runstack"); - writer.WriteLine("runstackpos = base.runstackpos;"); - writer.WriteLine("runstack = base.runstack!;"); - } - using (EmitBlock(writer, "if (runtrackpos < limit)")) - { - writer.WriteLine("base.runtrackpos = runtrackpos;"); - writer.WriteLine("base.DoubleTrack(); // might change runtrackpos and runtrack"); - writer.WriteLine("runtrackpos = base.runtrackpos;"); - writer.WriteLine("runtrack = base.runtrack!;"); - } - writer.WriteLine(); - using (EmitBlock(writer, "switch (runtrack[runtrackpos++])")) - { - for (int i = 0; i < noteCount; i++) - { - using (EmitBlock(writer, $"case {i}:")) - { - Debug.Assert(notes is not null); - BacktrackNote n = notes[i]; - if (n.flags != 0) - { - currentCodePos = n.codepos; - currentBacktrackNote = i; - currentOpcode = codes[n.codepos] | n.flags; - EmitOneCode(null); // should always end in a goto - } - else - { - writer.WriteLine($"goto {n.label};"); - } - } - - writer.WriteLine(); - } - - using (EmitBlock(writer, "default:")) - { - writer.WriteLine("global::System.Diagnostics.Debug.Fail($\"Unexpected backtracking state {runtrack[runtrackpos - 1]}\");"); - writer.WriteLine("break;"); - } - } - - return; - - /// - /// The main translation function. It translates the logic for a single opcode at - /// the current position. The structure of this function exactly mirrors - /// the structure of the inner loop of RegexInterpreter.Go(). - /// - /// - /// Note that since we're generating code, we can collapse many cases that are - /// dealt with one-at-a-time in RegexIntepreter. We can also unroll loops that - /// iterate over constant strings or sets. - /// - void EmitOneCode(string? label) - { - writer.WriteLine($"// {SymbolDisplay.FormatLiteral(RegexCode.OpcodeDescription(currentCodePos, rm.Code.Codes, rm.Code.Strings), quote: false)}"); - - if (label is not null) - { - MarkLabel(label); - } - - // Before executing any Regex code in the unrolled loop, - // we try checking for the match timeout: - EmitTimeoutCheck(writer, hasTimeout); - - // Now generate the code for the Regex code saved in _regexopcode. - switch (currentOpcode) - { - case RegexCode.Stop: - writer.WriteLine("base.runtextpos = runtextpos;"); - writer.WriteLine("return;"); - break; - - case RegexCode.Nothing: - writer.WriteLine($"goto {Backtrack};"); - break; - - case RegexCode.UpdateBumpalong: - // UpdateBumpalong should only exist in the code stream at such a point where the root - // of the backtracking stack contains the runtextpos from the start of this Go call. Replace - // that tracking value with the current runtextpos value. - writer.WriteLine("runtrack[^1] = runtextpos;"); - break; - - case RegexCode.Goto: - Goto(Operand(0)); - break; - - case RegexCode.Testref: - using (EmitBlock(writer, $"if (!base.IsMatched({Operand(0)}))")) - { - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.Lazybranch: - PushTrack("runtextpos"); - Track(); - break; - - case RegexCode.Lazybranch | RegexCode.Back: - writer.WriteLine($"runtextpos = {PopTrack()};"); - Goto(Operand(0)); - break; - - case RegexCode.Nullmark: - PushStack(-1); - TrackUnique(Stackpop); - break; - - case RegexCode.Setmark: - PushStack("runtextpos"); - TrackUnique(Stackpop); - break; - - case RegexCode.Nullmark | RegexCode.Back: - case RegexCode.Setmark | RegexCode.Back: - PopDiscardStack(); - writer.WriteLine($"goto {Backtrack};"); - break; - - case RegexCode.Getmark: - writer.WriteLine($"runtextpos = {PopStack()};"); - PushTrack("runtextpos"); - Track(); - break; - - case RegexCode.Getmark | RegexCode.Back: - PushStack(PopTrack()); - writer.WriteLine($"goto {Backtrack};"); - break; - - case RegexCode.Capturemark: - { - if (Operand(1) != -1) - { - using (EmitBlock(writer, $"if (!base.IsMatched({Operand(1)}))")) - { - writer.WriteLine($"goto {Backtrack};"); - } - } - - const string Stacked = "tmp1"; - writer.WriteLine($"{Stacked} = {PopStack()};"); - writer.WriteLine(Operand(1) != -1 ? - $"base.TransferCapture({Operand(0)}, {Operand(1)}, {Stacked}, runtextpos);" : - $"base.Capture({Operand(0)}, {Stacked}, runtextpos);"); - PushTrack(Stacked); - TrackUnique(Operand(0) != -1 && Operand(1) != -1 ? Capback2 : Capback); - } - break; - - case RegexCode.Capturemark | RegexCode.Back: - PushStack(PopTrack()); - writer.WriteLine("base.Uncapture();"); - if (Operand(0) != -1 && Operand(1) != -1) - { - writer.WriteLine("base.Uncapture();"); - } - writer.WriteLine($"goto {Backtrack};"); - break; - - case RegexCode.Branchmark: - { - const string Mark = "tmp1"; - writer.WriteLine($"{Mark} = {PopStack()}; // mark"); - PushTrack(Mark); - using (EmitBlock(writer, $"if (runtextpos != {Mark})")) - { - PushTrack("runtextpos"); - PushStack("runtextpos"); - Track(); - Goto(Operand(0)); - } - using (EmitBlock(writer, "else")) - { - TrackUnique2(Branchmarkback2); - } - } - break; - - case RegexCode.Branchmark | RegexCode.Back: - writer.WriteLine($"runtextpos = {PopTrack()};"); - PopDiscardStack(); - TrackUnique2(Branchmarkback2); // track spot 0 is already in place - Advance(); - break; - - case RegexCode.Branchmark | RegexCode.Back2: - PushStack(PopTrack()); - writer.WriteLine($"goto {Backtrack};"); - break; - - case RegexCode.Lazybranchmark: - { - const string Mark = "tmp1"; - writer.WriteLine($"{Mark} = {PopStack()}; // mark"); - PushTrack($"{Mark} != -1 ? {Mark} : runtextpos"); - using (EmitBlock(writer, $"if (runtextpos != {Mark})")) - { - PushTrack("runtextpos"); - Track(); - Advance(); - } - PushStack(Mark); - TrackUnique2(Lazybranchmarkback2); - } - break; - - case RegexCode.Lazybranchmark | RegexCode.Back: - writer.WriteLine($"runtextpos = {PopTrack()};"); - PushStack("runtextpos"); - TrackUnique2(Lazybranchmarkback2); - Goto(Operand(0)); - break; - - case RegexCode.Lazybranchmark | RegexCode.Back2: - writer.WriteLine($"{ReadyReplaceStack(0)} = {PopTrack()};"); - writer.WriteLine($"goto {Backtrack};"); - break; - - case RegexCode.Nullcount: - PushStack(-1); - PushStack(Operand(0)); - TrackUnique(Stackpop2); - break; - - case RegexCode.Setcount: - PushStack("runtextpos"); - PushStack(Operand(0)); - TrackUnique(Stackpop2); - break; - - case RegexCode.Nullcount | RegexCode.Back: - case RegexCode.Setcount | RegexCode.Back: - PopDiscardStack(2); - writer.WriteLine($"goto {Backtrack};"); - break; - - case RegexCode.Branchcount: - { - const string Count = "tmp1"; - const string Mark = "tmp2"; - writer.WriteLine($"{Count} = {PopStack()}; // count"); - writer.WriteLine($"{Mark} = {PopStack()}; // mark"); - PushTrack(Mark); - using (EmitBlock(writer, $"if ({Count} < ({Mark} == runtextpos ? 0 : {Operand(1)}))")) - { - PushStack("runtextpos"); - PushStack($"{Count} + 1"); - Track(); - Goto(Operand(0)); - } - PushTrack(Count); - TrackUnique2(Branchcountback2); - } - break; - - case RegexCode.Branchcount | RegexCode.Back: - { - const string Count = "tmp1"; - writer.WriteLine($"{Count} = {PopStack()} - 1; // count"); - using (EmitBlock(writer, $"if ({Count} >= 0)")) - { - writer.WriteLine($"runtextpos = {PopStack()};"); - PushTrack(Count); - TrackUnique2(Branchcountback2); - Advance(); - } - writer.WriteLine($"{ReadyReplaceStack(0)} = {PopTrack()};"); - PushStack(Count); - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.Branchcount | RegexCode.Back2: - { - const string Mark = "tmp1"; - writer.WriteLine($"{Mark} = {PopTrack()}; // mark"); - PushStack(PopTrack()); - PushStack(Mark); - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.Lazybranchcount: - { - const string Count = "tmp1"; - writer.WriteLine($"{Count} = {PopStack()}; // count"); - PushTrack(PopStack()); // mark - using (EmitBlock(writer, $"if ({Count} < 0)")) - { - PushStack("runtextpos"); - PushStack($"{Count} + 1"); - TrackUnique2(Lazybranchcountback2); - Goto(Operand(0)); - } - PushTrack(Count); - PushTrack("runtextpos"); - Track(); - } - break; - - case RegexCode.Lazybranchcount | RegexCode.Back: - { - const string C = "tmp1"; - writer.WriteLine($"runtextpos = {PopTrack()};"); - writer.WriteLine($"{C} = {PopTrack()}; // c"); - using (EmitBlock(writer, $"if ({C} < {Operand(1)} && runtextpos != {TopTrack()})")) - { - PushStack("runtextpos"); - PushStack($"{C} + 1"); - TrackUnique2(Lazybranchcountback2); - Goto(Operand(0)); - } - PushStack(PopTrack()); - PushStack(C); - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.Lazybranchcount | RegexCode.Back2: - writer.WriteLine($"{ReadyReplaceStack(1)} = {PopTrack()};"); - writer.WriteLine($"{ReadyReplaceStack(0)} = {TopStack()} - 1;"); - ReadyReplaceStack(0); - writer.WriteLine($"goto {Backtrack};"); - break; - - case RegexCode.Setjump: - PushStack("runtrack.Length - runtrackpos"); - PushStack("base.Crawlpos()"); - TrackUnique(Stackpop2); - break; - - case RegexCode.Setjump | RegexCode.Back: - PopDiscardStack(2); - writer.WriteLine($"goto {Backtrack};"); - break; - - case RegexCode.Backjump: - { - const string Stacked = "tmp1"; - writer.WriteLine($"{Stacked} = {PopStack()}; // stacked"); - writer.WriteLine($"runtrackpos = runtrack.Length - {PopStack()};"); - writer.WriteLine($"while (base.Crawlpos() != {Stacked}) base.Uncapture();"); - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.Forejump: - { - const string Stacked = "tmp1"; - writer.WriteLine($"{Stacked} = {PopStack()}; // stacked"); - writer.WriteLine($"runtrackpos = runtrack.Length - {PopStack()};"); - PushTrack(Stacked); - TrackUnique(Forejumpback); - } - break; - - case RegexCode.Forejump | RegexCode.Back: - { - const string TrackedCrawlpos = "tmp1"; - writer.WriteLine($"{TrackedCrawlpos} = {PopTrack()}; // tracked crawlpos"); - writer.WriteLine($"while (base.Crawlpos() != {TrackedCrawlpos}) base.Uncapture();"); - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.Bol: - using (EmitBlock(writer, $"if (runtextpos <= runtextbeg)")) - { - writer.WriteLine($"goto {labels[NextCodepos()]};"); - } - using (EmitBlock(writer, $"if ({Leftchar()} != '\\n')")) - { - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.Eol: - using (EmitBlock(writer, $"if (runtextpos >= runtextend)")) - { - writer.WriteLine($"goto {labels[NextCodepos()]};"); - } - using (EmitBlock(writer, $"if ({Rightchar()} != '\\n')")) - { - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.Boundary: - case RegexCode.NonBoundary: - using (EmitBlock(writer, $"if ({(Code() == RegexCode.Boundary ? "!" : "")}base.IsBoundary(runtextpos, runtextbeg, runtextend))")) - { - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.ECMABoundary: - case RegexCode.NonECMABoundary: - using (EmitBlock(writer, $"if ({(Code() == RegexCode.ECMABoundary ? "!" : "")}base.IsECMABoundary(runtextpos, runtextbeg, runtextend))")) - { - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.Beginning: - using (EmitBlock(writer, $"if (runtextpos > runtextbeg)")) - { - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.Start: - using (EmitBlock(writer, $"if (runtextpos != runtextstart)")) - { - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.EndZ: - using (EmitBlock(writer, $"if (runtextpos < runtextend - 1)")) - { - writer.WriteLine($"goto {Backtrack};"); - } - using (EmitBlock(writer, $"if (runtextpos >= runtextend)")) - { - writer.WriteLine($"goto {labels[NextCodepos()]};"); - } - using (EmitBlock(writer, $"if ({Rightchar()} != '\\n')")) - { - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.End: - using (EmitBlock(writer, $"if (runtextpos < runtextend)")) - { - writer.WriteLine($"goto {Backtrack};"); - } - break; - - case RegexCode.One: - case RegexCode.Notone: - case RegexCode.Set: - case RegexCode.One | RegexCode.Rtl: - case RegexCode.Notone | RegexCode.Rtl: - case RegexCode.Set | RegexCode.Rtl: - case RegexCode.One | RegexCode.Ci: - case RegexCode.Notone | RegexCode.Ci: - case RegexCode.Set | RegexCode.Ci: - case RegexCode.One | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Notone | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Set | RegexCode.Ci | RegexCode.Rtl: - { - string clause; - string expr; - if (!IsRightToLeft()) - { - clause = $"runtextpos >= runtextend || "; - expr = Rightcharnext(); - } - else - { - clause = $"runtextpos <= runtextbeg || "; - expr = Leftcharnext(); - } - - clause += Code() == RegexCode.Set ? - $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive(), null)}" : - $"{ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive())} {(Code() == RegexCode.One ? "!=" : "==")} {Operand(0)}"; - - using (EmitBlock(writer, $"if ({clause})")) - { - writer.WriteLine($"goto {Backtrack};"); - } - } - break; - - case RegexCode.Multi: - case RegexCode.Multi | RegexCode.Ci: - { - string str = rm.Code.Strings[Operand(0)]; - Debug.Assert(str.Length != 0); - writer.WriteLine($"if (runtextend - runtextpos < {str.Length} ||"); - for (int i = 0; i < str.Length; i++) - { - writer.Write($" {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos{(i == 0 ? "" : $" + {i}")}]", IsCaseInsensitive())} != {Literal(str[i])}"); - writer.WriteLine(i < str.Length - 1 ? " ||" : ")"); - } - using (EmitBlock(writer, null)) - { - writer.WriteLine($"goto {Backtrack};"); - } - EmitAdd(writer, "runtextpos", str.Length); - break; - } - - case RegexCode.Multi | RegexCode.Rtl: - case RegexCode.Multi | RegexCode.Ci | RegexCode.Rtl: - { - string str = rm.Code.Strings[Operand(0)]; - Debug.Assert(str.Length != 0); - writer.WriteLine($"if (runtextpos - runtextbeg < {str.Length} ||"); - for (int i = str.Length; i > 0;) - { - i--; - writer.Write($" {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos - {str.Length - i}]", IsCaseInsensitive())} != {Literal(str[i])}"); - writer.WriteLine(i == 0 ? ")" : " ||"); - } - using (EmitBlock(writer, null)) - { - writer.WriteLine($"goto {Backtrack};"); - } - writer.WriteLine($"runtextpos -= {str.Length};"); - break; - } - - case RegexCode.Ref: - case RegexCode.Ref | RegexCode.Ci: - case RegexCode.Ref | RegexCode.Rtl: - case RegexCode.Ref | RegexCode.Ci | RegexCode.Rtl: - { - const string Length = "tmp1"; - const string Index = "tmp2"; - - using (EmitBlock(writer, $"if (!base.IsMatched({Operand(0)}))")) - { - writer.WriteLine($"goto {((options & RegexOptions.ECMAScript) != 0 ? AdvanceLabel() : Backtrack)};"); - } - - writer.WriteLine($"{Length} = base.MatchLength({Operand(0)}); // length"); - - using (EmitBlock(writer, !IsRightToLeft() ? $"if (runtextend - runtextpos < {Length})" : $"if (runtextpos - runtextbeg < {Length})")) - { - writer.WriteLine($"goto {Backtrack};"); - } - - if (!IsRightToLeft()) - { - writer.WriteLine($"{Index} = base.MatchIndex({Operand(0)}) + {Length}; // index"); - writer.WriteLine($"runtextpos += {Length};"); - } - else - { - writer.WriteLine($"{Index} = base.MatchIndex({Operand(0)}); // index"); - writer.WriteLine($"runtextpos -= {Length};"); - } - - using (EmitBlock(writer, "while (true)")) - { - using (EmitBlock(writer, $"if ({Length} <= 0)")) - { - writer.WriteLine($"goto {AdvanceLabel()};"); - } - - using (EmitBlock(writer, !IsRightToLeft() ? - $"if ({ToLowerIfNeeded(hasTextInfo, options, $"runtext[{Index} - {Length}]", IsCaseInsensitive())} != {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos - {Length}--]", IsCaseInsensitive())})" : - $"if ({ToLowerIfNeeded(hasTextInfo, options, $"runtext[{Index} + --{Length}]", IsCaseInsensitive())} != {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos + {Length}]", IsCaseInsensitive())})")) - { - writer.WriteLine($"break;"); - } - } - - writer.WriteLine($"goto {Backtrack};"); - break; - } - - case RegexCode.Onerep: - case RegexCode.Notonerep: - case RegexCode.Setrep: - case RegexCode.Onerep | RegexCode.Ci: - case RegexCode.Notonerep | RegexCode.Ci: - case RegexCode.Setrep | RegexCode.Ci: - { - int c = Operand(1); - if (c != 0) - { - using (EmitBlock(writer, $"if (runtextend - runtextpos < {c})")) - { - writer.WriteLine($"goto {Backtrack};"); - } - - using (EmitBlock(writer, $"for (int i = 0; i < {c}; i++)")) - { - string expr = "runtext[runtextpos + i]"; - if (Code() == RegexCode.Setrep) - { - EmitTimeoutCheck(writer, hasTimeout); - expr = $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive(), null)}"; - } - else - { - expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive()); - expr = $"{expr} {(Code() == RegexCode.Onerep ? "!=" : "==")} {Literal((char)Operand(0))}"; - } - - using (EmitBlock(writer, $"if ({expr})")) - { - writer.WriteLine($"goto {Backtrack};"); - } - } - EmitAdd(writer, "runtextpos", c); - } - } - break; - - case RegexCode.Onerep | RegexCode.Rtl: - case RegexCode.Notonerep | RegexCode.Rtl: - case RegexCode.Setrep | RegexCode.Rtl: - case RegexCode.Onerep | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Notonerep | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Setrep | RegexCode.Ci | RegexCode.Rtl: - { - int c = Operand(1); - if (c != 0) - { - const string Length = "tmp1"; - - using (EmitBlock(writer, $"if (runtextpos - runtextbeg < {c})")) - { - writer.WriteLine($"goto {Backtrack};"); - } - writer.WriteLine($"runtextpos -= {c};"); - writer.WriteLine($"{Length} = {c}; // length"); - - string l1 = DefineLabel(); - MarkLabel(l1); - - string expr = $"runtext[runtextpos + --{Length}]"; - if (Code() == RegexCode.Setrep) - { - EmitTimeoutCheck(writer, hasTimeout); - using (EmitBlock(writer, $"if (!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive(), null)})")) - { - writer.WriteLine($"goto {Backtrack};"); - } - } - else - { - expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive()); - string op = Code() == RegexCode.Onerep ? "!=" : "=="; - using (EmitBlock(writer, $"if ({expr} {op} {Literal((char)Operand(0))})")) - { - writer.WriteLine($"goto {Backtrack};"); - } - } - - using (EmitBlock(writer, $"if ({Length} > 0)")) - { - writer.WriteLine($"goto {l1};"); - } - } - break; - } - - case RegexCode.Oneloop: - case RegexCode.Notoneloop: - case RegexCode.Setloop: - case RegexCode.Oneloop | RegexCode.Rtl: - case RegexCode.Notoneloop | RegexCode.Rtl: - case RegexCode.Setloop | RegexCode.Rtl: - case RegexCode.Oneloop | RegexCode.Ci: - case RegexCode.Notoneloop | RegexCode.Ci: - case RegexCode.Setloop | RegexCode.Ci: - case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Oneloopatomic: - case RegexCode.Notoneloopatomic: - case RegexCode.Setloopatomic: - case RegexCode.Oneloopatomic | RegexCode.Rtl: - case RegexCode.Notoneloopatomic | RegexCode.Rtl: - case RegexCode.Setloopatomic | RegexCode.Rtl: - case RegexCode.Oneloopatomic | RegexCode.Ci: - case RegexCode.Notoneloopatomic | RegexCode.Ci: - case RegexCode.Setloopatomic | RegexCode.Ci: - case RegexCode.Oneloopatomic | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Notoneloopatomic | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Setloopatomic | RegexCode.Ci | RegexCode.Rtl: - { - int c = Operand(1); - if (c != 0) - { - const string Len = "tmp1"; - const string I = "tmp2"; - - if (c == int.MaxValue) - { - writer.WriteLine(!IsRightToLeft() ? - $"{Len} = runtextend - runtextpos; // length" : - $"{Len} = runtextpos - runtextbeg; // length"); - } - else - { - writer.WriteLine(!IsRightToLeft() ? - $"{Len} = global::System.Math.Min(runtextend - runtextpos, {c}); // length" : - $"{Len} = global::System.Math.Min(runtextpos - runtextbeg, {c}); // length"); - } - - string? set = Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic ? rm.Code.Strings[Operand(0)] : null; - Span setChars = stackalloc char[5]; // max optimized by IndexOfAny today - int numSetChars; - - // If this is a notoneloop{atomic} and we're left-to-right and case-sensitive, - // we can use the vectorized IndexOf to search for the target character. - if ((Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic) && - !IsRightToLeft() && - !IsCaseInsensitive()) - { - writer.WriteLine($"{I} = global::System.MemoryExtensions.IndexOf(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal((char)Operand(0))}); // i"); - using (EmitBlock(writer, $"if ({I} == -1)")) - { - writer.WriteLine($"runtextpos += {Len};"); - writer.WriteLine($"{I} = 0;"); - } - using (EmitBlock(writer, "else")) - { - writer.WriteLine($"runtextpos += {I};"); - writer.WriteLine($"{I} = {Len} - {I};"); - } - } - else if ((Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) && - !IsRightToLeft() && - !IsCaseInsensitive() && - (numSetChars = RegexCharClass.GetSetChars(set!, setChars)) != 0 && - RegexCharClass.IsNegated(set!)) - { - // Similarly, if this is a setloop{atomic} and we're left-to-right and case-sensitive, - // and if the set contains only a few negated chars, we can use the vectorized IndexOfAny - // to search for those chars. - Debug.Assert(numSetChars > 1); - writer.WriteLine(numSetChars switch - { - 2 => $"{I} = global::System.MemoryExtensions.IndexOfAny(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal(setChars[0])}, {Literal(setChars[1])}); // i", - 3 => $"{I} = global::System.MemoryExtensions.IndexOfAny(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])}); // i", - _ => $"{I} = global::System.MemoryExtensions.IndexOfAny(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal(setChars.Slice(0, numSetChars).ToString())}); // i", - }); - using (EmitBlock(writer, $"if ({I} == -1)")) - { - writer.WriteLine($"runtextpos += {Len};"); - writer.WriteLine($"{I} = 0;"); - } - using (EmitBlock(writer, "else")) - { - writer.WriteLine($"runtextpos += {I};"); - writer.WriteLine($"{I} = {Len} - {I};"); - } - } - else if ((Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) && - !IsRightToLeft() && - set == RegexCharClass.AnyClass) - { - // If someone uses .* along with RegexOptions.Singleline, that becomes [anycharacter]*, which means it'll - // consume everything. As such, we can simply update our position to be the last allowed, without - // actually checking anything. - writer.WriteLine($"runtextpos += {Len};"); - writer.WriteLine($"{I} = 0;"); - } - else - { - // Otherwise, we emit the open-coded loop. - writer.WriteLine($"{I} = {Len} + 1;"); - using (EmitBlock(writer, $"while (--{I} > {0})")) - { - string expr = !IsRightToLeft() ? - Rightcharnext() : - Leftcharnext(); - - if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) - { - EmitTimeoutCheck(writer, hasTimeout); - expr = $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive(), null)}"; - } - else - { - string op = Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopatomic ? "!=" : "=="; - expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive()); - expr = $"{expr} {op} {Literal((char)Operand(0))}"; - } - - using (EmitBlock(writer, $"if ({expr})")) - { - writer.WriteLine(!IsRightToLeft() ? - "runtextpos--;" : - "runtextpos++;"); - writer.WriteLine("break;"); - } - } - } - - if (Code() != RegexCode.Oneloopatomic && Code() != RegexCode.Notoneloopatomic && Code() != RegexCode.Setloopatomic) - { - using (EmitBlock(writer, $"if ({I} >= {Len})")) - { - writer.WriteLine($"goto {AdvanceLabel()};"); - } - PushTrack($"{Len} - {I} - 1"); - PushTrack(!IsRightToLeft() ? - "runtextpos - 1" : - "runtextpos + 1"); - Track(); - } - } - break; - } - - case RegexCode.Oneloop | RegexCode.Back: - case RegexCode.Notoneloop | RegexCode.Back: - case RegexCode.Setloop | RegexCode.Back: - case RegexCode.Oneloop | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Notoneloop | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Setloop | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Back: - case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Back: - case RegexCode.Setloop | RegexCode.Ci | RegexCode.Back: - case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: - { - const string Position = "tmp1"; - writer.WriteLine($"runtextpos = {PopTrack()};"); - writer.WriteLine($"{Position} = {PopTrack()}; // position"); - using (EmitBlock(writer, $"if ({Position} > 0)")) - { - PushTrack($"{Position} - 1"); - PushTrack(!IsRightToLeft() ? - "runtextpos - 1" : - "runtextpos + 1"); - Trackagain(); - } - Advance(); - } - break; - - case RegexCode.Onelazy: - case RegexCode.Notonelazy: - case RegexCode.Setlazy: - case RegexCode.Onelazy | RegexCode.Rtl: - case RegexCode.Notonelazy | RegexCode.Rtl: - case RegexCode.Setlazy | RegexCode.Rtl: - case RegexCode.Onelazy | RegexCode.Ci: - case RegexCode.Notonelazy | RegexCode.Ci: - case RegexCode.Setlazy | RegexCode.Ci: - case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Rtl: - { - int count = Operand(1); - if (count != 0) - { - const string C = "tmp1"; - if (count == int.MaxValue) - { - writer.WriteLine(!IsRightToLeft() ? - $"{C} = runtextend - runtextpos; // count" : - $"{C} = runtextpos - runtextbeg; // count"); - } - else - { - writer.WriteLine(!IsRightToLeft() ? - $"{C} = global::System.Math.Min(runtextend - runtextpos, {count}); // count" : - $"{C} = global::System.Math.Min(runtextpos - runtextbeg, {count}); // count"); - } - - using (EmitBlock(writer, $"if ({C} <= 0)")) - { - writer.WriteLine($"goto {AdvanceLabel()};"); - } - - PushTrack($"{C} - 1"); - PushTrack("runtextpos"); - Track(); - } - break; - } - - case RegexCode.Onelazy | RegexCode.Back: - case RegexCode.Notonelazy | RegexCode.Back: - case RegexCode.Setlazy | RegexCode.Back: - case RegexCode.Onelazy | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Notonelazy | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Setlazy | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Back: - case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Back: - case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Back: - case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: - { - const string I = "tmp1"; - - writer.WriteLine($"runtextpos = {PopTrack()};"); - writer.WriteLine($"{I} = {PopTrack()}; // i"); - - string expr = !IsRightToLeft() ? - Rightcharnext() : - Leftcharnext(); - - if (Code() == RegexCode.Setlazy) - { - EmitTimeoutCheck(writer, hasTimeout); - expr = $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive(), null)}"; - } - else - { - expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive()); - expr = $"{expr} {(Code() == RegexCode.Onelazy ? "!=" : "==")} {Literal((char)Operand(0))}"; - } - - using (EmitBlock(writer, $"if ({expr})")) - { - writer.WriteLine($"goto {Backtrack};"); - } - - using (EmitBlock(writer, $"if ({I} > 0)")) - { - PushTrack($"{I} - 1"); - PushTrack("runtextpos"); - Trackagain(); - } - - Advance(); - } - break; - - default: - Debug.Fail($"Unimplemented state: {currentOpcode:X8}"); - break; - } - } - - - - /// - /// Branch to the label corresponding to the regex code at i - /// - /// - /// A trick: since track and stack space is gobbled up unboundedly - /// only as a result of branching backwards, this is where we check - /// for sufficient space and trigger reallocations. - /// - /// If the "goto" is backwards, we generate code that checks - /// available space against the amount of space that would be needed - /// in the worst case by code that will only go forward; if there's - /// not enough, we push the destination on the tracking stack, then - /// we jump to the place where we invoke the allocator. - /// - /// Since forward gotos pose no threat, they just turn into a Br. - /// - void Goto(int i) - { - // When going backwards, ensure enough space. - if (i < currentCodePos) - { - using (EmitBlock(writer, $"if (runtrackpos <= {rm.Code.TrackCount * 4} || runstackpos <= {rm.Code.TrackCount * 3})")) - { - writer.WriteLine($"{ReadyPushTrack()} = {AddGoto(i)};"); - writer.WriteLine($"goto {Backtrack};"); - } - } - - writer.WriteLine($"goto {labels[i]};"); - } - - string ReadyPushTrack() => "runtrack[--runtrackpos]"; - - void Track() => PushTrack(AddTrack()); - - /// - /// Pushes the current switch index on the tracking stack so the backtracking - /// logic will be repeated again next time we backtrack here. - /// - void Trackagain() => PushTrack(currentBacktrackNote); - - void PushTrack(T expr) where T : notnull => writer.WriteLine($"{ReadyPushTrack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};"); - - /// Retrieves the top entry on the tracking stack without popping. - string TopTrack() => "runtrack[runtrackpos]"; - - int Operand(int i) => codes[currentCodePos + i + 1]; - - /// True if the current operation is marked for the leftward direction. - bool IsRightToLeft() => (currentOpcode & RegexCode.Rtl) != 0; - - /// True if the current operation is marked for case insensitive operation. - bool IsCaseInsensitive() => (currentOpcode & RegexCode.Ci) != 0; - - /// Returns the raw regex opcode (masking out Back and Rtl). - int Code() => currentOpcode & RegexCode.Mask; - - /// Saves the value of a local variable on the grouping stack. - void PushStack(T expr) where T : notnull => writer.WriteLine($"{ReadyPushStack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};"); - - string ReadyPushStack() => "runstack[--runstackpos]"; - - /// Retrieves the top entry on the stack without popping. - string TopStack() => "runstack[runstackpos]"; - - void TrackUnique(int i) => PushTrack(AddUniqueTrack(i)); - - void TrackUnique2(int i) => PushTrack(AddUniqueTrack(i, RegexCode.Back2)); - - int AddUniqueTrack(int i, int flags = RegexCode.Back) - { - if (uniqueNote[i] == -1) - { - uniqueNote[i] = AddTrack(flags); - } - - return uniqueNote[i]; - } - - /// - /// Returns the position of the next operation in the regex code, taking - /// into account the different numbers of arguments taken by operations - /// - int NextCodepos() => currentCodePos + RegexCode.OpcodeSize(codes[currentCodePos]); - - /// The label for the next (forward) operation. - string AdvanceLabel() => labels[NextCodepos()]!; - - /// Goto the next (forward) operation. - void Advance() => writer.WriteLine($"goto {AdvanceLabel()};"); - - /// Loads the char to the left of the current position. - string Leftchar() => "runtext[runtextpos - 1]"; - - /// Loads the char to the left of the current position and advances (leftward). - string Leftcharnext() => "runtext[--runtextpos]"; - - /// Loads the char to the right of the current position. - string Rightchar() => "runtext[runtextpos]"; - - /// Loads the char to the right of the current position and advances the current position. - string Rightcharnext() => "runtext[runtextpos++]"; - - /// - /// Adds a backtrack note to the list of them, and returns the index of the new - /// note (which is also the index for the jump used by the switch table) - /// - int AddBacktrackNote(int flags, string l, int codepos) - { - if (notes == null || noteCount >= notes.Length) - { - var newnotes = new BacktrackNote[notes == null ? 16 : notes.Length * 2]; - if (notes != null) - { - Array.Copy(notes, newnotes, noteCount); - } - notes = newnotes; - } - - notes[noteCount] = new BacktrackNote(flags, l, codepos); - return noteCount++; - } - - /// - /// Adds a backtrack note for the current operation; creates a new label for - /// where the code will be, and returns the switch index. - /// - int AddTrack(int flags = RegexCode.Back) => AddBacktrackNote(flags, DefineLabel(), currentCodePos); - - int AddGoto(int destpos) - { - if (forwardJumpsThroughSwitch[destpos] == -1) - { - forwardJumpsThroughSwitch[destpos] = AddBacktrackNote(0, labels[destpos]!, destpos); - } - - return forwardJumpsThroughSwitch[destpos]; - } - - /// Pops an element off the tracking stack. - string PopTrack() => "runtrack[runtrackpos++]"; - - /// Pops an element off the grouping stack (leave it on the operand stack). - string PopStack() => "runstack[runstackpos++]"; - - /// Pops i elements off the grouping stack and discards them. - void PopDiscardStack(int i = 1) => EmitAdd(writer, "runstackpos", i); - - /// Prologue to code that will replace the ith element on the grouping stack. - string ReadyReplaceStack(int i) => i == 0 ? "runstack[runstackpos]" : $"runstack[runstackpos + {i}]"; - } - - /// - /// Keeps track of an operation that needs to be referenced in the backtrack-jump - /// switch table, and that needs backtracking code to be emitted (if flags != 0) - /// - private record BacktrackNote(int flags, string label, int codepos); - private static bool EmitLoopTimeoutCounterIfNeeded(IndentedTextWriter writer, RegexMethod rm) { if (rm.MatchTimeout != Timeout.Infinite) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs index cb3bed4d27fa29..c8e88bd2c2b38c 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs @@ -190,6 +190,7 @@ private static bool IsSyntaxTargetForGeneration(SyntaxNode node) => SymbolDisplayFormat.FullyQualifiedFormat.WithGlobalNamespaceStyle(SymbolDisplayGlobalNamespaceStyle.Omitted)); var regexMethod = new RegexMethod( + methodSyntax, regexMethodSymbol.Name, methodSyntax.Modifiers.ToString(), pattern, @@ -231,7 +232,7 @@ static bool IsAllowedKind(SyntaxKind kind) => } /// A regex method. - internal sealed record RegexMethod(string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexCode Code); + internal sealed record RegexMethod(MethodDeclarationSyntax MethodSyntax, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexCode Code); /// A type holding a regex method. internal sealed record RegexType(RegexMethod? Method, string Keyword, string Namespace, string Name, string Constraints) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs index 56bcb17935b6f9..a459c8312c2639 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs @@ -70,8 +70,12 @@ public void Initialize(IncrementalGeneratorInitializationContext context) context.ReportDiagnostic(d); break; - case string s: - code.Add(s); + case ValueTuple> t: + code.Add(t.Item1); + foreach (Diagnostic d in t.Item2) + { + context.ReportDiagnostic(d); + } break; } } diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx index 4f6ea8594572b2..2ce09c60fb6e59 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx @@ -137,6 +137,12 @@ C# LangVersion of 10 or greater is required + + RegexGenerator limitation reached. + + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + Regular expression parser error '{0}' at offset {1}. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf index 311813a6cf146f..1b24236f5aedd2 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf @@ -152,6 +152,16 @@ Délka nemůže být menší než 0 nebo přesáhnout délku vstupu. + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} Neplatný vzor {0} u posunu {1}. {2} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf index 532c4b4bee22f4..af011807dac541 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf @@ -152,6 +152,16 @@ Die Länge darf nicht kleiner als 0 sein oder die Eingabelänge überschreiten. + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} Ungültiges Muster "{0}" bei Offset {1}. {2} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf index 14bedaae5801c3..03cd5902030cc3 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf @@ -152,6 +152,16 @@ La longitud no puede ser inferior a 0 ni superar la longitud de entrada. + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} Patrón '{0}' no válido en el desplazamiento {1}. {2} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf index e1c7019d5872b4..2059255bf3bfb9 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf @@ -152,6 +152,16 @@ La longueur ne peut pas être inférieure à 0 ou supérieure à la longueur d'entrée. + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} Modèle « {0} » non valide au niveau du décalage {1}. {2} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf index 52becd96f3fa7a..7e5f8ef424400a 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf @@ -152,6 +152,16 @@ Lenght non può essere minore di zero o superare la lunghezza di input. + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} Criterio '{0}' non valido alla posizione di offset {1}. {2} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf index b27fec1ee2afdd..7070423342c461 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf @@ -152,6 +152,16 @@ 長さを 0 未満に設定したり、入力の長さを超えることはできません。 + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} オフセット {1} に無効なパターン '{0}' があります。{2} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf index 8ec1a365f3f9f0..ebd89b0c6b28bb 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf @@ -152,6 +152,16 @@ 길이는 0보다 작거나 입력 길이를 초과할 수 없습니다. + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} 오프셋 {1}에서 잘못된 패턴 '{0}'. {2} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf index 3879856ae00738..d014e1ebb9e907 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf @@ -152,6 +152,16 @@ Długość nie może być mniejsza od 0 ani przekraczać długości danych wejściowych. + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} Nieprawidłowy wzorzec „{0}” przy przesunięciu {1}. {2} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf index be090a6d3611bd..97a7fd1efa5c62 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf @@ -152,6 +152,16 @@ Comprimento não pode ser menor que 0 ou exceder o comprimento de entrada. + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} Padrão inválido '{0}' no deslocamento {1}. {2} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf index 08b344ec0d923b..9af130a2871941 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf @@ -152,6 +152,16 @@ Длина не может быть меньше 0 или превышать длину ввода. + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} Недопустимый шаблон "{0}" со смещением {1}. {2} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf index 70d5c0b730b72e..f5e6edd4646057 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf @@ -152,6 +152,16 @@ Uzunluk sıfırdan küçük olamaz ve giriş uzunluğunu aşamaz. + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} {1} ofsetinde geçersiz “{0}” deseni. {2} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf index 047c5b18c02937..fe30a513654142 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf @@ -152,6 +152,16 @@ 长度不能小于 0 或超过输入长度。 + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} 偏移 {0} 处的模式“{1}”无效。{2} diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf index 0d2b1fff76ba1c..5bbb62ca0363f6 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf @@ -152,6 +152,16 @@ 長度不能小於零或超過輸入長度。 + + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time. + + + + RegexGenerator limitation reached. + RegexGenerator limitation reached. + + Invalid pattern '{0}' at offset {1}. {2} 位移 {1} 的模式 '{0}' 無效。{2} diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index ee276b33deb756..284c8647f31f43 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -82,8 +82,13 @@ internal Regex(string pattern, RegexOptions options, TimeSpan matchTimeout, Cult else if (RuntimeFeature.IsDynamicCodeCompiled && UseOptionC()) { // If the compile option is set and compilation is supported, then compile the code. + // If the compiler can't compile this regex, it'll return null, and we'll fall back + // to the interpreter. factory = Compile(pattern, _code, options, matchTimeout != InfiniteMatchTimeout); - _code = null; + if (factory is not null) + { + _code = null; + } } } @@ -215,7 +220,7 @@ protected IDictionary? CapNames /// instantiating a non-compiled regex. /// [MethodImpl(MethodImplOptions.NoInlining)] - private static RegexRunnerFactory Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) => + private static RegexRunnerFactory? Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) => RegexCompiler.Compile(pattern, code, options, hasTimeout); [Obsolete(Obsoletions.RegexCompileToAssemblyMessage, DiagnosticId = Obsoletions.RegexCompileToAssemblyDiagId, UrlFormat = Obsoletions.SharedUrlFormat)] diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index a971e3f66a6345..5782d8d534194b 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -3,7 +3,6 @@ using System.Collections.Generic; using System.Diagnostics; -using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.Reflection; using System.Reflection.Emit; @@ -23,14 +22,8 @@ internal abstract class RegexCompiler private static readonly FieldInfo s_runtextstartField = RegexRunnerField("runtextstart"); private static readonly FieldInfo s_runtextposField = RegexRunnerField("runtextpos"); private static readonly FieldInfo s_runtextField = RegexRunnerField("runtext"); - private static readonly FieldInfo s_runtrackposField = RegexRunnerField("runtrackpos"); - private static readonly FieldInfo s_runtrackField = RegexRunnerField("runtrack"); - private static readonly FieldInfo s_runstackposField = RegexRunnerField("runstackpos"); private static readonly FieldInfo s_runstackField = RegexRunnerField("runstack"); - protected static readonly FieldInfo s_runtrackcountField = RegexRunnerField("runtrackcount"); - private static readonly MethodInfo s_doubleStackMethod = RegexRunnerMethod("DoubleStack"); - private static readonly MethodInfo s_doubleTrackMethod = RegexRunnerMethod("DoubleTrack"); private static readonly MethodInfo s_captureMethod = RegexRunnerMethod("Capture"); private static readonly MethodInfo s_transferCaptureMethod = RegexRunnerMethod("TransferCapture"); private static readonly MethodInfo s_uncaptureMethod = RegexRunnerMethod("Uncapture"); @@ -42,9 +35,6 @@ internal abstract class RegexCompiler private static readonly MethodInfo s_crawlposMethod = RegexRunnerMethod("Crawlpos"); private static readonly MethodInfo s_charInClassMethod = RegexRunnerMethod("CharInClass"); private static readonly MethodInfo s_checkTimeoutMethod = RegexRunnerMethod("CheckTimeout"); -#if DEBUG - private static readonly MethodInfo s_dumpStateM = RegexRunnerMethod("DumpState"); -#endif private static readonly MethodInfo s_charIsDigitMethod = typeof(char).GetMethod("IsDigit", new Type[] { typeof(char) })!; private static readonly MethodInfo s_charIsWhiteSpaceMethod = typeof(char).GetMethod("IsWhiteSpace", new Type[] { typeof(char) })!; @@ -52,9 +42,6 @@ internal abstract class RegexCompiler private static readonly MethodInfo s_charToLowerInvariantMethod = typeof(char).GetMethod("ToLowerInvariant", new Type[] { typeof(char) })!; private static readonly MethodInfo s_cultureInfoGetCurrentCultureMethod = typeof(CultureInfo).GetMethod("get_CurrentCulture")!; private static readonly MethodInfo s_cultureInfoGetTextInfoMethod = typeof(CultureInfo).GetMethod("get_TextInfo")!; -#if DEBUG - private static readonly MethodInfo s_debugWriteLine = typeof(Debug).GetMethod("WriteLine", new Type[] { typeof(string) })!; -#endif private static readonly MethodInfo s_spanGetItemMethod = typeof(ReadOnlySpan).GetMethod("get_Item", new Type[] { typeof(int) })!; private static readonly MethodInfo s_spanGetLengthMethod = typeof(ReadOnlySpan).GetMethod("get_Length")!; private static readonly MethodInfo s_memoryMarshalGetReference = typeof(MemoryMarshal).GetMethod("GetReference", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char)); @@ -63,8 +50,6 @@ internal abstract class RegexCompiler private static readonly MethodInfo s_spanIndexOfAnyCharChar = typeof(MemoryExtensions).GetMethod("IndexOfAny", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), Type.MakeGenericMethodParameter(0), Type.MakeGenericMethodParameter(0) })!.MakeGenericMethod(typeof(char)); private static readonly MethodInfo s_spanIndexOfAnyCharCharChar = typeof(MemoryExtensions).GetMethod("IndexOfAny", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), Type.MakeGenericMethodParameter(0), Type.MakeGenericMethodParameter(0), Type.MakeGenericMethodParameter(0) })!.MakeGenericMethod(typeof(char)); private static readonly MethodInfo s_spanIndexOfAnySpan = typeof(MemoryExtensions).GetMethod("IndexOfAny", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char)); - private static readonly MethodInfo s_spanLastIndexOfChar = typeof(MemoryExtensions).GetMethod("LastIndexOf", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), Type.MakeGenericMethodParameter(0) })!.MakeGenericMethod(typeof(char)); - private static readonly MethodInfo s_spanLastIndexOfSpan = typeof(MemoryExtensions).GetMethod("LastIndexOf", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char)); private static readonly MethodInfo s_spanSliceIntMethod = typeof(ReadOnlySpan).GetMethod("Slice", new Type[] { typeof(int) })!; private static readonly MethodInfo s_spanSliceIntIntMethod = typeof(ReadOnlySpan).GetMethod("Slice", new Type[] { typeof(int), typeof(int) })!; private static readonly MethodInfo s_spanStartsWith = typeof(MemoryExtensions).GetMethod("StartsWith", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char)); @@ -77,52 +62,17 @@ internal abstract class RegexCompiler protected ILGenerator? _ilg; - // tokens representing local variables - private LocalBuilder? _runtextbegLocal; - private LocalBuilder? _runtextendLocal; - private LocalBuilder? _runtextposLocal; - private LocalBuilder? _runtextLocal; - private LocalBuilder? _runtextSpanLocal; - private LocalBuilder? _runtrackposLocal; - private LocalBuilder? _runtrackLocal; - private LocalBuilder? _runstackposLocal; - private LocalBuilder? _runstackLocal; - private LocalBuilder? _textInfoLocal; // cached to avoid extraneous TLS hits from CurrentCulture and virtual calls to TextInfo - private LocalBuilder? _loopTimeoutCounterLocal; // timeout counter for setrep and setloop - - protected RegexOptions _options; // options - protected RegexCode? _code; // the RegexCode object - protected int[]? _codes; // the RegexCodes being translated - protected string[]? _strings; // the stringtable associated with the RegexCodes - protected bool _hasTimeout; // whether the regex has a non-infinite timeout - - private Label[]? _labels; // a label for every operation in _codes - private BacktrackNote[]? _notes; // a list of the backtracking states to be generated - private int _notecount; // true count of _notes (allocation grows exponentially) - protected int _trackcount; // count of backtracking states (used to reduce allocations) - private Label _backtrack; // label for backtracking - private Stack? _int32LocalsPool; // pool of Int32 local variables - private Stack? _readOnlySpanCharLocalsPool; // pool of ReadOnlySpan local variables - - private int _regexopcode; // the current opcode being processed - private int _codepos; // the current code being translated - private int _backpos; // the current backtrack-note being translated - - // special code fragments - private int[]? _uniquenote; // _notes indices for code that should be emitted <= once - private int[]? _goto; // indices for forward-jumps-through-switch (for allocations) - - // indices for unique code fragments - private const int Stackpop = 0; // pop one - private const int Stackpop2 = 1; // pop two - private const int Capback = 3; // uncapture - private const int Capback2 = 4; // uncapture 2 - private const int Branchmarkback2 = 5; // back2 part of branchmark - private const int Lazybranchmarkback2 = 6; // back2 part of lazybranchmark - private const int Branchcountback2 = 7; // back2 part of branchcount - private const int Lazybranchcountback2 = 8; // back2 part of lazybranchcount - private const int Forejumpback = 9; // back part of forejump - private const int Uniquecount = 10; + protected RegexOptions _options; // options + protected RegexCode? _code; // the RegexCode object + protected int[]? _codes; // the RegexCodes being translated + protected bool _hasTimeout; // whether the regex has a non-infinite timeout + + private Stack? _int32LocalsPool; // pool of Int32 local variables + private Stack? _readOnlySpanCharLocalsPool; // pool of ReadOnlySpan local variables + + private LocalBuilder? _textInfo; // cached to avoid extraneous TLS hits from CurrentCulture and virtual calls to TextInfo + private LocalBuilder? _loopTimeoutCounter; // timeout counter for loops (set and node) + private const int LoopTimeoutCheckCount = 2048; // A conservative value to guarantee the correct timeout handling. private static FieldInfo RegexRunnerField(string fieldname) => typeof(RegexRunner).GetField(fieldname, BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance | BindingFlags.Static)!; @@ -133,114 +83,15 @@ internal abstract class RegexCompiler /// Entry point to dynamically compile a regular expression. The expression is compiled to /// an in-memory assembly. /// - internal static RegexRunnerFactory Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) => + internal static RegexRunnerFactory? Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) => new RegexLWCGCompiler().FactoryInstanceFromCode(pattern, code, options, hasTimeout); - /// - /// Keeps track of an operation that needs to be referenced in the backtrack-jump - /// switch table, and that needs backtracking code to be emitted (if flags != 0) - /// - private sealed class BacktrackNote - { - internal int _codepos; - internal int _flags; - internal Label _label; - - public BacktrackNote(int flags, Label label, int codepos) - { - _codepos = codepos; - _flags = flags; - _label = label; - } - } - - /// - /// Adds a backtrack note to the list of them, and returns the index of the new - /// note (which is also the index for the jump used by the switch table) - /// - private int AddBacktrackNote(int flags, Label l, int codepos) - { - if (_notes == null || _notecount >= _notes.Length) - { - var newnotes = new BacktrackNote[_notes == null ? 16 : _notes.Length * 2]; - if (_notes != null) - { - Array.Copy(_notes, newnotes, _notecount); - } - _notes = newnotes; - } - - _notes[_notecount] = new BacktrackNote(flags, l, codepos); - - return _notecount++; - } - - /// - /// Adds a backtrack note for the current operation; creates a new label for - /// where the code will be, and returns the switch index. - /// - private int AddTrack() => AddTrack(RegexCode.Back); - - /// - /// Adds a backtrack note for the current operation; creates a new label for - /// where the code will be, and returns the switch index. - /// - private int AddTrack(int flags) => AddBacktrackNote(flags, DefineLabel(), _codepos); - - /// - /// Adds a switchtable entry for the specified position (for the forward - /// logic; does not cause backtracking logic to be generated) - /// - private int AddGoto(int destpos) - { - if (_goto![destpos] == -1) - { - _goto[destpos] = AddBacktrackNote(0, _labels![destpos], destpos); - } - - return _goto[destpos]; - } - - /// - /// Adds a note for backtracking code that only needs to be generated once; - /// if it's already marked to be generated, returns the switch index - /// for the unique piece of code. - /// - private int AddUniqueTrack(int i) => AddUniqueTrack(i, RegexCode.Back); - - /// - /// Adds a note for backtracking code that only needs to be generated once; - /// if it's already marked to be generated, returns the switch index - /// for the unique piece of code. - /// - private int AddUniqueTrack(int i, int flags) - { - if (_uniquenote![i] == -1) - { - _uniquenote[i] = AddTrack(flags); - } - - return _uniquenote[i]; - } - /// A macro for _ilg.DefineLabel private Label DefineLabel() => _ilg!.DefineLabel(); /// A macro for _ilg.MarkLabel private void MarkLabel(Label l) => _ilg!.MarkLabel(l); - /// Returns the ith operand of the current operation. - private int Operand(int i) => _codes![_codepos + i + 1]; - - /// True if the current operation is marked for the leftward direction. - private bool IsRightToLeft() => (_regexopcode & RegexCode.Rtl) != 0; - - /// True if the current operation is marked for case insensitive operation. - private bool IsCaseInsensitive() => (_regexopcode & RegexCode.Ci) != 0; - - /// Returns the raw regex opcode (masking out Back and Rtl). - private int Code() => _regexopcode & RegexCode.Mask; - /// A macro for _ilg.Emit(Opcodes.Ldstr, str) protected void Ldstr(string str) => _ilg!.Emit(OpCodes.Ldstr, str); @@ -253,9 +104,6 @@ private int AddUniqueTrack(int i, int flags) /// A macro for _ilg.Emit(OpCodes.Ret). protected void Ret() => _ilg!.Emit(OpCodes.Ret); - /// A macro for _ilg.Emit(OpCodes.Newobj, constructor). - protected void Newobj(ConstructorInfo constructor) => _ilg!.Emit(OpCodes.Newobj, constructor); - /// A macro for _ilg.Emit(OpCodes.Dup). protected void Dup() => _ilg!.Emit(OpCodes.Dup); @@ -277,18 +125,9 @@ private int AddUniqueTrack(int i, int flags) /// A macro for _ilg.Emit(OpCodes.Add). private void Add() => _ilg!.Emit(OpCodes.Add); - /// A macro for _ilg.Emit(OpCodes.Add); a true flag can turn it into a Sub. - private void Add(bool negate) => _ilg!.Emit(negate ? OpCodes.Sub : OpCodes.Add); - /// A macro for _ilg.Emit(OpCodes.Sub). private void Sub() => _ilg!.Emit(OpCodes.Sub); - /// A macro for _ilg.Emit(OpCodes.Sub) or _ilg.Emit(OpCodes.Add). - private void Sub(bool negate) => _ilg!.Emit(negate ? OpCodes.Add : OpCodes.Sub); - - /// A macro for _ilg.Emit(OpCodes.Neg). - private void Neg() => _ilg!.Emit(OpCodes.Neg); - /// A macro for _ilg.Emit(OpCodes.Mul). private void Mul() => _ilg!.Emit(OpCodes.Mul); @@ -335,7 +174,7 @@ private int AddUniqueTrack(int i, int flags) protected void Ldthisfld(FieldInfo ft) { Ldthis(); - Ldfld(ft); + _ilg!.Emit(OpCodes.Ldfld, ft); } /// A macro for Ldthis(); Ldfld(); Stloc(); @@ -345,17 +184,6 @@ private void Mvfldloc(FieldInfo ft, LocalBuilder lt) Stloc(lt); } - /// A macro for Ldthis(); Ldloc(); Stfld(); - private void Mvlocfld(LocalBuilder lt, FieldInfo ft) - { - Ldthis(); - Ldloc(lt); - Stfld(ft); - } - - /// A macro for _ilg.Emit(OpCodes.Ldfld). - private void Ldfld(FieldInfo ft) => _ilg!.Emit(OpCodes.Ldfld, ft); - /// A macro for _ilg.Emit(OpCodes.Stfld). protected void Stfld(FieldInfo ft) => _ilg!.Emit(OpCodes.Stfld, ft); @@ -389,18 +217,12 @@ private void Mvlocfld(LocalBuilder lt, FieldInfo ft) /// A macro for _ilg.Emit(OpCodes.Bge_Un) (long form). private void BgeUnFar(Label l) => _ilg!.Emit(OpCodes.Bge_Un, l); - /// A macro for _ilg.Emit(OpCodes.Bgt) (long form). - private void BgtFar(Label l) => _ilg!.Emit(OpCodes.Bgt, l); - /// A macro for _ilg.Emit(OpCodes.Bne) (long form). private void BneFar(Label l) => _ilg!.Emit(OpCodes.Bne_Un, l); /// A macro for _ilg.Emit(OpCodes.Beq) (long form). private void BeqFar(Label l) => _ilg!.Emit(OpCodes.Beq, l); - /// A macro for _ilg.Emit(OpCodes.Brfalse_S) (short jump). - private void Brfalse(Label l) => _ilg!.Emit(OpCodes.Brfalse_S, l); - /// A macro for _ilg.Emit(OpCodes.Brtrue_S) (short jump). private void Brtrue(Label l) => _ilg!.Emit(OpCodes.Brtrue_S, l); @@ -422,9 +244,6 @@ private void Mvlocfld(LocalBuilder lt, FieldInfo ft) /// A macro for _ilg.Emit(OpCodes.Bgt_S) (short jump). private void Bgt(Label l) => _ilg!.Emit(OpCodes.Bgt_S, l); - /// A macro for _ilg.Emit(OpCodes.Bgt_Un_S) (short jump). - private void BgtUn(Label l) => _ilg!.Emit(OpCodes.Bgt_Un_S, l); - /// A macro for _ilg.Emit(OpCodes.Bne_S) (short jump). private void Bne(Label l) => _ilg!.Emit(OpCodes.Bne_Un_S, l); @@ -448,9 +267,6 @@ private void Mvlocfld(LocalBuilder lt, FieldInfo ft) /// Declares a local CultureInfo. private LocalBuilder? DeclareTextInfo() => _ilg!.DeclareLocal(typeof(TextInfo)); - /// Declares a local int[]. - private LocalBuilder DeclareInt32Array() => _ilg!.DeclareLocal(typeof(int[])); - /// Declares a local string. private LocalBuilder DeclareString() => _ilg!.DeclareLocal(typeof(string)); @@ -498,271 +314,17 @@ public void Dispose() } } - /// Loads the char to the right of the current position. - private void Rightchar() - { - Ldloc(_runtextLocal!); - Ldloc(_runtextposLocal!); - Call(s_stringGetCharsMethod); - } - - /// Loads the char to the right of the current position and advances the current position. - private void Rightcharnext() - { - Ldloc(_runtextLocal!); - Ldloc(_runtextposLocal!); - Call(s_stringGetCharsMethod); - Ldloc(_runtextposLocal!); - Ldc(1); - Add(); - Stloc(_runtextposLocal!); - } - - /// Loads the char to the left of the current position. - private void Leftchar() - { - Ldloc(_runtextLocal!); - Ldloc(_runtextposLocal!); - Ldc(1); - Sub(); - Call(s_stringGetCharsMethod); - } - - /// Loads the char to the left of the current position and advances (leftward). - private void Leftcharnext() - { - Ldloc(_runtextposLocal!); - Ldc(1); - Sub(); - Stloc(_runtextposLocal!); - Ldloc(_runtextLocal!); - Ldloc(_runtextposLocal!); - Call(s_stringGetCharsMethod); - } - - /// Creates a backtrack note and pushes the switch index it on the tracking stack. - private void Track() - { - ReadyPushTrack(); - Ldc(AddTrack()); - DoPush(); - } - - /// - /// Pushes the current switch index on the tracking stack so the backtracking - /// logic will be repeated again next time we backtrack here. - /// - private void Trackagain() - { - ReadyPushTrack(); - Ldc(_backpos); - DoPush(); - } - - /// Saves the value of a local variable on the tracking stack. - private void PushTrack(LocalBuilder lt) - { - ReadyPushTrack(); - Ldloc(lt); - DoPush(); - } - - /// - /// Creates a backtrack note for a piece of code that should only be generated once, - /// and emits code that pushes the switch index on the backtracking stack. - /// - private void TrackUnique(int i) - { - ReadyPushTrack(); - Ldc(AddUniqueTrack(i)); - DoPush(); - } - - /// - /// Creates a second-backtrack note for a piece of code that should only be - /// generated once, and emits code that pushes the switch index on the - /// backtracking stack. - /// - private void TrackUnique2(int i) - { - ReadyPushTrack(); - Ldc(AddUniqueTrack(i, RegexCode.Back2)); - DoPush(); - } - - /// Prologue to code that will push an element on the tracking stack. - private void ReadyPushTrack() - { - Ldloc(_runtrackposLocal!); - Ldc(1); - Sub(); - Stloc(_runtrackposLocal!); - Ldloc(_runtrackLocal!); - Ldloc(_runtrackposLocal!); - } - - /// Pops an element off the tracking stack (leave it on the operand stack). - private void PopTrack() - { - Ldloc(_runtrackLocal!); - Ldloc(_runtrackposLocal!); - LdelemI4(); - using RentedLocalBuilder tmp = RentInt32Local(); - Stloc(tmp); - Ldloc(_runtrackposLocal!); - Ldc(1); - Add(); - Stloc(_runtrackposLocal!); - Ldloc(tmp); - } - - /// Retrieves the top entry on the tracking stack without popping. - private void TopTrack() - { - Ldloc(_runtrackLocal!); - Ldloc(_runtrackposLocal!); - LdelemI4(); - } - - /// Saves the value of a local variable on the grouping stack. - private void PushStack(LocalBuilder lt) - { - ReadyPushStack(); - Ldloc(lt); - DoPush(); - } - - /// Prologue to code that will replace the ith element on the grouping stack. - internal void ReadyReplaceStack(int i) - { - Ldloc(_runstackLocal!); - Ldloc(_runstackposLocal!); - if (i != 0) - { - Ldc(i); - Add(); - } - } - - /// Prologue to code that will push an element on the grouping stack. - private void ReadyPushStack() - { - Ldloc(_runstackposLocal!); - Ldc(1); - Sub(); - Stloc(_runstackposLocal!); - Ldloc(_runstackLocal!); - Ldloc(_runstackposLocal!); - } - - /// Retrieves the top entry on the stack without popping. - private void TopStack() - { - Ldloc(_runstackLocal!); - Ldloc(_runstackposLocal!); - LdelemI4(); - } - - /// Pops an element off the grouping stack (leave it on the operand stack). - private void PopStack() - { - using RentedLocalBuilder elementLocal = RentInt32Local(); - Ldloc(_runstackLocal!); - Ldloc(_runstackposLocal!); - LdelemI4(); - Stloc(elementLocal); - Ldloc(_runstackposLocal!); - Ldc(1); - Add(); - Stloc(_runstackposLocal!); - Ldloc(elementLocal); - } - - /// Pops 1 element off the grouping stack and discards it. - private void PopDiscardStack() => PopDiscardStack(1); - - /// Pops i elements off the grouping stack and discards them. - private void PopDiscardStack(int i) - { - Ldloc(_runstackposLocal!); - Ldc(i); - Add(); - Stloc(_runstackposLocal!); - } - - /// Epilogue to code that will replace an element on a stack (use Ld* in between). - private void DoReplace() => StelemI4(); - - /// Epilogue to code that will push an element on a stack (use Ld* in between). - private void DoPush() => StelemI4(); - - /// Jump to the backtracking switch. - private void Back() => BrFar(_backtrack); - - /// - /// Branch to the MSIL corresponding to the regex code at i - /// - /// - /// A trick: since track and stack space is gobbled up unboundedly - /// only as a result of branching backwards, this is where we check - /// for sufficient space and trigger reallocations. - /// - /// If the "goto" is backwards, we generate code that checks - /// available space against the amount of space that would be needed - /// in the worst case by code that will only go forward; if there's - /// not enough, we push the destination on the tracking stack, then - /// we jump to the place where we invoke the allocator. - /// - /// Since forward gotos pose no threat, they just turn into a Br. - /// - private void Goto(int i) - { - if (i < _codepos) - { - Label l1 = DefineLabel(); - - // When going backwards, ensure enough space. - Ldloc(_runtrackposLocal!); - Ldc(_trackcount * 4); - Ble(l1); - Ldloc(_runstackposLocal!); - Ldc(_trackcount * 3); - BgtFar(_labels![i]); - MarkLabel(l1); - ReadyPushTrack(); - Ldc(AddGoto(i)); - DoPush(); - BrFar(_backtrack); - } - else - { - BrFar(_labels![i]); - } - } - - /// - /// Returns the position of the next operation in the regex code, taking - /// into account the different numbers of arguments taken by operations - /// - private int NextCodepos() => _codepos + RegexCode.OpcodeSize(_codes![_codepos]); - - /// The label for the next (forward) operation. - private Label AdvanceLabel() => _labels![NextCodepos()]; - - /// Goto the next (forward) operation. - private void Advance() => BrFar(AdvanceLabel()); - /// Sets the culture local to CultureInfo.CurrentCulture. private void InitLocalCultureInfo() { - Debug.Assert(_textInfoLocal != null); + Debug.Assert(_textInfo != null); Call(s_cultureInfoGetCurrentCultureMethod); Callvirt(s_cultureInfoGetTextInfoMethod); - Stloc(_textInfoLocal); + Stloc(_textInfo); } - /// Whether ToLower operations should be performed with the invariant culture as opposed to the one in . - private bool UseToLowerInvariant => _textInfoLocal == null || (_options & RegexOptions.CultureInvariant) != 0; + /// Whether ToLower operations should be performed with the invariant culture as opposed to the one in . + private bool UseToLowerInvariant => _textInfo == null || (_options & RegexOptions.CultureInvariant) != 0; /// Invokes either char.ToLowerInvariant(c) or _textInfo.ToLower(c). private void CallToLower() @@ -775,169 +337,31 @@ private void CallToLower() { using RentedLocalBuilder currentCharLocal = RentInt32Local(); Stloc(currentCharLocal); - Ldloc(_textInfoLocal!); + Ldloc(_textInfo!); Ldloc(currentCharLocal); Callvirt(s_textInfoToLowerMethod); } } - /// - /// Generates the first section of the MSIL. This section contains all - /// the forward logic, and corresponds directly to the regex codes. - /// In the absence of backtracking, this is all we would need. - /// - private void GenerateForwardSection() - { - _uniquenote = new int[Uniquecount]; - _labels = new Label[_codes!.Length]; - _goto = new int[_codes.Length]; - - // initialize - - Array.Fill(_uniquenote, -1); - for (int codepos = 0; codepos < _codes.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) - { - _goto[codepos] = -1; - _labels[codepos] = DefineLabel(); - } - - // emit variable initializers - - Mvfldloc(s_runtextField, _runtextLocal!); - Mvfldloc(s_runtextbegField, _runtextbegLocal!); - Mvfldloc(s_runtextendField, _runtextendLocal!); - Mvfldloc(s_runtextposField, _runtextposLocal!); - Mvfldloc(s_runtrackField, _runtrackLocal!); - Mvfldloc(s_runtrackposField, _runtrackposLocal!); - Mvfldloc(s_runstackField, _runstackLocal!); - Mvfldloc(s_runstackposField, _runstackposLocal!); - - _backpos = -1; - - for (int codepos = 0; codepos < _codes.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) - { - MarkLabel(_labels[codepos]); - _codepos = codepos; - _regexopcode = _codes[codepos]; - GenerateOneCode(); - } - } - - /// - /// Generates the middle section of the MSIL. This section contains the - /// big switch jump that allows us to simulate a stack of addresses, - /// and it also contains the calls that expand the tracking and the - /// grouping stack when they get too full. - /// - private void GenerateMiddleSection() - { - using RentedLocalBuilder limitLocal = RentInt32Local(); - Label afterDoubleStack = DefineLabel(); - Label afterDoubleTrack = DefineLabel(); - - // Backtrack: - MarkLabel(_backtrack); - - // (Equivalent of EnsureStorage, but written to avoid unnecessary local spilling.) - - // int limitLocal = runtrackcount * 4; - Ldthisfld(s_runtrackcountField); - Ldc(4); - Mul(); - Stloc(limitLocal); - - // if (runstackpos < limit) - // { - // this.runstackpos = runstackpos; - // DoubleStack(); // might change runstackpos and runstack - // runstackpos = this.runstackpos; - // runstack = this.runstack; - // } - Ldloc(_runstackposLocal!); - Ldloc(limitLocal); - Bge(afterDoubleStack); - Mvlocfld(_runstackposLocal!, s_runstackposField); - Ldthis(); - Call(s_doubleStackMethod); - Mvfldloc(s_runstackposField, _runstackposLocal!); - Mvfldloc(s_runstackField, _runstackLocal!); - MarkLabel(afterDoubleStack); - - // if (runtrackpos < limit) - // { - // this.runtrackpos = runtrackpos; - // DoubleTrack(); // might change runtrackpos and runtrack - // runtrackpos = this.runtrackpos; - // runtrack = this.runtrack; - // } - Ldloc(_runtrackposLocal!); - Ldloc(limitLocal); - Bge(afterDoubleTrack); - Mvlocfld(_runtrackposLocal!, s_runtrackposField); - Ldthis(); - Call(s_doubleTrackMethod); - Mvfldloc(s_runtrackposField, _runtrackposLocal!); - Mvfldloc(s_runtrackField, _runtrackLocal!); - MarkLabel(afterDoubleTrack); - - // runtrack[runtrackpos++] - PopTrack(); - - // Backtracking jump table - var table = new Label[_notecount]; - for (int i = 0; i < _notecount; i++) - { - table[i] = _notes![i]._label; - } - Switch(table); - } - - /// - /// Generates the last section of the MSIL. This section contains all of - /// the backtracking logic. - /// - private void GenerateBacktrackSection() - { - for (int i = 0; i < _notecount; i++) - { - BacktrackNote n = _notes![i]; - if (n._flags != 0) - { - MarkLabel(n._label); - _codepos = n._codepos; - _backpos = i; - _regexopcode = _codes![n._codepos] | n._flags; - GenerateOneCode(); - } - } - } - - /// - /// Generates FindFirstChar. - /// + /// Generates FindFirstChar. protected void GenerateFindFirstChar() { Debug.Assert(_code != null); _int32LocalsPool?.Clear(); _readOnlySpanCharLocalsPool?.Clear(); - _runtextposLocal = DeclareInt32(); - _runtextendLocal = DeclareInt32(); - if (_code.RightToLeft) - { - _runtextbegLocal = DeclareInt32(); - } - _runtextSpanLocal = DeclareReadOnlySpanChar(); - _textInfoLocal = null; + LocalBuilder runtextSpan = DeclareReadOnlySpanChar(); + LocalBuilder runtextpos = DeclareInt32(); + LocalBuilder runtextend = DeclareInt32(); + + _textInfo = null; if ((_options & RegexOptions.CultureInvariant) == 0) { bool needsCulture = _code.FindOptimizations.FindMode switch { FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive or - FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive or FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or - FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or - FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive => true, + FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive => true, _ when _code.FindOptimizations.FixedDistanceSets is List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets => sets.Exists(set => set.CaseInsensitive), @@ -946,7 +370,7 @@ FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or if (needsCulture) { - _textInfoLocal = DeclareTextInfo(); + _textInfo = DeclareTextInfo(); InitLocalCultureInfo(); } } @@ -955,15 +379,11 @@ FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or // int runtextpos = this.runtextpos; // int runtextend = this.runtextend; // ReadOnlySpan runtextSpan = this.runtext.AsSpan(); - Mvfldloc(s_runtextposField, _runtextposLocal); - Mvfldloc(s_runtextendField, _runtextendLocal); + Mvfldloc(s_runtextposField, runtextpos); + Mvfldloc(s_runtextendField, runtextend); Ldthisfld(s_runtextField); Call(s_stringAsSpanMethod); - Stloc(_runtextSpanLocal); - if (_code.RightToLeft) - { - Mvfldloc(s_runtextbegField, _runtextbegLocal!); - } + Stloc(runtextSpan); // Generate length check. If the input isn't long enough to possibly match, fail quickly. // It's rare for min required length to be 0, so we don't bother special-casing the check, @@ -972,46 +392,25 @@ FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or Debug.Assert(minRequiredLength >= 0); Label returnFalse = DefineLabel(); Label finishedLengthCheck = DefineLabel(); - if (!_code.RightToLeft) - { - // if (runtextpos > runtextend - _code.Tree.MinRequiredLength) - // { - // this.runtextpos = runtextend; - // return false; - // } - Ldloc(_runtextposLocal); - Ldloc(_runtextendLocal); - if (minRequiredLength > 0) - { - Ldc(minRequiredLength); - Sub(); - } - Ble(finishedLengthCheck); - MarkLabel(returnFalse); - Ldthis(); - Ldloc(_runtextendLocal); - } - else + // if (runtextpos > runtextend - _code.Tree.MinRequiredLength) + // { + // this.runtextpos = runtextend; + // return false; + // } + Ldloc(runtextpos); + Ldloc(runtextend); + if (minRequiredLength > 0) { - // if (runtextpos - _code.Tree.MinRequiredLength < runtextbeg) - // { - // this.runtextpos = runtextbeg; - // return false; - // } - Ldloc(_runtextposLocal); - if (minRequiredLength > 0) - { - Ldc(minRequiredLength); - Sub(); - } - Ldloc(_runtextbegLocal!); - Bge(finishedLengthCheck); - - MarkLabel(returnFalse); - Ldthis(); - Ldloc(_runtextbegLocal!); + Ldc(minRequiredLength); + Sub(); } + Ble(finishedLengthCheck); + + MarkLabel(returnFalse); + Ldthis(); + Ldloc(runtextend); + Stfld(s_runtextposField); Ldc(0); Ret(); @@ -1024,7 +423,6 @@ FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or } // Either anchors weren't specified, or they don't completely root all matches to a specific location. - switch (_code.FindOptimizations.FindMode) { case FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive: @@ -1032,11 +430,6 @@ FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or GenerateIndexOf_LeftToRight(_code.FindOptimizations.LeadingCaseSensitivePrefix); break; - case FindNextStartingPositionMode.LeadingPrefix_RightToLeft_CaseSensitive: - Debug.Assert(!string.IsNullOrEmpty(_code.FindOptimizations.LeadingCaseSensitivePrefix)); - GenerateIndexOf_RightToLeft(_code.FindOptimizations.LeadingCaseSensitivePrefix); - break; - case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive: case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive: case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseSensitive: @@ -1045,12 +438,6 @@ FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or GenerateFixedSet_LeftToRight(); break; - case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive: - case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive: - Debug.Assert(_code.FindOptimizations.FixedDistanceSets is { Count: > 0 }); - GenerateFixedSet_RightToLeft(); - break; - default: Debug.Fail($"Unexpected mode: {_code.FindOptimizations.FindMode}"); goto case FindNextStartingPositionMode.NoSearch; @@ -1074,21 +461,10 @@ bool GenerateAnchors() case RegexPrefixAnalyzer.Beginning: { Label l1 = DefineLabel(); - Ldloc(_runtextposLocal); - if (!_code.RightToLeft) - { - Ldthisfld(s_runtextbegField); - Ble(l1); - Br(returnFalse); - } - else - { - Ldloc(_runtextbegLocal!); - Ble(l1); - Ldthis(); - Ldloc(_runtextbegLocal!); - Stfld(s_runtextposField); - } + Ldloc(runtextpos); + Ldthisfld(s_runtextbegField); + Ble(l1); + Br(returnFalse); MarkLabel(l1); } Ldc(1); @@ -1098,16 +474,9 @@ bool GenerateAnchors() case RegexPrefixAnalyzer.Start: { Label l1 = DefineLabel(); - Ldloc(_runtextposLocal); + Ldloc(runtextpos); Ldthisfld(s_runtextstartField); - if (!_code.RightToLeft) - { - Ble(l1); - } - else - { - Bge(l1); - } + Ble(l1); Br(returnFalse); MarkLabel(l1); } @@ -1118,41 +487,17 @@ bool GenerateAnchors() case RegexPrefixAnalyzer.EndZ: { Label l1 = DefineLabel(); - if (!_code.RightToLeft) - { - Ldloc(_runtextposLocal); - Ldloc(_runtextendLocal); - Ldc(1); - Sub(); - Bge(l1); - Ldthis(); - Ldloc(_runtextendLocal); - Ldc(1); - Sub(); - Stfld(s_runtextposField); - MarkLabel(l1); - } - else - { - Label l2 = DefineLabel(); - Ldloc(_runtextposLocal); - Ldloc(_runtextendLocal); - Ldc(1); - Sub(); - Blt(l1); - Ldloc(_runtextposLocal); - Ldloc(_runtextendLocal); - Beq(l2); - Ldloca(_runtextSpanLocal); - Ldloc(_runtextposLocal); - Call(s_spanGetItemMethod); - LdindU2(); - Ldc('\n'); - Beq(l2); - MarkLabel(l1); - BrFar(returnFalse); - MarkLabel(l2); - } + Ldloc(runtextpos); + Ldloc(runtextend); + Ldc(1); + Sub(); + Bge(l1); + Ldthis(); + Ldloc(runtextend); + Ldc(1); + Sub(); + Stfld(s_runtextposField); + MarkLabel(l1); } Ldc(1); Ret(); @@ -1161,20 +506,12 @@ bool GenerateAnchors() case RegexPrefixAnalyzer.End: { Label l1 = DefineLabel(); - Ldloc(_runtextposLocal); - Ldloc(_runtextendLocal); - if (!_code.RightToLeft) - { - Bge(l1); - Ldthis(); - Ldloc(_runtextendLocal); - Stfld(s_runtextposField); - } - else - { - Bge(l1); - Br(returnFalse); - } + Ldloc(runtextpos); + Ldloc(runtextend); + Bge(l1); + Ldthis(); + Ldloc(runtextend); + Stfld(s_runtextposField); MarkLabel(l1); } Ldc(1); @@ -1188,17 +525,16 @@ bool GenerateAnchors() // the other anchors, which all skip all subsequent processing if found, with BOL we just use it // to boost our position to the next line, and then continue normally with any prefix or char class searches. - Debug.Assert(!_code.RightToLeft, "RightToLeft isn't implemented and should have been filtered out previously"); Label atBeginningOfLine = DefineLabel(); // if (runtextpos > runtextbeg... - Ldloc(_runtextposLocal!); + Ldloc(runtextpos!); Ldthisfld(s_runtextbegField); Ble(atBeginningOfLine); // ... && runtextSpan[runtextpos - 1] != '\n') { ... } - Ldloca(_runtextSpanLocal); - Ldloc(_runtextposLocal); + Ldloca(runtextSpan); + Ldloc(runtextpos); Ldc(1); Sub(); Call(s_spanGetItemMethod); @@ -1207,8 +543,8 @@ bool GenerateAnchors() Beq(atBeginningOfLine); // int tmp = runtextSpan.Slice(runtextpos).IndexOf('\n'); - Ldloca(_runtextSpanLocal); - Ldloc(_runtextposLocal); + Ldloca(runtextSpan); + Ldloc(runtextpos); Call(s_spanSliceIntMethod); Ldc('\n'); Call(s_spanIndexOfChar); @@ -1225,20 +561,20 @@ bool GenerateAnchors() Ldc(-1); Beq(returnFalse); Ldloc(newlinePos); - Ldloc(_runtextposLocal); + Ldloc(runtextpos); Add(); Ldc(1); Add(); - Ldloc(_runtextendLocal); + Ldloc(runtextend); Bgt(returnFalse); // runtextpos = newlinePos + runtextpos + 1; Ldloc(newlinePos); - Ldloc(_runtextposLocal); + Ldloc(runtextpos); Add(); Ldc(1); Add(); - Stloc(_runtextposLocal); + Stloc(runtextpos); } MarkLabel(atBeginningOfLine); @@ -1255,10 +591,10 @@ void GenerateIndexOf_LeftToRight(string prefix) using RentedLocalBuilder i = RentInt32Local(); // int i = runtextSpan.Slice(runtextpos, runtextend - runtextpos).IndexOf(prefix); - Ldloca(_runtextSpanLocal); - Ldloc(_runtextposLocal); - Ldloc(_runtextendLocal); - Ldloc(_runtextposLocal); + Ldloca(runtextSpan); + Ldloc(runtextpos); + Ldloc(runtextend); + Ldloc(runtextpos); Sub(); Call(s_spanSliceIntIntMethod); Ldstr(prefix); @@ -1274,134 +610,14 @@ void GenerateIndexOf_LeftToRight(string prefix) // base.runtextpos = runtextpos + i; // return true; Ldthis(); - Ldloc(_runtextposLocal); - Ldloc(i); - Add(); - Stfld(s_runtextposField); - Ldc(1); - Ret(); - } - - void GenerateIndexOf_RightToLeft(string prefix) - { - using RentedLocalBuilder i = RentInt32Local(); - - // int i = runtextSpan.Slice(runtextbeg, runtextpos - runtextbeg).LastIndexOf(prefix); - Ldloca(_runtextSpanLocal); - Ldloc(_runtextbegLocal!); - Ldloc(_runtextposLocal); - Ldloc(_runtextbegLocal!); - Sub(); - Call(s_spanSliceIntIntMethod); - Ldstr(prefix); - Call(s_stringAsSpanMethod); - Call(s_spanLastIndexOfSpan); - Stloc(i); - - // if (i < 0) goto ReturnFalse; - Ldloc(i); - Ldc(0); - BltFar(returnFalse); - - // base.runtextpos = runtextbeg + i + LeadingCaseSensitivePrefix.Length; - // return true; - Ldthis(); - Ldloc(_runtextbegLocal!); + Ldloc(runtextpos); Ldloc(i); Add(); - Ldc(prefix.Length); - Add(); Stfld(s_runtextposField); Ldc(1); Ret(); } - void GenerateFixedSet_RightToLeft() - { - (char[]? Chars, string Set, int Distance, bool CaseInsensitive) set = _code.FindOptimizations.FixedDistanceSets![0]; - Debug.Assert(set.Distance == 0); - - using RentedLocalBuilder i = RentInt32Local(); - - if (set.Chars is { Length: 1 } && !set.CaseInsensitive) - { - // int i = runtextSpan.Slice(runtextbeg, runtextpos - runtextbeg).LastIndexOf(set.Chars[0]); - Ldloca(_runtextSpanLocal); - Ldloc(_runtextbegLocal!); - Ldloc(_runtextposLocal); - Ldloc(_runtextbegLocal!); - Sub(); - Call(s_spanSliceIntIntMethod); - Ldc(set.Chars[0]); - Call(s_spanLastIndexOfChar); - Stloc(i); - - // if (i < 0) goto ReturnFalse; - Ldloc(i); - Ldc(0); - BltFar(returnFalse); - - // base.runtextpos = runtextbeg + i + 1; - // return true; - Ldthis(); - Ldloc(_runtextbegLocal!); - Ldloc(i); - Add(); - Ldc(1); - Add(); - Stfld(s_runtextposField); - Ldc(1); - Ret(); - } - else - { - Label condition = DefineLabel(); - Label increment = DefineLabel(); - Label body = DefineLabel(); - - // for (int i = runtextpos - 1; ... - Ldloc(_runtextposLocal); - Ldc(1); - Sub(); - Stloc(i); - BrFar(condition); - - // if (MatchCharClass(runtextSpan[i], set)) - MarkLabel(body); - Ldloca(_runtextSpanLocal); - Ldloc(i); - Call(s_spanGetItemMethod); - LdindU2(); - EmitMatchCharacterClass(set.Set, set.CaseInsensitive); - Brfalse(increment); - - // base.runtextpos = i + 1; - // return true; - Ldthis(); - Ldloc(i); - Ldc(1); - Add(); - Stfld(s_runtextposField); - Ldc(1); - Ret(); - - // for (...; ...; i--) - MarkLabel(increment); - Ldloc(i); - Ldc(1); - Sub(); - Stloc(i); - - // for (...; i >= runtextbeg; ...) - MarkLabel(condition); - Ldloc(i); - Ldloc(_runtextbegLocal!); - BgeFar(body); - - BrFar(returnFalse); - } - } - void GenerateFixedSet_LeftToRight() { List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? sets = _code.FindOptimizations.FixedDistanceSets; @@ -1413,10 +629,10 @@ void GenerateFixedSet_LeftToRight() using RentedLocalBuilder textSpanLocal = RentReadOnlySpanCharLocal(); // ReadOnlySpan span = runtextSpan.Slice(runtextpos, runtextend - runtextpos); - Ldloca(_runtextSpanLocal); - Ldloc(_runtextposLocal); - Ldloc(_runtextendLocal); - Ldloc(_runtextposLocal); + Ldloca(runtextSpan); + Ldloc(runtextpos); + Ldloc(runtextend); + Ldloc(runtextpos); Sub(); Call(s_spanSliceIntIntMethod); Stloc(textSpanLocal); @@ -1565,7 +781,7 @@ void GenerateFixedSet_LeftToRight() // this.runtextpos = runtextpos + i; // return true; Ldthis(); - Ldloc(_runtextposLocal); + Ldloc(runtextpos); Ldloc(iLocal); Add(); Stfld(s_runtextposField); @@ -1601,31 +817,19 @@ void GenerateFixedSet_LeftToRight() } } - private bool TryGenerateSimplifiedGo(RegexNode node) + protected void GenerateGo() { + Debug.Assert(_code != null); + _int32LocalsPool?.Clear(); + _readOnlySpanCharLocalsPool?.Clear(); + + RegexNode node = _code!.Tree.Root; + Debug.Assert(node.Type == RegexNode.Capture, "Every generated tree should begin with a capture node"); Debug.Assert(node.ChildCount() == 1, "Capture nodes should have one child"); - // RightToLeft is rare and not worth adding a lot of custom code to handle in this path. - if ((node.Options & RegexOptions.RightToLeft) != 0) - { - return false; - } - // Skip the Capture node. We handle the implicit root capture specially. node = node.Child(0); - if (!node.SupportsSimplifiedCodeGenerationImplementation()) - { - return false; - } - - // We've determined that the RegexNode can be handled with this optimized path. Generate the code. -#if DEBUG - if ((_options & RegexOptions.Debug) != 0) - { - Debug.WriteLine("Using optimized non-backtracking code gen."); - } -#endif // In some limited cases, FindFirstChar will only return true if it successfully matched the whole thing. // This is the case, in particular, for strings. We can special case these to do essentially nothing @@ -1654,7 +858,7 @@ private bool TryGenerateSimplifiedGo(RegexNode node) Add(); Stfld(s_runtextposField); Ret(); - return true; + return; } } @@ -1669,7 +873,7 @@ private bool TryGenerateSimplifiedGo(RegexNode node) Label originalDoneLabel = doneLabel; if (_hasTimeout) { - _loopTimeoutCounterLocal = DeclareInt32(); + _loopTimeoutCounter = DeclareInt32(); } // CultureInfo culture = CultureInfo.CurrentCulture; // only if the whole expression or any subportion is ignoring case, and we're not using invariant @@ -1758,8 +962,8 @@ private bool TryGenerateSimplifiedGo(RegexNode node) // return; Ret(); - // Generated code successfully with non-backtracking implementation. - return true; + // Generated code successfully. + return; static bool IsCaseInsensitive(RegexNode node) => (node.Options & RegexOptions.IgnoreCase) != 0; @@ -4010,1888 +3214,149 @@ void EmitLoop(RegexNode node) Ldc(0); BeqFar(originalDoneLabel); - // if (iterationCount < minIterations) goto doneLabel/originalDoneLabel; - Ldloc(iterationCount); - Ldc(minIterations); - BltFar(childBacktracks ? doneLabel : originalDoneLabel); - } - - if (childBacktracks) - { - // goto endLoop; - BrFar(endLoop); - - // Backtrack: - Label backtrack = DefineLabel(); - MarkLabel(backtrack); - - // if (iterationCount == 0) goto originalDoneLabel; - Ldloc(iterationCount); - Ldc(0); - BeqFar(originalDoneLabel); - - // goto doneLabel; - BrFar(doneLabel); - - doneLabel = backtrack; - } - - MarkLabel(endLoop); - - if (node.IsInLoop()) - { - // Store the capture's state - EmitRunstackResizeIfNeeded(3); - EmitRunstackPush(() => Ldloc(startingRunTextPos)); - EmitRunstackPush(() => Ldloc(iterationCount)); - - // Skip past the backtracking section - // goto end; - Label end = DefineLabel(); - BrFar(end); - - // Emit a backtracking section that restores the capture's state and then jumps to the previous done label - Label backtrack = DefineLabel(); - MarkLabel(backtrack); - - // iterationCount = base.runstack[--runstack]; - // startingRunTextPos = base.runstack[--runstack]; - EmitRunstackPop(); - Stloc(iterationCount); - EmitRunstackPop(); - Stloc(startingRunTextPos); - - // goto doneLabel; - BrFar(doneLabel); - - doneLabel = backtrack; - MarkLabel(end); - } - } - - void EmitRunstackResizeIfNeeded(int count) - { - Debug.Assert(count >= 1); - - // if (runstackpos >= base.runstack!.Length - (count - 1)) - // { - // Array.Resize(ref base.runstack, base.runstack.Length * 2); - // } - - Label skipResize = DefineLabel(); - - Ldloc(runstackpos); - Ldthisfld(s_runstackField); - Ldlen(); - if (count > 1) - { - Ldc(count - 1); - Sub(); - } - Blt(skipResize); - - Ldthis(); - _ilg!.Emit(OpCodes.Ldflda, s_runstackField); - Ldthisfld(s_runstackField); - Ldlen(); - Ldc(2); - Mul(); - Call(s_arrayResize); - - MarkLabel(skipResize); - } - - void EmitRunstackPush(Action load) - { - // base.runstack[runstackpos] = load(); - Ldthisfld(s_runstackField); - Ldloc(runstackpos); - load(); - StelemI4(); - - // runstackpos++; - Ldloc(runstackpos); - Ldc(1); - Add(); - Stloc(runstackpos); - } - - void EmitRunstackPop() - { - // ... = base.runstack[--runstackpos]; - Ldthisfld(s_runstackField); - Ldloc(runstackpos); - Ldc(1); - Sub(); - Stloc(runstackpos); - Ldloc(runstackpos); - LdelemI4(); - } - } - - /// Generates the code for "RegexRunner.Go". - protected void GenerateGo() - { - Debug.Assert(_code != null); - _int32LocalsPool?.Clear(); - _readOnlySpanCharLocalsPool?.Clear(); - - // Generate simpler code when we're dealing with simpler regexes. - if (TryGenerateSimplifiedGo(_code.Tree.Root)) - { - return; - } - - // We're dealing with a regex more complicated that the fast-path non-backtracking - // implementation can handle. Do the full-fledged thing. - - // declare some locals - - _runtextposLocal = DeclareInt32(); - _runtextLocal = DeclareString(); - _runtrackposLocal = DeclareInt32(); - _runtrackLocal = DeclareInt32Array(); - _runstackposLocal = DeclareInt32(); - _runstackLocal = DeclareInt32Array(); - if (_hasTimeout) - { - _loopTimeoutCounterLocal = DeclareInt32(); - } - _runtextbegLocal = DeclareInt32(); - _runtextendLocal = DeclareInt32(); - - InitializeCultureForGoIfNecessary(); - - // clear some tables - - _labels = null; - _notes = null; - _notecount = 0; - - // globally used labels - - _backtrack = DefineLabel(); - - // emit the code! - - GenerateForwardSection(); - GenerateMiddleSection(); - GenerateBacktrackSection(); - } - - private void InitializeCultureForGoIfNecessary() - { - _textInfoLocal = null; - if ((_options & RegexOptions.CultureInvariant) == 0) - { - bool needsCulture = (_options & RegexOptions.IgnoreCase) != 0; - if (!needsCulture) - { - for (int codepos = 0; codepos < _codes!.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) - { - if ((_codes[codepos] & RegexCode.Ci) == RegexCode.Ci) - { - needsCulture = true; - break; - } - } - } - - if (needsCulture) - { - // cache CultureInfo in local variable which saves excessive thread local storage accesses - _textInfoLocal = DeclareTextInfo(); - InitLocalCultureInfo(); - } - } - } - - /// - /// The main translation function. It translates the logic for a single opcode at - /// the current position. The structure of this function exactly mirrors - /// the structure of the inner loop of RegexInterpreter.Go(). - /// - /// - /// The C# code from RegexInterpreter.Go() that corresponds to each case is - /// included as a comment. - /// - /// Note that since we're generating code, we can collapse many cases that are - /// dealt with one-at-a-time in RegexIntepreter. We can also unroll loops that - /// iterate over constant strings or sets. - /// - private void GenerateOneCode() - { -#if DEBUG - if ((_options & RegexOptions.Debug) != 0) - DumpBacktracking(); -#endif - - // Before executing any RegEx code in the unrolled loop, - // we try checking for the match timeout: - - if (_hasTimeout) - { - Ldthis(); - Call(s_checkTimeoutMethod); - } - - // Now generate the IL for the RegEx code saved in _regexopcode. - // We unroll the loop done by the RegexCompiler creating as very long method - // that is longer if the pattern is longer: - - switch (_regexopcode) - { - case RegexCode.Stop: - //: return; - Mvlocfld(_runtextposLocal!, s_runtextposField); // update _textpos - Ret(); - break; - - case RegexCode.Nothing: - //: break Backward; - Back(); - break; - - case RegexCode.UpdateBumpalong: - // UpdateBumpalong should only exist in the code stream at such a point where the root - // of the backtracking stack contains the runtextpos from the start of this Go call. Replace - // that tracking value with the current runtextpos value. - //: base.runtrack[base.runtrack.Length - 1] = runtextpos; - Ldloc(_runtrackLocal!); - Dup(); - Ldlen(); - Ldc(1); - Sub(); - Ldloc(_runtextposLocal!); - StelemI4(); - break; - - case RegexCode.Goto: - //: Goto(Operand(0)); - Goto(Operand(0)); - break; - - case RegexCode.Testref: - //: if (!_match.IsMatched(Operand(0))) - //: break Backward; - Ldthis(); - Ldc(Operand(0)); - Call(s_isMatchedMethod); - BrfalseFar(_backtrack); - break; - - case RegexCode.Lazybranch: - //: Track(Textpos()); - PushTrack(_runtextposLocal!); - Track(); - break; - - case RegexCode.Lazybranch | RegexCode.Back: - //: Trackframe(1); - //: Textto(Tracked(0)); - //: Goto(Operand(0)); - PopTrack(); - Stloc(_runtextposLocal!); - Goto(Operand(0)); - break; - - case RegexCode.Nullmark: - //: Stack(-1); - //: Track(); - ReadyPushStack(); - Ldc(-1); - DoPush(); - TrackUnique(Stackpop); - break; - - case RegexCode.Setmark: - //: Stack(Textpos()); - //: Track(); - PushStack(_runtextposLocal!); - TrackUnique(Stackpop); - break; - - case RegexCode.Nullmark | RegexCode.Back: - case RegexCode.Setmark | RegexCode.Back: - //: Stackframe(1); - //: break Backward; - PopDiscardStack(); - Back(); - break; - - case RegexCode.Getmark: - //: Stackframe(1); - //: Track(Stacked(0)); - //: Textto(Stacked(0)); - ReadyPushTrack(); - PopStack(); - Stloc(_runtextposLocal!); - Ldloc(_runtextposLocal!); - DoPush(); - - Track(); - break; - - case RegexCode.Getmark | RegexCode.Back: - //: Trackframe(1); - //: Stack(Tracked(0)); - //: break Backward; - ReadyPushStack(); - PopTrack(); - DoPush(); - Back(); - break; - - case RegexCode.Capturemark: - //: if (!IsMatched(Operand(1))) - //: break Backward; - //: Stackframe(1); - //: if (Operand(1) != -1) - //: TransferCapture(Operand(0), Operand(1), Stacked(0), Textpos()); - //: else - //: Capture(Operand(0), Stacked(0), Textpos()); - //: Track(Stacked(0)); - - //: Stackframe(1); - //: Capture(Operand(0), Stacked(0), Textpos()); - //: Track(Stacked(0)); - - if (Operand(1) != -1) - { - Ldthis(); - Ldc(Operand(1)); - Call(s_isMatchedMethod); - BrfalseFar(_backtrack); - } - - using (RentedLocalBuilder stackedLocal = RentInt32Local()) - { - PopStack(); - Stloc(stackedLocal); - - if (Operand(1) != -1) - { - Ldthis(); - Ldc(Operand(0)); - Ldc(Operand(1)); - Ldloc(stackedLocal); - Ldloc(_runtextposLocal!); - Call(s_transferCaptureMethod); - } - else - { - Ldthis(); - Ldc(Operand(0)); - Ldloc(stackedLocal); - Ldloc(_runtextposLocal!); - Call(s_captureMethod); - } - - PushTrack(stackedLocal); - } - - TrackUnique(Operand(0) != -1 && Operand(1) != -1 ? Capback2 : Capback); - break; - - - case RegexCode.Capturemark | RegexCode.Back: - //: Trackframe(1); - //: Stack(Tracked(0)); - //: Uncapture(); - //: if (Operand(0) != -1 && Operand(1) != -1) - //: Uncapture(); - //: break Backward; - ReadyPushStack(); - PopTrack(); - DoPush(); - Ldthis(); - Call(s_uncaptureMethod); - if (Operand(0) != -1 && Operand(1) != -1) - { - Ldthis(); - Call(s_uncaptureMethod); - } - Back(); - break; - - case RegexCode.Branchmark: - //: Stackframe(1); - //: - //: if (Textpos() != Stacked(0)) - //: { // Nonempty match -> loop now - //: Track(Stacked(0), Textpos()); // Save old mark, textpos - //: Stack(Textpos()); // Make new mark - //: Goto(Operand(0)); // Loop - //: } - //: else - //: { // Empty match -> straight now - //: Track2(Stacked(0)); // Save old mark - //: Advance(1); // Straight - //: } - //: continue Forward; - { - Label l1 = DefineLabel(); - - PopStack(); - using (RentedLocalBuilder mark = RentInt32Local()) - { - Stloc(mark); // Stacked(0) -> temp - PushTrack(mark); - Ldloc(mark); - } - Ldloc(_runtextposLocal!); - Beq(l1); // mark == textpos -> branch - - // (matched != 0) - - PushTrack(_runtextposLocal!); - PushStack(_runtextposLocal!); - Track(); - Goto(Operand(0)); // Goto(Operand(0)) - - // else - - MarkLabel(l1); - TrackUnique2(Branchmarkback2); - break; - } - - case RegexCode.Branchmark | RegexCode.Back: - //: Trackframe(2); - //: Stackframe(1); - //: Textto(Tracked(1)); // Recall position - //: Track2(Tracked(0)); // Save old mark - //: Advance(1); - PopTrack(); - Stloc(_runtextposLocal!); - PopStack(); - Pop(); - // track spot 0 is already in place - TrackUnique2(Branchmarkback2); - Advance(); - break; - - case RegexCode.Branchmark | RegexCode.Back2: - //: Trackframe(1); - //: Stack(Tracked(0)); // Recall old mark - //: break Backward; // Backtrack - ReadyPushStack(); - PopTrack(); - DoPush(); - Back(); - break; - - case RegexCode.Lazybranchmark: - //: StackPop(); - //: int oldMarkPos = StackPeek(); - //: - //: if (Textpos() != oldMarkPos) { // Nonempty match -> next loop - //: { // Nonempty match -> next loop - //: if (oldMarkPos != -1) - //: Track(Stacked(0), Textpos()); // Save old mark, textpos - //: else - //: TrackPush(Textpos(), Textpos()); - //: } - //: else - //: { // Empty match -> no loop - //: Track2(Stacked(0)); // Save old mark - //: } - //: Advance(1); - //: continue Forward; - { - using (RentedLocalBuilder mark = RentInt32Local()) - { - PopStack(); - Stloc(mark); // Stacked(0) -> temp - - // if (oldMarkPos != -1) - Label l2 = DefineLabel(); - Label l3 = DefineLabel(); - Ldloc(mark); - Ldc(-1); - Beq(l2); // mark == -1 -> branch - PushTrack(mark); - Br(l3); - // else - MarkLabel(l2); - PushTrack(_runtextposLocal!); - MarkLabel(l3); - - // if (Textpos() != mark) - Label l1 = DefineLabel(); - Ldloc(_runtextposLocal!); - Ldloc(mark); - Beq(l1); // mark == textpos -> branch - PushTrack(_runtextposLocal!); - Track(); - Br(AdvanceLabel()); // Advance (near) - // else - MarkLabel(l1); - ReadyPushStack(); // push the current textPos on the stack. - // May be ignored by 'back2' or used by a true empty match. - Ldloc(mark); - } - - DoPush(); - TrackUnique2(Lazybranchmarkback2); - - break; - } - - case RegexCode.Lazybranchmark | RegexCode.Back: - //: Trackframe(2); - //: Track2(Tracked(0)); // Save old mark - //: Stack(Textpos()); // Make new mark - //: Textto(Tracked(1)); // Recall position - //: Goto(Operand(0)); // Loop - - PopTrack(); - Stloc(_runtextposLocal!); - PushStack(_runtextposLocal!); - TrackUnique2(Lazybranchmarkback2); - Goto(Operand(0)); - break; - - case RegexCode.Lazybranchmark | RegexCode.Back2: - //: Stackframe(1); - //: Trackframe(1); - //: Stack(Tracked(0)); // Recall old mark - //: break Backward; - ReadyReplaceStack(0); - PopTrack(); - DoReplace(); - Back(); - break; - - case RegexCode.Nullcount: - //: Stack(-1, Operand(0)); - //: Track(); - ReadyPushStack(); - Ldc(-1); - DoPush(); - ReadyPushStack(); - Ldc(Operand(0)); - DoPush(); - TrackUnique(Stackpop2); - break; - - case RegexCode.Setcount: - //: Stack(Textpos(), Operand(0)); - //: Track(); - PushStack(_runtextposLocal!); - ReadyPushStack(); - Ldc(Operand(0)); - DoPush(); - TrackUnique(Stackpop2); - break; - - case RegexCode.Nullcount | RegexCode.Back: - case RegexCode.Setcount | RegexCode.Back: - //: Stackframe(2); - //: break Backward; - PopDiscardStack(2); - Back(); - break; - - case RegexCode.Branchcount: - //: Stackframe(2); - //: int mark = Stacked(0); - //: int count = Stacked(1); - //: - //: if (count >= Operand(1) || Textpos() == mark && count >= 0) - //: { // Max loops or empty match -> straight now - //: Track2(mark, count); // Save old mark, count - //: Advance(2); // Straight - //: } - //: else - //: { // Nonempty match -> count+loop now - //: Track(mark); // remember mark - //: Stack(Textpos(), count + 1); // Make new mark, incr count - //: Goto(Operand(0)); // Loop - //: } - //: continue Forward; - { - using (RentedLocalBuilder count = RentInt32Local()) - { - PopStack(); - Stloc(count); // count -> temp - PopStack(); - using (RentedLocalBuilder mark = RentInt32Local()) - { - Stloc(mark); // mark -> temp2 - PushTrack(mark); - Ldloc(mark); - } - - Label l1 = DefineLabel(); - Label l2 = DefineLabel(); - Ldloc(_runtextposLocal!); - Bne(l1); // mark != textpos -> l1 - Ldloc(count); - Ldc(0); - Bge(l2); // count >= 0 && mark == textpos -> l2 - - MarkLabel(l1); - Ldloc(count); - Ldc(Operand(1)); - Bge(l2); // count >= Operand(1) -> l2 - - // else - PushStack(_runtextposLocal!); - ReadyPushStack(); - Ldloc(count); // mark already on track - Ldc(1); - Add(); - DoPush(); - Track(); - Goto(Operand(0)); - - // if (count >= Operand(1) || Textpos() == mark) - MarkLabel(l2); - PushTrack(count); // mark already on track - } - TrackUnique2(Branchcountback2); - break; - } - - case RegexCode.Branchcount | RegexCode.Back: - //: Trackframe(1); - //: Stackframe(2); - //: if (Stacked(1) > 0) // Positive -> can go straight - //: { - //: Textto(Stacked(0)); // Zap to mark - //: Track2(Tracked(0), Stacked(1) - 1); // Save old mark, old count - //: Advance(2); // Straight - //: continue Forward; - //: } - //: Stack(Tracked(0), Stacked(1) - 1); // recall old mark, old count - //: break Backward; - { - using (RentedLocalBuilder count = RentInt32Local()) - { - Label l1 = DefineLabel(); - PopStack(); - Ldc(1); - Sub(); - Stloc(count); - Ldloc(count); - Ldc(0); - Blt(l1); - - // if (count >= 0) - PopStack(); - Stloc(_runtextposLocal!); - PushTrack(count); // Tracked(0) is already on the track - TrackUnique2(Branchcountback2); - Advance(); - - // else - MarkLabel(l1); - ReadyReplaceStack(0); - PopTrack(); - DoReplace(); - PushStack(count); - } - Back(); - break; - } - - case RegexCode.Branchcount | RegexCode.Back2: - //: Trackframe(2); - //: Stack(Tracked(0), Tracked(1)); // Recall old mark, old count - //: break Backward; // Backtrack - - PopTrack(); - using (RentedLocalBuilder tmp = RentInt32Local()) - { - Stloc(tmp); - ReadyPushStack(); - PopTrack(); - DoPush(); - PushStack(tmp); - } - Back(); - break; - - case RegexCode.Lazybranchcount: - //: Stackframe(2); - //: int mark = Stacked(0); - //: int count = Stacked(1); - //: - //: if (count < 0) - //: { // Negative count -> loop now - //: Track2(mark); // Save old mark - //: Stack(Textpos(), count + 1); // Make new mark, incr count - //: Goto(Operand(0)); // Loop - //: } - //: else - //: { // Nonneg count or empty match -> straight now - //: Track(mark, count, Textpos()); // Save mark, count, position - //: } - { - PopStack(); - using (RentedLocalBuilder count = RentInt32Local()) - { - Stloc(count); // count -> temp - PopStack(); - using (RentedLocalBuilder mark = RentInt32Local()) - { - Stloc(mark); // mark -> temp2 - - Label l1 = DefineLabel(); - Ldloc(count); - Ldc(0); - Bge(l1); // count >= 0 -> l1 - - // if (count < 0) - PushTrack(mark); - PushStack(_runtextposLocal!); - ReadyPushStack(); - Ldloc(count); - Ldc(1); - Add(); - DoPush(); - TrackUnique2(Lazybranchcountback2); - Goto(Operand(0)); - - // else - MarkLabel(l1); - PushTrack(mark); - } - PushTrack(count); - } - PushTrack(_runtextposLocal!); - Track(); - break; - } - - case RegexCode.Lazybranchcount | RegexCode.Back: - //: Trackframe(3); - //: int mark = Tracked(0); - //: int textpos = Tracked(2); - //: if (Tracked(1) < Operand(1) && textpos != mark) - //: { // Under limit and not empty match -> loop - //: Textto(Tracked(2)); // Recall position - //: Stack(Textpos(), Tracked(1) + 1); // Make new mark, incr count - //: Track2(Tracked(0)); // Save old mark - //: Goto(Operand(0)); // Loop - //: continue Forward; - //: } - //: else - //: { - //: Stack(Tracked(0), Tracked(1)); // Recall old mark, count - //: break Backward; // backtrack - //: } - { - using (RentedLocalBuilder cLocal = RentInt32Local()) - { - Label l1 = DefineLabel(); - - PopTrack(); - Stloc(_runtextposLocal!); - PopTrack(); - Stloc(cLocal); - Ldloc(cLocal); - Ldc(Operand(1)); - Bge(l1); // Tracked(1) >= Operand(1) -> l1 - - Ldloc(_runtextposLocal!); - TopTrack(); - Beq(l1); // textpos == mark -> l1 - - PushStack(_runtextposLocal!); - ReadyPushStack(); - Ldloc(cLocal); - Ldc(1); - Add(); - DoPush(); - TrackUnique2(Lazybranchcountback2); - Goto(Operand(0)); - - MarkLabel(l1); - ReadyPushStack(); - PopTrack(); - DoPush(); - PushStack(cLocal); - } - Back(); - break; - } - - case RegexCode.Lazybranchcount | RegexCode.Back2: - // < - ReadyReplaceStack(1); - PopTrack(); - DoReplace(); - ReadyReplaceStack(0); - TopStack(); - Ldc(1); - Sub(); - DoReplace(); - Back(); - break; - - case RegexCode.Setjump: - //: Stack(Trackpos(), Crawlpos()); - //: Track(); - ReadyPushStack(); - Ldthisfld(s_runtrackField); - Ldlen(); - Ldloc(_runtrackposLocal!); - Sub(); - DoPush(); - ReadyPushStack(); - Ldthis(); - Call(s_crawlposMethod); - DoPush(); - TrackUnique(Stackpop2); - break; - - case RegexCode.Setjump | RegexCode.Back: - //: Stackframe(2); - PopDiscardStack(2); - Back(); - break; - - case RegexCode.Backjump: - //: Stackframe(2); - //: Trackto(Stacked(0)); - //: while (Crawlpos() != Stacked(1)) - //: Uncapture(); - //: break Backward; - { - Label l1 = DefineLabel(); - Label l2 = DefineLabel(); - - using (RentedLocalBuilder stackedLocal = RentInt32Local()) - { - PopStack(); - Stloc(stackedLocal); - Ldthisfld(s_runtrackField); - Ldlen(); - PopStack(); - Sub(); - Stloc(_runtrackposLocal!); - - MarkLabel(l1); - Ldthis(); - Call(s_crawlposMethod); - Ldloc(stackedLocal); - Beq(l2); - Ldthis(); - Call(s_uncaptureMethod); - Br(l1); - } - - MarkLabel(l2); - Back(); - break; - } - - case RegexCode.Forejump: - //: Stackframe(2); - //: Trackto(Stacked(0)); - //: Track(Stacked(1)); - PopStack(); - using (RentedLocalBuilder tmp = RentInt32Local()) - { - Stloc(tmp); - Ldthisfld(s_runtrackField); - Ldlen(); - PopStack(); - Sub(); - Stloc(_runtrackposLocal!); - PushTrack(tmp); - } - TrackUnique(Forejumpback); - break; - - case RegexCode.Forejump | RegexCode.Back: - //: Trackframe(1); - //: while (Crawlpos() != Tracked(0)) - //: Uncapture(); - //: break Backward; - { - Label l1 = DefineLabel(); - Label l2 = DefineLabel(); - - using (RentedLocalBuilder trackedLocal = RentInt32Local()) - { - PopTrack(); - Stloc(trackedLocal); - - MarkLabel(l1); - Ldthis(); - Call(s_crawlposMethod); - Ldloc(trackedLocal); - Beq(l2); - Ldthis(); - Call(s_uncaptureMethod); - Br(l1); - } - - MarkLabel(l2); - Back(); - break; - } - - case RegexCode.Bol: - //: if (Leftchars() > 0 && CharAt(Textpos() - 1) != '\n') - //: break Backward; - { - Label l1 = _labels![NextCodepos()]; - Ldloc(_runtextposLocal!); - Ldloc(_runtextbegLocal!); - Ble(l1); - Leftchar(); - Ldc('\n'); - BneFar(_backtrack); - break; - } - - case RegexCode.Eol: - //: if (Rightchars() > 0 && CharAt(Textpos()) != '\n') - //: break Backward; - { - Label l1 = _labels![NextCodepos()]; - Ldloc(_runtextposLocal!); - Ldloc(_runtextendLocal!); - Bge(l1); - Rightchar(); - Ldc('\n'); - BneFar(_backtrack); - break; - } - - case RegexCode.Boundary: - case RegexCode.NonBoundary: - //: if (!IsBoundary(Textpos(), _textbeg, _textend)) - //: break Backward; - Ldthis(); - Ldloc(_runtextposLocal!); - Ldloc(_runtextbegLocal!); - Ldloc(_runtextendLocal!); - Call(s_isBoundaryMethod); - if (Code() == RegexCode.Boundary) - { - BrfalseFar(_backtrack); - } - else - { - BrtrueFar(_backtrack); - } - break; - - case RegexCode.ECMABoundary: - case RegexCode.NonECMABoundary: - //: if (!IsECMABoundary(Textpos(), _textbeg, _textend)) - //: break Backward; - Ldthis(); - Ldloc(_runtextposLocal!); - Ldloc(_runtextbegLocal!); - Ldloc(_runtextendLocal!); - Call(s_isECMABoundaryMethod); - if (Code() == RegexCode.ECMABoundary) - { - BrfalseFar(_backtrack); - } - else - { - BrtrueFar(_backtrack); - } - break; - - case RegexCode.Beginning: - //: if (Leftchars() > 0) - //: break Backward; - Ldloc(_runtextposLocal!); - Ldloc(_runtextbegLocal!); - BgtFar(_backtrack); - break; - - case RegexCode.Start: - //: if (Textpos() != Textstart()) - //: break Backward; - Ldloc(_runtextposLocal!); - Ldthisfld(s_runtextstartField); - BneFar(_backtrack); - break; - - case RegexCode.EndZ: - //: if (Rightchars() > 1 || Rightchars() == 1 && CharAt(Textpos()) != '\n') - //: break Backward; - Ldloc(_runtextposLocal!); - Ldloc(_runtextendLocal!); - Ldc(1); - Sub(); - BltFar(_backtrack); - Ldloc(_runtextposLocal!); - Ldloc(_runtextendLocal!); - Bge(_labels![NextCodepos()]); - Rightchar(); - Ldc('\n'); - BneFar(_backtrack); - break; - - case RegexCode.End: - //: if (Rightchars() > 0) - //: break Backward; - Ldloc(_runtextposLocal!); - Ldloc(_runtextendLocal!); - BltFar(_backtrack); - break; - - case RegexCode.One: - case RegexCode.Notone: - case RegexCode.Set: - case RegexCode.One | RegexCode.Rtl: - case RegexCode.Notone | RegexCode.Rtl: - case RegexCode.Set | RegexCode.Rtl: - case RegexCode.One | RegexCode.Ci: - case RegexCode.Notone | RegexCode.Ci: - case RegexCode.Set | RegexCode.Ci: - case RegexCode.One | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Notone | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Set | RegexCode.Ci | RegexCode.Rtl: - - //: if (Rightchars() < 1 || Rightcharnext() != (char)Operand(0)) - //: break Backward; - - Ldloc(_runtextposLocal!); - - if (!IsRightToLeft()) - { - Ldloc(_runtextendLocal!); - BgeFar(_backtrack); - Rightcharnext(); - } - else - { - Ldloc(_runtextbegLocal!); - BleFar(_backtrack); - Leftcharnext(); - } - - if (Code() == RegexCode.Set) - { - EmitMatchCharacterClass(_strings![Operand(0)], IsCaseInsensitive()); - BrfalseFar(_backtrack); - } - else - { - if (IsCaseInsensitive()) - { - CallToLower(); - } - - Ldc(Operand(0)); - if (Code() == RegexCode.One) - { - BneFar(_backtrack); - } - else - { - BeqFar(_backtrack); - } - } - break; - - case RegexCode.Multi: - case RegexCode.Multi | RegexCode.Ci: - //: String Str = _strings[Operand(0)]; - //: int i, c; - //: if (Rightchars() < (c = Str.Length)) - //: break Backward; - //: for (i = 0; c > 0; i++, c--) - //: if (Str[i] != Rightcharnext()) - //: break Backward; - { - string str = _strings![Operand(0)]; - - Ldc(str.Length); - Ldloc(_runtextendLocal!); - Ldloc(_runtextposLocal!); - Sub(); - BgtFar(_backtrack); - - // unroll the string - for (int i = 0; i < str.Length; i++) - { - Ldloc(_runtextLocal!); - Ldloc(_runtextposLocal!); - if (i != 0) - { - Ldc(i); - Add(); - } - Call(s_stringGetCharsMethod); - if (IsCaseInsensitive()) - { - CallToLower(); - } - - Ldc(str[i]); - BneFar(_backtrack); - } - - Ldloc(_runtextposLocal!); - Ldc(str.Length); - Add(); - Stloc(_runtextposLocal!); - break; - } - - case RegexCode.Multi | RegexCode.Rtl: - case RegexCode.Multi | RegexCode.Ci | RegexCode.Rtl: - //: String Str = _strings[Operand(0)]; - //: int c; - //: if (Leftchars() < (c = Str.Length)) - //: break Backward; - //: while (c > 0) - //: if (Str[--c] != Leftcharnext()) - //: break Backward; - { - string str = _strings![Operand(0)]; - - Ldc(str.Length); - Ldloc(_runtextposLocal!); - Ldloc(_runtextbegLocal!); - Sub(); - BgtFar(_backtrack); - - // unroll the string - for (int i = str.Length; i > 0;) - { - i--; - Ldloc(_runtextLocal!); - Ldloc(_runtextposLocal!); - Ldc(str.Length - i); - Sub(); - Call(s_stringGetCharsMethod); - if (IsCaseInsensitive()) - { - CallToLower(); - } - Ldc(str[i]); - BneFar(_backtrack); - } - - Ldloc(_runtextposLocal!); - Ldc(str.Length); - Sub(); - Stloc(_runtextposLocal!); - - break; - } - - case RegexCode.Ref: - case RegexCode.Ref | RegexCode.Rtl: - case RegexCode.Ref | RegexCode.Ci: - case RegexCode.Ref | RegexCode.Ci | RegexCode.Rtl: - //: int capnum = Operand(0); - //: int j, c; - //: if (!_match.IsMatched(capnum)) { - //: if (!RegexOptions.ECMAScript) - //: break Backward; - //: } else { - //: if (Rightchars() < (c = _match.MatchLength(capnum))) - //: break Backward; - //: for (j = _match.MatchIndex(capnum); c > 0; j++, c--) - //: if (CharAt(j) != Rightcharnext()) - //: break Backward; - //: } - { - using RentedLocalBuilder lenLocal = RentInt32Local(); - using RentedLocalBuilder indexLocal = RentInt32Local(); - Label l1 = DefineLabel(); - - Ldthis(); - Ldc(Operand(0)); - Call(s_isMatchedMethod); - if ((_options & RegexOptions.ECMAScript) != 0) - { - Brfalse(AdvanceLabel()); - } - else - { - BrfalseFar(_backtrack); // !IsMatched() -> back - } - - Ldthis(); - Ldc(Operand(0)); - Call(s_matchLengthMethod); - Stloc(lenLocal); - Ldloc(lenLocal); - if (!IsRightToLeft()) - { - Ldloc(_runtextendLocal!); - Ldloc(_runtextposLocal!); - } - else - { - Ldloc(_runtextposLocal!); - Ldloc(_runtextbegLocal!); - } - Sub(); - BgtFar(_backtrack); // Matchlength() > Rightchars() -> back - - Ldthis(); - Ldc(Operand(0)); - Call(s_matchIndexMethod); - if (!IsRightToLeft()) - { - Ldloc(lenLocal); - Add(IsRightToLeft()); - } - Stloc(indexLocal); // index += len - - Ldloc(_runtextposLocal!); - Ldloc(lenLocal); - Add(IsRightToLeft()); - Stloc(_runtextposLocal!); // texpos += len - - MarkLabel(l1); - Ldloc(lenLocal); - Ldc(0); - Ble(AdvanceLabel()); - Ldloc(_runtextLocal!); - Ldloc(indexLocal); - Ldloc(lenLocal); - if (IsRightToLeft()) - { - Ldc(1); - Sub(); - Stloc(lenLocal); - Ldloc(lenLocal); - } - Sub(IsRightToLeft()); - Call(s_stringGetCharsMethod); - if (IsCaseInsensitive()) - { - CallToLower(); - } - - Ldloc(_runtextLocal!); - Ldloc(_runtextposLocal!); - Ldloc(lenLocal); - if (!IsRightToLeft()) - { - Ldloc(lenLocal); - Ldc(1); - Sub(); - Stloc(lenLocal); - } - Sub(IsRightToLeft()); - Call(s_stringGetCharsMethod); - if (IsCaseInsensitive()) - { - CallToLower(); - } - - Beq(l1); - Back(); - break; - } - - case RegexCode.Onerep: - case RegexCode.Notonerep: - case RegexCode.Setrep: - case RegexCode.Onerep | RegexCode.Rtl: - case RegexCode.Notonerep | RegexCode.Rtl: - case RegexCode.Setrep | RegexCode.Rtl: - case RegexCode.Onerep | RegexCode.Ci: - case RegexCode.Notonerep | RegexCode.Ci: - case RegexCode.Setrep | RegexCode.Ci: - case RegexCode.Onerep | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Notonerep | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Setrep | RegexCode.Ci | RegexCode.Rtl: - //: int c = Operand(1); - //: if (Rightchars() < c) - //: break Backward; - //: char ch = (char)Operand(0); - //: while (c-- > 0) - //: if (Rightcharnext() != ch) - //: break Backward; - { - int c = Operand(1); - if (c == 0) - break; - - Ldc(c); - if (!IsRightToLeft()) - { - Ldloc(_runtextendLocal!); - Ldloc(_runtextposLocal!); - } - else - { - Ldloc(_runtextposLocal!); - Ldloc(_runtextbegLocal!); - } - Sub(); - BgtFar(_backtrack); // Matchlength() > Rightchars() -> back - - Ldloc(_runtextposLocal!); - Ldc(c); - Add(IsRightToLeft()); - Stloc(_runtextposLocal!); // texpos += len - - using RentedLocalBuilder lenLocal = RentInt32Local(); - Label l1 = DefineLabel(); - Ldc(c); - Stloc(lenLocal); - - MarkLabel(l1); - Ldloc(_runtextLocal!); - Ldloc(_runtextposLocal!); - Ldloc(lenLocal); - if (IsRightToLeft()) - { - Ldc(1); - Sub(); - Stloc(lenLocal); - Ldloc(lenLocal); - Add(); - } - else - { - Ldloc(lenLocal); - Ldc(1); - Sub(); - Stloc(lenLocal); - Sub(); - } - Call(s_stringGetCharsMethod); - - if (Code() == RegexCode.Setrep) - { - EmitTimeoutCheck(); - EmitMatchCharacterClass(_strings![Operand(0)], IsCaseInsensitive()); - BrfalseFar(_backtrack); - } - else - { - if (IsCaseInsensitive()) - { - CallToLower(); - } - - Ldc(Operand(0)); - if (Code() == RegexCode.Onerep) - { - BneFar(_backtrack); - } - else - { - BeqFar(_backtrack); - } - } - Ldloc(lenLocal); - Ldc(0); - if (Code() == RegexCode.Setrep) - { - BgtFar(l1); - } - else - { - Bgt(l1); - } - break; - } - - case RegexCode.Oneloop: - case RegexCode.Notoneloop: - case RegexCode.Setloop: - case RegexCode.Oneloop | RegexCode.Rtl: - case RegexCode.Notoneloop | RegexCode.Rtl: - case RegexCode.Setloop | RegexCode.Rtl: - case RegexCode.Oneloop | RegexCode.Ci: - case RegexCode.Notoneloop | RegexCode.Ci: - case RegexCode.Setloop | RegexCode.Ci: - case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Oneloopatomic: - case RegexCode.Notoneloopatomic: - case RegexCode.Setloopatomic: - case RegexCode.Oneloopatomic | RegexCode.Rtl: - case RegexCode.Notoneloopatomic | RegexCode.Rtl: - case RegexCode.Setloopatomic | RegexCode.Rtl: - case RegexCode.Oneloopatomic | RegexCode.Ci: - case RegexCode.Notoneloopatomic | RegexCode.Ci: - case RegexCode.Setloopatomic | RegexCode.Ci: - case RegexCode.Oneloopatomic | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Notoneloopatomic | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Setloopatomic | RegexCode.Ci | RegexCode.Rtl: - //: int len = Operand(1); - //: if (len > Rightchars()) - //: len = Rightchars(); - //: char ch = (char)Operand(0); - //: int i; - //: for (i = len; i > 0; i--) - //: { - //: if (Rightcharnext() != ch) - //: { - //: Leftnext(); - //: break; - //: } - //: } - //: if (len > i) - //: Track(len - i - 1, Textpos() - 1); - { - int c = Operand(1); - if (c == 0) - { - break; - } - - using RentedLocalBuilder lenLocal = RentInt32Local(); - using RentedLocalBuilder iLocal = RentInt32Local(); - - if (!IsRightToLeft()) - { - Ldloc(_runtextendLocal!); - Ldloc(_runtextposLocal!); - } - else - { - Ldloc(_runtextposLocal!); - Ldloc(_runtextbegLocal!); - } - Sub(); - Stloc(lenLocal); - if (c != int.MaxValue) - { - Label l4 = DefineLabel(); - Ldloc(lenLocal); - Ldc(c); - Blt(l4); - Ldc(c); - Stloc(lenLocal); - MarkLabel(l4); - } - - Label loopEnd = DefineLabel(); - string? set = Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic ? _strings![Operand(0)] : null; - Span setChars = stackalloc char[5]; // max optimized by IndexOfAny today - int numSetChars; + // if (iterationCount < minIterations) goto doneLabel/originalDoneLabel; + Ldloc(iterationCount); + Ldc(minIterations); + BltFar(childBacktracks ? doneLabel : originalDoneLabel); + } - // If this is a notoneloop{atomic} and we're left-to-right and case-sensitive, - // we can use the vectorized IndexOf to search for the target character. - if ((Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic) && - !IsRightToLeft() && - (!IsCaseInsensitive())) - { - // i = runtext.AsSpan(runtextpos, len).IndexOf(ch); - Ldloc(_runtextLocal!); - Ldloc(_runtextposLocal!); - Ldloc(lenLocal); - Call(s_stringAsSpanIntIntMethod); - Ldc(Operand(0)); - Call(s_spanIndexOfChar); - Stloc(iLocal); + if (childBacktracks) + { + // goto endLoop; + BrFar(endLoop); - Label charFound = DefineLabel(); + // Backtrack: + Label backtrack = DefineLabel(); + MarkLabel(backtrack); - // if (i != -1) goto charFound; - Ldloc(iLocal); - Ldc(-1); - Bne(charFound); - - // runtextpos += len; - // i = 0; - // goto loopEnd; - Ldloc(_runtextposLocal!); - Ldloc(lenLocal); - Add(); - Stloc(_runtextposLocal!); - Ldc(0); - Stloc(iLocal); - BrFar(loopEnd); - - // charFound: - // runtextpos += i; - // i = len - i; - // goto loopEnd; - MarkLabel(charFound); - Ldloc(_runtextposLocal!); - Ldloc(iLocal); - Add(); - Stloc(_runtextposLocal!); - Ldloc(lenLocal); - Ldloc(iLocal); - Sub(); - Stloc(iLocal); - BrFar(loopEnd); - } - else if ((Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) && - !IsRightToLeft() && - !IsCaseInsensitive() && - (numSetChars = RegexCharClass.GetSetChars(set!, setChars)) != 0 && - RegexCharClass.IsNegated(set!)) - { - // Similarly, if this is a setloop{atomic} and we're left-to-right and case-sensitive, - // and if the set contains only a few negated chars, we can use the vectorized IndexOfAny - // to search for those chars. - Debug.Assert(numSetChars > 1); - - // i = runtext.AsSpan(runtextpos, len).IndexOfAny(ch1, ch2{, ch3}); - Ldloc(_runtextLocal!); - Ldloc(_runtextposLocal!); - Ldloc(lenLocal); - Call(s_stringAsSpanIntIntMethod); - switch (numSetChars) - { - case 2: - Ldc(setChars[0]); - Ldc(setChars[1]); - Call(s_spanIndexOfAnyCharChar); - break; - - case 3: - Ldc(setChars[0]); - Ldc(setChars[1]); - Ldc(setChars[2]); - Call(s_spanIndexOfAnyCharCharChar); - break; - - default: - Ldstr(setChars.Slice(0, numSetChars).ToString()); - Call(s_stringAsSpanMethod); - Call(s_spanIndexOfSpan); - break; - } - Stloc(iLocal); + // if (iterationCount == 0) goto originalDoneLabel; + Ldloc(iterationCount); + Ldc(0); + BeqFar(originalDoneLabel); - Label charFound = DefineLabel(); + // goto doneLabel; + BrFar(doneLabel); - // if (i != -1) goto charFound; - Ldloc(iLocal); - Ldc(-1); - Bne(charFound); - - // runtextpos += len; - // i = 0; - // goto loopEnd; - Ldloc(_runtextposLocal!); - Ldloc(lenLocal); - Add(); - Stloc(_runtextposLocal!); - Ldc(0); - Stloc(iLocal); - BrFar(loopEnd); - - // charFound: - // runtextpos += i; - // i = len - i; - // goto loopEnd; - MarkLabel(charFound); - Ldloc(_runtextposLocal!); - Ldloc(iLocal); - Add(); - Stloc(_runtextposLocal!); - Ldloc(lenLocal); - Ldloc(iLocal); - Sub(); - Stloc(iLocal); - BrFar(loopEnd); - } - else if ((Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) && - !IsRightToLeft() && - set == RegexCharClass.AnyClass) - { - // If someone uses .* along with RegexOptions.Singleline, that becomes [anycharacter]*, which means it'll - // consume everything. As such, we can simply update our position to be the last allowed, without - // actually checking anything. - - // runtextpos += len; - // i = 0; - // goto loopEnd; - Ldloc(_runtextposLocal!); - Ldloc(lenLocal); - Add(); - Stloc(_runtextposLocal!); - Ldc(0); - Stloc(iLocal); - BrFar(loopEnd); - } - else - { - // Otherwise, we emit the open-coded loop. + doneLabel = backtrack; + } - Ldloc(lenLocal); - Ldc(1); - Add(); - Stloc(iLocal); + MarkLabel(endLoop); - Label loopCondition = DefineLabel(); - MarkLabel(loopCondition); - Ldloc(iLocal); - Ldc(1); - Sub(); - Stloc(iLocal); - Ldloc(iLocal); - Ldc(0); - if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) - { - BleFar(loopEnd); - } - else - { - Ble(loopEnd); - } + if (node.IsInLoop()) + { + // Store the capture's state + EmitRunstackResizeIfNeeded(3); + EmitRunstackPush(() => Ldloc(startingRunTextPos)); + EmitRunstackPush(() => Ldloc(iterationCount)); - if (IsRightToLeft()) - { - Leftcharnext(); - } - else - { - Rightcharnext(); - } + // Skip past the backtracking section + // goto end; + Label end = DefineLabel(); + BrFar(end); - if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) - { - EmitTimeoutCheck(); - EmitMatchCharacterClass(_strings![Operand(0)], IsCaseInsensitive()); - BrtrueFar(loopCondition); - } - else - { - if (IsCaseInsensitive()) - { - CallToLower(); - } + // Emit a backtracking section that restores the capture's state and then jumps to the previous done label + Label backtrack = DefineLabel(); + MarkLabel(backtrack); - Ldc(Operand(0)); - if (Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopatomic) - { - Beq(loopCondition); - } - else - { - Debug.Assert(Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic); - Bne(loopCondition); - } - } + // iterationCount = base.runstack[--runstack]; + // startingRunTextPos = base.runstack[--runstack]; + EmitRunstackPop(); + Stloc(iterationCount); + EmitRunstackPop(); + Stloc(startingRunTextPos); - Ldloc(_runtextposLocal!); - Ldc(1); - Sub(IsRightToLeft()); - Stloc(_runtextposLocal!); - } + // goto doneLabel; + BrFar(doneLabel); - // loopEnd: - MarkLabel(loopEnd); - if (Code() != RegexCode.Oneloopatomic && Code() != RegexCode.Notoneloopatomic && Code() != RegexCode.Setloopatomic) - { - // if (len <= i) goto advance; - Ldloc(lenLocal); - Ldloc(iLocal); - Ble(AdvanceLabel()); + doneLabel = backtrack; + MarkLabel(end); + } + } - // TrackPush(len - i - 1, runtextpos - Bump()) - ReadyPushTrack(); - Ldloc(lenLocal); - Ldloc(iLocal); - Sub(); - Ldc(1); - Sub(); - DoPush(); + void EmitRunstackResizeIfNeeded(int count) + { + Debug.Assert(count >= 1); - ReadyPushTrack(); - Ldloc(_runtextposLocal!); - Ldc(1); - Sub(IsRightToLeft()); - DoPush(); + // if (runstackpos >= base.runstack!.Length - (count - 1)) + // { + // Array.Resize(ref base.runstack, base.runstack.Length * 2); + // } - Track(); - } - break; - } + Label skipResize = DefineLabel(); - case RegexCode.Oneloop | RegexCode.Back: - case RegexCode.Notoneloop | RegexCode.Back: - case RegexCode.Setloop | RegexCode.Back: - case RegexCode.Oneloop | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Notoneloop | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Setloop | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Back: - case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Back: - case RegexCode.Setloop | RegexCode.Ci | RegexCode.Back: - case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: - //: Trackframe(2); - //: int i = Tracked(0); - //: int pos = Tracked(1); - //: Textto(pos); - //: if (i > 0) - //: Track(i - 1, pos - 1); - //: Advance(2); - PopTrack(); - Stloc(_runtextposLocal!); - PopTrack(); - using (RentedLocalBuilder posLocal = RentInt32Local()) - { - Stloc(posLocal); - Ldloc(posLocal); - Ldc(0); - BleFar(AdvanceLabel()); - ReadyPushTrack(); - Ldloc(posLocal); - } - Ldc(1); + Ldloc(runstackpos); + Ldthisfld(s_runstackField); + Ldlen(); + if (count > 1) + { + Ldc(count - 1); Sub(); - DoPush(); - ReadyPushTrack(); - Ldloc(_runtextposLocal!); - Ldc(1); - Sub(IsRightToLeft()); - DoPush(); - Trackagain(); - Advance(); - break; + } + Blt(skipResize); - case RegexCode.Onelazy: - case RegexCode.Notonelazy: - case RegexCode.Setlazy: - case RegexCode.Onelazy | RegexCode.Rtl: - case RegexCode.Notonelazy | RegexCode.Rtl: - case RegexCode.Setlazy | RegexCode.Rtl: - case RegexCode.Onelazy | RegexCode.Ci: - case RegexCode.Notonelazy | RegexCode.Ci: - case RegexCode.Setlazy | RegexCode.Ci: - case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Rtl: - case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Rtl: - //: int c = Operand(1); - //: if (c > Rightchars()) - //: c = Rightchars(); - //: if (c > 0) - //: Track(c - 1, Textpos()); - { - int c = Operand(1); - if (c == 0) - { - break; - } + Ldthis(); + _ilg!.Emit(OpCodes.Ldflda, s_runstackField); + Ldthisfld(s_runstackField); + Ldlen(); + Ldc(2); + Mul(); + Call(s_arrayResize); - if (!IsRightToLeft()) - { - Ldloc(_runtextendLocal!); - Ldloc(_runtextposLocal!); - } - else - { - Ldloc(_runtextposLocal!); - Ldloc(_runtextbegLocal!); - } - Sub(); - using (RentedLocalBuilder cLocal = RentInt32Local()) - { - Stloc(cLocal); - if (c != int.MaxValue) - { - Label l4 = DefineLabel(); - Ldloc(cLocal); - Ldc(c); - Blt(l4); - Ldc(c); - Stloc(cLocal); - MarkLabel(l4); - } - Ldloc(cLocal); - Ldc(0); - Ble(AdvanceLabel()); - ReadyPushTrack(); - Ldloc(cLocal); - } - Ldc(1); - Sub(); - DoPush(); - PushTrack(_runtextposLocal!); - Track(); - break; - } + MarkLabel(skipResize); + } - case RegexCode.Onelazy | RegexCode.Back: - case RegexCode.Notonelazy | RegexCode.Back: - case RegexCode.Setlazy | RegexCode.Back: - case RegexCode.Onelazy | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Notonelazy | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Setlazy | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Back: - case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Back: - case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Back: - case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: - case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back: - //: Trackframe(2); - //: int pos = Tracked(1); - //: Textto(pos); - //: if (Rightcharnext() != (char)Operand(0)) - //: break Backward; - //: int i = Tracked(0); - //: if (i > 0) - //: Track(i - 1, pos + 1); - - PopTrack(); - Stloc(_runtextposLocal!); - PopTrack(); - using (RentedLocalBuilder iLocal = RentInt32Local()) - { - Stloc(iLocal); + void EmitRunstackPush(Action load) + { + // base.runstack[runstackpos] = load(); + Ldthisfld(s_runstackField); + Ldloc(runstackpos); + load(); + StelemI4(); - if (!IsRightToLeft()) - { - Rightcharnext(); - } - else - { - Leftcharnext(); - } + // runstackpos++; + Ldloc(runstackpos); + Ldc(1); + Add(); + Stloc(runstackpos); + } - if (Code() == RegexCode.Setlazy) - { - EmitMatchCharacterClass(_strings![Operand(0)], IsCaseInsensitive()); - BrfalseFar(_backtrack); - } - else - { - if (IsCaseInsensitive()) - { - CallToLower(); - } + void EmitRunstackPop() + { + // ... = base.runstack[--runstackpos]; + Ldthisfld(s_runstackField); + Ldloc(runstackpos); + Ldc(1); + Sub(); + Stloc(runstackpos); + Ldloc(runstackpos); + LdelemI4(); + } + } - Ldc(Operand(0)); - if (Code() == RegexCode.Onelazy) - { - BneFar(_backtrack); - } - else - { - BeqFar(_backtrack); - } + private void InitializeCultureForGoIfNecessary() + { + _textInfo = null; + if ((_options & RegexOptions.CultureInvariant) == 0) + { + bool needsCulture = (_options & RegexOptions.IgnoreCase) != 0; + if (!needsCulture) + { + for (int codepos = 0; codepos < _codes!.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) + { + if ((_codes[codepos] & RegexCode.Ci) == RegexCode.Ci) + { + needsCulture = true; + break; } - - Ldloc(iLocal); - Ldc(0); - BleFar(AdvanceLabel()); - ReadyPushTrack(); - Ldloc(iLocal); } - Ldc(1); - Sub(); - DoPush(); - PushTrack(_runtextposLocal!); - Trackagain(); - Advance(); - break; + } - default: - Debug.Fail($"Unimplemented state: {_regexopcode:X8}"); - break; + if (needsCulture) + { + // cache CultureInfo in local variable which saves excessive thread local storage accesses + _textInfo = DeclareTextInfo(); + InitLocalCultureInfo(); + } } } @@ -6227,17 +3692,17 @@ private void EmitTimeoutCheck() return; } - Debug.Assert(_loopTimeoutCounterLocal != null); + Debug.Assert(_loopTimeoutCounter != null); // Increment counter for each loop iteration. - Ldloc(_loopTimeoutCounterLocal); + Ldloc(_loopTimeoutCounter); Ldc(1); Add(); - Stloc(_loopTimeoutCounterLocal); + Stloc(_loopTimeoutCounter); // Emit code to check the timeout every 2048th iteration. Label label = DefineLabel(); - Ldloc(_loopTimeoutCounterLocal); + Ldloc(_loopTimeoutCounter); Ldc(LoopTimeoutCheckCount); RemUn(); Brtrue(label); @@ -6245,42 +3710,5 @@ private void EmitTimeoutCheck() Call(s_checkTimeoutMethod); MarkLabel(label); } - -#if DEBUG - /// Emit code to print out the current state of the runner. - [ExcludeFromCodeCoverage(Justification = "Debug only")] - private void DumpBacktracking() - { - Mvlocfld(_runtextposLocal!, s_runtextposField); - Mvlocfld(_runtrackposLocal!, s_runtrackposField); - Mvlocfld(_runstackposLocal!, s_runstackposField); - Ldthis(); - Call(s_dumpStateM); - - var sb = new StringBuilder(); - if (_backpos > 0) - { - sb.Append($"{_backpos:D6} "); - } - else - { - sb.Append(" "); - } - sb.Append(_code!.OpcodeDescription(_codepos)); - - if ((_regexopcode & RegexCode.Back) != 0) - { - sb.Append(" Back"); - } - - if ((_regexopcode & RegexCode.Back2) != 0) - { - sb.Append(" Back2"); - } - - Ldstr(sb.ToString()); - Call(s_debugWriteLine!); - } -#endif } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs index 53b78c5d324796..a548c01201edbe 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs @@ -29,17 +29,16 @@ internal sealed class RegexLWCGCompiler : RegexCompiler /// Id number to use for the next compiled regex. private static int s_regexCount; - public RegexLWCGCompiler() - { - } - /// The top-level driver. Initializes everything then calls the Generate* methods. - public RegexRunnerFactory FactoryInstanceFromCode(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) + public RegexRunnerFactory? FactoryInstanceFromCode(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) { + if (!code.Tree.Root.SupportsCompilation()) + { + return null; + } + _code = code; _codes = code.Codes; - _strings = code.Strings; - _trackcount = code.TrackCount; _options = options; _hasTimeout = hasTimeout; @@ -54,13 +53,13 @@ public RegexRunnerFactory FactoryInstanceFromCode(string pattern, RegexCode code description = string.Concat("_", pattern.Length > DescriptionLimit ? pattern.AsSpan(0, DescriptionLimit) : pattern); } - DynamicMethod goMethod = DefineDynamicMethod($"Regex{regexNum}_Go{description}", null, typeof(CompiledRegexRunner)); - GenerateGo(); - DynamicMethod findFirstCharMethod = DefineDynamicMethod($"Regex{regexNum}_FindFirstChar{description}", typeof(bool), typeof(CompiledRegexRunner)); GenerateFindFirstChar(); - return new CompiledRegexRunnerFactory(goMethod, findFirstCharMethod, _trackcount); + DynamicMethod goMethod = DefineDynamicMethod($"Regex{regexNum}_Go{description}", null, typeof(CompiledRegexRunner)); + GenerateGo(); + + return new CompiledRegexRunnerFactory(goMethod, findFirstCharMethod, code.TrackCount); } /// Begins the definition of a new method (no args) with a specified return value. diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index d3adddb5e1c30f..66d66ce128d168 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -2207,49 +2207,44 @@ public int ChildCount() return 1; } - // Determines whether the node supports an optimized code gen strategy based on walking the node tree. - internal bool SupportsSimplifiedCodeGenerationImplementation() + // Determines whether the node supports a compilation / code generation strategy based on walking the node tree. + internal bool SupportsCompilation() { if (!StackHelper.TryEnsureSufficientExecutionStack()) { - // If we can't recur further, simplified code generation isn't supported as the tree is too deep. + // If we can't recur further, code generation isn't supported as the tree is too deep. return false; } - if ((Options & RegexOptions.RightToLeft) != 0) + if ((Options & (RegexOptions.RightToLeft | RegexOptions.NonBacktracking)) != 0) { - // RightToLeft isn't supported. That applies to both the top-level options as well as when used - // to specify positive and negative lookbehinds. + // NonBacktracking isn't supported, nor RightToLeft. The latter applies to both the top-level + // options as well as when used to specify positive and negative lookbehinds. return false; } - // TODO: This should be moved somewhere else, to a pass somewhere where we explicitly - // annotate the tree, potentially as part of the final optimization pass. It doesn't - // belong in this check. - switch (Type) - { - case Capture: - // If we've found a supported capture, mark all of the nodes in its parent - // hierarchy as containing a capture. - RegexNode? parent = this; - while (parent != null && ((parent.Options & HasCapturesFlag) == 0)) - { - parent.Options |= HasCapturesFlag; - parent = parent.Next; - } - break; - } - int childCount = ChildCount(); for (int i = 0; i < childCount; i++) { // The node isn't supported if any of its children aren't supported. - if (!Child(i).SupportsSimplifiedCodeGenerationImplementation()) + if (!Child(i).SupportsCompilation()) { return false; } } + // TODO: This should be moved somewhere else, to a pass somewhere where we explicitly + // annotate the tree, potentially as part of the final optimization pass. It doesn't + // belong in this check. + if (Type == Capture) + { + // If we've found a supported capture, mark all of the nodes in its parent hierarchy as containing a capture. + for (RegexNode? parent = this; parent != null && (parent.Options & HasCapturesFlag) == 0; parent = parent.Next) + { + parent.Options |= HasCapturesFlag; + } + } + // Supported. return true; } diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs index 460a7a4d5156d8..f2c3515c5ca596 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs @@ -326,6 +326,40 @@ public static IEnumerable Matches_TestData() }; } } + + if (engine != RegexEngine.Interpreter && !PlatformDetection.IsNetFramework) + { + // Fails on interpreter and .NET Framework: [ActiveIssue("https://github.com/dotnet/runtime/issues/62094")] + yield return new object[] + { + engine, "@(a*)+?", "@", RegexOptions.None, new[] + { + new CaptureData("@", 0, 1) + } + }; + + // Fails on interpreter and .NET Framework: [ActiveIssue("https://github.com/dotnet/runtime/issues/62094")] + yield return new object[] + { + engine, @"(?:){93}", "x", RegexOptions.None, new[] + { + new CaptureData("", 0, 0), + new CaptureData("", 1, 0) + } + }; + + if (!RegexHelpers.IsNonBacktracking(engine)) // atomic subexpressions aren't supported + { + // Fails on interpreter and .NET Framework: [ActiveIssue("https://github.com/dotnet/runtime/issues/62094")] + yield return new object[] + { + engine, @"()(?>\1+?).\b", "xxxx", RegexOptions.None, new[] + { + new CaptureData("x", 3, 1), + } + }; + } + } } } @@ -336,9 +370,6 @@ public async Task Matches(RegexEngine engine, string pattern, string input, Rege Regex regexAdvanced = await RegexHelpers.GetRegexAsync(engine, pattern, options); VerifyMatches(regexAdvanced.Matches(input), expected); VerifyMatches(regexAdvanced.Match(input), expected); - - VerifyMatches(Regex.Matches(input, pattern, options), expected); - VerifyMatches(Regex.Match(input, pattern, options), expected); } private static void VerifyMatches(Match match, CaptureData[] expected) @@ -361,18 +392,18 @@ private static void VerifyMatches(MatchCollection matches, CaptureData[] expecte private static void VerifyMatch(Match match, CaptureData expected) { Assert.True(match.Success); - RegexAssert.Equal(expected.Value, match); Assert.Equal(expected.Index, match.Index); Assert.Equal(expected.Length, match.Length); + RegexAssert.Equal(expected.Value, match); - RegexAssert.Equal(expected.Value, match.Groups[0]); Assert.Equal(expected.Index, match.Groups[0].Index); Assert.Equal(expected.Length, match.Groups[0].Length); + RegexAssert.Equal(expected.Value, match.Groups[0]); Assert.Equal(1, match.Captures.Count); - RegexAssert.Equal(expected.Value, match.Captures[0]); Assert.Equal(expected.Index, match.Captures[0].Index); Assert.Equal(expected.Length, match.Captures[0].Length); + RegexAssert.Equal(expected.Value, match.Captures[0]); } [Fact] diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs index 5320ae273f838d..294cbfe3fde06a 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexGeneratorHelper.netcoreapp.cs @@ -1,7 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Collections.Generic; +using System.Collections.Immutable; using System.Diagnostics; using System.Globalization; using System.IO; @@ -123,11 +123,12 @@ internal static async Task SourceGenRegexAsync( // Run the generator GeneratorDriverRunResult generatorResults = s_generatorDriver.RunGenerators(comp!, cancellationToken).GetRunResult(); - if (generatorResults.Diagnostics.Length != 0) + ImmutableArray generatorDiagnostics = generatorResults.Diagnostics.RemoveAll(d => d.Severity <= DiagnosticSeverity.Info); + if (generatorDiagnostics.Length != 0) { throw new ArgumentException( string.Join(Environment.NewLine, generatorResults.GeneratedTrees.Select(t => NumberLines(t.ToString()))) + Environment.NewLine + - string.Join(Environment.NewLine, generatorResults.Diagnostics)); + string.Join(Environment.NewLine, generatorDiagnostics)); } // Compile the assembly to a stream @@ -138,7 +139,7 @@ internal static async Task SourceGenRegexAsync( { throw new ArgumentException( string.Join(Environment.NewLine, generatorResults.GeneratedTrees.Select(t => NumberLines(t.ToString()))) + Environment.NewLine + - string.Join(Environment.NewLine, results.Diagnostics.Concat(generatorResults.Diagnostics))); + string.Join(Environment.NewLine, results.Diagnostics.Concat(generatorDiagnostics))); } dll.Position = 0; diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs index 2212211696af9a..1e8523d2f73f4a 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Generators.Tests/RegexGeneratorParserTests.cs @@ -163,6 +163,66 @@ partial class C Assert.Equal("SYSLIB1044", Assert.Single(diagnostics).Id); } + [Fact] + public async Task Diagnostic_RightToLeft_LimitedSupport() + { + IReadOnlyList diagnostics = await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"", RegexOptions.RightToLeft)] + private static partial Regex RightToLeftNotSupported(); + } + "); + + Assert.Equal("SYSLIB1045", Assert.Single(diagnostics).Id); + } + + [Fact] + public async Task Diagnostic_NonBacktracking_LimitedSupport() + { + IReadOnlyList diagnostics = await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""ab"", RegexOptions.NonBacktracking)] + private static partial Regex RightToLeftNotSupported(); + } + "); + + Assert.Equal("SYSLIB1045", Assert.Single(diagnostics).Id); + } + + [Fact] + public async Task Diagnostic_PositiveLookbehind_LimitedSupport() + { + IReadOnlyList diagnostics = await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""(?<=\b20)\d{2}\b"")] + private static partial Regex PositiveLookbehindNotSupported(); + } + "); + + Assert.Equal("SYSLIB1045", Assert.Single(diagnostics).Id); + } + + [Fact] + public async Task Diagnostic_NegativeLookbehind_LimitedSupport() + { + IReadOnlyList diagnostics = await RunGenerator(@" + using System.Text.RegularExpressions; + partial class C + { + [RegexGenerator(""(? Date: Thu, 2 Dec 2021 16:50:40 -0500 Subject: [PATCH 2/4] Make the addition of more declarations a bit more robust --- .../gen/RegexGenerator.Emitter.cs | 52 +++++++++++-------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 4bc44709d48704..d6ce717cb81b81 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -26,12 +26,6 @@ namespace System.Text.RegularExpressions.Generator { public partial class RegexGenerator { - /// - /// Value added to the written code to enable subsequent replacement with any variable declarations - /// dynamically discovered during code generation. - /// - private const string AdditionalDeclarationsPlaceholder = "<>PLACEHOLDER_FOR_ADDITIONAL_DECLARATIONS"; - /// Code for a [GeneratedCode] attribute to put on the top-level generated members. private static readonly string s_generatedCodeAttribute = $"[global::System.CodeDom.Compiler.GeneratedCodeAttribute(\"{typeof(RegexGenerator).Assembly.GetName().Name}\", \"{typeof(RegexGenerator).Assembly.GetName().Version}\")]"; /// Header comments and usings to include at the top of every generated file. @@ -293,7 +287,10 @@ private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, // Emit locals initialization writer.WriteLine("global::System.ReadOnlySpan runtextSpan = base.runtext;"); writer.WriteLine("int runtextpos = base.runtextpos;"); - writer.WriteLine($"int runtextend = base.runtextend;{AdditionalDeclarationsPlaceholder}"); // placeholder at the end of a line so the generated indents line up + writer.Write($"int runtextend = base.runtextend;"); + writer.Flush(); + int additionalDeclarationsPosition = ((StringWriter)writer.InnerWriter).GetStringBuilder().Length; + int additionalDeclarationsIndent = writer.Indent; writer.WriteLine(); // Generate length check. If the input isn't long enough to possibly match, fail quickly. @@ -353,7 +350,7 @@ private static void EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, writer.WriteLine("return false;"); // We're done. Patch up any additional declarations. - ReplaceAdditionalDeclarations(additionalDeclarations, writer); + ReplaceAdditionalDeclarations(writer, additionalDeclarations, additionalDeclarationsPosition, additionalDeclarationsIndent); return; // Emits any anchors. Returns true if the anchor roots any match to a specific location and thus no further @@ -639,9 +636,13 @@ string ReserveName(string prefix) writer.WriteLine("string runtext = base.runtext!;"); writer.WriteLine("int runtextpos = base.runtextpos;"); writer.WriteLine("int runtextend = base.runtextend;"); - writer.WriteLine($"int original_runtextpos = runtextpos;{AdditionalDeclarationsPlaceholder}"); // placeholder at the end of a line so the generated indents line up - writer.WriteLine("int runstackpos = 0;"); + writer.WriteLine($"int original_runtextpos = runtextpos;"); hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm); + writer.Write("int runstackpos = 0;"); + writer.Flush(); + int additionalDeclarationsPosition = ((StringWriter)writer.InnerWriter).GetStringBuilder().Length; + int additionalDeclarationsIndent = writer.Indent; + writer.WriteLine(); // TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo; // only if the whole expression or any subportion is ignoring case, and we're not using invariant bool hasTextInfo = EmitInitializeCultureForGoIfNecessary(writer, rm); @@ -683,7 +684,7 @@ string ReserveName(string prefix) } // We're done. Patch up any additional declarations. - ReplaceAdditionalDeclarations(additionalDeclarations, writer); + ReplaceAdditionalDeclarations(writer, additionalDeclarations, additionalDeclarationsPosition, additionalDeclarationsIndent); return; static bool IsCaseInsensitive(RegexNode node) => (node.Options & RegexOptions.IgnoreCase) != 0; @@ -3003,22 +3004,31 @@ private static string MatchCharacterClass(bool hasTextInfo, RegexOptions options /// Replaces in with /// all of the variable declarations in . /// - private static void ReplaceAdditionalDeclarations(HashSet declarations, IndentedTextWriter writer) + /// The writer around a StringWriter to have additional declarations inserted into. + /// The additional declarations to insert. + /// The position into the writer at which to insert the additional declarations. + /// The indentation to use for the additional declarations. + private static void ReplaceAdditionalDeclarations(IndentedTextWriter writer, HashSet declarations, int position, int indent) { - StringBuilder sb = ((StringWriter)writer.InnerWriter).GetStringBuilder(); - string replacement = ""; - if (declarations.Count != 0) { - var tmp = new StringBuilder().AppendLine(); - foreach (string decl in declarations) + var arr = new string[declarations.Count]; + declarations.CopyTo(arr); + Array.Sort(arr); + + StringBuilder tmp = new StringBuilder().AppendLine(); + foreach (string decl in arr) { - tmp.Append(' ', writer.Indent * 4).AppendLine(decl); + for (int i = 0; i < indent; i++) + { + tmp.Append(IndentedTextWriter.DefaultTabString); + } + + tmp.AppendLine(decl); } - replacement = tmp.ToString(); - } - sb.Replace(AdditionalDeclarationsPlaceholder, replacement); + ((StringWriter)writer.InnerWriter).GetStringBuilder().Insert(position, tmp.ToString()); + } } private static string Literal(char c) => SymbolDisplay.FormatLiteral(c, quote: true); From 350c756b9dfae5f7e4bcc5345e811d83551f642a Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 2 Dec 2021 21:47:09 -0500 Subject: [PATCH 3/4] Reduce backtracking code gen when nodes are atomic Also added some comments and renamed a few methods for consistency between RegexCompiler and RegexGenerator.Emitter --- .../gen/RegexGenerator.Emitter.cs | 324 ++++++++----- .../Text/RegularExpressions/RegexCompiler.cs | 455 +++++++++++------- .../RegularExpressions/RegexLWCGCompiler.cs | 5 +- .../Text/RegularExpressions/RegexNode.cs | 7 +- 4 files changed, 489 insertions(+), 302 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index d6ce717cb81b81..62898cbdd5c938 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -577,7 +577,25 @@ FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or /// Emits the body of the Go override. private static void EmitGo(IndentedTextWriter writer, RegexMethod rm, string id) { - Debug.Assert(rm.Code.Tree.Root.Type == RegexNode.Capture); + // In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiled was effectively an unrolled + // version of what RegexInterpreter would process. The RegexNode tree would be turned into a series of opcodes via + // RegexWriter; the interpreter would then sit in a loop processing those opcodes, and the RegexCompiler iterated through the + // opcodes generating code for each equivalent to what the interpreter would do albeit with some decisions made at compile-time + // rather than at run-time. This approach, however, lead to complicated code that wasn't pay-for-play (e.g. a big backtracking + // jump table that all compilations went through even if there was no backtracking), that didn't factor in the shape of the + // tree (e.g. it's difficult to add optimizations based on interactions between nodes in the graph), and that didn't read well + // when decompiled from IL to C# or when directly emitted as C# as part of a source generator. + // + // This implementation is instead based on directly walking the RegexNode tree and outputting code for each node in the graph. + // A dedicated for each kind of RegexNode emits the code necessary to handle that node's processing, including recursively + // calling the relevant function for any of its children nodes. Backtracking is handled not via a giant jump table, but instead + // by emitting direct jumps to each backtracking construct. This is achieved by having all match failures jump to a "done" + // label that can be changed by a previous emitter, e.g. before EmitLoop returns, it ensures that "doneLabel" is set to the + // label that code should jump back to when backtracking. That way, a subsequent EmitXx function doesn't need to know exactly + // where to jump: it simply always jumps to "doneLabel" on match failure, and "doneLabel" is always configured to point to + // the right location. In an expression without backtracking, or before any backtracking constructs have been encountered, + // "doneLabel" is simply the final return location from the Go method that will undo any captures and exit, signaling to + // the calling scan loop that nothing was matched. // Arbitrary limit for unrolling vs creating a loop. We want to balance size in the generated // code with other costs, like the (small) overhead of slicing to create the temp span to iterate. @@ -898,9 +916,10 @@ void EmitAllBranches() // construct is responsible for unwinding back to its starting crawl position. If // it eventually ends up failing, that failure will result in jumping to the next branch // of the alternation, which will again dutifully unwind the remaining captures until - // what they were at the start of the alternation. + // what they were at the start of the alternation. Of course, if there are no captures + // anywhere in the regex, we don't have to do any of that. string? startingCrawlPos = null; - if ((node.Options & RegexNode.HasCapturesFlag) != 0 || !isAtomic) + if (expressionHasCaptures && ((node.Options & RegexNode.HasCapturesFlag) != 0 || !isAtomic)) { startingCrawlPos = ReserveName("alternation_starting_crawlpos"); additionalDeclarations.Add($"int {startingCrawlPos} = 0;"); @@ -950,7 +969,10 @@ void EmitAllBranches() { EmitRunstackResizeIfNeeded(2); writer.WriteLine($"{RunstackPush()} = {i};"); - writer.WriteLine($"{RunstackPush()} = {startingCrawlPos};"); + if (startingCrawlPos is not null) + { + writer.WriteLine($"{RunstackPush()} = {startingCrawlPos};"); + } writer.WriteLine($"{RunstackPush()} = {startingRunTextPos};"); } labelMap[i] = doneLabel; @@ -987,13 +1009,20 @@ void EmitAllBranches() // "doneLabel" to the label for this section. Thus, we only need to emit it if // something can backtrack to us, which can't happen if we're inside of an atomic // node. Thus, emit the backtracking section only if we're non-atomic. - if (!isAtomic) + if (isAtomic) + { + doneLabel = originalDoneLabel; + } + else { doneLabel = backtrackLabel; MarkLabel(backtrackLabel, emitSemicolon: false); writer.WriteLine($"{startingRunTextPos} = {RunstackPop()};"); - writer.WriteLine($"{startingCrawlPos} = {RunstackPop()};"); + if (startingCrawlPos is not null) + { + writer.WriteLine($"{startingCrawlPos} = {RunstackPop()};"); + } using (EmitBlock(writer, $"switch ({RunstackPop()})")) { for (int i = 0; i < labelMap.Length; i++) @@ -1057,6 +1086,8 @@ void EmitBackreference(RegexNode node) // Emits the code for an if(backreference)-then-else conditional. void EmitBackreferenceConditional(RegexNode node) { + bool isAtomic = node.IsAtomicByParent(); + // We're branching in a complicated fashion. Make sure textSpanPos is 0. TransferTextSpanPosToRunTextPos(); @@ -1127,43 +1158,53 @@ void EmitBackreferenceConditional(RegexNode node) } } - // If either the yes branch or the no branch contained backtracking, subsequent expressions - // might try to backtrack to here, so output a backtracking map based on resumeAt. - if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel) + if (isAtomic) { - // Skip the backtracking section. - writer.WriteLine($"goto {endRef};"); - writer.WriteLine(); + doneLabel = originalDoneLabel; + } + else + { + // If either the yes branch or the no branch contained backtracking, subsequent expressions + // might try to backtrack to here, so output a backtracking map based on resumeAt. + if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel) + { + // Skip the backtracking section. + writer.WriteLine($"goto {endRef};"); + writer.WriteLine(); - string backtrack = ReserveName("ConditionalBackreferenceBacktrack"); - doneLabel = backtrack; - MarkLabel(backtrack); + string backtrack = ReserveName("ConditionalBackreferenceBacktrack"); + doneLabel = backtrack; + MarkLabel(backtrack); - writer.WriteLine($"{resumeAt} = {RunstackPop()};"); + writer.WriteLine($"{resumeAt} = {RunstackPop()};"); - using (EmitBlock(writer, $"switch ({resumeAt})")) - { - if (postIfDoneLabel != originalDoneLabel) + using (EmitBlock(writer, $"switch ({resumeAt})")) { - writer.WriteLine($"case 0: goto {postIfDoneLabel};"); - } + if (postIfDoneLabel != originalDoneLabel) + { + writer.WriteLine($"case 0: goto {postIfDoneLabel};"); + } - if (postElseDoneLabel != originalDoneLabel) - { - writer.WriteLine($"case 1: goto {postElseDoneLabel};"); - } + if (postElseDoneLabel != originalDoneLabel) + { + writer.WriteLine($"case 1: goto {postElseDoneLabel};"); + } - writer.WriteLine($"default: goto {originalDoneLabel};"); + writer.WriteLine($"default: goto {originalDoneLabel};"); + } } } if (postIfDoneLabel != originalDoneLabel || hasNo) { MarkLabel(endRef); - if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel) + if (!isAtomic) { - EmitRunstackResizeIfNeeded(1); - writer.WriteLine($"{RunstackPush()} = {resumeAt};"); + if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel) + { + EmitRunstackResizeIfNeeded(1); + writer.WriteLine($"{RunstackPush()} = {resumeAt};"); + } } } } @@ -1171,6 +1212,8 @@ void EmitBackreferenceConditional(RegexNode node) // Emits the code for an if(expression)-then-else conditional. void EmitExpressionConditional(RegexNode node) { + bool isAtomic = node.IsAtomicByParent(); + // We're branching in a complicated fashion. Make sure textSpanPos is 0. TransferTextSpanPosToRunTextPos(); @@ -1215,7 +1258,10 @@ void EmitExpressionConditional(RegexNode node) string postConditionalDoneLabel = doneLabel; string resumeAt = ReserveName("conditionalexpression_resumeAt"); - additionalDeclarations.Add($"int {resumeAt} = 0;"); + if (!isAtomic) + { + additionalDeclarations.Add($"int {resumeAt} = 0;"); + } // If we get to this point of the code, the conditional successfully matched, so run the "yes" branch. // Since the "yes" branch may have a different execution path than the "no" branch or the lack of @@ -1225,7 +1271,7 @@ void EmitExpressionConditional(RegexNode node) EmitNode(yesBranch); TransferTextSpanPosToRunTextPos(); // ensure all subsequent code sees the same textSpanPos value by setting it to 0 string postYesDoneLabel = doneLabel; - if (postYesDoneLabel != originalDoneLabel) + if (!isAtomic && postYesDoneLabel != originalDoneLabel) { writer.WriteLine($"{resumeAt} = 0;"); } @@ -1253,7 +1299,7 @@ void EmitExpressionConditional(RegexNode node) EmitNode(noBranch); TransferTextSpanPosToRunTextPos(); // ensure all subsequent code sees the same textSpanPos value by setting it to 0 postNoDoneLabel = doneLabel; - if (postNoDoneLabel != originalDoneLabel) + if (!isAtomic && postNoDoneLabel != originalDoneLabel) { writer.WriteLine($"{resumeAt} = 1;"); } @@ -1263,42 +1309,49 @@ void EmitExpressionConditional(RegexNode node) // There's only a yes branch. If it's going to cause us to output a backtracking // label but code may not end up taking the yes branch path, we need to emit a resumeAt // that will cause the backtracking to immediately pass through this node. - if (postYesDoneLabel != originalDoneLabel) + if (!isAtomic && postYesDoneLabel != originalDoneLabel) { writer.WriteLine($"{resumeAt} = 2;"); } } - if (postYesDoneLabel != postConditionalDoneLabel || postNoDoneLabel != postConditionalDoneLabel) + if (isAtomic) { - // Skip the backtracking section. - writer.WriteLine($"goto {end};"); - writer.WriteLine(); + doneLabel = originalDoneLabel; + } + else + { + if (postYesDoneLabel != postConditionalDoneLabel || postNoDoneLabel != postConditionalDoneLabel) + { + // Skip the backtracking section. + writer.WriteLine($"goto {end};"); + writer.WriteLine(); - string backtrack = ReserveName("ConditionalExpressionBacktrack"); - doneLabel = backtrack; - MarkLabel(backtrack); + string backtrack = ReserveName("ConditionalExpressionBacktrack"); + doneLabel = backtrack; + MarkLabel(backtrack); - using (EmitBlock(writer, $"switch ({RunstackPop()})")) - { - if (postYesDoneLabel != postConditionalDoneLabel) + using (EmitBlock(writer, $"switch ({RunstackPop()})")) { - writer.WriteLine($"case 0: goto {postYesDoneLabel};"); - } + if (postYesDoneLabel != postConditionalDoneLabel) + { + writer.WriteLine($"case 0: goto {postYesDoneLabel};"); + } - if (postNoDoneLabel != postConditionalDoneLabel && postNoDoneLabel != originalDoneLabel) - { - writer.WriteLine($"case 1: goto {postNoDoneLabel};"); - } + if (postNoDoneLabel != postConditionalDoneLabel && postNoDoneLabel != originalDoneLabel) + { + writer.WriteLine($"case 1: goto {postNoDoneLabel};"); + } - writer.WriteLine($"default: goto {postConditionalDoneLabel};"); + writer.WriteLine($"default: goto {postConditionalDoneLabel};"); + } } - } - if (postYesDoneLabel != originalDoneLabel || postNoDoneLabel != originalDoneLabel) - { - EmitRunstackResizeIfNeeded(1); - writer.WriteLine($"{RunstackPush()} = {resumeAt};"); + if (postYesDoneLabel != originalDoneLabel || postNoDoneLabel != originalDoneLabel) + { + EmitRunstackResizeIfNeeded(1); + writer.WriteLine($"{RunstackPush()} = {resumeAt};"); + } } MarkLabel(end); @@ -1310,6 +1363,7 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null) Debug.Assert(node.Type == RegexNode.Capture); int capnum = RegexParser.MapCaptureNumber(node.M, rm.Code.Caps); int uncapnum = RegexParser.MapCaptureNumber(node.N, rm.Code.Caps); + bool isAtomic = node.IsAtomicByParent(); TransferTextSpanPosToRunTextPos(); string startingRunTextPos = ReserveName("capture_starting_runtextpos"); @@ -1343,7 +1397,7 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null) writer.WriteLine($"base.TransferCapture({capnum}, {uncapnum}, {startingRunTextPos}, runtextpos);"); } - if (childBacktracks || node.IsInLoop()) + if (!isAtomic && (childBacktracks || node.IsInLoop())) { writer.WriteLine(); @@ -1370,6 +1424,10 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null) doneLabel = backtrack; MarkLabel(end); } + else + { + doneLabel = originalDoneLabel; + } } // Emits code to unwind the capture stack until the crawl position specified in the provided local. @@ -1600,11 +1658,16 @@ void EmitUpdateBumpalong() writer.WriteLine("base.runtextpos = runtextpos;"); } + // Emits code for a concatenation void EmitConcatenation(RegexNode node, RegexNode? subsequent, bool emitLengthChecksIfRequired) { + // Emit the code for each child one after the other. int childCount = node.ChildCount(); for (int i = 0; i < childCount; i++) { + // If we can find a subsequence of fixed-length children, we can emit a length check once for that sequence + // and then skip the individual length checks for each. We also want to minimize the repetition of if blocks, + // and so we try to emit a series of clauses all part of the same if block rather than one if block per child. if (emitLengthChecksIfRequired && node.TryGetJoinableLengthCheckChildRange(i, out int requiredLength, out int exclusiveEnd)) { bool wroteClauses = true; @@ -1633,7 +1696,6 @@ void WriteSingleCharChild(RegexNode child) if (child.Type is RegexNode.One or RegexNode.Notone or RegexNode.Set) { WriteSingleCharChild(child); - writer.Write($" /* {DescribeNode(child)} */"); } else if (child.Type is RegexNode.Oneloop or RegexNode.Onelazy or RegexNode.Oneloopatomic or RegexNode.Setloop or RegexNode.Setlazy or RegexNode.Setloopatomic or @@ -1644,10 +1706,6 @@ RegexNode.Notoneloop or RegexNode.Notonelazy or RegexNode.Notoneloopatomic && for (int c = 0; c < child.M; c++) { WriteSingleCharChild(child); - if (c == 0) - { - writer.Write($" /* {DescribeNode(child)} */"); - } } } else @@ -1675,11 +1733,10 @@ RegexNode.Notoneloop or RegexNode.Notonelazy or RegexNode.Notoneloopatomic && } i--; + continue; } - else - { - EmitNode(node.Child(i), i + 1 < childCount ? node.Child(i + 1) : subsequent, emitLengthChecksIfRequired: emitLengthChecksIfRequired); - } + + EmitNode(node.Child(i), i + 1 < childCount ? node.Child(i + 1) : subsequent, emitLengthChecksIfRequired: emitLengthChecksIfRequired); } } @@ -2171,10 +2228,11 @@ void EmitLazy(RegexNode node) int minIterations = node.M; int maxIterations = node.N; string originalDoneLabel = doneLabel; + bool isAtomic = node.IsAtomicByParent(); // If this is actually an atomic lazy loop, we need to output just the minimum number of iterations, // as nothing will backtrack into the lazy loop to get it progress further. - if (node.IsAtomicByParent()) + if (isAtomic) { switch (minIterations) { @@ -2313,43 +2371,46 @@ void EmitLazy(RegexNode node) MarkLabel(endLoop); - // Store the capture's state and skip the backtracking section - EmitRunstackResizeIfNeeded(3); - writer.WriteLine($"{RunstackPush()} = {startingRunTextPos};"); - writer.WriteLine($"{RunstackPush()} = {iterationCount};"); - writer.WriteLine($"{RunstackPush()} = {sawEmpty};"); - string skipBacktrack = ReserveName("SkipBacktrack"); - writer.WriteLine($"goto {skipBacktrack};"); - writer.WriteLine(); + if (!isAtomic) + { + // Store the capture's state and skip the backtracking section + EmitRunstackResizeIfNeeded(3); + writer.WriteLine($"{RunstackPush()} = {startingRunTextPos};"); + writer.WriteLine($"{RunstackPush()} = {iterationCount};"); + writer.WriteLine($"{RunstackPush()} = {sawEmpty};"); + string skipBacktrack = ReserveName("SkipBacktrack"); + writer.WriteLine($"goto {skipBacktrack};"); + writer.WriteLine(); - // Emit a backtracking section that restores the capture's state and then jumps to the previous done label - string backtrack = ReserveName($"LazyLoopBacktrack"); - MarkLabel(backtrack); + // Emit a backtracking section that restores the capture's state and then jumps to the previous done label + string backtrack = ReserveName($"LazyLoopBacktrack"); + MarkLabel(backtrack); - writer.WriteLine($"{sawEmpty} = {RunstackPop()};"); - writer.WriteLine($"{iterationCount} = {RunstackPop()};"); - writer.WriteLine($"{startingRunTextPos} = {RunstackPop()};"); + writer.WriteLine($"{sawEmpty} = {RunstackPop()};"); + writer.WriteLine($"{iterationCount} = {RunstackPop()};"); + writer.WriteLine($"{startingRunTextPos} = {RunstackPop()};"); - if (maxIterations == int.MaxValue) - { - using (EmitBlock(writer, $"if ({sawEmpty} == 0)")) + if (maxIterations == int.MaxValue) { - writer.WriteLine($"goto {body};"); + using (EmitBlock(writer, $"if ({sawEmpty} == 0)")) + { + writer.WriteLine($"goto {body};"); + } } - } - else - { - using (EmitBlock(writer, $"if ({iterationCount} < {maxIterations} && {sawEmpty} == 0)")) + else { - writer.WriteLine($"goto {body};"); + using (EmitBlock(writer, $"if ({iterationCount} < {maxIterations} && {sawEmpty} == 0)")) + { + writer.WriteLine($"goto {body};"); + } } - } - writer.WriteLine($"goto {doneLabel};"); - writer.WriteLine(); + writer.WriteLine($"goto {doneLabel};"); + writer.WriteLine(); - doneLabel = backtrack; - MarkLabel(skipBacktrack); + doneLabel = backtrack; + MarkLabel(skipBacktrack); + } } // Emits the code to handle a loop (repeater) with a fixed number of iterations. @@ -2585,6 +2646,7 @@ void EmitLoop(RegexNode node) Debug.Assert(node.N >= node.M, $"Unexpected M={node.M}, N={node.N}"); int minIterations = node.M; int maxIterations = node.N; + bool isAtomic = node.IsAtomicByParent(); // We might loop any number of times. In order to ensure this loop and subsequent code sees textSpanPos // the same regardless, we always need it to contain the same value, and the easiest such value is 0. @@ -2694,50 +2756,56 @@ void EmitLoop(RegexNode node) } } - if (childBacktracks) + if (isAtomic) { - writer.WriteLine($"goto {endLoop};"); - writer.WriteLine(); - - string backtrack = ReserveName("LoopBacktrack"); - MarkLabel(backtrack); - using (EmitBlock(writer, $"if ({iterationCount} == 0)")) - { - writer.WriteLine($"goto {originalDoneLabel};"); - } - writer.WriteLine($"goto {doneLabel};"); - doneLabel = backtrack; + doneLabel = originalDoneLabel; + MarkLabel(endLoop); } + else + { + if (childBacktracks) + { + writer.WriteLine($"goto {endLoop};"); + writer.WriteLine(); - MarkLabel(endLoop); - + string backtrack = ReserveName("LoopBacktrack"); + MarkLabel(backtrack); + using (EmitBlock(writer, $"if ({iterationCount} == 0)")) + { + writer.WriteLine($"goto {originalDoneLabel};"); + } + writer.WriteLine($"goto {doneLabel};"); + doneLabel = backtrack; + } + MarkLabel(endLoop); - if (node.IsInLoop()) - { - writer.WriteLine(); + if (node.IsInLoop()) + { + writer.WriteLine(); - // Store the capture's state - EmitRunstackResizeIfNeeded(3); - writer.WriteLine($"{RunstackPush()} = {startingRunTextPos};"); - writer.WriteLine($"{RunstackPush()} = {iterationCount};"); + // Store the capture's state + EmitRunstackResizeIfNeeded(3); + writer.WriteLine($"{RunstackPush()} = {startingRunTextPos};"); + writer.WriteLine($"{RunstackPush()} = {iterationCount};"); - // Skip past the backtracking section - string end = ReserveName("SkipBacktrack"); - writer.WriteLine($"goto {end};"); - writer.WriteLine(); + // Skip past the backtracking section + string end = ReserveName("SkipBacktrack"); + writer.WriteLine($"goto {end};"); + writer.WriteLine(); - // Emit a backtracking section that restores the capture's state and then jumps to the previous done label - string backtrack = ReserveName("LoopBacktrack"); - MarkLabel(backtrack); - writer.WriteLine($"{iterationCount} = {RunstackPop()};"); - writer.WriteLine($"{startingRunTextPos} = {RunstackPop()};"); + // Emit a backtracking section that restores the capture's state and then jumps to the previous done label + string backtrack = ReserveName("LoopBacktrack"); + MarkLabel(backtrack); + writer.WriteLine($"{iterationCount} = {RunstackPop()};"); + writer.WriteLine($"{startingRunTextPos} = {RunstackPop()};"); - writer.WriteLine($"goto {doneLabel};"); - writer.WriteLine(); + writer.WriteLine($"goto {doneLabel};"); + writer.WriteLine(); - doneLabel = backtrack; - MarkLabel(end); + doneLabel = backtrack; + MarkLabel(end); + } } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 5782d8d534194b..a3e38808752109 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -12,8 +12,7 @@ namespace System.Text.RegularExpressions { /// - /// RegexCompiler translates a block of RegexCode to MSIL, and creates a - /// subclass of the RegexRunner type. + /// RegexCompiler translates a block of RegexCode to MSIL, and creates a subclass of the RegexRunner type. /// internal abstract class RegexCompiler { @@ -60,20 +59,26 @@ internal abstract class RegexCompiler private static readonly MethodInfo s_textInfoToLowerMethod = typeof(TextInfo).GetMethod("ToLower", new Type[] { typeof(char) })!; private static readonly MethodInfo s_arrayResize = typeof(Array).GetMethod("Resize")!.MakeGenericMethod(typeof(int)); + /// The ILGenerator currently in use. protected ILGenerator? _ilg; - - protected RegexOptions _options; // options - protected RegexCode? _code; // the RegexCode object - protected int[]? _codes; // the RegexCodes being translated - protected bool _hasTimeout; // whether the regex has a non-infinite timeout - - private Stack? _int32LocalsPool; // pool of Int32 local variables - private Stack? _readOnlySpanCharLocalsPool; // pool of ReadOnlySpan local variables - - private LocalBuilder? _textInfo; // cached to avoid extraneous TLS hits from CurrentCulture and virtual calls to TextInfo - private LocalBuilder? _loopTimeoutCounter; // timeout counter for loops (set and node) - - private const int LoopTimeoutCheckCount = 2048; // A conservative value to guarantee the correct timeout handling. + /// The options for the expression. + protected RegexOptions _options; + /// The code written for the expression. + protected RegexCode? _code; + /// Whether this expression has a non-infinite timeout. + protected bool _hasTimeout; + + /// Pool of Int32 LocalBuilders. + private Stack? _int32LocalsPool; + /// Pool of ReadOnlySpan of char locals. + private Stack? _readOnlySpanCharLocalsPool; + + /// Local representing a cached TextInfo for the culture to use for all case-insensitive operations. + private LocalBuilder? _textInfo; + /// Local representing a timeout counter for loops (set loops and node loops). + private LocalBuilder? _loopTimeoutCounter; + /// A frequency with which the timeout should be validated. + private const int LoopTimeoutCheckCount = 2048; private static FieldInfo RegexRunnerField(string fieldname) => typeof(RegexRunner).GetField(fieldname, BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance | BindingFlags.Static)!; @@ -343,8 +348,8 @@ private void CallToLower() } } - /// Generates FindFirstChar. - protected void GenerateFindFirstChar() + /// Generates the implementation for FindFirstChar. + protected void EmitFindFirstChar() { Debug.Assert(_code != null); _int32LocalsPool?.Clear(); @@ -427,7 +432,7 @@ FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or { case FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive: Debug.Assert(!string.IsNullOrEmpty(_code.FindOptimizations.LeadingCaseSensitivePrefix)); - GenerateIndexOf_LeftToRight(_code.FindOptimizations.LeadingCaseSensitivePrefix); + EmitIndexOf_LeftToRight(_code.FindOptimizations.LeadingCaseSensitivePrefix); break; case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive: @@ -435,7 +440,7 @@ FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseSensitive: case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive: Debug.Assert(_code.FindOptimizations.FixedDistanceSets is { Count: > 0 }); - GenerateFixedSet_LeftToRight(); + EmitFixedSet_LeftToRight(); break; default: @@ -586,7 +591,7 @@ bool GenerateAnchors() return false; } - void GenerateIndexOf_LeftToRight(string prefix) + void EmitIndexOf_LeftToRight(string prefix) { using RentedLocalBuilder i = RentInt32Local(); @@ -618,7 +623,7 @@ void GenerateIndexOf_LeftToRight(string prefix) Ret(); } - void GenerateFixedSet_LeftToRight() + void EmitFixedSet_LeftToRight() { List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? sets = _code.FindOptimizations.FixedDistanceSets; (char[]? Chars, string Set, int Distance, bool CaseInsensitive) primarySet = sets![0]; @@ -817,14 +822,35 @@ void GenerateFixedSet_LeftToRight() } } - protected void GenerateGo() + /// Generates the implementation for Go. + protected void EmitGo() { + // In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiled was effectively an unrolled + // version of what RegexInterpreter would process. The RegexNode tree would be turned into a series of opcodes via + // RegexWriter; the interpreter would then sit in a loop processing those opcodes, and the RegexCompiler iterated through the + // opcodes generating code for each equivalent to what the interpreter would do albeit with some decisions made at compile-time + // rather than at run-time. This approach, however, lead to complicated code that wasn't pay-for-play (e.g. a big backtracking + // jump table that all compilations went through even if there was no backtracking), that didn't factor in the shape of the + // tree (e.g. it's difficult to add optimizations based on interactions between nodes in the graph), and that didn't read well + // when decompiled from IL to C# or when directly emitted as C# as part of a source generator. + // + // This implementation is instead based on directly walking the RegexNode tree and outputting code for each node in the graph. + // A dedicated for each kind of RegexNode emits the code necessary to handle that node's processing, including recursively + // calling the relevant function for any of its children nodes. Backtracking is handled not via a giant jump table, but instead + // by emitting direct jumps to each backtracking construct. This is achieved by having all match failures jump to a "done" + // label that can be changed by a previous emitter, e.g. before EmitLoop returns, it ensures that "doneLabel" is set to the + // label that code should jump back to when backtracking. That way, a subsequent EmitXx function doesn't need to know exactly + // where to jump: it simply always jumps to "doneLabel" on match failure, and "doneLabel" is always configured to point to + // the right location. In an expression without backtracking, or before any backtracking constructs have been encountered, + // "doneLabel" is simply the final return location from the Go method that will undo any captures and exit, signaling to + // the calling scan loop that nothing was matched. + Debug.Assert(_code != null); _int32LocalsPool?.Clear(); _readOnlySpanCharLocalsPool?.Clear(); - RegexNode node = _code!.Tree.Root; - + // Get the root Capture node of the tree. + RegexNode node = _code.Tree.Root; Debug.Assert(node.Type == RegexNode.Capture, "Every generated tree should begin with a capture node"); Debug.Assert(node.ChildCount() == 1, "Capture nodes should have one child"); @@ -834,7 +860,7 @@ protected void GenerateGo() // In some limited cases, FindFirstChar will only return true if it successfully matched the whole thing. // This is the case, in particular, for strings. We can special case these to do essentially nothing // in Go other than emit the capture. - if (!IsCaseInsensitive(node)) // FindFirstChar may not be 100% accurate on casing in all cultures + if (!IsCaseInsensitive(node)) // FindFirstChar may yield false positives on these in some cultures when case-insensitive { switch (node.Type) { @@ -862,7 +888,7 @@ protected void GenerateGo() } } - // Declare some locals. + // Initialize the main locals used throughout the implementation. LocalBuilder runtextLocal = DeclareString(); LocalBuilder originalruntextposLocal = DeclareInt32(); LocalBuilder runtextposLocal = DeclareInt32(); @@ -1063,6 +1089,7 @@ void EmitAlternation(RegexNode node) Label matchLabel = DefineLabel(); // Save off runtextpos. We'll need to reset this each time a branch fails. + // startingRunTextPos = runtextpos; LocalBuilder startingRunTextPos = DeclareInt32(); Ldloc(runtextposLocal); Stloc(startingRunTextPos); @@ -1086,10 +1113,12 @@ void EmitAlternation(RegexNode node) // construct is responsible for unwinding back to its starting crawl position. If // it eventually ends up failing, that failure will result in jumping to the next branch // of the alternation, which will again dutifully unwind the remaining captures until - // what they were at the start of the alternation. + // what they were at the start of the alternation. Of course, if there are no captures + // anywhere in the regex, we don't have to do any of that. LocalBuilder? startingCrawlpos = null; - if ((node.Options & RegexNode.HasCapturesFlag) != 0 || !isAtomic) + if (expressionHasCaptures && ((node.Options & RegexNode.HasCapturesFlag) != 0 || !isAtomic)) { + // startingCrawlpos = base.Crawlpos(); startingCrawlpos = DeclareInt32(); Ldthis(); Call(s_crawlposMethod); @@ -1135,9 +1164,16 @@ void EmitAlternation(RegexNode node) // still points to the nextBranch, which similarly is where we'll want to jump to. if (!isAtomic) { + // if (runstackpos + 3 >= base.runstack.Length) Array.Resize(ref base.runstack, base.runstack.Length * 2); + // base.runstack[runstackpos++] = i; + // base.runstack[runstackpos++] = startingCrawlpos; + // base.runstack[runstackpos++] = startingRunTextPos; EmitRunstackResizeIfNeeded(3); EmitRunstackPush(() => Ldc(i)); - EmitRunstackPush(() => Ldloc(startingCrawlpos!)); + if (startingCrawlpos is not null) + { + EmitRunstackPush(() => Ldloc(startingCrawlpos)); + } EmitRunstackPush(() => Ldloc(startingRunTextPos)); } labelMap[i] = doneLabel; @@ -1146,6 +1182,9 @@ void EmitAlternation(RegexNode node) // Before jumping to the end, we need to zero out textSpanPos, so that no // matter what the value is after the branch, whatever follows the alternate // will see the same textSpanPos. + // runtextpos += textSpanPos; + // textSpanPos = 0; + // goto matchLabel; TransferTextSpanPosToRunTextPos(); BrFar(matchLabel); @@ -1155,6 +1194,10 @@ void EmitAlternation(RegexNode node) // needs to be reset, uncapturing it. if (!isLastBranch) { + // NextBranch: + // runtextpos = startingRunTextPos; + // textSpan = runtext.AsSpan(runtextpos, runtextend - runtextpos); + // while (base.Crawlpos() > startingCrawlpos) base.Uncapture(); MarkLabel(nextBranch); Ldloc(startingRunTextPos); Stloc(runtextposLocal); @@ -1174,7 +1217,11 @@ void EmitAlternation(RegexNode node) // "doneLabel" to the label for this section. Thus, we only need to emit it if // something can backtrack to us, which can't happen if we're inside of an atomic // node. Thus, emit the backtracking section only if we're non-atomic. - if (!isAtomic) + if (isAtomic) + { + doneLabel = originalDoneLabel; + } + else { doneLabel = backtrackLabel; MarkLabel(backtrackLabel); @@ -1184,8 +1231,11 @@ void EmitAlternation(RegexNode node) // switch (base.runstack[--runstackpos]) { ... } // branch number EmitRunstackPop(); Stloc(startingRunTextPos); - EmitRunstackPop(); - Stloc(startingCrawlpos!); + if (startingCrawlpos is not null) + { + EmitRunstackPop(); + Stloc(startingCrawlpos); + } EmitRunstackPop(); Switch(labelMap); } @@ -1287,6 +1337,8 @@ void EmitBackreference(RegexNode node) // Emits the code for an if(backreference)-then-else conditional. void EmitBackreferenceConditional(RegexNode node) { + bool isAtomic = node.IsAtomicByParent(); + // We're branching in a complicated fashion. Make sure textSpanPos is 0. TransferTextSpanPosToRunTextPos(); @@ -1317,11 +1369,13 @@ void EmitBackreferenceConditional(RegexNode node) Label postIfDoneLabel = doneLabel; if (postIfDoneLabel != originalDoneLabel) { + // resumeAt = 0; Ldc(0); Stloc(resumeAt); } if (postIfDoneLabel != originalDoneLabel || hasNo) { + // goto endRef; BrFar(endRef); } @@ -1337,6 +1391,7 @@ void EmitBackreferenceConditional(RegexNode node) postElseDoneLabel = doneLabel; if (postElseDoneLabel != originalDoneLabel) { + // resumeAt = 1; Ldc(1); Stloc(resumeAt); } @@ -1348,51 +1403,62 @@ void EmitBackreferenceConditional(RegexNode node) // that will cause the backtracking to immediately pass through this node. if (postIfDoneLabel != originalDoneLabel) { + // resumeAt = 2; Ldc(2); Stloc(resumeAt); } } - // If either the yes branch or the no branch contained backtracking, subsequent expressions - // might try to backtrack to here, so output a backtracking map based on resumeAt. - if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel) + if (isAtomic) { - // Skip the backtracking section - Br(endRef); + doneLabel = originalDoneLabel; + } + else + { + // If either the yes branch or the no branch contained backtracking, subsequent expressions + // might try to backtrack to here, so output a backtracking map based on resumeAt. + if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel) + { + // Skip the backtracking section + // goto endRef; + Br(endRef); - Label backtrack = DefineLabel(); - doneLabel = backtrack; - MarkLabel(backtrack); + Label backtrack = DefineLabel(); + doneLabel = backtrack; + MarkLabel(backtrack); - // resumeAt = base.runstack[--runstackpos]; - EmitRunstackPop(); - Stloc(resumeAt); + // resumeAt = base.runstack[--runstackpos]; + EmitRunstackPop(); + Stloc(resumeAt); - if (postIfDoneLabel != originalDoneLabel) - { - // if (resumeAt == 0) goto postIfDoneLabel; - Ldloc(resumeAt); - Ldc(0); - BeqFar(postIfDoneLabel); - } + if (postIfDoneLabel != originalDoneLabel) + { + // if (resumeAt == 0) goto postIfDoneLabel; + Ldloc(resumeAt); + Ldc(0); + BeqFar(postIfDoneLabel); + } - if (postElseDoneLabel != originalDoneLabel) - { - // if (resumeAt == 1) goto postElseDoneLabel; - Ldloc(resumeAt); - Ldc(1); - BeqFar(postElseDoneLabel); - } + if (postElseDoneLabel != originalDoneLabel) + { + // if (resumeAt == 1) goto postElseDoneLabel; + Ldloc(resumeAt); + Ldc(1); + BeqFar(postElseDoneLabel); + } - // goto originalDoneLabel; - BrFar(originalDoneLabel); + // goto originalDoneLabel; + BrFar(originalDoneLabel); + } } if (postIfDoneLabel != originalDoneLabel || hasNo) { MarkLabel(endRef); - if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel) + if (!isAtomic && (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel)) { + // if (runstackpos + 1 >= base.runstack.Length) Array.Resize(ref base.runstack, base.runstack.Length * 2); + // base.runstack[runstackpos++] = resumeAt; EmitRunstackResizeIfNeeded(1); EmitRunstackPush(() => Ldloc(resumeAt)); } @@ -1402,6 +1468,8 @@ void EmitBackreferenceConditional(RegexNode node) // Emits the code for an if(expression)-then-else conditional. void EmitExpressionConditional(RegexNode node) { + bool isAtomic = node.IsAtomicByParent(); + // We're branching in a complicated fashion. Make sure textSpanPos is 0. TransferTextSpanPosToRunTextPos(); @@ -1447,7 +1515,7 @@ void EmitExpressionConditional(RegexNode node) } Label postConditionalDoneLabel = doneLabel; - LocalBuilder resumeAt = DeclareInt32(); + LocalBuilder? resumeAt = !isAtomic ? DeclareInt32() : null; // If we get to this point of the code, the conditional successfully matched, so run the "yes" branch. // Since the "yes" branch may have a different execution path than the "no" branch or the lack of @@ -1457,13 +1525,15 @@ void EmitExpressionConditional(RegexNode node) EmitNode(yesBranch); TransferTextSpanPosToRunTextPos(); // ensure all subsequent code sees the same textSpanPos value by setting it to 0 Label postYesDoneLabel = doneLabel; - if (postYesDoneLabel != originalDoneLabel) + if (resumeAt is not null && postYesDoneLabel != originalDoneLabel) { + // resumeAt = 0; Ldc(0); Stloc(resumeAt); } if (postYesDoneLabel != originalDoneLabel || noBranch is not null) { + // goto end; BrFar(end); } @@ -1476,6 +1546,7 @@ void EmitExpressionConditional(RegexNode node) MarkLabel(no); if (startingCrawlPos is not null) { + // while (base.Crawlpos() > startingCrawlPos) base.Uncapture(); EmitUncaptureUntil(startingCrawlPos); } @@ -1486,6 +1557,7 @@ void EmitExpressionConditional(RegexNode node) postNoDoneLabel = doneLabel; if (postNoDoneLabel != originalDoneLabel) { + // goto end; BrFar(end); } } @@ -1494,43 +1566,57 @@ void EmitExpressionConditional(RegexNode node) // There's only a yes branch. If it's going to cause us to output a backtracking // label but code may not end up taking the yes branch path, we need to emit a resumeAt // that will cause the backtracking to immediately pass through this node. - if (postYesDoneLabel != originalDoneLabel) + if (resumeAt is not null && postYesDoneLabel != originalDoneLabel) { + // resumeAt = 2; Ldc(2); Stloc(resumeAt); } } - if (postYesDoneLabel != postConditionalDoneLabel || postNoDoneLabel != postConditionalDoneLabel) + if (isAtomic) { - // Skip the backtracking section. - BrFar(end); + doneLabel = originalDoneLabel; + } + else + { + Debug.Assert(resumeAt is not null); + if (postYesDoneLabel != postConditionalDoneLabel || postNoDoneLabel != postConditionalDoneLabel) + { + // Skip the backtracking section. + BrFar(end); - Label backtrack = DefineLabel(); - doneLabel = backtrack; - MarkLabel(backtrack); + Label backtrack = DefineLabel(); + doneLabel = backtrack; + MarkLabel(backtrack); - if (postYesDoneLabel != postConditionalDoneLabel) - { - Ldloc(resumeAt); - Ldc(0); - BeqFar(postYesDoneLabel); + if (postYesDoneLabel != postConditionalDoneLabel) + { + // if (resumeAt == 0) goto postYesDoneLabel; + Ldloc(resumeAt); + Ldc(0); + BeqFar(postYesDoneLabel); + } + + if (postNoDoneLabel != postConditionalDoneLabel && postNoDoneLabel != originalDoneLabel) + { + // if (resumeAt == 1) goto postNoDoneLabel; + Ldloc(resumeAt); + Ldc(1); + BeqFar(postNoDoneLabel); + } + + // goto postConditionalDoneLabel; + BrFar(postConditionalDoneLabel); } - if (postNoDoneLabel != postConditionalDoneLabel && postNoDoneLabel != originalDoneLabel) + if (postYesDoneLabel != originalDoneLabel || postNoDoneLabel != originalDoneLabel) { - Ldloc(resumeAt); - Ldc(1); - BeqFar(postNoDoneLabel); + // if (runstackpos + 1 >= base.runstack.Length) Array.Resize(ref base.runstack, base.runstack.Length * 2); + // base.runstack[runstackpos++] = resumeAt; + EmitRunstackResizeIfNeeded(1); + EmitRunstackPush(() => Ldloc(resumeAt)); } - - BrFar(postConditionalDoneLabel); - } - - if (postYesDoneLabel != originalDoneLabel || postNoDoneLabel != originalDoneLabel) - { - EmitRunstackResizeIfNeeded(1); - EmitRunstackPush(() => Ldloc(resumeAt)); } MarkLabel(end); @@ -1542,6 +1628,7 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null) Debug.Assert(node.Type == RegexNode.Capture); int capnum = RegexParser.MapCaptureNumber(node.M, _code!.Caps); int uncapnum = RegexParser.MapCaptureNumber(node.N, _code.Caps); + bool isAtomic = node.IsAtomicByParent(); // runtextpos += textSpanPos; // textSpan = textSpan.Slice(textSpanPos); @@ -1592,8 +1679,10 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null) Call(s_transferCaptureMethod); } - if (childBacktracks || node.IsInLoop()) + if (!isAtomic && (childBacktracks || node.IsInLoop())) { + // if (runstackpos + 1 >= base.runstack.Length) Array.Resize(ref base.runstack, base.runstack.Length * 2); + // base.runstack[runstackpos++] = startingRunTextPos; EmitRunstackResizeIfNeeded(1); EmitRunstackPush(() => Ldloc(startingRunTextPos)); @@ -1621,6 +1710,10 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null) doneLabel = backtrack; MarkLabel(end); } + else + { + doneLabel = originalDoneLabel; + } } // Emits code to unwind the capture stack until the crawl position specified in the provided local. @@ -1651,6 +1744,7 @@ void EmitPositiveLookaheadAssertion(RegexNode node) Label originalDoneLabel = doneLabel; // Save off runtextpos. We'll need to reset this upon successful completion of the lookahead. + // startingRunTextPos = runtextpos; LocalBuilder startingRunTextPos = DeclareInt32(); Ldloc(runtextposLocal); Stloc(startingRunTextPos); @@ -1661,6 +1755,8 @@ void EmitPositiveLookaheadAssertion(RegexNode node) // After the child completes successfully, reset the text positions. // Do not reset captures, which persist beyond the lookahead. + // runtextpos = startingRunTextPos; + // textSpan = runtext.AsSpan(runtextpos, runtextend - runtextpos); Ldloc(startingRunTextPos); Stloc(runtextposLocal); LoadTextSpanLocal(); @@ -1676,6 +1772,7 @@ void EmitNegativeLookaheadAssertion(RegexNode node) Label originalDoneLabel = doneLabel; // Save off runtextpos. We'll need to reset this upon successful completion of the lookahead. + // startingRunTextPos = runtextpos; LocalBuilder startingRunTextPos = DeclareInt32(); Ldloc(runtextposLocal); Stloc(startingRunTextPos); @@ -1689,6 +1786,7 @@ void EmitNegativeLookaheadAssertion(RegexNode node) // If the generated code ends up here, it matched the lookahead, which actually // means failure for a _negative_ lookahead, so we need to jump to the original done. + // goto originalDoneLabel; BrFar(originalDoneLabel); // Failures (success for a negative lookahead) jump here. @@ -1699,6 +1797,7 @@ void EmitNegativeLookaheadAssertion(RegexNode node) } // After the child completes in failure (success for negative lookahead), reset the text positions. + // runtextpos = startingRunTextPos; Ldloc(startingRunTextPos); Stloc(runtextposLocal); LoadTextSpanLocal(); @@ -1850,9 +1949,12 @@ void EmitUpdateBumpalong() // Emits code for a concatenation void EmitConcatenation(RegexNode node, RegexNode? subsequent, bool emitLengthChecksIfRequired) { + // Emit the code for each child one after the other. int childCount = node.ChildCount(); for (int i = 0; i < childCount; i++) { + // If we can find a subsequence of fixed-length children, we can emit a length check once for that sequence + // and then skip the individual length checks for each. if (emitLengthChecksIfRequired && node.TryGetJoinableLengthCheckChildRange(i, out int requiredLength, out int exclusiveEnd)) { EmitSpanLengthCheck(requiredLength); @@ -2314,12 +2416,14 @@ void EmitSingleCharLazy(RegexNode node, bool emitLengthChecksIfRequired = true) // Track the current runtextpos. Each time we backtrack, we'll reset to the stored position, which // is also incremented each time we match another character in the loop. + // int startingRunTextPos = runtextpos; LocalBuilder startingRunTextPos = DeclareInt32(); Ldloc(runtextposLocal); Stloc(startingRunTextPos); // Skip the backtracking section for the initial subsequent matching. We've already matched the // minimum number of iterations, which means we can successfully match with zero additional iterations. + // goto endLoopLabel; Label endLoopLabel = DefineLabel(); BrFar(endLoopLabel); @@ -2331,6 +2435,7 @@ void EmitSingleCharLazy(RegexNode node, bool emitLengthChecksIfRequired = true) // are before this node, in which case this is wasted effort, but still functionally correct. if (crawlPos is not null) { + // while (base.Crawlpos() > crawlPos) base.Uncapture(); EmitUncaptureUntil(crawlPos); } @@ -2438,10 +2543,11 @@ void EmitLazy(RegexNode node) int minIterations = node.M; int maxIterations = node.N; Label originalDoneLabel = doneLabel; + bool isAtomic = node.IsAtomicByParent(); // If this is actually an atomic lazy loop, we need to output just the minimum number of iterations, // as nothing will backtrack into the lazy loop to get it progress further. - if (node.IsAtomicByParent()) + if (isAtomic) { switch (minIterations) { @@ -2620,51 +2726,54 @@ void EmitLazy(RegexNode node) MarkLabel(endLoop); - // Store the capture's state and skip the backtracking section - EmitRunstackResizeIfNeeded(3); - EmitRunstackPush(() => Ldloc(startingRunTextPos)); - EmitRunstackPush(() => Ldloc(iterationCount)); - EmitRunstackPush(() => Ldloc(sawEmpty)); - Label skipBacktrack = DefineLabel(); - BrFar(skipBacktrack); + if (!isAtomic) + { + // Store the capture's state and skip the backtracking section + EmitRunstackResizeIfNeeded(3); + EmitRunstackPush(() => Ldloc(startingRunTextPos)); + EmitRunstackPush(() => Ldloc(iterationCount)); + EmitRunstackPush(() => Ldloc(sawEmpty)); + Label skipBacktrack = DefineLabel(); + BrFar(skipBacktrack); - // Emit a backtracking section that restores the capture's state and then jumps to the previous done label - Label backtrack = DefineLabel(); - MarkLabel(backtrack); + // Emit a backtracking section that restores the capture's state and then jumps to the previous done label + Label backtrack = DefineLabel(); + MarkLabel(backtrack); - // sawEmpty = base.runstack[--runstackpos]; - // iterationCount = base.runstack[--runstackpos]; - // startingRunTextPos = base.runstack[--runstackpos]; - EmitRunstackPop(); - Stloc(sawEmpty); - EmitRunstackPop(); - Stloc(iterationCount); - EmitRunstackPop(); - Stloc(startingRunTextPos); + // sawEmpty = base.runstack[--runstackpos]; + // iterationCount = base.runstack[--runstackpos]; + // startingRunTextPos = base.runstack[--runstackpos]; + EmitRunstackPop(); + Stloc(sawEmpty); + EmitRunstackPop(); + Stloc(iterationCount); + EmitRunstackPop(); + Stloc(startingRunTextPos); - if (maxIterations == int.MaxValue) - { - // if (sawEmpty != 0) goto doneLabel; - Ldloc(sawEmpty); - Ldc(0); - BneFar(doneLabel); - } - else - { - // if (iterationCount >= maxIterations || sawEmpty != 0) goto doneLabel; - Ldloc(iterationCount); - Ldc(maxIterations); - BgeFar(doneLabel); - Ldloc(sawEmpty); - Ldc(0); - BneFar(doneLabel); - } + if (maxIterations == int.MaxValue) + { + // if (sawEmpty != 0) goto doneLabel; + Ldloc(sawEmpty); + Ldc(0); + BneFar(doneLabel); + } + else + { + // if (iterationCount >= maxIterations || sawEmpty != 0) goto doneLabel; + Ldloc(iterationCount); + Ldc(maxIterations); + BgeFar(doneLabel); + Ldloc(sawEmpty); + Ldc(0); + BneFar(doneLabel); + } - // goto body; - BrFar(body); + // goto body; + BrFar(body); - doneLabel = backtrack; - MarkLabel(skipBacktrack); + doneLabel = backtrack; + MarkLabel(skipBacktrack); + } } // Emits the code to handle a loop (repeater) with a fixed number of iterations. @@ -3053,6 +3162,7 @@ void EmitLoop(RegexNode node) Debug.Assert(node.N >= node.M, $"Unexpected M={node.M}, N={node.N}"); int minIterations = node.M; int maxIterations = node.N; + bool isAtomic = node.IsAtomicByParent(); // We might loop any number of times. In order to ensure this loop and subsequent code sees textSpanPos // the same regardless, we always need it to contain the same value, and the easiest such value is 0. @@ -3220,56 +3330,64 @@ void EmitLoop(RegexNode node) BltFar(childBacktracks ? doneLabel : originalDoneLabel); } - if (childBacktracks) + if (isAtomic) { - // goto endLoop; - BrFar(endLoop); + doneLabel = originalDoneLabel; + MarkLabel(endLoop); + } + else + { + if (childBacktracks) + { + // goto endLoop; + BrFar(endLoop); - // Backtrack: - Label backtrack = DefineLabel(); - MarkLabel(backtrack); + // Backtrack: + Label backtrack = DefineLabel(); + MarkLabel(backtrack); - // if (iterationCount == 0) goto originalDoneLabel; - Ldloc(iterationCount); - Ldc(0); - BeqFar(originalDoneLabel); + // if (iterationCount == 0) goto originalDoneLabel; + Ldloc(iterationCount); + Ldc(0); + BeqFar(originalDoneLabel); - // goto doneLabel; - BrFar(doneLabel); + // goto doneLabel; + BrFar(doneLabel); - doneLabel = backtrack; - } + doneLabel = backtrack; + } - MarkLabel(endLoop); + MarkLabel(endLoop); - if (node.IsInLoop()) - { - // Store the capture's state - EmitRunstackResizeIfNeeded(3); - EmitRunstackPush(() => Ldloc(startingRunTextPos)); - EmitRunstackPush(() => Ldloc(iterationCount)); + if (node.IsInLoop()) + { + // Store the capture's state + EmitRunstackResizeIfNeeded(3); + EmitRunstackPush(() => Ldloc(startingRunTextPos)); + EmitRunstackPush(() => Ldloc(iterationCount)); - // Skip past the backtracking section - // goto end; - Label end = DefineLabel(); - BrFar(end); + // Skip past the backtracking section + // goto end; + Label end = DefineLabel(); + BrFar(end); - // Emit a backtracking section that restores the capture's state and then jumps to the previous done label - Label backtrack = DefineLabel(); - MarkLabel(backtrack); + // Emit a backtracking section that restores the capture's state and then jumps to the previous done label + Label backtrack = DefineLabel(); + MarkLabel(backtrack); - // iterationCount = base.runstack[--runstack]; - // startingRunTextPos = base.runstack[--runstack]; - EmitRunstackPop(); - Stloc(iterationCount); - EmitRunstackPop(); - Stloc(startingRunTextPos); + // iterationCount = base.runstack[--runstack]; + // startingRunTextPos = base.runstack[--runstack]; + EmitRunstackPop(); + Stloc(iterationCount); + EmitRunstackPop(); + Stloc(startingRunTextPos); - // goto doneLabel; - BrFar(doneLabel); + // goto doneLabel; + BrFar(doneLabel); - doneLabel = backtrack; - MarkLabel(end); + doneLabel = backtrack; + MarkLabel(end); + } } } @@ -3341,9 +3459,10 @@ private void InitializeCultureForGoIfNecessary() bool needsCulture = (_options & RegexOptions.IgnoreCase) != 0; if (!needsCulture) { - for (int codepos = 0; codepos < _codes!.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) + int[] codes = _code!.Codes; + for (int codepos = 0; codepos < codes.Length; codepos += RegexCode.OpcodeSize(codes[codepos])) { - if ((_codes[codepos] & RegexCode.Ci) == RegexCode.Ci) + if ((codes[codepos] & RegexCode.Ci) == RegexCode.Ci) { needsCulture = true; break; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs index a548c01201edbe..34b7f1b1130592 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs @@ -38,7 +38,6 @@ internal sealed class RegexLWCGCompiler : RegexCompiler } _code = code; - _codes = code.Codes; _options = options; _hasTimeout = hasTimeout; @@ -54,10 +53,10 @@ internal sealed class RegexLWCGCompiler : RegexCompiler } DynamicMethod findFirstCharMethod = DefineDynamicMethod($"Regex{regexNum}_FindFirstChar{description}", typeof(bool), typeof(CompiledRegexRunner)); - GenerateFindFirstChar(); + EmitFindFirstChar(); DynamicMethod goMethod = DefineDynamicMethod($"Regex{regexNum}_Go{description}", null, typeof(CompiledRegexRunner)); - GenerateGo(); + EmitGo(); return new CompiledRegexRunnerFactory(goMethod, findFirstCharMethod, code.TrackCount); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 66d66ce128d168..582cb1130be7ab 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -523,7 +523,8 @@ private void EliminateEndingBacktracking() public bool IsAtomicByParent() { // Walk up the parent hierarchy. - for (RegexNode? parent = Next; parent is not null; parent = parent.Next) + RegexNode child = this; + for (RegexNode? parent = child.Next; parent is not null; child = parent, parent = child.Next) { switch (parent.Type) { @@ -540,14 +541,14 @@ public bool IsAtomicByParent() // so any atomicity applied to the alternation also applies to // each individual branch. This is true as well for conditional // backreferences, where each of the yes/no branches are independent. - case Testgroup when parent.Child(0) != this: + case Testgroup when parent.Child(0) != child: // As with alternations, each yes/no branch of an expression conditional // are independent from each other, but the conditional expression itself // can be backtracked into from each of the branches, so we can't make // it atomic just because the whole conditional is. case Capture: // Skip captures. They don't affect atomicity. - case Concatenate when parent.Child(parent.ChildCount() - 1) == this: + case Concatenate when parent.Child(parent.ChildCount() - 1) == child: // If the parent is a concatenation and this is the last node, // any atomicity applying to the concatenation applies to this // node, too. From dcffd8de0fa5d1c8878bd7c43485cff546300f78 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 3 Dec 2021 09:42:23 -0500 Subject: [PATCH 4/4] Fix tests on mono interpreter --- .../tests/Regex.MultipleMatches.Tests.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs index f2c3515c5ca596..4c009b46bf73dd 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs @@ -5,6 +5,7 @@ using System.Threading.Tasks; using Xunit; using System.Linq; +using System.Runtime.CompilerServices; namespace System.Text.RegularExpressions.Tests { @@ -327,7 +328,9 @@ public static IEnumerable Matches_TestData() } } - if (engine != RegexEngine.Interpreter && !PlatformDetection.IsNetFramework) +#if !NETFRAMEWORK // these tests currently fail on .NET Framework, and we need to check IsDynamicCodeCompiled but that doesn't exist on .NET Framework + if (engine != RegexEngine.Interpreter && // these tests currently fail with RegexInterpreter + RuntimeFeature.IsDynamicCodeCompiled) // if dynamic code isn't compiled, RegexOptions.Compiled falls back to the interpreter, for which these tests currently fail { // Fails on interpreter and .NET Framework: [ActiveIssue("https://github.com/dotnet/runtime/issues/62094")] yield return new object[] @@ -360,6 +363,7 @@ public static IEnumerable Matches_TestData() }; } } +#endif } }