diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 8286589654cce7..bd55727af62a97 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -349,58 +349,6 @@ internal RegexNode FinalOptimize() break; } } - - // Optimization: implicit anchoring. - // If the expression begins with a .* loop, add an anchor to the beginning: - // - If Singleline is set such that '.' eats anything, the .* will zip to the end of the string and then backtrack through - // the whole thing looking for a match; since it will have examined everything, there's no benefit to examining it all - // again, and we can anchor to beginning. - // - If Singleline is not set, then '.' eats anything up until a '\n' and backtracks from there, so we can similarly avoid - // re-examining that content and anchor to the beginning of lines. - // We are currently very conservative here, only examining concat nodes. This could be loosened in the future, e.g. to - // explore captures (but think through any implications of there being a back ref to that capture), to explore loops and - // lazy loops a positive minimum (but the anchor shouldn't be part of the loop), to explore alternations and support adding - // an anchor if all of them begin with appropriate star loops (though this could also be accomplished by factoring out the - // loops to be before the alternation), etc. - { - RegexNode node = rootNode.Child(0); // skip implicit root capture node - while (true) - { - bool singleline = (node.Options & RegexOptions.Singleline) != 0; - switch (node.Type) - { - case Concatenate: - node = node.Child(0); - continue; - - case Setloop when singleline && node.N == int.MaxValue && node.Str == RegexCharClass.AnyClass: - case Setloopatomic when singleline && node.N == int.MaxValue && node.Str == RegexCharClass.AnyClass: - case Notoneloop when !singleline && node.N == int.MaxValue && node.Ch == '\n': - case Notoneloopatomic when !singleline && node.N == int.MaxValue && node.Ch == '\n': - RegexNode? parent = node.Next; - var anchor = new RegexNode(singleline ? Beginning : Bol, node.Options); - Debug.Assert(parent != null); - if (parent.Type == Concatenate) - { - Debug.Assert(parent.ChildCount() >= 2); - Debug.Assert(parent.Children is List); - anchor.Next = parent; - ((List)parent.Children).Insert(0, anchor); - } - else - { - Debug.Assert(parent.Type == Capture && parent.Next is null, "Only valid capture is the implicit root capture"); - var concat = new RegexNode(Concatenate, parent.Options); - concat.AddChild(anchor); - concat.AddChild(node); - parent.ReplaceChild(0, concat); - } - break; - } - - break; - } - } } // Optimization: Unnecessary root atomic. diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index 70dfee3c47d9f3..a7e73699f36e8e 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -127,6 +127,13 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { @"(?>\w+)(? Match_StartatDiffersFromBeginning_MemberData() + { + foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.Singleline, RegexOptions.Multiline }) + { + // Anchors + yield return new object[] { @"^.*", "abc", options, 0, true, true }; + yield return new object[] { @"^.*", "abc", options, 1, false, true }; + + // Positive Lookbehinds + yield return new object[] { @"(?<=abc)def", "abcdef", options, 3, true, false }; + + // Negative Lookbehinds + yield return new object[] { @"(? Matches_TestData() } }; + yield return new object[] + { + ".*", "abc", RegexOptions.None, + new[] + { + new CaptureData("abc", 0, 3), + new CaptureData("", 3, 0) + } + }; + if (!PlatformDetection.IsNetFramework) { // .NET Framework missing fix in https://github.com/dotnet/runtime/pull/1075