diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 05cd639bd3dfe2..496e6d368bd5fc 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -2086,23 +2086,42 @@ static bool RemoveCaptures(RegexNode parent, int nodeIndex) // eliminate any ending backtracking from it. EliminateEndingBacktracking(); - // A positive lookaround wrapped around an empty is a nop, and we can reduce it - // to simply Empty. A developer typically doesn't write this, but rather it evolves - // due to optimizations resulting in empty. - - // A negative lookaround wrapped around an empty child, i.e. (?!), is - // sometimes used as a way to insert a guaranteed no-match into the expression, - // often as part of a conditional. We can reduce it to simply Nothing. + RegexNode child = Child(0); - if (Child(0).Kind == RegexNodeKind.Empty) + // A positive lookahead that wraps a zero-width assertion is useless wrapping and can be removed. + // Similarly, a positive lookaround wrapped around an empty can be reduced simply to Empty. + // A developer typically doesn't write this, but rather it evolves due to optimizations resulting in empty. + if (Kind is RegexNodeKind.PositiveLookaround) { - Kind = Kind == RegexNodeKind.PositiveLookaround ? RegexNodeKind.Empty : RegexNodeKind.Nothing; - Children = null; + if (((Options & RegexOptions.RightToLeft) == 0 && IsZeroWidthAssertion(child.Kind)) || + child.Kind is RegexNodeKind.Empty) + { + return child; + } + } + else if (Kind is RegexNodeKind.NegativeLookaround) + { + // A negative lookaround wrapped around an empty child, i.e. (?!), is + // sometimes used as a way to insert a guaranteed no-match into the expression, + // often as part of a conditional. We can reduce it to simply Nothing. + if (child.Kind is RegexNodeKind.Empty) + { + Kind = RegexNodeKind.Nothing; + Children = null; + } } return this; } + private static bool IsZeroWidthAssertion(RegexNodeKind kind) => kind is + RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround or + RegexNodeKind.Beginning or RegexNodeKind.Start or + RegexNodeKind.Bol or RegexNodeKind.Eol or + RegexNodeKind.End or RegexNodeKind.EndZ or + RegexNodeKind.Boundary or RegexNodeKind.ECMABoundary or + RegexNodeKind.NonBoundary or RegexNodeKind.NonECMABoundary; + /// Gets whether the node contains a backreference anywhere in its tree. private static bool? ContainsBackreference(RegexNode node) { diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs index 93ab67a2c392a0..34e5a84c85a5da 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs @@ -1354,8 +1354,6 @@ private static RegexNodeKind FindLeadingOrTrailingAnchor(RegexNode node, bool le case RegexNodeKind.Start: case RegexNodeKind.EndZ: case RegexNodeKind.End: - case RegexNodeKind.Boundary: - case RegexNodeKind.ECMABoundary: // Return any anchor found. return node.Kind; @@ -1389,6 +1387,7 @@ private static RegexNodeKind FindLeadingOrTrailingAnchor(RegexNode node, bool le { case RegexNodeKind.Empty or RegexNodeKind.NegativeLookaround: case RegexNodeKind.PositiveLookaround when ((node.Options | tmpChild.Options) & RegexOptions.RightToLeft) != 0: + case RegexNodeKind.Boundary or RegexNodeKind.ECMABoundary or RegexNodeKind.NonBoundary or RegexNodeKind.NonECMABoundary: // Skip over zero-width assertions. continue; diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs index 8501700b31c180..856f3d85d52627 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs @@ -34,7 +34,7 @@ public class RegexFindOptimizationsTests [InlineData(@"(?=^)abc", 0, (int)FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning)] [InlineData(@"(?=.*$)abc", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight)] [InlineData(@"(?=^)abc", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingString_RightToLeft)] - [InlineData(@"abc(?=^)", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingString_RightToLeft)] + [InlineData(@"abc(?=^)", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Beginning)] [InlineData(@"(?