Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2086,23 +2086,42 @@ static bool RemoveCaptures(RegexNode parent, int nodeIndex)
// eliminate any ending backtracking from it.
EliminateEndingBacktracking();

// A positive lookaround wrapped around an empty is a nop, and we can reduce it
// to simply Empty. A developer typically doesn't write this, but rather it evolves
// due to optimizations resulting in empty.

// A negative lookaround wrapped around an empty child, i.e. (?!), is
// sometimes used as a way to insert a guaranteed no-match into the expression,
// often as part of a conditional. We can reduce it to simply Nothing.
RegexNode child = Child(0);

if (Child(0).Kind == RegexNodeKind.Empty)
// A positive lookahead that wraps a zero-width assertion is useless wrapping and can be removed.
// Similarly, a positive lookaround wrapped around an empty can be reduced simply to Empty.
// A developer typically doesn't write this, but rather it evolves due to optimizations resulting in empty.
if (Kind is RegexNodeKind.PositiveLookaround)
{
Kind = Kind == RegexNodeKind.PositiveLookaround ? RegexNodeKind.Empty : RegexNodeKind.Nothing;
Children = null;
if (((Options & RegexOptions.RightToLeft) == 0 && IsZeroWidthAssertion(child.Kind)) ||
child.Kind is RegexNodeKind.Empty)
{
return child;
}
}
else if (Kind is RegexNodeKind.NegativeLookaround)
{
// A negative lookaround wrapped around an empty child, i.e. (?!), is
// sometimes used as a way to insert a guaranteed no-match into the expression,
// often as part of a conditional. We can reduce it to simply Nothing.
if (child.Kind is RegexNodeKind.Empty)
{
Kind = RegexNodeKind.Nothing;
Children = null;
}
}

return this;
}

private static bool IsZeroWidthAssertion(RegexNodeKind kind) => kind is
RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround or
RegexNodeKind.Beginning or RegexNodeKind.Start or
RegexNodeKind.Bol or RegexNodeKind.Eol or
RegexNodeKind.End or RegexNodeKind.EndZ or
RegexNodeKind.Boundary or RegexNodeKind.ECMABoundary or
RegexNodeKind.NonBoundary or RegexNodeKind.NonECMABoundary;

/// <summary>Gets whether the node contains a backreference anywhere in its tree.</summary>
private static bool? ContainsBackreference(RegexNode node)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1354,8 +1354,6 @@ private static RegexNodeKind FindLeadingOrTrailingAnchor(RegexNode node, bool le
case RegexNodeKind.Start:
case RegexNodeKind.EndZ:
case RegexNodeKind.End:
case RegexNodeKind.Boundary:
case RegexNodeKind.ECMABoundary:
// Return any anchor found.
return node.Kind;

Expand Down Expand Up @@ -1389,6 +1387,7 @@ private static RegexNodeKind FindLeadingOrTrailingAnchor(RegexNode node, bool le
{
case RegexNodeKind.Empty or RegexNodeKind.NegativeLookaround:
case RegexNodeKind.PositiveLookaround when ((node.Options | tmpChild.Options) & RegexOptions.RightToLeft) != 0:
case RegexNodeKind.Boundary or RegexNodeKind.ECMABoundary or RegexNodeKind.NonBoundary or RegexNodeKind.NonECMABoundary:
// Skip over zero-width assertions.
continue;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public class RegexFindOptimizationsTests
[InlineData(@"(?=^)abc", 0, (int)FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning)]
[InlineData(@"(?=.*$)abc", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight)]
[InlineData(@"(?=^)abc", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingString_RightToLeft)]
[InlineData(@"abc(?=^)", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingString_RightToLeft)]
[InlineData(@"abc(?=^)", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Beginning)]
[InlineData(@"(?<!42)(?=^)abc", 0, (int)FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning)]
[InlineData(@"(?<=^)abc", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight)]
[InlineData(@"(?<=^)(?=^)abc", 0, (int)FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,16 @@ public class RegexReductionTests
[InlineData(@"\z\z", @"\z")]
[InlineData(@"\G\G", @"\G")]
[InlineData(@"\A\A", @"\A")]
// Lookarounds
[InlineData(@"(?=^)abc", @"^abc")]
[InlineData(@"(?=\G)abc", @"\Gabc")]
[InlineData(@"abc(?=$)", @"abc$")]
[InlineData(@"(?=\b)abc", @"\babc")]
[InlineData(@"abc(?=\z)", @"abc\z")]
[InlineData(@"abc(?=\Z)", @"abc\Z")]
[InlineData(@"abc(?=\A)", @"abc\A")]
[InlineData(@"(?=(?=(?=abc)))", @"(?=abc)")]
[InlineData(@"(?=(?<=(?=abc)))", @"(?<=(?=abc))")]
// Nothing handling
[InlineData(@"\wabc(?!)def", "(?!)")]
[InlineData(@"\wabc(?!)def|ghi(?!)", "(?!)")]
Expand Down
Loading