diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
index e8ecee73f7a1df..65f470ce2d31c8 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
@@ -2018,12 +2018,18 @@ static void ProcessNode(RegexNode node, RegexNode subsequent)
loopChild = loopChild.Child(loopChild.ChildCount() - 1);
}
+ // MakeLoopAtomic wraps the loop in an Atomic group, which discards all backtracking
+ // state from within the body. CanBeMadeAtomic only proves that giving back iterations
+ // is unnecessary, but the Atomic wrapper also prevents within-body backtracking from
+ // being triggered by subsequent failures. That's only safe when the body has no
+ // backtracking of its own and the last descendant is a type that won't be adversely
+ // affected by seeing an Atomic ancestor.
if (loopChild.Kind is
- RegexNodeKind.Boundary or RegexNodeKind.ECMABoundary or
- RegexNodeKind.Multi or
- RegexNodeKind.One or RegexNodeKind.Notone or RegexNodeKind.Set)
+ RegexNodeKind.Boundary or RegexNodeKind.ECMABoundary or
+ RegexNodeKind.Multi or
+ RegexNodeKind.One or RegexNodeKind.Notone or RegexNodeKind.Set &&
+ !MayContainBacktracking(node.Child(0)))
{
- // For types on the allow list, we can make the loop itself atomic.
node.MakeLoopAtomic();
}
else if (node.Kind is RegexNodeKind.Loop or RegexNodeKind.Lazyloop)
@@ -2516,6 +2522,52 @@ bool MayOverlapStartingOrEndingSet(string set) =>
}
}
+ /// Gets whether this node is itself a backtracking construct.
+ ///
+ /// This checks the node in isolation (not its children). A node is a backtracking construct
+ /// if it's a variable-width loop or an alternation.
+ ///
+ public bool IsBacktrackingConstruct => Kind switch
+ {
+ RegexNodeKind.Alternate => true,
+ RegexNodeKind.Loop or RegexNodeKind.Lazyloop when M != N => true,
+ RegexNodeKind.Oneloop or RegexNodeKind.Onelazy or
+ RegexNodeKind.Notoneloop or RegexNodeKind.Notonelazy or
+ RegexNodeKind.Setloop or RegexNodeKind.Setlazy when M != N => true,
+ _ => false,
+ };
+
+ ///
+ /// Checks whether a node tree may contain backtracking constructs (variable-width loops or alternations).
+ ///
+ private static bool MayContainBacktracking(RegexNode node)
+ {
+ // If we can't recur, just assume the worst and say that it may contain backtracking constructs.
+ if (!StackHelper.TryEnsureSufficientExecutionStack())
+ {
+ return true;
+ }
+
+ // If this node is a backtracking construct, then obviously it may contain backtracking constructs.
+ if (node.IsBacktrackingConstruct)
+ {
+ return true;
+ }
+
+ // Otherwise, we need to check the children to see if any of them may contain backtracking constructs.
+ int childCount = node.ChildCount();
+ for (int i = 0; i < childCount; i++)
+ {
+ if (MayContainBacktracking(node.Child(i)))
+ {
+ return true;
+ }
+ }
+
+ // No backtracking is possible.
+ return false;
+ }
+
/// Gets whether this node is known to be immediately preceded by a word character.
public bool IsKnownPrecededByWordChar() => IsKnownPrecededOrSucceededByWordChar(false);
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTreeAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTreeAnalyzer.cs
index 5d02fe6bd8c863..20f658ba753603 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTreeAnalyzer.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexTreeAnalyzer.cs
@@ -44,13 +44,9 @@ static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByA
// Certain kinds of nodes incur backtracking logic themselves: add them to the backtracking collection.
// We may later find that a node contains another that has backtracking; we'll add nodes based on that
// after examining the children.
- switch (node.Kind)
+ if (node.IsBacktrackingConstruct)
{
- case RegexNodeKind.Alternate:
- case RegexNodeKind.Loop or RegexNodeKind.Lazyloop when node.M != node.N:
- case RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop or RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy when node.M != node.N:
- (results._mayBacktrack ??= new HashSet()).Add(node);
- break;
+ (results._mayBacktrack ??= new HashSet()).Add(node);
}
}
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
index 2cc54b7540dae8..1aae6a238685d9 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
@@ -334,6 +334,42 @@ public static IEnumerable