From cc95695c0e5c47590fffd5faaa640cd8a0a8f0ad Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sun, 5 Apr 2026 08:58:32 -0400 Subject: [PATCH 1/2] Fix regex compiler/source generator resumeAt handling of conditionals inside loops Update EmitExpressionConditional to reset resumeAt when inside loops, preventing stale values and incorrect matches. --- .../gen/RegexGenerator.Emitter.cs | 6 +- .../Text/RegularExpressions/RegexCompiler.cs | 6 +- .../Regex.MultipleMatches.Tests.cs | 106 ++++++++++++++++++ 3 files changed, 112 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 9f7ac8653ac38a..415dae8b52415d 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -2527,7 +2527,7 @@ void EmitExpressionConditional(RegexNode node) writer.WriteLine(); TransferSliceStaticPosToPos(); // make sure sliceStaticPos is 0 after each branch string postYesDoneLabel = doneLabel; - if (!isAtomic && postYesDoneLabel != originalDoneLabel) + if ((!isAtomic && postYesDoneLabel != originalDoneLabel) || isInLoop) { writer.WriteLine($"{resumeAt} = 0;"); } @@ -2556,7 +2556,7 @@ void EmitExpressionConditional(RegexNode node) writer.WriteLine(); TransferSliceStaticPosToPos(); // make sure sliceStaticPos is 0 after each branch postNoDoneLabel = doneLabel; - if (!isAtomic && postNoDoneLabel != originalDoneLabel) + if ((!isAtomic && postNoDoneLabel != originalDoneLabel) || isInLoop) { writer.WriteLine($"{resumeAt} = 1;"); } @@ -2566,7 +2566,7 @@ void EmitExpressionConditional(RegexNode node) // There's only a yes branch. If it's going to cause us to output a backtracking // label but code may not end up taking the yes branch path, we need to emit a resumeAt // that will cause the backtracking to immediately pass through this node. - if (!isAtomic && postYesDoneLabel != originalDoneLabel) + if ((!isAtomic && postYesDoneLabel != originalDoneLabel) || isInLoop) { writer.WriteLine($"{resumeAt} = 2;"); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 2963d9f4b8298a..5f3f1aaf0f9d78 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -2529,7 +2529,7 @@ void EmitExpressionConditional(RegexNode node) EmitNode(yesBranch); TransferSliceStaticPosToPos(); // make sure sliceStaticPos is 0 after each branch Label postYesDoneLabel = doneLabel; - if (!isAtomic && postYesDoneLabel != originalDoneLabel) + if ((!isAtomic && postYesDoneLabel != originalDoneLabel) || isInLoop) { // resumeAt = 0; Ldc(0); @@ -2560,7 +2560,7 @@ void EmitExpressionConditional(RegexNode node) EmitNode(noBranch); TransferSliceStaticPosToPos(); // make sure sliceStaticPos is 0 after each branch postNoDoneLabel = doneLabel; - if (!isAtomic && postNoDoneLabel != originalDoneLabel) + if ((!isAtomic && postNoDoneLabel != originalDoneLabel) || isInLoop) { // resumeAt = 1; Ldc(1); @@ -2572,7 +2572,7 @@ void EmitExpressionConditional(RegexNode node) // There's only a yes branch. If it's going to cause us to output a backtracking // label but code may not end up taking the yes branch path, we need to emit a resumeAt // that will cause the backtracking to immediately pass through this node. - if (!isAtomic && postYesDoneLabel != originalDoneLabel) + if ((!isAtomic && postYesDoneLabel != originalDoneLabel) || isInLoop) { // resumeAt = 2; Ldc(2); diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs index b2d5f521725b3c..7bb2b7457c9752 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs @@ -461,6 +461,112 @@ public static IEnumerable Matches_TestData() new CaptureData("anyexpress1", 10, 11), } }; + + // ExpressionConditional with balancing groups inside a loop + + // Balancing group conditional with alternation in no-branch, no match + yield return new object[] + { + engine, @"(?((?'-1'))|((?'1'\S)+|(?'1'\s)))+(?!(?'-1'))", "abc", RegexOptions.None, + Array.Empty() + }; + + // Balancing group conditional with nested captures in alternation + yield return new object[] + { + engine, @"(?((?'-1'){6})|((?'1'(?'2'\S))+|(?'1'(?'2'\s))))+(?!(?'-1'))", "it not", RegexOptions.None, new[] + { + new CaptureData("it ", 0, 3), + new CaptureData("not", 3, 3), + } + }; + + // Alternation in capturing group in no-branch, no match expected + yield return new object[] + { + engine, @"(?((?'-1'))|((?'1'a)+|(?'1'b)))+(?!(?'-1'))", "abc", RegexOptions.None, + Array.Empty() + }; + yield return new object[] + { + engine, @"(?((?'-1'))|((?'1'a)+|(?'1'b)))+(?!(?'-1'))", "aaa", RegexOptions.None, + Array.Empty() + }; + + // No-branch with quantifier but no wrapping capture group + yield return new object[] + { + engine, @"(?((?'-1'))|(?'1'\S)+)+(?!(?'-1'))", "abc", RegexOptions.None, new[] + { + new CaptureData("a", 0, 1), + new CaptureData("b", 1, 1), + new CaptureData("c", 2, 1), + } + }; + yield return new object[] + { + engine, @"(?((?'-1'))|(?'1'a)+)+(?!(?'-1'))", "aaa", RegexOptions.None, new[] + { + new CaptureData("a", 0, 1), + new CaptureData("a", 1, 1), + new CaptureData("a", 2, 1), + } + }; + + // No-branch with quantifier inside wrapping capture group + yield return new object[] + { + engine, @"(?((?'-1'))|((?'1'\S)+))+(?!(?'-1'))", "abc", RegexOptions.None, + Array.Empty() + }; + + // Non-capturing group wrapping alternation in no-branch + yield return new object[] + { + engine, @"(?((?'-1'))|(?:(?'1'a)+|(?'1'b)))+(?!(?'-1'))", "aaa", RegexOptions.None, new[] + { + new CaptureData("a", 0, 1), + new CaptureData("a", 1, 1), + new CaptureData("a", 2, 1), + } + }; + yield return new object[] + { + engine, @"(?((?'-1'))|(?:(?'1'a)+|(?'1'b)))+(?!(?'-1'))", "abc", RegexOptions.None, new[] + { + new CaptureData("a", 0, 1), + new CaptureData("b", 1, 1), + } + }; + + // Balancing group conditional with single char in no-branch + yield return new object[] + { + engine, @"(?((?'-1'))|(?'1'a))+(?!(?'-1'))", "aaa", RegexOptions.None, new[] + { + new CaptureData("a", 0, 1), + new CaptureData("a", 1, 1), + new CaptureData("a", 2, 1), + } + }; + + // Balancing group conditional with multi-word input + yield return new object[] + { + engine, @"(?((?'-1'))|(?'1'\S)+)+(?!(?'-1'))", "hello world", RegexOptions.None, new[] + { + new CaptureData("h", 0, 1), + new CaptureData("e", 1, 1), + new CaptureData("l", 2, 1), + new CaptureData("l", 3, 1), + new CaptureData("o", 4, 1), + new CaptureData("w", 6, 1), + new CaptureData("o", 7, 1), + new CaptureData("r", 8, 1), + new CaptureData("l", 9, 1), + new CaptureData("d", 10, 1), + } + }; } // Fails on .NET Framework: https://github.com/dotnet/runtime/issues/62094 From 9f152572127004b869fc11bd41b2572f8faf213d Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Tue, 7 Apr 2026 14:04:31 -0600 Subject: [PATCH 2/2] Add additional regression tests for conditional regex in loops Add 4 more test cases covering: - Auto-numbered capture groups with dot and literal patterns - Alternation in no-branch with empty second branch - Quantified balancing group pop {2} Move all conditional/balancing group tests outside #if !NETFRAMEWORK since this bug is specific to the .NET Core regex compiler rewrite and these patterns work correctly on .NET Framework. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Regex.MultipleMatches.Tests.cs | 70 +++++++++++++++---- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs index 7bb2b7457c9752..a814d2f5f47172 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs @@ -434,31 +434,46 @@ public static IEnumerable Matches_TestData() } } -#if !NETFRAMEWORK // these tests currently fail on .NET Framework, and we need to check IsDynamicCodeCompiled but that doesn't exist on .NET Framework - yield return new object[] + if (!RegexHelpers.IsNonBacktracking(engine)) // balancing groups aren't supported { - engine, "@(a*)+?", "@", RegexOptions.None, new[] + // ExpressionConditional with balancing groups inside a loop, auto-numbered capture groups + + // Balancing group conditional with auto-numbered capture group and dot + yield return new object[] { - new CaptureData("@", 0, 1) - } - }; + engine, @"(?((?'-1'))|(.)+)+(?!(?'-1'))", "abc", RegexOptions.None, new[] + { + new CaptureData("a", 0, 1), + new CaptureData("b", 1, 1), + new CaptureData("c", 2, 1), + } + }; - if (!RegexHelpers.IsNonBacktracking(engine)) // atomic subexpressions aren't supported - { + // Balancing group conditional with auto-numbered capture group and literal yield return new object[] { - engine, @"()(?>\1+?).\b", "xxxx", RegexOptions.None, new[] + engine, @"(?((?'-1'))|(a)+)+(?!(?'-1'))", "aaa", RegexOptions.None, new[] { - new CaptureData("x", 3, 1), + new CaptureData("a", 0, 1), + new CaptureData("a", 1, 1), + new CaptureData("a", 2, 1), } }; - // Fails on .NET Framework: https://github.com/dotnet/runtime/issues/111051 + // Alternation in no-branch with empty second branch, no match expected yield return new object[] { - engine, @"anyexpress1(?<=(.(any express|(any express)*)+?)anyexpress1)", "anystring anyexpress1", RegexOptions.None, new[] + engine, @"(?((?'-1'))|((?'1'.)+|()))+(?!(?'-1'))", "a", RegexOptions.None, + Array.Empty() + }; + + // Balancing group conditional with quantified pop {2} + yield return new object[] + { + engine, @"(?((?'-1'){2})|((?'1'a)+))+(?!(?'-1'))", "aa", RegexOptions.None, new[] { - new CaptureData("anyexpress1", 10, 11), + new CaptureData("a", 0, 1), + new CaptureData("a", 1, 1), } }; @@ -569,6 +584,35 @@ public static IEnumerable Matches_TestData() }; } +#if !NETFRAMEWORK // these tests currently fail on .NET Framework + yield return new object[] + { + engine, "@(a*)+?", "@", RegexOptions.None, new[] + { + new CaptureData("@", 0, 1) + } + }; + + if (!RegexHelpers.IsNonBacktracking(engine)) // atomic subexpressions aren't supported + { + yield return new object[] + { + engine, @"()(?>\1+?).\b", "xxxx", RegexOptions.None, new[] + { + new CaptureData("x", 3, 1), + } + }; + + // Fails on .NET Framework: https://github.com/dotnet/runtime/issues/111051 + yield return new object[] + { + engine, @"anyexpress1(?<=(.(any express|(any express)*)+?)anyexpress1)", "anystring anyexpress1", RegexOptions.None, new[] + { + new CaptureData("anyexpress1", 10, 11), + } + }; + } + // Fails on .NET Framework: https://github.com/dotnet/runtime/issues/62094 yield return new object[] {