diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 19488703f07f7e..efe4f0e40b4390 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -3770,6 +3770,17 @@ void EmitLazy(RegexNode node) using (clause) { + // We're backtracking, which could either be to something prior to the lazy loop or to something + // inside of the lazy loop. If it's to something inside of the lazy loop, then either the loop + // will eventually succeed or we'll eventually end up unwinding back through the iterations all + // the way back to the loop not matching at all, in which case the state we first pushed on at the + // beginning of the !isAtomic section will get popped off. But if here we're instead going to jump + // to something prior to the lazy loop, then we need to pop off that state here. + if (doneLabel == originalDoneLabel) + { + EmitAdd(writer, "stackpos", -entriesPerIteration); + } + if (iterationMayBeEmpty) { // If we saw empty, it must have been in the most recent iteration, as we wouldn't have @@ -3777,6 +3788,7 @@ void EmitLazy(RegexNode node) // false prior to backtracking / undoing that iteration. writer.WriteLine($"{sawEmpty} = 0; // false"); } + Goto(doneLabel); } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 20ccc3afefcca6..fb710b26788732 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -4075,6 +4075,8 @@ void EmitLazy(RegexNode node) // Determine where to branch, either back to the lazy loop body to add an additional iteration, // or to the last backtracking label. + Label jumpToDone = DefineLabel(); + if (iterationMayBeEmpty) { // if (sawEmpty != 0) @@ -4093,7 +4095,7 @@ void EmitLazy(RegexNode node) Ldc(0); Stloc(sawEmpty!); - BrFar(doneLabel); + Br(jumpToDone); MarkLabel(sawEmptyZero); } @@ -4102,12 +4104,32 @@ void EmitLazy(RegexNode node) // if (iterationCount >= maxIterations) goto doneLabel; Ldloc(iterationCount); Ldc(maxIterations); - BgeFar(doneLabel); + Bge(jumpToDone); } // goto body; BrFar(body); + MarkLabel(jumpToDone); + + // We're backtracking, which could either be to something prior to the lazy loop or to something + // inside of the lazy loop. If it's to something inside of the lazy loop, then either the loop + // will eventually succeed or we'll eventually end up unwinding back through the iterations all + // the way back to the loop not matching at all, in which case the state we first pushed on at the + // beginning of the !isAtomic section will get popped off. But if here we're instead going to jump + // to something prior to the lazy loop, then we need to pop off that state here. + if (doneLabel == originalDoneLabel) + { + // stackpos -= entriesPerIteration; + Ldloc(stackpos); + Ldc(entriesPerIteration); + Sub(); + Stloc(stackpos); + } + + // goto done; + BrFar(doneLabel); + doneLabel = backtrack; MarkLabel(skipBacktrack); } diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 43bd39f8f36779..d09f45ac2064c4 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -2374,6 +2374,11 @@ public static IEnumerable AllMatches_TestData() yield return new object[] { engine, "^(?:aaa|aa){1,5}?$", RegexOptions.None, "aaaaaaaa", new (int, int, string)[] { (0, 8, "aaaaaaaa") } }; yield return new object[] { engine, "^(?:aaa|aa){4}$", RegexOptions.None, "aaaaaaaa", new (int, int, string)[] { (0, 8, "aaaaaaaa") } }; yield return new object[] { engine, "^(?:aaa|aa){4}?$", RegexOptions.None, "aaaaaaaa", new (int, int, string)[] { (0, 8, "aaaaaaaa") } }; + yield return new object[] { engine, "^((?:(xx?,xx?)|xx?,xx?>xx?,xx?)-?(x)??){1,2}$", RegexOptions.None, "xx,xx>xx,xx", new (int, int, string)[] { (0, 11, "xx,xx>xx,xx") } }; + yield return new object[] { engine, "^((?:(xx?,xx?)|xx?,xx?>xx?,xx?)-?(x*)??){1,2}$", RegexOptions.None, "xx,xx>xx,xx", new (int, int, string)[] { (0, 11, "xx,xx>xx,xx") } }; + yield return new object[] { engine, "^((?:(xx?,xx?)|xx?,xx?>xx?,xx?)-?(x+)??){1,2}$", RegexOptions.None, "xx,xx>xx,xx", new (int, int, string)[] { (0, 11, "xx,xx>xx,xx") } }; + yield return new object[] { engine, "^((?:(x(x?),x(x?))|xx?,(x(x?)>x(x?)),(x((x))?))-?(x+?)??){1,2}$", RegexOptions.None, "xx,xx>xx,xx", new (int, int, string)[] { (0, 11, "xx,xx>xx,xx") } }; + yield return new object[] { engine, "^(?:x|(x)??){2}$", RegexOptions.None, "x>", null }; // Mostly empty matches using unusual regexes consisting mostly of anchors only yield return new object[] { engine, "^", RegexOptions.None, "", new (int, int, string)[] { (0, 0, "") } };