From 912adde8e37281b7f47f6fbb55635b7f31dd8d76 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Wed, 14 Feb 2024 21:06:58 -0500 Subject: [PATCH 1/3] Add backtracking stack imbalance detection to regex source generator Backtracking regex constructs use a stack to track state. Information about the current state of a construct (e.g. loop iteration number, alternation branch, etc.) is pushed onto the stack when the construct matches, and then if backtracking occurs and execution unwinds back to that construct, it pops state off of the stack in order to restore its knowledge of what it had done and what needs to happen next. We've had a couple of bugs over the years where this process goes awry and something incorrectly leaves extra state on the stack, which in turn makes it so that when the state is unwound, a previous construct ends up popping the wrong state off the stack, which it then misinterprets. This can lead to subtle bugs (in some cases it doesn't end up impacting matching against a particular input, in others it results in exceptions, etc.). To help reduce the chances that such issues remain, this adds some additional debug-only code to the source generator (I didn't modify the compiler just because it's a bit more challenging there, and the source generator is what's typically used when debugging these issues). Whenever pushing state onto the stack, it also pushes a cookie. When it pops state off, it also pops off that cookie, and validates that it's the expected value. This actually found one more lurking case of this in the code, which this PR also fixes. Conditional expressions (a rarely used feature) are supposed to treat the condition as a positive lookaround, which means it should be atomic, but if the expression contained a backtracking construct, that state was erroneously being left on the stack rather than being removed as part of wiping away the atomic expression. --- .../gen/RegexGenerator.Emitter.cs | 229 ++++++++++++++---- .../Text/RegularExpressions/RegexCompiler.cs | 17 +- .../RegexGeneratorOutputTests.cs | 49 +++- 3 files changed, 244 insertions(+), 51 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 7e7fed6cab65f2..6cdddf0780ec70 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -10,6 +10,7 @@ using System.Globalization; using System.IO; using System.Linq; +using System.Runtime.CompilerServices; using System.Security.Cryptography; using System.Threading; using Microsoft.CodeAnalysis; @@ -1416,6 +1417,15 @@ private static void EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, Reg HashSet additionalDeclarations = new(); Dictionary additionalLocalFunctions = new(); + // In debug builds, additional code is emitted to validate that the backtracking stack is being maintained appropriately. + // When state is pushed onto the backtracking stack, an additional known value is pushed, and when it's popped, it's + // the popped value is checked against that known value, throwing an exception if they don't match. +#if DEBUG +#pragma warning disable RS1035 // Random isn't always deterministic, but this is only for debug builds, and we've seeded the Random with a constant + Random stackCookieGenerator = new(12345); // seed for deterministic behavior +#pragma warning restore RS1035 +#endif + // Declare some locals. string sliceSpan = "slice"; writer.WriteLine("int pos = base.runtextpos;"); @@ -1857,6 +1867,7 @@ void EmitAllBranches() additionalDeclarations.Add($"int {currentBranch} = 0;"); } + int stackCookie = CreateStackCookie(); for (int i = 0; i < childCount; i++) { // If the alternation isn't atomic, backtracking may require our jump table jumping back @@ -1896,7 +1907,7 @@ void EmitAllBranches() // the relevant state is stored in our locals. if (currentBranch is null) { - EmitStackPush(startingCapturePos is not null ? + EmitStackPush(stackCookie + i, startingCapturePos is not null ? [i.ToString(), startingPos, startingCapturePos] : [i.ToString(), startingPos]); } @@ -1967,10 +1978,10 @@ void EmitAllBranches() if (currentBranch is null) { // We're in a loop, so we use the backtracking stack to persist our state. Pop it off. - EmitStackPop(startingCapturePos is not null ? + EmitStackPop(0, startingCapturePos is not null ? [startingCapturePos, startingPos] : [startingPos]); - switchClause = StackPop(); + switchClause = ValidateStackCookieWithAdditionAndReturnPoppedStack(stackCookie); } else { @@ -2070,6 +2081,7 @@ void EmitBackreferenceConditional(RegexNode node) // We're branching in a complicated fashion. Make sure sliceStaticPos is 0. TransferSliceStaticPosToPos(); + int stackCookie = CreateStackCookie(); // Get the capture number to test. int capnum = RegexParser.MapCaptureNumber(node.M, rm.Tree.CaptureNumberSparseMapping); @@ -2201,7 +2213,7 @@ void EmitBackreferenceConditional(RegexNode node) // the local. if (isInLoop) { - EmitStackPop(resumeAt); + EmitStackPop(stackCookie, resumeAt); } using (EmitBlock(writer, $"switch ({resumeAt})")) { @@ -2230,7 +2242,7 @@ void EmitBackreferenceConditional(RegexNode node) // so finish outputting our backtracking logic, which involves pushing onto the stack which // branch to backtrack into. If we're not in a loop, though, nothing else can overwrite this local // in the interim, so we can avoid pushing it. - EmitStackPush(resumeAt); + EmitStackPush(stackCookie, resumeAt); } } @@ -2298,10 +2310,19 @@ void EmitExpressionConditional(RegexNode node) writer.WriteLine(); int startingSliceStaticPos = sliceStaticPos; - // Emit the child. The condition expression is a zero-width assertion, which is atomic, + // Emit the condition. The condition expression is a zero-width assertion, which is atomic, // so prevent backtracking into it. writer.WriteLine("// Condition:"); - EmitNode(condition); + if (rm.Analysis.MayBacktrack(condition)) + { + // Condition expressions are treated like positive lookarounds and thus are implicitly atomic, + // so we need to emit the node as atomic if it might backtrack. + EmitAtomic(node, null); + } + else + { + EmitNode(condition); + } writer.WriteLine(); doneLabel = originalDoneLabel; @@ -2380,11 +2401,13 @@ void EmitExpressionConditional(RegexNode node) doneLabel = backtrack; MarkLabel(backtrack, emitSemicolon: false); + int stackCookie = CreateStackCookie(); + if (isInLoop) { // If we're not in a loop, the local will maintain its value until backtracking occurs. // If we are in a loop, multiple iterations need their own value, so we need to use the stack. - EmitStackPop(resumeAt); + EmitStackPop(stackCookie, resumeAt); } using (EmitBlock(writer, $"switch ({resumeAt})")) @@ -2405,7 +2428,7 @@ void EmitExpressionConditional(RegexNode node) MarkLabel(endConditional, emitSemicolon: !isInLoop); if (isInLoop) { - EmitStackPush(resumeAt); + EmitStackPush(stackCookie, resumeAt); } } } @@ -2477,12 +2500,13 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null) // pushes/pops the starting position before falling through. writer.WriteLine(); + int stackCookie = CreateStackCookie(); if (isInLoop) { // If we're in a loop, different iterations of the loop need their own // starting position, so push it on to the stack. If we're not in a loop, // the local will maintain its value and will suffice. - EmitStackPush(startingPos); + EmitStackPush(stackCookie, startingPos); } // Skip past the backtracking section @@ -2495,7 +2519,7 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null) MarkLabel(backtrack, emitSemicolon: false); if (isInLoop) { - EmitStackPop(startingPos); + EmitStackPop(stackCookie, startingPos); } Goto(doneLabel); writer.WriteLine(); @@ -2589,6 +2613,7 @@ void EmitNegativeLookaroundAssertion(RegexNode node) RegexNode child = node.Child(0); // Ensure we're able to uncapture anything captured by the child. + int stackCookie = CreateStackCookie(); bool isInLoop = false; string? capturePos = null; bool hasCaptures = rm.Analysis.MayContainCapture(child); @@ -2599,7 +2624,7 @@ void EmitNegativeLookaroundAssertion(RegexNode node) isInLoop = rm.Analysis.IsInLoop(node); if (isInLoop) { - EmitStackPush("base.Crawlpos()"); + EmitStackPush(stackCookie, "base.Crawlpos()"); } else { @@ -2637,7 +2662,15 @@ void EmitNegativeLookaroundAssertion(RegexNode node) // And uncapture anything if necessary. Negative lookaround captures don't persist beyond the lookaround. if (hasCaptures) { - EmitUncaptureUntil(isInLoop ? StackPop() : capturePos!); + if (isInLoop) + { + EmitUncaptureUntil(StackPop()); + EmitStackCookieValidate(stackCookie); + } + else + { + EmitUncaptureUntil(capturePos!); + } } doneLabel = originalDoneLabel; @@ -2817,8 +2850,8 @@ void EmitNode(RegexNode node, RegexNode? subsequent = null, bool emitLengthCheck // Emits the node for an atomic. void EmitAtomic(RegexNode node, RegexNode? subsequent) { - Debug.Assert(node.Kind is RegexNodeKind.Atomic or RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround, $"Unexpected type: {node.Kind}"); - Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); + Debug.Assert(node.Kind is RegexNodeKind.Atomic or RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround or RegexNodeKind.ExpressionConditional, $"Unexpected type: {node.Kind}"); + Debug.Assert(node.Kind is RegexNodeKind.ExpressionConditional ? node.ChildCount() >= 1 : node.ChildCount() == 1, $"Unexpected number of children: {node.ChildCount()}"); Debug.Assert(rm.Analysis.MayBacktrack(node.Child(0)), "Expected child to potentially backtrack"); // Grab the current done label and the current backtracking position. The purpose of the atomic node @@ -3227,6 +3260,7 @@ void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitL // point we decrement the matched count as long as it's above the minimum // required, and try again by flowing to everything that comes after this. MarkLabel(backtrackingLabel, emitSemicolon: false); + int stackCookie = CreateStackCookie(); string? capturePos = null; if (isInLoop) { @@ -3239,7 +3273,7 @@ void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitL { EmitUncaptureUntil(StackPop()); } - EmitStackPop(endingPos, startingPos); + EmitStackPop(stackCookie, endingPos, startingPos); } else if (expressionHasCaptures) { @@ -3294,7 +3328,7 @@ void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitL // We're in a loop and thus can't rely on locals correctly holding the state we // need (the locals could be overwritten by a subsequent iteration). Push the state // on to the backtracking stack. - EmitStackPush(expressionHasCaptures ? + EmitStackPush(stackCookie, expressionHasCaptures ? [startingPos, endingPos, "base.Crawlpos()"] : [startingPos, endingPos]); } @@ -3535,9 +3569,10 @@ node.Kind is RegexNodeKind.Setlazy && if (isInLoop) { writer.WriteLine(); + int stackCookie = CreateStackCookie(); // Store the loop's state. - EmitStackPush( + EmitStackPush(stackCookie, capturePos is not null && iterationCount is not null ? [startingPos, capturePos, iterationCount] : capturePos is not null ? [startingPos, capturePos] : iterationCount is not null ? [startingPos, iterationCount] : @@ -3553,7 +3588,7 @@ node.Kind is RegexNodeKind.Setlazy && MarkLabel(backtrack, emitSemicolon: false); // Restore the loop's state. - EmitStackPop( + EmitStackPop(stackCookie, capturePos is not null && iterationCount is not null ? [iterationCount, capturePos, startingPos] : capturePos is not null ? [capturePos, startingPos] : iterationCount is not null ? [iterationCount, startingPos] : @@ -3640,8 +3675,13 @@ void EmitLazy(RegexNode node) // iterations, this state needs to be stored on to the backtracking stack. if (!isAtomic) { - int entriesPerIteration = 1/*pos*/ + (iterationMayBeEmpty ? 2/*startingPos+sawEmpty*/ : 0) + (expressionHasCaptures ? 1/*Crawlpos*/ : 0); - EmitStackPush( + int stackCookie = CreateStackCookie(); + int entriesPerIteration = + 1/*pos*/ + + (iterationMayBeEmpty ? 2/*startingPos+sawEmpty*/ : 0) + + (expressionHasCaptures ? 1/*Crawlpos*/ : 0) + + (stackCookie != 0 ? 1 : 0); + EmitStackPush(stackCookie, expressionHasCaptures && iterationMayBeEmpty ? ["pos", startingPos!, sawEmpty!, "base.Crawlpos()"] : iterationMayBeEmpty ? ["pos", startingPos!, sawEmpty!] : expressionHasCaptures ? ["pos", "base.Crawlpos()"] : @@ -3721,7 +3761,7 @@ void EmitLazy(RegexNode node) { EmitUncaptureUntil(StackPop()); } - EmitStackPop(iterationMayBeEmpty ? + EmitStackPop(stackCookie, iterationMayBeEmpty ? [sawEmpty!, startingPos!, "pos"] : ["pos"]); SliceInputSpan(); @@ -3778,7 +3818,8 @@ void EmitLazy(RegexNode node) // of another loop, then any number of iterations might have such state that needs to be stored, // and thus it needs to be pushed on to the backtracking stack. bool isInLoop = rm.Analysis.IsInLoop(node); - EmitStackPush( + stackCookie = CreateStackCookie(); + EmitStackPush(stackCookie, !isInLoop ? (expressionHasCaptures ? ["pos", "base.Crawlpos()"] : ["pos"]) : iterationMayBeEmpty ? (expressionHasCaptures ? ["pos", iterationCount, startingPos!, sawEmpty!, "base.Crawlpos()"] : ["pos", iterationCount, startingPos!, sawEmpty!]) : expressionHasCaptures ? ["pos", iterationCount, "base.Crawlpos()"] : @@ -3800,7 +3841,7 @@ void EmitLazy(RegexNode node) { EmitUncaptureUntil(StackPop()); } - EmitStackPop( + EmitStackPop(stackCookie, !isInLoop ? ["pos"] : iterationMayBeEmpty ? [sawEmpty!, startingPos!, iterationCount, "pos"] : [iterationCount, "pos"]); @@ -4183,6 +4224,7 @@ void EmitLoop(RegexNode node) int minIterations = node.M; int maxIterations = node.N; + int stackCookie = CreateStackCookie(); // Special-case some repeaters. if (minIterations == maxIterations) @@ -4261,7 +4303,7 @@ void EmitLoop(RegexNode node) // need to know where each iteration began so when backtracking we can jump back to that location. This is // true even if the loop is atomic, as we might need to backtrack within the loop in order to match the // minimum iteration count. - EmitStackPush( + EmitStackPush(stackCookie, expressionHasCaptures && iterationMayBeEmpty ? ["base.Crawlpos()", startingPos!, "pos"] : expressionHasCaptures ? ["base.Crawlpos()", "pos"] : iterationMayBeEmpty ? [startingPos!, "pos"] : @@ -4371,13 +4413,14 @@ void EmitLoop(RegexNode node) writer.WriteLine("// Unable to match the remainder of the expression after exhausting the loop."); Goto(originalDoneLabel); } - EmitStackPop(iterationMayBeEmpty ? + EmitStackPop(0, iterationMayBeEmpty ? // stack cookie handled is explicitly 0 to handle it below ["pos", startingPos!] : ["pos"]); if (expressionHasCaptures) { EmitUncaptureUntil(StackPop()); } + EmitStackCookieValidate(stackCookie); SliceInputSpan(); // If there's a required minimum iteration count, validate now that we've processed enough iterations. @@ -4487,7 +4530,8 @@ void EmitLoop(RegexNode node) writer.WriteLine(); // Store the loop's state - EmitStackPush( + stackCookie = CreateStackCookie(); + EmitStackPush(stackCookie, startingPos is not null && startingStackpos is not null ? [startingPos, startingStackpos, iterationCount] : startingPos is not null ? [startingPos, iterationCount] : startingStackpos is not null ? [startingStackpos, iterationCount] : @@ -4501,7 +4545,7 @@ void EmitLoop(RegexNode node) // Emit a backtracking section that restores the loop's state and then jumps to the previous done label string backtrack = ReserveName("LoopBacktrack"); MarkLabel(backtrack, emitSemicolon: false); - EmitStackPop( + EmitStackPop(stackCookie, startingPos is not null && startingStackpos is not null ? [iterationCount, startingStackpos, startingPos] : startingPos is not null ? [iterationCount, startingPos] : startingStackpos is not null ? [iterationCount, startingStackpos] : @@ -4552,7 +4596,7 @@ void EmitUncaptureUntil(string capturepos) } /// Pushes values on to the backtracking stack. - void EmitStackPush(params string[] args) + void EmitStackPush(int stackCookie, params string[] args) { Debug.Assert(args.Length is >= 1); @@ -4596,41 +4640,134 @@ void EmitStackPush(params string[] args) requiredHelpers.Add(key, lines); } + if (stackCookie != 0) + { + EmitStackCookie(stackCookie); + } writer.WriteLine($"{HelpersTypeName}.{MethodName}(ref base.runstack!, ref stackpos, {string.Join(", ", args)});"); } /// Pops values from the backtracking stack into the specified locations. - void EmitStackPop(params string[] args) + void EmitStackPop(int stackCookie, params string[] args) { Debug.Assert(args.Length is >= 1); if (args.Length == 1) { writer.WriteLine($"{args[0]} = {StackPop()};"); - return; } - - const string MethodName = "StackPop"; - string key = $"{MethodName}{args.Length}"; - - if (!requiredHelpers.ContainsKey(key)) + else { - var lines = new string[5 + args.Length]; - lines[0] = $"/// Pops {args.Length} value{(args.Length == 1 ? "" : "s")} from the backtracking stack."; - lines[1] = $"[MethodImpl(MethodImplOptions.AggressiveInlining)]"; - lines[2] = $"internal static void {MethodName}(int[] stack, ref int pos{FormatN(", out int arg{0}", args.Length)})"; - lines[3] = $"{{"; - for (int i = 0; i < args.Length; i++) + const string MethodName = "StackPop"; + string key = $"{MethodName}{args.Length}"; + + if (!requiredHelpers.ContainsKey(key)) { - lines[4 + i] = $" arg{i} = stack[--pos];"; + var lines = new string[5 + args.Length]; + lines[0] = $"/// Pops {args.Length} value{(args.Length == 1 ? "" : "s")} from the backtracking stack."; + lines[1] = $"[MethodImpl(MethodImplOptions.AggressiveInlining)]"; + lines[2] = $"internal static void {MethodName}(int[] stack, ref int pos{FormatN(", out int arg{0}", args.Length)})"; + lines[3] = $"{{"; + for (int i = 0; i < args.Length; i++) + { + lines[4 + i] = $" arg{i} = stack[--pos];"; + } + lines[4 + args.Length] = $"}}"; + + requiredHelpers.Add(key, lines); } - lines[4 + args.Length] = $"}}"; - requiredHelpers.Add(key, lines); + writer.WriteLine($"{HelpersTypeName}.{MethodName}(base.runstack!, ref stackpos, out {string.Join(", out ", args)});"); + } + + if (stackCookie != 0) + { + EmitStackCookieValidate(stackCookie); + } + } + + /// Initializes a debug stack cookie for a new backtracking stack push. + int CreateStackCookie() => +#if DEBUG +#pragma warning disable RS1035 // Random is banned from generators due to non-determinism, but this Random is seeded with a constant and it's only for debug builds + stackCookieGenerator.Next() + 1; +#pragma warning restore RS1035 +#else + 0; +#endif + + /// Emits a debug stack cookie for a new backtracking stack push. + void EmitStackCookie(int stackCookie) + { +#if DEBUG + EmitStackPush(0, stackCookie.ToString()); +#endif + } + + /// Emits validation for a debug stack cookie. + void EmitStackCookieValidate(int stackCookie, [CallerLineNumber] int? lineNumber = null) + { +#if DEBUG + writer.WriteLine($"{StackCookieValidate(stackCookie, lineNumber)};"); +#endif + } + + /// + /// Returns an expression that: + /// In debug, pops item 1 from the backtracking stack, pops item 2 and validates it against the cookie, then evaluates to item1. + /// In release, pops and evaluates to an item from the backtracking stack. + /// + string ValidateStackCookieWithAdditionAndReturnPoppedStack(int stackCookie, [CallerLineNumber] int? lineNumber = null) + { +#if DEBUG + const string MethodName = "ValidateStackCookieWithAdditionAndReturnPoppedStack"; + if (!requiredHelpers.ContainsKey(MethodName)) + { + requiredHelpers.Add(MethodName, + [ + $"/// Validates that a stack cookie popped off the backtracking stack holds the expected value. Debug only.", + $"internal static int {MethodName}(int poppedStack, int expectedCookie, int actualCookie)", + $"{{", + $" expectedCookie += poppedStack;", + $" if (expectedCookie != actualCookie)", + $" {{", + $" throw new Exception($\"Backtracking stack imbalance detected (L{lineNumber}). Expected {{expectedCookie}}. Actual {{actualCookie}}.\");", + $" }}", + $" return poppedStack;", + $"}}", + ]); + } + + return $"{HelpersTypeName}.{MethodName}({StackPop()}, {stackCookie}, {StackPop()})"; +#else + return StackPop(); +#endif + } + +#if DEBUG + /// Returns an expression that validates and returns a debug stack cookie. + string StackCookieValidate(int stackCookie, [CallerLineNumber] int? lineNumber = null) + { + const string MethodName = "ValidateStackCookie"; + if (!requiredHelpers.ContainsKey(MethodName)) + { + requiredHelpers.Add(MethodName, + [ + $"/// Validates that a stack cookie popped off the backtracking stack holds the expected value. Debug only.", + $"internal static int {MethodName}(int expected, int actual)", + $"{{", + $" if (expected != actual)", + $" {{", + $" throw new Exception($\"Backtracking stack imbalance detected (L{lineNumber}). Expected {{expected}}. Actual {{actual}}.\");", + $" }}", + $" return actual;", + $"}}", + ]); } - writer.WriteLine($"{HelpersTypeName}.{MethodName}(base.runstack!, ref stackpos, out {string.Join(", out ", args)});"); + return $"{HelpersTypeName}.{MethodName}({stackCookie}, {StackPop()})"; } +#endif /// Expression for popping the next item from the backtracking stack. string StackPop() => "base.runstack![--stackpos]"; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 58abbb8b181eac..216ade8b4967b3 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -2233,9 +2233,18 @@ void EmitExpressionConditional(RegexNode node) Stloc(startingPos); int startingSliceStaticPos = sliceStaticPos; - // Emit the child. The condition expression is a zero-width assertion, which is atomic, + // Emit the condition. The condition expression is a zero-width assertion, which is atomic, // so prevent backtracking into it. - EmitNode(condition); + if (analysis.MayBacktrack(condition)) + { + // Condition expressions are treated like positive lookarounds and thus are implicitly atomic, + // so we need to emit the node as atomic if it might backtrack. + EmitAtomic(node, null); + } + else + { + EmitNode(condition); + } doneLabel = originalDoneLabel; // After the condition completes successfully, reset the text positions. @@ -2803,8 +2812,8 @@ void EmitNode(RegexNode node, RegexNode? subsequent = null, bool emitLengthCheck // Emits the node for an atomic. void EmitAtomic(RegexNode node, RegexNode? subsequent) { - Debug.Assert(node.Kind is RegexNodeKind.Atomic or RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround, $"Unexpected type: {node.Kind}"); - Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); + Debug.Assert(node.Kind is RegexNodeKind.Atomic or RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround or RegexNodeKind.ExpressionConditional, $"Unexpected type: {node.Kind}"); + Debug.Assert(node.Kind is RegexNodeKind.ExpressionConditional ? node.ChildCount() >= 1 : node.ChildCount() == 1, $"Unexpected number of children: {node.ChildCount()}"); RegexNode child = node.Child(0); diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs index 181c978a376661..1e9e5d5cdb4d22 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs @@ -2,15 +2,24 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Collections.Generic; +using System.Diagnostics; using System.IO; +using System.Linq; +using System.Text.RegularExpressions.Generator; using System.Threading.Tasks; using Xunit; namespace System.Text.RegularExpressions.Tests { - [ConditionalClass(typeof(PlatformDetection), nameof(PlatformDetection.IsReflectionEmitSupported), nameof(PlatformDetection.IsNotMobile), nameof(PlatformDetection.IsNotBrowser))] + [ConditionalClass(typeof(RegexGeneratorOutputTests), nameof(GeneratorOutputTestsSupported))] public partial class RegexGeneratorOutputTests { + public static bool GeneratorOutputTestsSupported => + PlatformDetection.IsReflectionEmitSupported && + PlatformDetection.IsNotMobile && + PlatformDetection.IsNotBrowser && + typeof(RegexGenerator).Assembly.GetCustomAttributes(false).OfType().Any(da => da.IsJITTrackingEnabled); // output differs between debug and release + // This exists to ensure we're aware of any egregious breaks to formatting / readability. // Any updates that impact the generated code in these baselines will need to be updated // as changes are made to the code emitted by the generator. @@ -258,6 +267,7 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan) loop_iteration = 0; LoopBody: + Utilities.StackPush(ref base.runstack!, ref stackpos, 143337952); Utilities.StackPush(ref base.runstack!, ref stackpos, base.Crawlpos(), pos); loop_iteration++; @@ -311,6 +321,7 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan) } pos = base.runstack![--stackpos]; UncaptureUntil(base.runstack![--stackpos]); + Utilities.ValidateStackCookie(143337952, base.runstack![--stackpos]); slice = inputSpan.Slice(pos); LoopEnd:; //} @@ -381,6 +392,32 @@ internal static bool IsWordChar(char ch) (WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0; } + /// Pushes 1 value onto the backtracking stack. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void StackPush(ref int[] stack, ref int pos, int arg0) + { + // If there's space available for the value, store it. + int[] s = stack; + int p = pos; + if ((uint)p < (uint)s.Length) + { + s[p] = arg0; + pos++; + return; + } + + // Otherwise, resize the stack to make room and try again. + WithResize(ref stack, ref pos, arg0); + + // Resize the backtracking stack array and push 1 value onto the stack. + [MethodImpl(MethodImplOptions.NoInlining)] + static void WithResize(ref int[] stack, ref int pos, int arg0) + { + Array.Resize(ref stack, (pos + 0) * 2); + StackPush(ref stack, ref pos, arg0); + } + } + /// Pushes 2 values onto the backtracking stack. [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static void StackPush(ref int[] stack, ref int pos, int arg0, int arg1) @@ -407,6 +444,16 @@ static void WithResize(ref int[] stack, ref int pos, int arg0, int arg1) StackPush(ref stack, ref pos, arg0, arg1); } } + + /// Validates that a stack cookie popped off the backtracking stack holds the expected value. Debug only. + internal static int ValidateStackCookie(int expected, int actual) + { + if (expected != actual) + { + throw new Exception($"Backtracking stack imbalance detected (L4423). Expected {expected}. Actual {actual}."); + } + return actual; + } } } """ From 34a6f31a60367d91c23bb658c7ce7315bff59291 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 23 Feb 2024 16:56:04 -0500 Subject: [PATCH 2/3] Update comments based on PR feedback --- .../gen/RegexGenerator.Emitter.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index e5113dc3a1e302..4475f46c3c6ec5 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -1455,7 +1455,8 @@ private static void EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, Reg // In debug builds, additional code is emitted to validate that the backtracking stack is being maintained appropriately. // When state is pushed onto the backtracking stack, an additional known value is pushed, and when it's popped, it's - // the popped value is checked against that known value, throwing an exception if they don't match. + // the popped value is checked against that known value, throwing an exception if they don't match. This validation code + // is currently not part of RegexCompiler, though it could be added there in the future if desired. #if DEBUG #pragma warning disable RS1035 // Random isn't always deterministic, but this is only for debug builds, and we've seeded the Random with a constant Random stackCookieGenerator = new(12345); // seed for deterministic behavior @@ -2013,7 +2014,8 @@ void EmitAllBranches() string switchClause; if (currentBranch is null) { - // We're in a loop, so we use the backtracking stack to persist our state. Pop it off. + // We're in a loop, so we use the backtracking stack to persist our state. + // Pop it off and validate the stack position. EmitStackPop(0, startingCapturePos is not null ? [startingCapturePos, startingPos] : [startingPos]); From 4e21c74f5a116a5c6e7fd57daeb15bee9f8e6ade Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 23 Feb 2024 22:25:48 -0500 Subject: [PATCH 3/3] Fix tests after merge --- .../gen/RegexGenerator.Emitter.cs | 12 ++++++------ .../FunctionalTests/RegexGeneratorOutputTests.cs | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 4475f46c3c6ec5..f0f6fc9da0e1df 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -4743,10 +4743,10 @@ void EmitStackCookie(int stackCookie) } /// Emits validation for a debug stack cookie. - void EmitStackCookieValidate(int stackCookie, [CallerLineNumber] int? lineNumber = null) + void EmitStackCookieValidate(int stackCookie) { #if DEBUG - writer.WriteLine($"{StackCookieValidate(stackCookie, lineNumber)};"); + writer.WriteLine($"{StackCookieValidate(stackCookie)};"); #endif } @@ -4755,7 +4755,7 @@ void EmitStackCookieValidate(int stackCookie, [CallerLineNumber] int? lineNumber /// In debug, pops item 1 from the backtracking stack, pops item 2 and validates it against the cookie, then evaluates to item1. /// In release, pops and evaluates to an item from the backtracking stack. /// - string ValidateStackCookieWithAdditionAndReturnPoppedStack(int stackCookie, [CallerLineNumber] int? lineNumber = null) + string ValidateStackCookieWithAdditionAndReturnPoppedStack(int stackCookie) { #if DEBUG const string MethodName = "ValidateStackCookieWithAdditionAndReturnPoppedStack"; @@ -4769,7 +4769,7 @@ string ValidateStackCookieWithAdditionAndReturnPoppedStack(int stackCookie, [Cal $" expectedCookie += poppedStack;", $" if (expectedCookie != actualCookie)", $" {{", - $" throw new Exception($\"Backtracking stack imbalance detected (L{lineNumber}). Expected {{expectedCookie}}. Actual {{actualCookie}}.\");", + $" throw new Exception($\"Backtracking stack imbalance detected. Expected {{expectedCookie}}. Actual {{actualCookie}}.\");", $" }}", $" return poppedStack;", $"}}", @@ -4784,7 +4784,7 @@ string ValidateStackCookieWithAdditionAndReturnPoppedStack(int stackCookie, [Cal #if DEBUG /// Returns an expression that validates and returns a debug stack cookie. - string StackCookieValidate(int stackCookie, [CallerLineNumber] int? lineNumber = null) + string StackCookieValidate(int stackCookie) { const string MethodName = "ValidateStackCookie"; if (!requiredHelpers.ContainsKey(MethodName)) @@ -4796,7 +4796,7 @@ string StackCookieValidate(int stackCookie, [CallerLineNumber] int? lineNumber = $"{{", $" if (expected != actual)", $" {{", - $" throw new Exception($\"Backtracking stack imbalance detected (L{lineNumber}). Expected {{expected}}. Actual {{actual}}.\");", + $" throw new Exception($\"Backtracking stack imbalance detected. Expected {{expected}}. Actual {{actual}}.\");", $" }}", $" return actual;", $"}}", diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs index 1e9e5d5cdb4d22..57d4232ee1ed18 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs @@ -450,7 +450,7 @@ internal static int ValidateStackCookie(int expected, int actual) { if (expected != actual) { - throw new Exception($"Backtracking stack imbalance detected (L4423). Expected {expected}. Actual {actual}."); + throw new Exception($"Backtracking stack imbalance detected. Expected {expected}. Actual {actual}."); } return actual; }