From 4f55002fbb8152f606537c8ed692aab54aabdb61 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 20 Jan 2022 18:02:55 -0500 Subject: [PATCH] RegexNode cleanup No functional changes, just code cleanup: - Move node types into a RegexNodeKind enum - Rename some of the kinds to make them more descriptive - Rename node.Next to node.Parent to better describe its purpose - Add a bunch of comments about node kinds --- .../gen/RegexGenerator.Emitter.cs | 320 ++--- .../gen/Resources/Strings.resx | 3 - .../gen/Resources/xlf/Strings.cs.xlf | 5 - .../gen/Resources/xlf/Strings.de.xlf | 5 - .../gen/Resources/xlf/Strings.es.xlf | 5 - .../gen/Resources/xlf/Strings.fr.xlf | 5 - .../gen/Resources/xlf/Strings.it.xlf | 5 - .../gen/Resources/xlf/Strings.ja.xlf | 5 - .../gen/Resources/xlf/Strings.ko.xlf | 5 - .../gen/Resources/xlf/Strings.pl.xlf | 5 - .../gen/Resources/xlf/Strings.pt-BR.xlf | 5 - .../gen/Resources/xlf/Strings.ru.xlf | 5 - .../gen/Resources/xlf/Strings.tr.xlf | 5 - .../gen/Resources/xlf/Strings.zh-Hans.xlf | 5 - .../gen/Resources/xlf/Strings.zh-Hant.xlf | 5 - ...m.Text.RegularExpressions.Generator.csproj | 1 + .../src/Resources/Strings.resx | 3 - .../src/System.Text.RegularExpressions.csproj | 1 + .../Text/RegularExpressions/RegexCode.cs | 3 +- .../Text/RegularExpressions/RegexCompiler.cs | 178 +-- .../RegexFindOptimizations.cs | 2 +- .../Text/RegularExpressions/RegexNode.cs | 1063 ++++++++--------- .../Text/RegularExpressions/RegexNodeKind.cs | 182 +++ .../Text/RegularExpressions/RegexParser.cs | 133 +-- .../RegularExpressions/RegexPrefixAnalyzer.cs | 277 ++--- .../RegularExpressions/RegexReplacement.cs | 10 +- .../Text/RegularExpressions/RegexWriter.cs | 144 +-- .../Symbolic/RegexNodeToSymbolicConverter.cs | 111 +- 28 files changed, 1252 insertions(+), 1244 deletions(-) create mode 100644 src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNodeKind.cs diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index d3b724985d2c1e..fabe62f5262ec1 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -592,7 +592,7 @@ void EmitLiteralAfterAtomicLoop() Debug.Assert(code.FindOptimizations.LiteralAfterLoop is not null); (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Literal) target = code.FindOptimizations.LiteralAfterLoop.Value; - Debug.Assert(target.LoopNode.Type is RegexNode.Setloop or RegexNode.Setlazy or RegexNode.Setloopatomic); + Debug.Assert(target.LoopNode.Kind is RegexNodeKind.Setloop or RegexNodeKind.Setlazy or RegexNodeKind.Setloopatomic); Debug.Assert(target.LoopNode.N == int.MaxValue); using (EmitBlock(writer, "while (true)")) @@ -705,24 +705,24 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe // Every RegexTree is rooted in the implicit Capture for the whole expression. // Skip the Capture node. We handle the implicit root capture specially. RegexNode node = code.Tree.Root; - Debug.Assert(node.Type == RegexNode.Capture, "Every generated tree should begin with a capture node"); + Debug.Assert(node.Kind == RegexNodeKind.Capture, "Every generated tree should begin with a capture node"); Debug.Assert(node.ChildCount() == 1, "Capture nodes should have one child"); node = node.Child(0); // In some limited cases, FindFirstChar will only return true if it successfully matched the whole expression. // We can special case these to do essentially nothing in Go other than emit the capture. - switch (node.Type) + switch (node.Kind) { - case RegexNode.Multi or RegexNode.Notone or RegexNode.One or RegexNode.Set when !IsCaseInsensitive(node): + case RegexNodeKind.Multi or RegexNodeKind.Notone or RegexNodeKind.One or RegexNodeKind.Set when !IsCaseInsensitive(node): // This is the case for single and multiple characters, though the whole thing is only guaranteed // to have been validated in FindFirstChar when doing case-sensitive comparison. writer.WriteLine($"int start = base.runtextpos;"); - writer.WriteLine($"int end = start + {(node.Type == RegexNode.Multi ? node.Str!.Length : 1)};"); + writer.WriteLine($"int end = start + {(node.Kind == RegexNodeKind.Multi ? node.Str!.Length : 1)};"); writer.WriteLine("base.Capture(0, start, end);"); writer.WriteLine("base.runtextpos = end;"); return requiredHelpers; - case RegexNode.Empty: + case RegexNodeKind.Empty: // This case isn't common in production, but it's very common when first getting started with the // source generator and seeing what happens as you add more to expressions. When approaching // it from a learning perspective, this is very common, as it's the empty string you start with. @@ -881,7 +881,7 @@ void TransferSliceStaticPosToPos() // Emits the code for an alternation. void EmitAlternation(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Alternate, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Alternate, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() >= 2, $"Expected at least 2 children, found {node.ChildCount()}"); int childCount = node.ChildCount(); @@ -892,7 +892,7 @@ void EmitAlternation(RegexNode node) // Both atomic and non-atomic are supported. While a parent RegexNode.Atomic node will itself // successfully prevent backtracking into this child node, we can emit better / cheaper code // for an Alternate when it is atomic, so we still take it into account here. - Debug.Assert(node.Next is not null); + Debug.Assert(node.Parent is not null); bool isAtomic = node.IsAtomicByParent(); // If no child branch overlaps with another child branch, we can emit more streamlined code @@ -938,7 +938,7 @@ void EmitAlternation(RegexNode node) // If it's a One or a Multi, get the first character and add it to the set. // If it was already in the set, we can't apply this optimization. - if (oneMultiOrSet.Type is RegexNode.One or RegexNode.Multi) + if (oneMultiOrSet.Kind is RegexNodeKind.One or RegexNodeKind.Multi) { if (!seenChars.Add(oneMultiOrSet.FirstCharOfOneOrMulti())) { @@ -950,7 +950,7 @@ void EmitAlternation(RegexNode node) { // The branch begins with a set. Make sure it's a set of only a few characters // and get them. If we can't, we can't apply this optimization. - Debug.Assert(oneMultiOrSet.Type is RegexNode.Set); + Debug.Assert(oneMultiOrSet.Kind is RegexNodeKind.Set); int numChars; if (RegexCharClass.IsNegated(oneMultiOrSet.Str!) || (numChars = RegexCharClass.GetSetChars(oneMultiOrSet.Str!, setChars)) == 0) @@ -1006,14 +1006,14 @@ void EmitSwitchedBranches() sliceStaticPos = startingSliceStaticPos; RegexNode child = node.Child(i); - Debug.Assert(child.Type is RegexNode.One or RegexNode.Multi or RegexNode.Set or RegexNode.Concatenate, DescribeNode(child, rm.Code)); - Debug.Assert(child.Type is not RegexNode.Concatenate || (child.ChildCount() >= 2 && child.Child(0).Type is RegexNode.One or RegexNode.Multi or RegexNode.Set)); + Debug.Assert(child.Kind is RegexNodeKind.One or RegexNodeKind.Multi or RegexNodeKind.Set or RegexNodeKind.Concatenate, DescribeNode(child, rm.Code)); + Debug.Assert(child.Kind is not RegexNodeKind.Concatenate || (child.ChildCount() >= 2 && child.Child(0).Kind is RegexNodeKind.One or RegexNodeKind.Multi or RegexNodeKind.Set)); RegexNode? childStart = child.FindBranchOneMultiOrSetStart(); Debug.Assert(childStart is not null, "Unexpectedly couldn't find the branch starting node."); Debug.Assert((childStart.Options & RegexOptions.IgnoreCase) == 0, "Expected only to find non-IgnoreCase branch starts"); - if (childStart.Type is RegexNode.Set) + if (childStart.Kind is RegexNodeKind.Set) { int numChars = RegexCharClass.GetSetChars(childStart.Str!, setChars); Debug.Assert(numChars != 0); @@ -1026,16 +1026,16 @@ void EmitSwitchedBranches() writer.Indent++; // Emit the code for the branch, without the first character that was already matched in the switch. - switch (child.Type) + switch (child.Kind) { - case RegexNode.Multi: + case RegexNodeKind.Multi: EmitNode(CloneMultiWithoutFirstChar(child)); writer.WriteLine(); break; - case RegexNode.Concatenate: - var newConcat = new RegexNode(RegexNode.Concatenate, child.Options); - if (childStart.Type == RegexNode.Multi) + case RegexNodeKind.Concatenate: + var newConcat = new RegexNode(RegexNodeKind.Concatenate, child.Options); + if (childStart.Kind == RegexNodeKind.Multi) { newConcat.AddChild(CloneMultiWithoutFirstChar(childStart)); } @@ -1050,11 +1050,11 @@ void EmitSwitchedBranches() static RegexNode CloneMultiWithoutFirstChar(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Multi); + Debug.Assert(node.Kind is RegexNodeKind.Multi); Debug.Assert(node.Str!.Length >= 2); return node.Str!.Length == 2 ? - new RegexNode(RegexNode.One, node.Options, node.Str![1]) : - new RegexNode(RegexNode.Multi, node.Options, node.Str!.Substring(1)); + new RegexNode(RegexNodeKind.One, node.Options, node.Str![1]) : + new RegexNode(RegexNodeKind.Multi, node.Options, node.Str!.Substring(1)); } } @@ -1240,7 +1240,7 @@ void EmitAllBranches() // Emits the code to handle a backreference. void EmitBackreference(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Ref, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Backreference, $"Unexpected type: {node.Kind}"); int capnum = RegexParser.MapCaptureNumber(node.M, rm.Code.Caps); @@ -1317,7 +1317,7 @@ void EmitWhenHasCapture() // Emits the code for an if(backreference)-then-else conditional. void EmitBackreferenceConditional(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Testref, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.BackreferenceConditional, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() == 2, $"Expected 2 children, found {node.ChildCount()}"); // We're branching in a complicated fashion. Make sure sliceStaticPos is 0. @@ -1329,7 +1329,7 @@ void EmitBackreferenceConditional(RegexNode node) // Get the "yes" branch and the "no" branch. The "no" branch is optional in syntax and is thus // somewhat likely to be Empty. RegexNode yesBranch = node.Child(0); - RegexNode? noBranch = node.Child(1) is { Type: not RegexNode.Empty } childNo ? childNo : null; + RegexNode? noBranch = node.Child(1) is { Kind: not RegexNodeKind.Empty } childNo ? childNo : null; string originalDoneLabel = doneLabel; // If the child branches might backtrack, we can't emit the branches inside constructs that @@ -1474,7 +1474,7 @@ void EmitBackreferenceConditional(RegexNode node) // Emits the code for an if(expression)-then-else conditional. void EmitExpressionConditional(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Testgroup, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.ExpressionConditional, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() == 3, $"Expected 3 children, found {node.ChildCount()}"); bool isAtomic = node.IsAtomicByParent(); @@ -1490,7 +1490,7 @@ void EmitExpressionConditional(RegexNode node) // Get the "yes" branch and the "no" branch. The "no" branch is optional in syntax and is thus // somewhat likely to be Empty. RegexNode yesBranch = node.Child(1); - RegexNode? noBranch = node.Child(2) is { Type: not RegexNode.Empty } childNo ? childNo : null; + RegexNode? noBranch = node.Child(2) is { Kind: not RegexNodeKind.Empty } childNo ? childNo : null; string originalDoneLabel = doneLabel; string expressionNotMatched = ReserveName("ConditionalExpressionNotMatched"); @@ -1632,7 +1632,7 @@ void EmitExpressionConditional(RegexNode node) // Emits the code for a Capture node. void EmitCapture(RegexNode node, RegexNode? subsequent = null) { - Debug.Assert(node.Type is RegexNode.Capture, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Capture, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); int capnum = RegexParser.MapCaptureNumber(node.M, rm.Code.Caps); @@ -1706,7 +1706,7 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null) // Emits the code to handle a positive lookahead assertion. void EmitPositiveLookaheadAssertion(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Require, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.PositiveLookaround, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); // Lookarounds are implicitly atomic. Store the original done label to reset at the end. @@ -1734,7 +1734,7 @@ void EmitPositiveLookaheadAssertion(RegexNode node) // Emits the code to handle a negative lookahead assertion. void EmitNegativeLookaheadAssertion(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Prevent, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.NegativeLookaround, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); // Lookarounds are implicitly atomic. Store the original done label to reset at the end. @@ -1770,15 +1770,15 @@ void EmitNegativeLookaheadAssertion(RegexNode node) static bool PossiblyBacktracks(RegexNode node) => !( // Certain nodes will never backtrack out of them - node.Type is RegexNode.Atomic or // atomic nodes by definition don't give up anything - RegexNode.Oneloopatomic or RegexNode.Notoneloopatomic or RegexNode.Setloopatomic or // same for atomic loops - RegexNode.One or RegexNode.Notone or RegexNode.Set or // individual characters don't backtrack - RegexNode.Multi or // multiple characters don't backtrack - RegexNode.Ref or // backreferences don't backtrack - RegexNode.Beginning or RegexNode.Bol or RegexNode.Start or RegexNode.End or RegexNode.EndZ or RegexNode.Eol or RegexNode.Boundary or RegexNode.NonBoundary or RegexNode.ECMABoundary or RegexNode.NonECMABoundary or // anchors don't backtrack - RegexNode.Nothing or RegexNode.Empty or RegexNode.UpdateBumpalong // empty/nothing don't do anything + node.Kind is RegexNodeKind.Atomic or // atomic nodes by definition don't give up anything + RegexNodeKind.Oneloopatomic or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Setloopatomic or // same for atomic loops + RegexNodeKind.One or RegexNodeKind.Notone or RegexNodeKind.Set or // individual characters don't backtrack + RegexNodeKind.Multi or // multiple characters don't backtrack + RegexNodeKind.Backreference or // backreferences don't backtrack + RegexNodeKind.Beginning or RegexNodeKind.Bol or RegexNodeKind.Start or RegexNodeKind.End or RegexNodeKind.EndZ or RegexNodeKind.Eol or RegexNodeKind.Boundary or RegexNodeKind.NonBoundary or RegexNodeKind.ECMABoundary or RegexNodeKind.NonECMABoundary or // anchors don't backtrack + RegexNodeKind.Nothing or RegexNodeKind.Empty or RegexNodeKind.UpdateBumpalong // empty/nothing don't do anything // Fixed-size repeaters of single characters or atomic don't backtrack - || node.Type is RegexNode.Oneloop or RegexNode.Notoneloop or RegexNode.Setloop or RegexNode.Onelazy or RegexNode.Notonelazy or RegexNode.Setlazy && node.M == node.N + || node.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop or RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy && node.M == node.N ); // Emits the code for the node. @@ -1791,25 +1791,25 @@ void EmitNode(RegexNode node, RegexNode? subsequent = null, bool emitLengthCheck } // Separate out several node types that, for conciseness, don't need a header and scope written into the source. - switch (node.Type) + switch (node.Kind) { // Nothing is written for an empty - case RegexNode.Empty: + case RegexNodeKind.Empty: return; // A match failure doesn't need a scope. - case RegexNode.Nothing: + case RegexNodeKind.Nothing: writer.WriteLine($"goto {doneLabel};"); return; // Atomic is invisible in the generated source, other than its impact on the targets of jumps - case RegexNode.Atomic: + case RegexNodeKind.Atomic: EmitAtomic(node, subsequent); return; // Concatenate is a simplification in the node tree so that a series of children can be represented as one. // We don't need its presence visible in the source. - case RegexNode.Concatenate: + case RegexNodeKind.Concatenate: EmitConcatenation(node, subsequent, emitLengthChecksIfRequired); return; } @@ -1818,94 +1818,94 @@ void EmitNode(RegexNode node, RegexNode? subsequent = null, bool emitLengthCheck // be visible outside of its scope, the scope is still emitted for clarity but is commented out. using (EmitScope(writer, DescribeNode(node, rm.Code), faux: PossiblyBacktracks(node) && !node.IsAtomicByParent())) { - switch (node.Type) + switch (node.Kind) { - case RegexNode.Beginning: - case RegexNode.Start: - case RegexNode.Bol: - case RegexNode.Eol: - case RegexNode.End: - case RegexNode.EndZ: + case RegexNodeKind.Beginning: + case RegexNodeKind.Start: + case RegexNodeKind.Bol: + case RegexNodeKind.Eol: + case RegexNodeKind.End: + case RegexNodeKind.EndZ: EmitAnchors(node); break; - case RegexNode.Boundary: - case RegexNode.NonBoundary: - case RegexNode.ECMABoundary: - case RegexNode.NonECMABoundary: + case RegexNodeKind.Boundary: + case RegexNodeKind.NonBoundary: + case RegexNodeKind.ECMABoundary: + case RegexNodeKind.NonECMABoundary: EmitBoundary(node); break; - case RegexNode.Multi: + case RegexNodeKind.Multi: EmitMultiChar(node, emitLengthChecksIfRequired); break; - case RegexNode.One: - case RegexNode.Notone: - case RegexNode.Set: + case RegexNodeKind.One: + case RegexNodeKind.Notone: + case RegexNodeKind.Set: EmitSingleChar(node, emitLengthChecksIfRequired); break; - case RegexNode.Oneloop: - case RegexNode.Notoneloop: - case RegexNode.Setloop: + case RegexNodeKind.Oneloop: + case RegexNodeKind.Notoneloop: + case RegexNodeKind.Setloop: EmitSingleCharLoop(node, subsequent, emitLengthChecksIfRequired); break; - case RegexNode.Onelazy: - case RegexNode.Notonelazy: - case RegexNode.Setlazy: + case RegexNodeKind.Onelazy: + case RegexNodeKind.Notonelazy: + case RegexNodeKind.Setlazy: EmitSingleCharLazy(node, subsequent, emitLengthChecksIfRequired); break; - case RegexNode.Oneloopatomic: - case RegexNode.Notoneloopatomic: - case RegexNode.Setloopatomic: + case RegexNodeKind.Oneloopatomic: + case RegexNodeKind.Notoneloopatomic: + case RegexNodeKind.Setloopatomic: EmitSingleCharAtomicLoop(node, emitLengthChecksIfRequired); break; - case RegexNode.Loop: + case RegexNodeKind.Loop: EmitLoop(node); break; - case RegexNode.Lazyloop: + case RegexNodeKind.Lazyloop: EmitLazy(node); break; - case RegexNode.Alternate: + case RegexNodeKind.Alternate: EmitAlternation(node); break; - case RegexNode.Ref: + case RegexNodeKind.Backreference: EmitBackreference(node); break; - case RegexNode.Testref: + case RegexNodeKind.BackreferenceConditional: EmitBackreferenceConditional(node); break; - case RegexNode.Testgroup: + case RegexNodeKind.ExpressionConditional: EmitExpressionConditional(node); break; - case RegexNode.Capture: + case RegexNodeKind.Capture: EmitCapture(node, subsequent); break; - case RegexNode.Require: + case RegexNodeKind.PositiveLookaround: EmitPositiveLookaheadAssertion(node); break; - case RegexNode.Prevent: + case RegexNodeKind.NegativeLookaround: EmitNegativeLookaheadAssertion(node); break; - case RegexNode.UpdateBumpalong: + case RegexNodeKind.UpdateBumpalong: EmitUpdateBumpalong(node); break; default: - Debug.Fail($"Unexpected node type: {node.Type}"); + Debug.Fail($"Unexpected node type: {node.Kind}"); break; } } @@ -1914,7 +1914,7 @@ void EmitNode(RegexNode node, RegexNode? subsequent = null, bool emitLengthCheck // Emits the node for an atomic. void EmitAtomic(RegexNode node, RegexNode? subsequent) { - Debug.Assert(node.Type is RegexNode.Atomic, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Atomic, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); // Atomic simply outputs the code for the child, but it ensures that any done label left @@ -1930,7 +1930,7 @@ void EmitAtomic(RegexNode node, RegexNode? subsequent) // it should bump from this location rather than from the original location. void EmitUpdateBumpalong(RegexNode node) { - Debug.Assert(node.Type is RegexNode.UpdateBumpalong, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.UpdateBumpalong, $"Unexpected type: {node.Kind}"); TransferSliceStaticPosToPos(); using (EmitBlock(writer, "if (base.runtextpos < pos)")) @@ -1942,7 +1942,7 @@ void EmitUpdateBumpalong(RegexNode node) // Emits code for a concatenation void EmitConcatenation(RegexNode node, RegexNode? subsequent, bool emitLengthChecksIfRequired) { - Debug.Assert(node.Type is RegexNode.Concatenate, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Concatenate, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() >= 2, $"Expected at least 2 children, found {node.ChildCount()}"); // Emit the code for each child one after the other. @@ -1979,13 +1979,13 @@ void WriteSingleCharChild(RegexNode child, bool includeDescription = true) } RegexNode child = node.Child(i); - if (child.Type is RegexNode.One or RegexNode.Notone or RegexNode.Set) + if (child.Kind is RegexNodeKind.One or RegexNodeKind.Notone or RegexNodeKind.Set) { WriteSingleCharChild(child); } - else if (child.Type is RegexNode.Oneloop or RegexNode.Onelazy or RegexNode.Oneloopatomic or - RegexNode.Setloop or RegexNode.Setlazy or RegexNode.Setloopatomic or - RegexNode.Notoneloop or RegexNode.Notonelazy or RegexNode.Notoneloopatomic && + else if (child.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Onelazy or RegexNodeKind.Oneloopatomic or + RegexNodeKind.Setloop or RegexNodeKind.Setlazy or RegexNodeKind.Setloopatomic or + RegexNodeKind.Notoneloop or RegexNodeKind.Notonelazy or RegexNodeKind.Notoneloopatomic && child.M == child.N && child.M <= MaxUnrollSize) { @@ -2046,7 +2046,7 @@ RegexNode.Notoneloop or RegexNode.Notonelazy or RegexNode.Notoneloopatomic && for (int i = index + 1; i < childCount; i++) { RegexNode next = node.Child(i); - if (next.Type is not RegexNode.UpdateBumpalong) // skip node types that don't have a semantic impact + if (next.Kind is not RegexNodeKind.UpdateBumpalong) // skip node types that don't have a semantic impact { return next; } @@ -2059,7 +2059,7 @@ RegexNode.Notoneloop or RegexNode.Notonelazy or RegexNode.Notoneloopatomic && // Emits the code to handle a single-character match. void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string? offset = null, bool clauseOnly = false) { - Debug.Assert(node.IsOneFamily || node.IsNotoneFamily || node.IsSetFamily, $"Unexpected type: {node.Type}"); + Debug.Assert(node.IsOneFamily || node.IsNotoneFamily || node.IsSetFamily, $"Unexpected type: {node.Kind}"); // This only emits a single check, but it's called from the looping constructs in a loop // to generate the code for a single check, so we map those looping constructs to the @@ -2095,13 +2095,13 @@ void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string? offset // Emits the code to handle a boundary check on a character. void EmitBoundary(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Boundary or RegexNode.NonBoundary or RegexNode.ECMABoundary or RegexNode.NonECMABoundary, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Boundary or RegexNodeKind.NonBoundary or RegexNodeKind.ECMABoundary or RegexNodeKind.NonECMABoundary, $"Unexpected type: {node.Kind}"); - string call = node.Type switch + string call = node.Kind switch { - RegexNode.Boundary => "!base.IsBoundary", - RegexNode.NonBoundary => "base.IsBoundary", - RegexNode.ECMABoundary => "!base.IsECMABoundary", + RegexNodeKind.Boundary => "!base.IsBoundary", + RegexNodeKind.NonBoundary => "base.IsBoundary", + RegexNodeKind.ECMABoundary => "!base.IsECMABoundary", _ => "base.IsECMABoundary", }; @@ -2114,13 +2114,13 @@ void EmitBoundary(RegexNode node) // Emits the code to handle various anchors. void EmitAnchors(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Beginning or RegexNode.Start or RegexNode.Bol or RegexNode.End or RegexNode.EndZ or RegexNode.Eol, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Beginning or RegexNodeKind.Start or RegexNodeKind.Bol or RegexNodeKind.End or RegexNodeKind.EndZ or RegexNodeKind.Eol, $"Unexpected type: {node.Kind}"); Debug.Assert(sliceStaticPos >= 0); - switch (node.Type) + switch (node.Kind) { - case RegexNode.Beginning: - case RegexNode.Start: + case RegexNodeKind.Beginning: + case RegexNodeKind.Start: if (sliceStaticPos > 0) { // If we statically know we've already matched part of the regex, there's no way we're at the @@ -2129,15 +2129,15 @@ void EmitAnchors(RegexNode node) } else { - additionalDeclarations.Add(node.Type == RegexNode.Beginning ? "int beginning = base.runtextbeg;" : "int start = base.runtextstart;"); - using (EmitBlock(writer, node.Type == RegexNode.Beginning ? "if (pos != beginning)" : "if (pos != start)")) + additionalDeclarations.Add(node.Kind == RegexNodeKind.Beginning ? "int beginning = base.runtextbeg;" : "int start = base.runtextstart;"); + using (EmitBlock(writer, node.Kind == RegexNodeKind.Beginning ? "if (pos != beginning)" : "if (pos != start)")) { writer.WriteLine($"goto {doneLabel};"); } } break; - case RegexNode.Bol: + case RegexNodeKind.Bol: if (sliceStaticPos > 0) { using (EmitBlock(writer, $"if ({sliceSpan}[{sliceStaticPos - 1}] != '\\n')")) @@ -2156,14 +2156,14 @@ void EmitAnchors(RegexNode node) } break; - case RegexNode.End: + case RegexNodeKind.End: using (EmitBlock(writer, $"if ({IsSliceLengthGreaterThanSliceStaticPos()})")) { writer.WriteLine($"goto {doneLabel};"); } break; - case RegexNode.EndZ: + case RegexNodeKind.EndZ: writer.WriteLine($"if ({sliceSpan}.Length > {sliceStaticPos + 1} || ({IsSliceLengthGreaterThanSliceStaticPos()} && {sliceSpan}[{sliceStaticPos}] != '\\n'))"); using (EmitBlock(writer, null)) { @@ -2171,7 +2171,7 @@ void EmitAnchors(RegexNode node) } break; - case RegexNode.Eol: + case RegexNodeKind.Eol: using (EmitBlock(writer, $"if ({IsSliceLengthGreaterThanSliceStaticPos()} && {sliceSpan}[{sliceStaticPos}] != '\\n')")) { writer.WriteLine($"goto {doneLabel};"); @@ -2187,7 +2187,7 @@ string IsSliceLengthGreaterThanSliceStaticPos() => // Emits the code to handle a multiple-character match. void EmitMultiChar(RegexNode node, bool emitLengthCheck = true) { - Debug.Assert(node.Type is RegexNode.Multi, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Multi, $"Unexpected type: {node.Kind}"); bool caseInsensitive = IsCaseInsensitive(node); @@ -2297,7 +2297,7 @@ void EmitOr() void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitLengthChecksIfRequired = true) { - Debug.Assert(node.Type is RegexNode.Oneloop or RegexNode.Notoneloop or RegexNode.Setloop, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop, $"Unexpected type: {node.Kind}"); // If this is actually a repeater, emit that instead; no backtracking necessary. if (node.M == node.N) @@ -2382,7 +2382,7 @@ void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitL void EmitSingleCharLazy(RegexNode node, RegexNode? subsequent = null, bool emitLengthChecksIfRequired = true) { - Debug.Assert(node.Type is RegexNode.Onelazy or RegexNode.Notonelazy or RegexNode.Setlazy, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy, $"Unexpected type: {node.Kind}"); // Emit the min iterations as a repeater. Any failures here don't necessitate backtracking, // as the lazy itself failed to match, and there's no backtracking possible by the individual @@ -2471,7 +2471,7 @@ void EmitSingleCharLazy(RegexNode node, RegexNode? subsequent = null, bool emitL // Now that we've appropriately advanced by one character and are set for what comes after the loop, // see if we can skip ahead more iterations by doing a search for a following literal. if (iterationCount is null && - node.Type is RegexNode.Notonelazy && + node.Kind is RegexNodeKind.Notonelazy && !IsCaseInsensitive(node) && subsequent?.FindStartingCharacterOrString() is ValueTuple literal && (literal.Item2?[0] ?? literal.Item1) != node.Ch) @@ -2489,7 +2489,7 @@ node.Type is RegexNode.Notonelazy && SliceInputSpan(writer); } else if (iterationCount is null && - node.Type is RegexNode.Setlazy && + node.Kind is RegexNodeKind.Setlazy && node.Str == RegexCharClass.AnyClass && subsequent?.FindStartingCharacterOrString() is ValueTuple literal2) { @@ -2559,7 +2559,7 @@ node.Type is RegexNode.Setlazy && void EmitLazy(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Lazyloop, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Lazyloop, $"Unexpected type: {node.Kind}"); Debug.Assert(node.M < int.MaxValue, $"Unexpected M={node.M}"); Debug.Assert(node.N >= node.M, $"Unexpected M={node.M}, N={node.N}"); Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); @@ -2744,7 +2744,7 @@ void EmitLazy(RegexNode node) // RegexNode.M is used for the number of iterations; RegexNode.N is ignored. void EmitSingleCharFixedRepeater(RegexNode node, bool emitLengthCheck = true) { - Debug.Assert(node.IsOneFamily || node.IsNotoneFamily || node.IsSetFamily, $"Unexpected type: {node.Type}"); + Debug.Assert(node.IsOneFamily || node.IsNotoneFamily || node.IsSetFamily, $"Unexpected type: {node.Kind}"); int iterations = node.M; if (iterations == 0) @@ -2810,7 +2810,7 @@ void EmitSingleCharFixedRepeater(RegexNode node, bool emitLengthCheck = true) // Emits the code to handle a non-backtracking, variable-length loop around a single character comparison. void EmitSingleCharAtomicLoop(RegexNode node, bool emitLengthChecksIfRequired = true) { - Debug.Assert(node.Type is RegexNode.Oneloop or RegexNode.Oneloopatomic or RegexNode.Notoneloop or RegexNode.Notoneloopatomic or RegexNode.Setloop or RegexNode.Setloopatomic, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic, $"Unexpected type: {node.Kind}"); // If this is actually a repeater, emit that instead. if (node.M == node.N) @@ -2953,7 +2953,7 @@ void EmitSingleCharAtomicLoop(RegexNode node, bool emitLengthChecksIfRequired = // Emits the code to handle a non-backtracking optional zero-or-one loop. void EmitAtomicSingleCharZeroOrOne(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Oneloop or RegexNode.Oneloopatomic or RegexNode.Notoneloop or RegexNode.Notoneloopatomic or RegexNode.Setloop or RegexNode.Setloopatomic, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic, $"Unexpected type: {node.Kind}"); Debug.Assert(node.M == 0 && node.N == 1); string expr = $"{sliceSpan}[{sliceStaticPos}]"; @@ -2977,7 +2977,7 @@ void EmitAtomicSingleCharZeroOrOne(RegexNode node) void EmitLoop(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Loop or RegexNode.Lazyloop, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Loop or RegexNodeKind.Lazyloop, $"Unexpected type: {node.Kind}"); Debug.Assert(node.M < int.MaxValue, $"Unexpected M={node.M}"); Debug.Assert(node.N >= node.M, $"Unexpected M={node.M}, N={node.N}"); Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); @@ -3565,41 +3565,41 @@ private static string Literal(RegexOptions options) /// Gets a textual description of the node fit for rendering in a comment in source. private static string DescribeNode(RegexNode node, RegexCode regexCode) => - node.Type switch - { - RegexNode.Alternate => $"Match with {node.ChildCount()} alternative expressions{(node.IsAtomicByParent() ? ", atomically" : "")}.", - RegexNode.Atomic => $"Atomic group.", - RegexNode.Beginning => "Match if at the beginning of the string.", - RegexNode.Bol => "Match if at the beginning of a line.", - RegexNode.Boundary => $"Match if at a word boundary.", - RegexNode.Capture when node.M == -1 && node.N != -1 => $"Non-capturing balancing group. Uncaptures the {DescribeCapture(node.N, regexCode)}.", - RegexNode.Capture when node.N != -1 => $"Balancing group. Captures the {DescribeCapture(node.M, regexCode)} and uncaptures the {DescribeCapture(node.N, regexCode)}.", - RegexNode.Capture when node.N == -1 => $"{DescribeCapture(node.M, regexCode)}.", - RegexNode.Concatenate => "Match a sequence of expressions.", - RegexNode.ECMABoundary => $"Match if at a word boundary (according to ECMAScript rules).", - RegexNode.Empty => $"Match an empty string.", - RegexNode.End => "Match if at the end of the string.", - RegexNode.EndZ => "Match if at the end of the string or if before an ending newline.", - RegexNode.Eol => "Match if at the end of a line.", - RegexNode.Loop or RegexNode.Lazyloop => node.M == 0 && node.N == 1 ? $"Optional ({(node.Type is RegexNode.Loop ? "greedy" : "lazy")})." : $"Loop {DescribeLoop(node)}.", - RegexNode.Multi => $"Match the string {Literal(node.Str!)}.", - RegexNode.NonBoundary => $"Match if at anything other than a word boundary.", - RegexNode.NonECMABoundary => $"Match if at anything other than a word boundary (according to ECMAScript rules).", - RegexNode.Nothing => $"Fail to match.", - RegexNode.Notone => $"Match any character other than {Literal(node.Ch)}.", - RegexNode.Notoneloop or RegexNode.Notoneloopatomic or RegexNode.Notonelazy => $"Match a character other than {Literal(node.Ch)} {DescribeLoop(node)}.", - RegexNode.One => $"Match {Literal(node.Ch)}.", - RegexNode.Oneloop or RegexNode.Oneloopatomic or RegexNode.Onelazy => $"Match {Literal(node.Ch)} {DescribeLoop(node)}.", - RegexNode.Prevent => $"Zero-width negative lookahead assertion.", - RegexNode.Ref => $"Match the same text as matched by the {DescribeCapture(node.M, regexCode)}.", - RegexNode.Require => $"Zero-width positive lookahead assertion.", - RegexNode.Set => $"Match {DescribeSet(node.Str!)}.", - RegexNode.Setloop or RegexNode.Setloopatomic or RegexNode.Setlazy => $"Match {DescribeSet(node.Str!)} {DescribeLoop(node)}.", - RegexNode.Start => "Match if at the start position.", - RegexNode.Testgroup => $"Conditionally match one of two expressions depending on whether an initial expression matches.", - RegexNode.Testref => $"Conditionally match one of two expressions depending on whether the {DescribeCapture(node.M, regexCode)} matched.", - RegexNode.UpdateBumpalong => $"Advance the next matching position.", - _ => $"Unknown node type {node.Type}", + node.Kind switch + { + RegexNodeKind.Alternate => $"Match with {node.ChildCount()} alternative expressions{(node.IsAtomicByParent() ? ", atomically" : "")}.", + RegexNodeKind.Atomic => $"Atomic group.", + RegexNodeKind.Beginning => "Match if at the beginning of the string.", + RegexNodeKind.Bol => "Match if at the beginning of a line.", + RegexNodeKind.Boundary => $"Match if at a word boundary.", + RegexNodeKind.Capture when node.M == -1 && node.N != -1 => $"Non-capturing balancing group. Uncaptures the {DescribeCapture(node.N, regexCode)}.", + RegexNodeKind.Capture when node.N != -1 => $"Balancing group. Captures the {DescribeCapture(node.M, regexCode)} and uncaptures the {DescribeCapture(node.N, regexCode)}.", + RegexNodeKind.Capture when node.N == -1 => $"{DescribeCapture(node.M, regexCode)}.", + RegexNodeKind.Concatenate => "Match a sequence of expressions.", + RegexNodeKind.ECMABoundary => $"Match if at a word boundary (according to ECMAScript rules).", + RegexNodeKind.Empty => $"Match an empty string.", + RegexNodeKind.End => "Match if at the end of the string.", + RegexNodeKind.EndZ => "Match if at the end of the string or if before an ending newline.", + RegexNodeKind.Eol => "Match if at the end of a line.", + RegexNodeKind.Loop or RegexNodeKind.Lazyloop => node.M == 0 && node.N == 1 ? $"Optional ({(node.Kind is RegexNodeKind.Loop ? "greedy" : "lazy")})." : $"Loop {DescribeLoop(node)}.", + RegexNodeKind.Multi => $"Match the string {Literal(node.Str!)}.", + RegexNodeKind.NonBoundary => $"Match if at anything other than a word boundary.", + RegexNodeKind.NonECMABoundary => $"Match if at anything other than a word boundary (according to ECMAScript rules).", + RegexNodeKind.Nothing => $"Fail to match.", + RegexNodeKind.Notone => $"Match any character other than {Literal(node.Ch)}.", + RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Notonelazy => $"Match a character other than {Literal(node.Ch)} {DescribeLoop(node)}.", + RegexNodeKind.One => $"Match {Literal(node.Ch)}.", + RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy => $"Match {Literal(node.Ch)} {DescribeLoop(node)}.", + RegexNodeKind.NegativeLookaround => $"Zero-width negative lookahead assertion.", + RegexNodeKind.Backreference => $"Match the same text as matched by the {DescribeCapture(node.M, regexCode)}.", + RegexNodeKind.PositiveLookaround => $"Zero-width positive lookahead assertion.", + RegexNodeKind.Set => $"Match {DescribeSet(node.Str!)}.", + RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy => $"Match {DescribeSet(node.Str!)} {DescribeLoop(node)}.", + RegexNodeKind.Start => "Match if at the start position.", + RegexNodeKind.ExpressionConditional => $"Conditionally match one of two expressions depending on whether an initial expression matches.", + RegexNodeKind.BackreferenceConditional => $"Conditionally match one of two expressions depending on whether the {DescribeCapture(node.M, regexCode)} matched.", + RegexNodeKind.UpdateBumpalong => $"Advance the next matching position.", + _ => $"Unknown node type {node.Kind}", }; /// Gets an identifer to describe a capture group. @@ -3657,14 +3657,14 @@ private static string DescribeSet(string charClass) => /// The depth of the current node. private static void DescribeExpression(TextWriter writer, RegexNode node, string prefix, RegexCode regexCode, int depth = 0) { - bool skip = node.Type switch + bool skip = node.Kind switch { // For concatenations, flatten the contents into the parent, but only if the parent isn't a form of alternation, // where each branch is considered to be independent rather than a concatenation. - RegexNode.Concatenate when node.Next is not { Type: RegexNode.Alternate or RegexNode.Testref or RegexNode.Testgroup } => true, + RegexNodeKind.Concatenate when node.Parent is not { Kind: RegexNodeKind.Alternate or RegexNodeKind.BackreferenceConditional or RegexNodeKind.ExpressionConditional } => true, // For atomic, skip the node if we'll instead render the atomic label as part of rendering the child. - RegexNode.Atomic when node.Child(0).Type is RegexNode.Loop or RegexNode.Lazyloop or RegexNode.Alternate => true, + RegexNodeKind.Atomic when node.Child(0).Kind is RegexNodeKind.Loop or RegexNodeKind.Lazyloop or RegexNodeKind.Alternate => true, // Don't skip anything else. _ => false, @@ -3672,14 +3672,14 @@ RegexNode.Atomic when node.Child(0).Type is RegexNode.Loop or RegexNode.Lazyloop if (!skip) { - string tag = node.Next?.Type switch + string tag = node.Parent?.Kind switch { - RegexNode.Testgroup when node.Next.Child(0) == node => "Condition: ", - RegexNode.Testgroup when node.Next.Child(1) == node => "Matched: ", - RegexNode.Testgroup when node.Next.Child(2) == node => "Not Matched: ", + RegexNodeKind.ExpressionConditional when node.Parent.Child(0) == node => "Condition: ", + RegexNodeKind.ExpressionConditional when node.Parent.Child(1) == node => "Matched: ", + RegexNodeKind.ExpressionConditional when node.Parent.Child(2) == node => "Not Matched: ", - RegexNode.Testref when node.Next.Child(0) == node => "Matched: ", - RegexNode.Testref when node.Next.Child(1) == node => "Not Matched: ", + RegexNodeKind.BackreferenceConditional when node.Parent.Child(0) == node => "Matched: ", + RegexNodeKind.BackreferenceConditional when node.Parent.Child(1) == node => "Not Matched: ", _ => "", }; @@ -3701,13 +3701,13 @@ RegexNode.Testref when node.Next.Child(1) == node => "Not Matched: ", /// Gets a textual description of a loop's style and bounds. private static string DescribeLoop(RegexNode node) { - string style = node.Type switch + string style = node.Kind switch { _ when node.M == node.N => "exactly", - RegexNode.Oneloopatomic or RegexNode.Notoneloopatomic or RegexNode.Setloopatomic => "atomically", - RegexNode.Oneloop or RegexNode.Notoneloop or RegexNode.Setloop => "greedily", - RegexNode.Onelazy or RegexNode.Notonelazy or RegexNode.Setlazy => "lazily", - RegexNode.Loop => node.IsAtomicByParent() ? "greedily and atomically" : "greedily", + RegexNodeKind.Oneloopatomic or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Setloopatomic => "atomically", + RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop => "greedily", + RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy => "lazily", + RegexNodeKind.Loop => node.IsAtomicByParent() ? "greedily and atomically" : "greedily", _ /* RegexNode.Lazy */ => node.IsAtomicByParent() ? "lazily and atomically" : "lazily", }; diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx index 2ce09c60fb6e59..72dedb53b0b910 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx @@ -289,9 +289,6 @@ Alternation has a reference to undefined group. - - Unexpected opcode in regular expression generation: {0}. - Unknown property '{0}'. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf index 3907e73e7ca2ba..670114d466a8f9 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf @@ -292,11 +292,6 @@ Znak \\ na konci vzorku je neplatný. - - Unexpected opcode in regular expression generation: {0}. - Při generování regulárního výrazu byl nalezen neočekávaný operační kód: {0}. - - Unrecognized control character. Nerozpoznaný řídicí znak. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf index 67beed526576d2..decb3d41eecab9 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf @@ -292,11 +292,6 @@ Unzulässiger \\ am Ende des Musters. - - Unexpected opcode in regular expression generation: {0}. - Unerwarteter "opcode" bei Generierung von regulärem Ausdruck: {0}. - - Unrecognized control character. Unbekanntes Steuerzeichen. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf index e97ec8feaf1b2a..dfcb4b46eb8cbb 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf @@ -292,11 +292,6 @@ \\ no válido al final del patrón. - - Unexpected opcode in regular expression generation: {0}. - Código de operación inesperado en la generación de expresión regular: {0}. - - Unrecognized control character. Carácter de control no reconocido. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf index 16efb2d977b0e9..0413c2f3697287 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf @@ -292,11 +292,6 @@ \\ non conforme à la fin du modèle. - - Unexpected opcode in regular expression generation: {0}. - opcode inattendu dans la génération d'expressions régulières : {0}. - - Unrecognized control character. Caractère de contrôle non reconnu. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf index f7d730755b8462..2f32980d35f71f 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf @@ -292,11 +292,6 @@ Carattere \\ non valido alla fine del criterio. - - Unexpected opcode in regular expression generation: {0}. - Codice operativo imprevisto nella generazione dell'espressione regolare: {0}. - - Unrecognized control character. Carattere di controllo non riconosciuto. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf index f14f085a8f42ce..8dddebb2d887ec 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf @@ -292,11 +292,6 @@ パターンの末尾に無効な \\ があります。 - - Unexpected opcode in regular expression generation: {0}. - 正規表現の生成中に予期しない Opcode: {0} が発生しました。 - - Unrecognized control character. 認識されない制御文字です。 diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf index 618b1067b0ebab..369483d521ad69 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf @@ -292,11 +292,6 @@ 패턴 끝에 잘못된 \\가 있습니다. - - Unexpected opcode in regular expression generation: {0}. - 정규식 생성에 예기치 않은 opcode가 있습니다. {0} - - Unrecognized control character. 인식할 수 없는 제어 문자입니다. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf index f8cde42177c715..01efb83445bb5e 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf @@ -292,11 +292,6 @@ Niedozwolony znak \\ na końcu wzorca. - - Unexpected opcode in regular expression generation: {0}. - Nieoczekiwany kod operacji podczas generowania wyrażenia regularnego: {0}. - - Unrecognized control character. Nierozpoznany znak kontrolny. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf index 6991eccb5b9cf6..9f039335387e4a 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf @@ -292,11 +292,6 @@ \\ incorreto no final do padrão. - - Unexpected opcode in regular expression generation: {0}. - Opcode inesperado na geração de expressão regular: {0}. - - Unrecognized control character. Caractere de controle não reconhecido. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf index 4f777ba7d0ccdb..5adb41132357ce 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf @@ -292,11 +292,6 @@ Недопустимая обратная косая черта \\ в конце образца. - - Unexpected opcode in regular expression generation: {0}. - Неожиданный код операции при создании регулярного выражения: {0}. - - Unrecognized control character. Нераспознанный управляющий знак. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf index c1492d873c5f57..9622b266daee22 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf @@ -292,11 +292,6 @@ Desenin sonunda geçersiz \\. - - Unexpected opcode in regular expression generation: {0}. - Normal ifade oluşturmada beklenmeyen işlem kodu: {0}. - - Unrecognized control character. Tanınmayan denetim karakteri. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf index 8bc7e9656e5be4..6b007977907ab5 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf @@ -292,11 +292,6 @@ \\ 在模式末尾非法。 - - Unexpected opcode in regular expression generation: {0}. - 生成正则表达式时出现意外的操作码: {0}。 - - Unrecognized control character. 无法识别的控制字符。 diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf index 4e598383d8266e..7972b6b990e627 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf @@ -292,11 +292,6 @@ 模式結尾有不合法的 \\。 - - Unexpected opcode in regular expression generation: {0}. - 在規則運算式產生中發生未預期的作業碼: {0}。 - - Unrecognized control character. 無法識別的控制字元。 diff --git a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj index 56f7899f081db8..8f24e872e22660 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj +++ b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj @@ -35,6 +35,7 @@ + diff --git a/src/libraries/System.Text.RegularExpressions/src/Resources/Strings.resx b/src/libraries/System.Text.RegularExpressions/src/Resources/Strings.resx index ba430db54c5a3b..52b6616648dd28 100644 --- a/src/libraries/System.Text.RegularExpressions/src/Resources/Strings.resx +++ b/src/libraries/System.Text.RegularExpressions/src/Resources/Strings.resx @@ -200,9 +200,6 @@ Alternation has a reference to undefined group. - - Unexpected opcode in regular expression generation: {0}. - Unknown property '{0}'. diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj index 8537fd70de5276..acda3098b795ce 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj +++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj @@ -32,6 +32,7 @@ + diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs index 6f6c8cd8f8852f..d51ca826fd71c6 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs @@ -210,7 +210,8 @@ public static int OpcodeSize(int opcode) return 3; default: - throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, opcode.ToString())); + Debug.Fail($"Unexpected opcode: {opcode}"); + return 0; } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 07a3d1b927cd33..2145a5dec334d2 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -300,8 +300,8 @@ private void Mvfldloc(FieldInfo ft, LocalBuilder lt) /// Returned a rented local to the pool. private struct RentedLocalBuilder : IDisposable { - private Stack _pool; - private LocalBuilder _local; + private readonly Stack _pool; + private readonly LocalBuilder _local; internal RentedLocalBuilder(Stack pool, LocalBuilder local) { @@ -773,7 +773,7 @@ void EmitFixedSet_LeftToRight() // if (!CharInClass(slice[i + 1], prefix[1], "...")) continue; // if (!CharInClass(slice[i + 2], prefix[2], "...")) continue; // ... - Debug.Assert(setIndex == 0 || setIndex == 1); + Debug.Assert(setIndex is 0 or 1); for ( ; setIndex < sets.Count; setIndex++) { Debug.Assert(needLoop); @@ -834,7 +834,7 @@ void EmitLiteralAfterAtomicLoop() Debug.Assert(_code.FindOptimizations.LiteralAfterLoop is not null); (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Literal) target = _code.FindOptimizations.LiteralAfterLoop.Value; - Debug.Assert(target.LoopNode.Type is RegexNode.Setloop or RegexNode.Setlazy or RegexNode.Setloopatomic); + Debug.Assert(target.LoopNode.Kind is RegexNodeKind.Setloop or RegexNodeKind.Setlazy or RegexNodeKind.Setloopatomic); Debug.Assert(target.LoopNode.N == int.MaxValue); // while (true) @@ -1003,7 +1003,7 @@ protected void EmitGo() // Get the root Capture node of the tree. RegexNode node = _code.Tree.Root; - Debug.Assert(node.Type == RegexNode.Capture, "Every generated tree should begin with a capture node"); + Debug.Assert(node.Kind == RegexNodeKind.Capture, "Every generated tree should begin with a capture node"); Debug.Assert(node.ChildCount() == 1, "Capture nodes should have one child"); // Skip the Capture node. We handle the implicit root capture specially. @@ -1012,9 +1012,9 @@ protected void EmitGo() // In some limited cases, FindFirstChar will only return true if it successfully matched the whole expression. // We can special case these to do essentially nothing in Go other than emit the capture. - switch (node.Type) + switch (node.Kind) { - case RegexNode.Multi or RegexNode.Notone or RegexNode.One or RegexNode.Set when !IsCaseInsensitive(node): + case RegexNodeKind.Multi or RegexNodeKind.Notone or RegexNodeKind.One or RegexNodeKind.Set when !IsCaseInsensitive(node): // This is the case for single and multiple characters, though the whole thing is only guaranteed // to have been validated in FindFirstChar when doing case-sensitive comparison. // base.Capture(0, base.runtextpos, base.runtextpos + node.Str.Length); @@ -1025,11 +1025,11 @@ protected void EmitGo() Ldc(0); Ldthisfld(s_runtextposField); Dup(); - Ldc(node.Type == RegexNode.Multi ? node.Str!.Length : 1); + Ldc(node.Kind == RegexNodeKind.Multi ? node.Str!.Length : 1); Add(); Call(s_captureMethod); Ldthisfld(s_runtextposField); - Ldc(node.Type == RegexNode.Multi ? node.Str!.Length : 1); + Ldc(node.Kind == RegexNodeKind.Multi ? node.Str!.Length : 1); Add(); Stfld(s_runtextposField); Ret(); @@ -1227,7 +1227,7 @@ void TransferSliceStaticPosToPos() // Emits the code for an alternation. void EmitAlternation(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Alternate, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Alternate, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() >= 2, $"Expected at least 2 children, found {node.ChildCount()}"); int childCount = node.ChildCount(); @@ -1238,7 +1238,7 @@ void EmitAlternation(RegexNode node) // Both atomic and non-atomic are supported. While a parent RegexNode.Atomic node will itself // successfully prevent backtracking into this child node, we can emit better / cheaper code // for an Alternate when it is atomic, so we still take it into account here. - Debug.Assert(node.Next is not null); + Debug.Assert(node.Parent is not null); bool isAtomic = node.IsAtomicByParent(); // Label to jump to when any branch completes successfully. @@ -1404,7 +1404,7 @@ void EmitAlternation(RegexNode node) // Emits the code to handle a backreference. void EmitBackreference(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Ref, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Backreference, $"Unexpected type: {node.Kind}"); int capnum = RegexParser.MapCaptureNumber(node.M, _code!.Caps); @@ -1496,7 +1496,7 @@ void EmitBackreference(RegexNode node) // Emits the code for an if(backreference)-then-else conditional. void EmitBackreferenceConditional(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Testref, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.BackreferenceConditional, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() == 2, $"Expected 2 children, found {node.ChildCount()}"); bool isAtomic = node.IsAtomicByParent(); @@ -1510,7 +1510,7 @@ void EmitBackreferenceConditional(RegexNode node) // Get the "yes" branch and the "no" branch. The "no" branch is optional in syntax and is thus // somewhat likely to be Empty. RegexNode yesBranch = node.Child(0); - RegexNode? noBranch = node.Child(1) is { Type: not RegexNode.Empty } childNo ? childNo : null; + RegexNode? noBranch = node.Child(1) is { Kind: not RegexNodeKind.Empty } childNo ? childNo : null; Label originalDoneLabel = doneLabel; Label refNotMatched = DefineLabel(); @@ -1642,7 +1642,7 @@ void EmitBackreferenceConditional(RegexNode node) // Emits the code for an if(expression)-then-else conditional. void EmitExpressionConditional(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Testgroup, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.ExpressionConditional, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() == 3, $"Expected 3 children, found {node.ChildCount()}"); bool isAtomic = node.IsAtomicByParent(); @@ -1658,7 +1658,7 @@ void EmitExpressionConditional(RegexNode node) // Get the "yes" branch and the "no" branch. The "no" branch is optional in syntax and is thus // somewhat likely to be Empty. RegexNode yesBranch = node.Child(1); - RegexNode? noBranch = node.Child(2) is { Type: not RegexNode.Empty } childNo ? childNo : null; + RegexNode? noBranch = node.Child(2) is { Kind: not RegexNodeKind.Empty } childNo ? childNo : null; Label originalDoneLabel = doneLabel; Label expressionNotMatched = DefineLabel(); @@ -1822,7 +1822,7 @@ void EmitExpressionConditional(RegexNode node) // Emits the code for a Capture node. void EmitCapture(RegexNode node, RegexNode? subsequent = null) { - Debug.Assert(node.Type is RegexNode.Capture, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Capture, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); int capnum = RegexParser.MapCaptureNumber(node.M, _code!.Caps); @@ -1939,7 +1939,7 @@ void EmitUncaptureUntil(LocalBuilder startingCapturePos) // Emits the code to handle a positive lookahead assertion. void EmitPositiveLookaheadAssertion(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Require, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.PositiveLookaround, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); // Lookarounds are implicitly atomic. Store the original done label to reset at the end. @@ -1970,7 +1970,7 @@ void EmitPositiveLookaheadAssertion(RegexNode node) // Emits the code to handle a negative lookahead assertion. void EmitNegativeLookaheadAssertion(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Prevent, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.NegativeLookaround, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); // Lookarounds are implicitly atomic. Store the original done label to reset at the end. @@ -2020,110 +2020,110 @@ void EmitNode(RegexNode node, RegexNode? subsequent = null, bool emitLengthCheck return; } - switch (node.Type) + switch (node.Kind) { - case RegexNode.Beginning: - case RegexNode.Start: - case RegexNode.Bol: - case RegexNode.Eol: - case RegexNode.End: - case RegexNode.EndZ: + case RegexNodeKind.Beginning: + case RegexNodeKind.Start: + case RegexNodeKind.Bol: + case RegexNodeKind.Eol: + case RegexNodeKind.End: + case RegexNodeKind.EndZ: EmitAnchors(node); break; - case RegexNode.Boundary: - case RegexNode.NonBoundary: - case RegexNode.ECMABoundary: - case RegexNode.NonECMABoundary: + case RegexNodeKind.Boundary: + case RegexNodeKind.NonBoundary: + case RegexNodeKind.ECMABoundary: + case RegexNodeKind.NonECMABoundary: EmitBoundary(node); break; - case RegexNode.Multi: + case RegexNodeKind.Multi: EmitMultiChar(node, emitLengthChecksIfRequired); break; - case RegexNode.One: - case RegexNode.Notone: - case RegexNode.Set: + case RegexNodeKind.One: + case RegexNodeKind.Notone: + case RegexNodeKind.Set: EmitSingleChar(node, emitLengthChecksIfRequired); break; - case RegexNode.Oneloop: - case RegexNode.Notoneloop: - case RegexNode.Setloop: + case RegexNodeKind.Oneloop: + case RegexNodeKind.Notoneloop: + case RegexNodeKind.Setloop: EmitSingleCharLoop(node, subsequent, emitLengthChecksIfRequired); break; - case RegexNode.Onelazy: - case RegexNode.Notonelazy: - case RegexNode.Setlazy: + case RegexNodeKind.Onelazy: + case RegexNodeKind.Notonelazy: + case RegexNodeKind.Setlazy: EmitSingleCharLazy(node, subsequent, emitLengthChecksIfRequired); break; - case RegexNode.Oneloopatomic: - case RegexNode.Notoneloopatomic: - case RegexNode.Setloopatomic: + case RegexNodeKind.Oneloopatomic: + case RegexNodeKind.Notoneloopatomic: + case RegexNodeKind.Setloopatomic: EmitSingleCharAtomicLoop(node); break; - case RegexNode.Loop: + case RegexNodeKind.Loop: EmitLoop(node); break; - case RegexNode.Lazyloop: + case RegexNodeKind.Lazyloop: EmitLazy(node); break; - case RegexNode.Alternate: + case RegexNodeKind.Alternate: EmitAlternation(node); break; - case RegexNode.Concatenate: + case RegexNodeKind.Concatenate: EmitConcatenation(node, subsequent, emitLengthChecksIfRequired); break; - case RegexNode.Atomic: + case RegexNodeKind.Atomic: EmitAtomic(node, subsequent); break; - case RegexNode.Ref: + case RegexNodeKind.Backreference: EmitBackreference(node); break; - case RegexNode.Testref: + case RegexNodeKind.BackreferenceConditional: EmitBackreferenceConditional(node); break; - case RegexNode.Testgroup: + case RegexNodeKind.ExpressionConditional: EmitExpressionConditional(node); break; - case RegexNode.Capture: + case RegexNodeKind.Capture: EmitCapture(node, subsequent); break; - case RegexNode.Require: + case RegexNodeKind.PositiveLookaround: EmitPositiveLookaheadAssertion(node); break; - case RegexNode.Prevent: + case RegexNodeKind.NegativeLookaround: EmitNegativeLookaheadAssertion(node); break; - case RegexNode.Nothing: + case RegexNodeKind.Nothing: BrFar(doneLabel); break; - case RegexNode.Empty: + case RegexNodeKind.Empty: // Emit nothing. break; - case RegexNode.UpdateBumpalong: + case RegexNodeKind.UpdateBumpalong: EmitUpdateBumpalong(node); break; default: - Debug.Fail($"Unexpected node type: {node.Type}"); + Debug.Fail($"Unexpected node type: {node.Kind}"); break; } } @@ -2131,7 +2131,7 @@ void EmitNode(RegexNode node, RegexNode? subsequent = null, bool emitLengthCheck // Emits the node for an atomic. void EmitAtomic(RegexNode node, RegexNode? subsequent) { - Debug.Assert(node.Type is RegexNode.Atomic, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Atomic, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); // Atomic simply outputs the code for the child, but it ensures that any done label left @@ -2147,7 +2147,7 @@ void EmitAtomic(RegexNode node, RegexNode? subsequent) // it should bump from this location rather than from the original location. void EmitUpdateBumpalong(RegexNode node) { - Debug.Assert(node.Type is RegexNode.UpdateBumpalong, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.UpdateBumpalong, $"Unexpected type: {node.Kind}"); // if (base.runtextpos < pos) // { @@ -2167,7 +2167,7 @@ void EmitUpdateBumpalong(RegexNode node) // Emits code for a concatenation void EmitConcatenation(RegexNode node, RegexNode? subsequent, bool emitLengthChecksIfRequired) { - Debug.Assert(node.Type is RegexNode.Concatenate, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Concatenate, $"Unexpected type: {node.Kind}"); Debug.Assert(node.ChildCount() >= 2, $"Expected at least 2 children, found {node.ChildCount()}"); // Emit the code for each child one after the other. @@ -2198,7 +2198,7 @@ void EmitConcatenation(RegexNode node, RegexNode? subsequent, bool emitLengthChe for (int i = index + 1; i < childCount; i++) { RegexNode next = node.Child(i); - if (next.Type is not RegexNode.UpdateBumpalong) // skip node types that don't have a semantic impact + if (next.Kind is not RegexNodeKind.UpdateBumpalong) // skip node types that don't have a semantic impact { return next; } @@ -2211,7 +2211,7 @@ void EmitConcatenation(RegexNode node, RegexNode? subsequent, bool emitLengthChe // Emits the code to handle a single-character match. void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, LocalBuilder? offset = null) { - Debug.Assert(node.IsOneFamily || node.IsNotoneFamily || node.IsSetFamily, $"Unexpected type: {node.Type}"); + Debug.Assert(node.IsOneFamily || node.IsNotoneFamily || node.IsSetFamily, $"Unexpected type: {node.Kind}"); // This only emits a single check, but it's called from the looping constructs in a loop // to generate the code for a single check, so we check for each "family" (one, notone, set) @@ -2254,7 +2254,7 @@ void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, LocalBuilder? o // Emits the code to handle a boundary check on a character. void EmitBoundary(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Boundary or RegexNode.NonBoundary or RegexNode.ECMABoundary or RegexNode.NonECMABoundary, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Boundary or RegexNodeKind.NonBoundary or RegexNodeKind.ECMABoundary or RegexNodeKind.NonECMABoundary, $"Unexpected type: {node.Kind}"); // if (!IsBoundary(pos + sliceStaticPos, base.runtextbeg, end)) goto doneLabel; Ldthis(); @@ -2266,25 +2266,25 @@ void EmitBoundary(RegexNode node) } Ldthisfld(s_runtextbegField); Ldloc(end); - switch (node.Type) + switch (node.Kind) { - case RegexNode.Boundary: + case RegexNodeKind.Boundary: Call(s_isBoundaryMethod); BrfalseFar(doneLabel); break; - case RegexNode.NonBoundary: + case RegexNodeKind.NonBoundary: Call(s_isBoundaryMethod); BrtrueFar(doneLabel); break; - case RegexNode.ECMABoundary: + case RegexNodeKind.ECMABoundary: Call(s_isECMABoundaryMethod); BrfalseFar(doneLabel); break; default: - Debug.Assert(node.Type == RegexNode.NonECMABoundary); + Debug.Assert(node.Kind == RegexNodeKind.NonECMABoundary); Call(s_isECMABoundaryMethod); BrtrueFar(doneLabel); break; @@ -2294,13 +2294,13 @@ void EmitBoundary(RegexNode node) // Emits the code to handle various anchors. void EmitAnchors(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Beginning or RegexNode.Start or RegexNode.Bol or RegexNode.End or RegexNode.EndZ or RegexNode.Eol, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Beginning or RegexNodeKind.Start or RegexNodeKind.Bol or RegexNodeKind.End or RegexNodeKind.EndZ or RegexNodeKind.Eol, $"Unexpected type: {node.Kind}"); Debug.Assert(sliceStaticPos >= 0); - switch (node.Type) + switch (node.Kind) { - case RegexNode.Beginning: - case RegexNode.Start: + case RegexNodeKind.Beginning: + case RegexNodeKind.Start: if (sliceStaticPos > 0) { // If we statically know we've already matched part of the regex, there's no way we're at the @@ -2311,12 +2311,12 @@ void EmitAnchors(RegexNode node) { // if (pos > base.runtextbeg/start) goto doneLabel; Ldloc(pos); - Ldthisfld(node.Type == RegexNode.Beginning ? s_runtextbegField : s_runtextstartField); + Ldthisfld(node.Kind == RegexNodeKind.Beginning ? s_runtextbegField : s_runtextstartField); BneFar(doneLabel); } break; - case RegexNode.Bol: + case RegexNodeKind.Bol: if (sliceStaticPos > 0) { // if (slice[sliceStaticPos - 1] != '\n') goto doneLabel; @@ -2347,7 +2347,7 @@ void EmitAnchors(RegexNode node) } break; - case RegexNode.End: + case RegexNodeKind.End: // if (sliceStaticPos < slice.Length) goto doneLabel; Ldc(sliceStaticPos); Ldloca(slice); @@ -2355,7 +2355,7 @@ void EmitAnchors(RegexNode node) BltUnFar(doneLabel); break; - case RegexNode.EndZ: + case RegexNodeKind.EndZ: // if (sliceStaticPos < slice.Length - 1) goto doneLabel; Ldc(sliceStaticPos); Ldloca(slice); @@ -2363,9 +2363,9 @@ void EmitAnchors(RegexNode node) Ldc(1); Sub(); BltFar(doneLabel); - goto case RegexNode.Eol; + goto case RegexNodeKind.Eol; - case RegexNode.Eol: + case RegexNodeKind.Eol: // if (sliceStaticPos < slice.Length && slice[sliceStaticPos] != '\n') goto doneLabel; { Label success = DefineLabel(); @@ -2388,7 +2388,7 @@ void EmitAnchors(RegexNode node) // Emits the code to handle a multiple-character match. void EmitMultiChar(RegexNode node, bool emitLengthCheck = true) { - Debug.Assert(node.Type is RegexNode.Multi, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Multi, $"Unexpected type: {node.Kind}"); bool caseInsensitive = IsCaseInsensitive(node); @@ -2485,7 +2485,7 @@ void EmitMultiChar(RegexNode node, bool emitLengthCheck = true) // Emits the code to handle a backtracking, single-character loop. void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitLengthChecksIfRequired = true) { - Debug.Assert(node.Type is RegexNode.Oneloop or RegexNode.Notoneloop or RegexNode.Setloop, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop, $"Unexpected type: {node.Kind}"); // If this is actually a repeater, emit that instead; no backtracking necessary. if (node.M == node.N) @@ -2646,7 +2646,7 @@ void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitL void EmitSingleCharLazy(RegexNode node, RegexNode? subsequent = null, bool emitLengthChecksIfRequired = true) { - Debug.Assert(node.Type is RegexNode.Onelazy or RegexNode.Notonelazy or RegexNode.Setlazy, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy, $"Unexpected type: {node.Kind}"); // Emit the min iterations as a repeater. Any failures here don't necessitate backtracking, // as the lazy itself failed to match, and there's no backtracking possible by the individual @@ -2742,7 +2742,7 @@ void EmitSingleCharLazy(RegexNode node, RegexNode? subsequent = null, bool emitL // Now that we've appropriately advanced by one character and are set for what comes after the loop, // see if we can skip ahead more iterations by doing a search for a following literal. if (iterationCount is null && - node.Type is RegexNode.Notonelazy && + node.Kind is RegexNodeKind.Notonelazy && !IsCaseInsensitive(node) && subsequent?.FindStartingCharacterOrString() is ValueTuple literal && (literal.Item2?[0] ?? literal.Item1) != node.Ch) @@ -2782,7 +2782,7 @@ node.Type is RegexNode.Notonelazy && SliceInputSpan(); } else if (iterationCount is null && - node.Type is RegexNode.Setlazy && + node.Kind is RegexNodeKind.Setlazy && node.Str == RegexCharClass.AnyClass && subsequent?.FindStartingCharacterOrString() is ValueTuple literal2) { @@ -2889,7 +2889,7 @@ node.Type is RegexNode.Setlazy && void EmitLazy(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Lazyloop, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Lazyloop, $"Unexpected type: {node.Kind}"); Debug.Assert(node.M < int.MaxValue, $"Unexpected M={node.M}"); Debug.Assert(node.N >= node.M, $"Unexpected M={node.M}, N={node.N}"); Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); @@ -3134,7 +3134,7 @@ void EmitLazy(RegexNode node) // RegexNode.M is used for the number of iterations; RegexNode.N is ignored. void EmitSingleCharFixedRepeater(RegexNode node, bool emitLengthChecksIfRequired = true) { - Debug.Assert(node.IsOneFamily || node.IsNotoneFamily || node.IsSetFamily, $"Unexpected type: {node.Type}"); + Debug.Assert(node.IsOneFamily || node.IsNotoneFamily || node.IsSetFamily, $"Unexpected type: {node.Kind}"); int iterations = node.M; if (iterations == 0) @@ -3218,7 +3218,7 @@ void EmitSingleCharFixedRepeater(RegexNode node, bool emitLengthChecksIfRequired // Emits the code to handle a non-backtracking, variable-length loop around a single character comparison. void EmitSingleCharAtomicLoop(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Oneloop or RegexNode.Oneloopatomic or RegexNode.Notoneloop or RegexNode.Notoneloopatomic or RegexNode.Setloop or RegexNode.Setloopatomic, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic, $"Unexpected type: {node.Kind}"); // If this is actually a repeater, emit that instead. if (node.M == node.N) @@ -3460,7 +3460,7 @@ void EmitSingleCharAtomicLoop(RegexNode node) // Emits the code to handle a non-backtracking optional zero-or-one loop. void EmitAtomicSingleCharZeroOrOne(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Oneloop or RegexNode.Oneloopatomic or RegexNode.Notoneloop or RegexNode.Notoneloopatomic or RegexNode.Setloop or RegexNode.Setloopatomic, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic, $"Unexpected type: {node.Kind}"); Debug.Assert(node.M == 0 && node.N == 1); Label skipUpdatesLabel = DefineLabel(); @@ -3515,7 +3515,7 @@ void EmitAtomicSingleCharZeroOrOne(RegexNode node) void EmitLoop(RegexNode node) { - Debug.Assert(node.Type is RegexNode.Loop or RegexNode.Lazyloop, $"Unexpected type: {node.Type}"); + Debug.Assert(node.Kind is RegexNodeKind.Loop or RegexNodeKind.Lazyloop, $"Unexpected type: {node.Kind}"); Debug.Assert(node.M < int.MaxValue, $"Unexpected M={node.M}"); Debug.Assert(node.N >= node.M, $"Unexpected M={node.M}, N={node.N}"); Debug.Assert(node.ChildCount() == 1, $"Expected 1 child, found {node.ChildCount()}"); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs index 7f8191e51752cb..f3c5552b8404af 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs @@ -626,7 +626,7 @@ public bool TryFindNextStartingPosition(ReadOnlySpan textSpan, ref int pos Debug.Assert(LiteralAfterLoop is not null); (RegexNode loopNode, (char Char, string? String, char[]? Chars) literal) = LiteralAfterLoop.GetValueOrDefault(); - Debug.Assert(loopNode.Type is RegexNode.Setloop or RegexNode.Setlazy or RegexNode.Setloopatomic); + Debug.Assert(loopNode.Kind is RegexNodeKind.Setloop or RegexNodeKind.Setlazy or RegexNodeKind.Setloopatomic); Debug.Assert(loopNode.N == int.MaxValue); int startingPos = pos; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 121b9d1e4225ba..ad760e95e39142 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -1,43 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// This RegexNode class is internal to the Regex package. -// It is built into a parsed tree for a regular expression. - -// Implementation notes: -// -// Since the node tree is a temporary data structure only used -// during compilation of the regexp to integer codes, it's -// designed for clarity and convenience rather than -// space efficiency. -// -// RegexNodes are built into a tree, linked by the _children list. -// Each node also has a _parent and _ichild member indicating -// its parent and which child # it is in its parent's list. -// -// RegexNodes come in as many types as there are constructs in -// a regular expression, for example, "concatenate", "alternate", -// "one", "rept", "group". There are also node types for basic -// peephole optimizations, e.g., "onerep", "notsetrep", etc. -// -// Because perl 5 allows "lookback" groups that scan backwards, -// each node also gets a "direction". Normally the value of -// boolean _backward = false. -// -// During parsing, top-level nodes are also stacked onto a parse -// stack (a stack of trees). For this purpose we have a _next -// pointer. [Note that to save a few bytes, we could overload the -// _parent pointer instead.] -// -// On the parse stack, each tree has a "role" - basically, the -// nonterminal in the grammar that the parser has currently -// assigned to the tree. That code is stored in _role. -// -// Finally, some of the different kinds of nodes have data. -// Two integers (for the looping constructs) are stored in -// _operands, an object (either a string or a set) -// is stored in _data - using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; @@ -46,107 +9,75 @@ namespace System.Text.RegularExpressions { + /// Represents a regex subexpression. internal sealed class RegexNode { - // RegexNode types - - // The following are leaves, and correspond to primitive operations - - public const int Oneloop = RegexCode.Oneloop; // c,n a* - public const int Notoneloop = RegexCode.Notoneloop; // c,n .* - public const int Setloop = RegexCode.Setloop; // set,n \d* - - public const int Onelazy = RegexCode.Onelazy; // c,n a*? - public const int Notonelazy = RegexCode.Notonelazy; // c,n .*? - public const int Setlazy = RegexCode.Setlazy; // set,n \d*? - - public const int One = RegexCode.One; // char a - public const int Notone = RegexCode.Notone; // char . [^a] - public const int Set = RegexCode.Set; // set [a-z] \w \s \d - - public const int Multi = RegexCode.Multi; // string abcdef - public const int Ref = RegexCode.Ref; // index \1 - - public const int Bol = RegexCode.Bol; // ^ - public const int Eol = RegexCode.Eol; // $ - public const int Boundary = RegexCode.Boundary; // \b - public const int NonBoundary = RegexCode.NonBoundary; // \B - public const int ECMABoundary = RegexCode.ECMABoundary; // \b - public const int NonECMABoundary = RegexCode.NonECMABoundary; // \B - public const int Beginning = RegexCode.Beginning; // \A - public const int Start = RegexCode.Start; // \G - public const int EndZ = RegexCode.EndZ; // \Z - public const int End = RegexCode.End; // \z - - public const int Oneloopatomic = RegexCode.Oneloopatomic; // c,n (?> a*) - public const int Notoneloopatomic = RegexCode.Notoneloopatomic; // c,n (?> .*) - public const int Setloopatomic = RegexCode.Setloopatomic; // set,n (?> \d*) - public const int UpdateBumpalong = RegexCode.UpdateBumpalong; - - // Interior nodes do not correspond to primitive operations, but - // control structures compositing other operations - - // Concat and alternate take n children, and can run forward or backwards - - public const int Nothing = 22; // [] - public const int Empty = 23; // () - - public const int Alternate = 24; // a|b - public const int Concatenate = 25; // ab - - public const int Loop = 26; // m,x * + ? {,} - public const int Lazyloop = 27; // m,x *? +? ?? {,}? - - public const int Capture = 28; // n () - capturing group - public const int Group = 29; // (?:) - noncapturing group - public const int Require = 30; // (?=) (?<=) - lookahead and lookbehind assertions - public const int Prevent = 31; // (?!) (?) - atomic subexpression - public const int Testref = 33; // (?(n) | ) - alternation, reference - public const int Testgroup = 34; // (?(...) | )- alternation, expression - /// empty bit from the node's options to store data on whether a node contains captures internal const RegexOptions HasCapturesFlag = (RegexOptions)(1 << 31); + /// The node's children. + /// null if no children, a if one child, or a if multiple children. private object? Children; - public int Type { get; private set; } + + /// The kind of expression represented by this node. + public RegexNodeKind Kind { get; private set; } + + /// A string associated with the node. + /// For a , this is the string from the expression. For an node, this is the character class string from . public string? Str { get; private set; } + + /// The character associated with the node. + /// For a or node, the character from the expression. public char Ch { get; private set; } + + /// The minimum number of iterations for a loop, or the capture group number for a capture or backreference. + /// No minimum is represented by 0. No capture group is represented by -1. public int M { get; private set; } + + /// The maximum number of iterations for a loop, or the uncapture group number for a balancing group. + /// No upper bound is represented by . No capture group is represented by -1. public int N { get; private set; } + + /// The options associated with the node. public RegexOptions Options; - public RegexNode? Next; - public RegexNode(int type, RegexOptions options) + /// The node's parent node in the tree. + /// + /// During parsing, top-level nodes are also stacked onto a parse stack (a stack of trees) using . + /// After parsing, is the node in the tree that has this node as or in . + /// + public RegexNode? Parent; + + public RegexNode(RegexNodeKind kind, RegexOptions options) { - Type = type; + Kind = kind; Options = options; } - public RegexNode(int type, RegexOptions options, char ch) + public RegexNode(RegexNodeKind kind, RegexOptions options, char ch) { - Type = type; + Kind = kind; Options = options; Ch = ch; } - public RegexNode(int type, RegexOptions options, string str) + public RegexNode(RegexNodeKind kind, RegexOptions options, string str) { - Type = type; + Kind = kind; Options = options; Str = str; } - public RegexNode(int type, RegexOptions options, int m) + public RegexNode(RegexNodeKind kind, RegexOptions options, int m) { - Type = type; + Kind = kind; Options = options; M = m; } - public RegexNode(int type, RegexOptions options, int m, int n) + public RegexNode(RegexNodeKind kind, RegexOptions options, int m, int n) { - Type = type; + Kind = kind; Options = options; M = m; N = n; @@ -168,7 +99,7 @@ public static RegexNode CreateOneWithCaseConversion(char ch, RegexOptions option // we can simply strip out the IgnoreCase option and make the node case-sensitive. if (!RegexCharClass.ParticipatesInCaseConversion(ch)) { - return new RegexNode(One, options & ~RegexOptions.IgnoreCase, ch); + return new RegexNode(RegexNodeKind.One, options & ~RegexOptions.IgnoreCase, ch); } // Create a set for the character, trying to include all case-insensitive equivalent characters. @@ -177,7 +108,7 @@ public static RegexNode CreateOneWithCaseConversion(char ch, RegexOptions option string stringSet = RegexCharClass.OneToStringClass(ch, culture, out bool resultIsCaseInsensitive); if (!resultIsCaseInsensitive) { - return new RegexNode(Set, options & ~RegexOptions.IgnoreCase, stringSet); + return new RegexNode(RegexNodeKind.Set, options & ~RegexOptions.IgnoreCase, stringSet); } // Otherwise, until we can get rid of ToLower usage at match time entirely (https://github.com/dotnet/runtime/issues/61048), @@ -186,14 +117,14 @@ public static RegexNode CreateOneWithCaseConversion(char ch, RegexOptions option } // Create a One node for the character. - return new RegexNode(One, options, ch); + return new RegexNode(RegexNodeKind.One, options, ch); } /// Reverses all children of a concatenation when in RightToLeft mode. public RegexNode ReverseConcatenationIfRightToLeft() { if ((Options & RegexOptions.RightToLeft) != 0 && - Type == Concatenate && + Kind == RegexNodeKind.Concatenate && ChildCount() > 1) { ((List)Children!).Reverse(); @@ -205,38 +136,38 @@ public RegexNode ReverseConcatenationIfRightToLeft() /// /// Pass type as OneLazy or OneLoop /// - private void MakeRep(int type, int min, int max) + private void MakeRep(RegexNodeKind kind, int min, int max) { - Type += type - One; + Kind += kind - RegexNodeKind.One; M = min; N = max; } private void MakeLoopAtomic() { - switch (Type) + switch (Kind) { - case Oneloop or Notoneloop or Setloop: + case RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop: // For loops, we simply change the Type to the atomic variant. // Atomic greedy loops should consume as many values as they can. - Type += Oneloopatomic - Oneloop; + Kind += RegexNodeKind.Oneloopatomic - RegexNodeKind.Oneloop; break; - case Onelazy or Notonelazy or Setlazy: + case RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy: // For lazy, we not only change the Type, we also lower the max number of iterations // to the minimum number of iterations, as they should end up matching as little as possible. - Type += Oneloopatomic - Onelazy; + Kind += RegexNodeKind.Oneloopatomic - RegexNodeKind.Onelazy; N = M; if (N == 0) { - Type = Empty; + Kind = RegexNodeKind.Empty; Str = null; Ch = '\0'; } break; default: - Debug.Fail($"Unexpected type: {Type}"); + Debug.Fail($"Unexpected type: {Kind}"); break; } } @@ -246,7 +177,7 @@ private void MakeLoopAtomic() [Conditional("DEBUG")] private void ValidateFinalTreeInvariants() { - Debug.Assert(Type == Capture, "Every generated tree should begin with a capture node"); + Debug.Assert(Kind == RegexNodeKind.Capture, "Every generated tree should begin with a capture node"); var toExamine = new Stack(); toExamine.Push(this); @@ -259,90 +190,90 @@ private void ValidateFinalTreeInvariants() for (int i = 0; i < childCount; i++) { RegexNode child = node.Child(i); - Debug.Assert(child.Next == node, $"{child.Description()} missing reference to parent {node.Description()}"); + Debug.Assert(child.Parent == node, $"{child.Description()} missing reference to parent {node.Description()}"); toExamine.Push(child); } // Validate that we never see certain node types. - Debug.Assert(Type != Group, "All Group nodes should have been removed."); + Debug.Assert(Kind != RegexNodeKind.Group, "All Group nodes should have been removed."); // Validate node types and expected child counts. - switch (node.Type) + switch (node.Kind) { - case Group: + case RegexNodeKind.Group: Debug.Fail("All Group nodes should have been removed."); break; - case Beginning: - case Bol: - case Boundary: - case ECMABoundary: - case Empty: - case End: - case EndZ: - case Eol: - case Multi: - case NonBoundary: - case NonECMABoundary: - case Nothing: - case Notone: - case Notonelazy: - case Notoneloop: - case Notoneloopatomic: - case One: - case Onelazy: - case Oneloop: - case Oneloopatomic: - case Ref: - case Set: - case Setlazy: - case Setloop: - case Setloopatomic: - case Start: - case UpdateBumpalong: + case RegexNodeKind.Beginning: + case RegexNodeKind.Bol: + case RegexNodeKind.Boundary: + case RegexNodeKind.ECMABoundary: + case RegexNodeKind.Empty: + case RegexNodeKind.End: + case RegexNodeKind.EndZ: + case RegexNodeKind.Eol: + case RegexNodeKind.Multi: + case RegexNodeKind.NonBoundary: + case RegexNodeKind.NonECMABoundary: + case RegexNodeKind.Nothing: + case RegexNodeKind.Notone: + case RegexNodeKind.Notonelazy: + case RegexNodeKind.Notoneloop: + case RegexNodeKind.Notoneloopatomic: + case RegexNodeKind.One: + case RegexNodeKind.Onelazy: + case RegexNodeKind.Oneloop: + case RegexNodeKind.Oneloopatomic: + case RegexNodeKind.Backreference: + case RegexNodeKind.Set: + case RegexNodeKind.Setlazy: + case RegexNodeKind.Setloop: + case RegexNodeKind.Setloopatomic: + case RegexNodeKind.Start: + case RegexNodeKind.UpdateBumpalong: Debug.Assert(childCount == 0, $"Expected zero children for {node.TypeName}, got {childCount}."); break; - case Atomic: - case Capture: - case Lazyloop: - case Loop: - case Prevent: - case Require: + case RegexNodeKind.Atomic: + case RegexNodeKind.Capture: + case RegexNodeKind.Lazyloop: + case RegexNodeKind.Loop: + case RegexNodeKind.NegativeLookaround: + case RegexNodeKind.PositiveLookaround: Debug.Assert(childCount == 1, $"Expected one and only one child for {node.TypeName}, got {childCount}."); break; - case Testref: + case RegexNodeKind.BackreferenceConditional: Debug.Assert(childCount == 2, $"Expected two children for {node.TypeName}, got {childCount}"); break; - case Testgroup: + case RegexNodeKind.ExpressionConditional: Debug.Assert(childCount == 3, $"Expected three children for {node.TypeName}, got {childCount}"); break; - case Concatenate: - case Alternate: + case RegexNodeKind.Concatenate: + case RegexNodeKind.Alternate: Debug.Assert(childCount >= 2, $"Expected at least two children for {node.TypeName}, got {childCount}."); break; default: - Debug.Fail($"Unexpected node type: {node.Type}"); + Debug.Fail($"Unexpected node type: {node.Kind}"); break; } // Validate node configuration. - switch (node.Type) + switch (node.Kind) { - case Multi: + case RegexNodeKind.Multi: Debug.Assert(node.Str is not null, "Expect non-null multi string"); Debug.Assert(node.Str.Length >= 2, $"Expected {node.Str} to be at least two characters"); break; - case Set: - case Setloop: - case Setloopatomic: - case Setlazy: + case RegexNodeKind.Set: + case RegexNodeKind.Setloop: + case RegexNodeKind.Setloopatomic: + case RegexNodeKind.Setlazy: Debug.Assert(!string.IsNullOrEmpty(node.Str), $"Expected non-null, non-empty string for {node.TypeName}."); break; @@ -364,8 +295,8 @@ private void ValidateFinalTreeInvariants() internal RegexNode FinalOptimize() { RegexNode rootNode = this; - Debug.Assert(rootNode.Type == Capture); - Debug.Assert(rootNode.Next is null); + Debug.Assert(rootNode.Kind == RegexNodeKind.Capture); + Debug.Assert(rootNode.Parent is null); Debug.Assert(rootNode.ChildCount() == 1); // Only apply optimization when LTR to avoid needing additional code for the much rarer RTL case. @@ -404,19 +335,19 @@ internal RegexNode FinalOptimize() RegexNode node = rootNode.Child(0); // skip implicit root capture node while (true) { - switch (node.Type) + switch (node.Kind) { - case Atomic: - case Concatenate: + case RegexNodeKind.Atomic: + case RegexNodeKind.Concatenate: node = node.Child(0); continue; - case Oneloop or Oneloopatomic or Notoneloop or Notoneloopatomic or Setloop or Setloopatomic when node.N == int.MaxValue: - case Onelazy or Notonelazy or Setlazy when node.N == int.MaxValue && !node.IsAtomicByParent(): - RegexNode? parent = node.Next; - if (parent != null && parent.Type == Concatenate) + case RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when node.N == int.MaxValue: + case RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy when node.N == int.MaxValue && !node.IsAtomicByParent(): + RegexNode? parent = node.Parent; + if (parent != null && parent.Kind == RegexNodeKind.Concatenate) { - parent.InsertChild(1, new RegexNode(UpdateBumpalong, node.Options)); + parent.InsertChild(1, new RegexNode(RegexNodeKind.UpdateBumpalong, node.Options)); } break; } @@ -454,19 +385,19 @@ private void EliminateEndingBacktracking() RegexNode node = this; while (true) { - switch (node.Type) + switch (node.Kind) { // {One/Notone/Set}loops can be upgraded to {One/Notone/Set}loopatomic nodes, e.g. [abc]* => (?>[abc]*). // And {One/Notone/Set}lazys can similarly be upgraded to be atomic, which really makes them into repeaters // or even empty nodes. - case Oneloop or Notoneloop or Setloop: - case Onelazy or Notonelazy or Setlazy: + case RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop: + case RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy: node.MakeLoopAtomic(); break; // Just because a particular node is atomic doesn't mean all its descendants are. // Process them as well. - case Atomic: + case RegexNodeKind.Atomic: node = node.Child(0); continue; @@ -476,13 +407,13 @@ private void EliminateEndingBacktracking() // node is atomic based on its parent or grandparent, we don't bother wrapping such a node in // an Atomic one if its grandparent is already Atomic. // e.g. [xyz](?:abc|def) => [xyz](?>abc|def) - case Capture: - case Concatenate: + case RegexNodeKind.Capture: + case RegexNodeKind.Concatenate: RegexNode existingChild = node.Child(node.ChildCount() - 1); - if ((existingChild.Type is Alternate or Testref or Testgroup or Loop or Lazyloop) && - (node.Next is null || node.Next.Type != Atomic)) // validate grandparent isn't atomic + if ((existingChild.Kind is RegexNodeKind.Alternate or RegexNodeKind.BackreferenceConditional or RegexNodeKind.ExpressionConditional or RegexNodeKind.Loop or RegexNodeKind.Lazyloop) && + (node.Parent is null || node.Parent.Kind != RegexNodeKind.Atomic)) // validate grandparent isn't atomic { - var atomic = new RegexNode(Atomic, existingChild.Options); + var atomic = new RegexNode(RegexNodeKind.Atomic, existingChild.Options); atomic.AddChild(existingChild); node.ReplaceChild(node.ChildCount() - 1, atomic); } @@ -492,9 +423,9 @@ private void EliminateEndingBacktracking() // For alternate, we can recur into each branch separately. We use this iteration for the first branch. // Conditionals are just like alternations in this regard. // e.g. abc*|def* => ab(?>c*)|de(?>f*) - case Alternate: - case Testref: - case Testgroup: + case RegexNodeKind.Alternate: + case RegexNodeKind.BackreferenceConditional: + case RegexNodeKind.ExpressionConditional: { int branches = node.ChildCount(); for (int i = 1; i < branches; i++) @@ -502,7 +433,7 @@ private void EliminateEndingBacktracking() node.Child(i).EliminateEndingBacktracking(); } - if (node.Type != Testgroup) // ReduceTestgroup will have already applied ending backtracking removal + if (node.Kind != RegexNodeKind.ExpressionConditional) // ReduceTestgroup will have already applied ending backtracking removal { node = node.Child(0); continue; @@ -517,10 +448,10 @@ private void EliminateEndingBacktracking() // repeater, which results in better code generation. // e.g. (?:abc*)* => (?:ab(?>c*))* // e.g. (abc*?)+? => (ab){1} - case Lazyloop: + case RegexNodeKind.Lazyloop: node.N = node.M; - goto case Loop; - case Loop: + goto case RegexNodeKind.Loop; + case RegexNodeKind.Loop: { if (node.N == 1) { @@ -554,31 +485,31 @@ public bool IsAtomicByParent() { // Walk up the parent hierarchy. RegexNode child = this; - for (RegexNode? parent = child.Next; parent is not null; child = parent, parent = child.Next) + for (RegexNode? parent = child.Parent; parent is not null; child = parent, parent = child.Parent) { - switch (parent.Type) + switch (parent.Kind) { - case Atomic: - case Prevent: - case Require: + case RegexNodeKind.Atomic: + case RegexNodeKind.NegativeLookaround: + case RegexNodeKind.PositiveLookaround: // If the parent is atomic, so is the child. That's the whole purpose // of the Atomic node, and lookarounds are also implicitly atomic. return true; - case Alternate: - case Testref: + case RegexNodeKind.Alternate: + case RegexNodeKind.BackreferenceConditional: // Skip alternations. Each branch is considered independently, // so any atomicity applied to the alternation also applies to // each individual branch. This is true as well for conditional // backreferences, where each of the yes/no branches are independent. - case Testgroup when parent.Child(0) != child: + case RegexNodeKind.ExpressionConditional when parent.Child(0) != child: // As with alternations, each yes/no branch of an expression conditional // are independent from each other, but the conditional expression itself // can be backtracked into from each of the branches, so we can't make // it atomic just because the whole conditional is. - case Capture: + case RegexNodeKind.Capture: // Skip captures. They don't affect atomicity. - case Concatenate when parent.Child(parent.ChildCount() - 1) == child: + case RegexNodeKind.Concatenate when parent.Child(parent.ChildCount() - 1) == child: // If the parent is a concatenation and this is the last node, // any atomicity applying to the concatenation applies to this // node, too. @@ -598,18 +529,18 @@ public bool IsAtomicByParent() /// Removes redundant nodes from the subtree, and returns an optimized subtree. /// internal RegexNode Reduce() => - Type switch - { - Alternate => ReduceAlternation(), - Atomic => ReduceAtomic(), - Concatenate => ReduceConcatenation(), - Group => ReduceGroup(), - Loop or Lazyloop => ReduceLoops(), - Prevent => ReducePrevent(), - Require => ReduceRequire(), - Set or Setloop or Setloopatomic or Setlazy => ReduceSet(), - Testgroup => ReduceTestgroup(), - Testref => ReduceTestref(), + Kind switch + { + RegexNodeKind.Alternate => ReduceAlternation(), + RegexNodeKind.Atomic => ReduceAtomic(), + RegexNodeKind.Concatenate => ReduceConcatenation(), + RegexNodeKind.Group => ReduceGroup(), + RegexNodeKind.Loop or RegexNodeKind.Lazyloop => ReduceLoops(), + RegexNodeKind.NegativeLookaround => ReducePrevent(), + RegexNodeKind.PositiveLookaround => ReduceRequire(), + RegexNodeKind.Set or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy => ReduceSet(), + RegexNodeKind.ExpressionConditional => ReduceTestgroup(), + RegexNodeKind.BackreferenceConditional => ReduceTestref(), _ => this, }; @@ -621,10 +552,10 @@ internal RegexNode Reduce() => /// private RegexNode ReplaceNodeIfUnnecessary() { - Debug.Assert(Type is Alternate or Concatenate); + Debug.Assert(Kind is RegexNodeKind.Alternate or RegexNodeKind.Concatenate); return ChildCount() switch { - 0 => new RegexNode(Type == Alternate ? Nothing : Empty, Options), + 0 => new RegexNode(Kind == RegexNodeKind.Alternate ? RegexNodeKind.Nothing : RegexNodeKind.Empty, Options), 1 => Child(0), _ => this, }; @@ -638,10 +569,10 @@ private RegexNode ReplaceNodeIfUnnecessary() /// private RegexNode ReduceGroup() { - Debug.Assert(Type == Group); + Debug.Assert(Kind == RegexNodeKind.Group); RegexNode u = this; - while (u.Type == Group) + while (u.Kind == RegexNodeKind.Group) { Debug.Assert(u.ChildCount() == 1); u = u.Child(0); @@ -666,45 +597,45 @@ private RegexNode ReduceAtomic() return this; } - Debug.Assert(Type == Atomic); + Debug.Assert(Kind == RegexNodeKind.Atomic); Debug.Assert(ChildCount() == 1); RegexNode atomic = this; RegexNode child = Child(0); - while (child.Type == Atomic) + while (child.Kind == RegexNodeKind.Atomic) { atomic = child; child = atomic.Child(0); } - switch (child.Type) + switch (child.Kind) { // If the child is empty/nothing, there's nothing to be made atomic so the Atomic // node can simply be removed. - case Empty: - case Nothing: + case RegexNodeKind.Empty: + case RegexNodeKind.Nothing: return child; // If the child is already atomic, we can just remove the atomic node. - case Oneloopatomic: - case Notoneloopatomic: - case Setloopatomic: + case RegexNodeKind.Oneloopatomic: + case RegexNodeKind.Notoneloopatomic: + case RegexNodeKind.Setloopatomic: return child; // If an atomic subexpression contains only a {one/notone/set}{loop/lazy}, // change it to be an {one/notone/set}loopatomic and remove the atomic node. - case Oneloop: - case Notoneloop: - case Setloop: - case Onelazy: - case Notonelazy: - case Setlazy: + case RegexNodeKind.Oneloop: + case RegexNodeKind.Notoneloop: + case RegexNodeKind.Setloop: + case RegexNodeKind.Onelazy: + case RegexNodeKind.Notonelazy: + case RegexNodeKind.Setlazy: child.MakeLoopAtomic(); return child; // Alternations have a variety of possible optimizations that can be applied // iff they're atomic. - case Alternate: + case RegexNodeKind.Alternate: if ((Options & RegexOptions.RightToLeft) == 0) { List? branches = child.Children as List; @@ -713,9 +644,9 @@ private RegexNode ReduceAtomic() // If an alternation is atomic and its first branch is Empty, the whole thing // is a nop, as Empty will match everything trivially, and no backtracking // into the node will be performed, making the remaining branches irrelevant. - if (branches[0].Type == Empty) + if (branches[0].Kind == RegexNodeKind.Empty) { - return new RegexNode(Empty, child.Options); + return new RegexNode(RegexNodeKind.Empty, child.Options); } // Similarly, we can trim off any branches after an Empty, as they'll never be used. @@ -724,7 +655,7 @@ private RegexNode ReduceAtomic() // but if the alternation is atomic, such backtracking won't happen. for (int i = 1; i < branches.Count - 1; i++) { - if (branches[i].Type == Empty) + if (branches[i].Kind == RegexNodeKind.Empty) { branches.RemoveRange(i + 1, branches.Count - (i + 1)); break; @@ -828,10 +759,10 @@ private RegexNode ReduceAtomic() /// private RegexNode ReduceLoops() { - Debug.Assert(Type == Loop || Type == Lazyloop); + Debug.Assert(Kind is RegexNodeKind.Loop or RegexNodeKind.Lazyloop); RegexNode u = this; - int type = Type; + RegexNodeKind kind = Kind; int min = M; int max = N; @@ -841,30 +772,30 @@ private RegexNode ReduceLoops() RegexNode child = u.Child(0); // multiply reps of the same type only - if (child.Type != type) + if (child.Kind != kind) { bool valid = false; - if (type == Loop) + if (kind == RegexNodeKind.Loop) { - switch (child.Type) + switch (child.Kind) { - case Oneloop: - case Oneloopatomic: - case Notoneloop: - case Notoneloopatomic: - case Setloop: - case Setloopatomic: + case RegexNodeKind.Oneloop: + case RegexNodeKind.Oneloopatomic: + case RegexNodeKind.Notoneloop: + case RegexNodeKind.Notoneloopatomic: + case RegexNodeKind.Setloop: + case RegexNodeKind.Setloopatomic: valid = true; break; } } else // type == Lazyloop { - switch (child.Type) + switch (child.Kind) { - case Onelazy: - case Notonelazy: - case Setlazy: + case RegexNodeKind.Onelazy: + case RegexNodeKind.Notonelazy: + case RegexNodeKind.Setlazy: valid = true; break; } @@ -898,7 +829,7 @@ private RegexNode ReduceLoops() if (min == int.MaxValue) { - return new RegexNode(Nothing, Options); + return new RegexNode(RegexNodeKind.Nothing, Options); } // If the Loop or Lazyloop now only has one child node and its a Set, One, or Notone, @@ -908,12 +839,12 @@ private RegexNode ReduceLoops() if (u.ChildCount() == 1) { RegexNode child = u.Child(0); - switch (child.Type) + switch (child.Kind) { - case One: - case Notone: - case Set: - child.MakeRep(u.Type == Lazyloop ? Onelazy : Oneloop, u.M, u.N); + case RegexNodeKind.One: + case RegexNodeKind.Notone: + case RegexNodeKind.Set: + child.MakeRep(u.Kind == RegexNodeKind.Lazyloop ? RegexNodeKind.Onelazy : RegexNodeKind.Oneloop, u.M, u.N); u = child; break; } @@ -937,33 +868,33 @@ private RegexNode ReduceLoops() private RegexNode ReduceSet() { // Extract empty-set, one, and not-one case as special - Debug.Assert(Type == Set || Type == Setloop || Type == Setloopatomic || Type == Setlazy); + Debug.Assert(Kind is RegexNodeKind.Set or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy); Debug.Assert(!string.IsNullOrEmpty(Str)); if (RegexCharClass.IsEmpty(Str)) { - Type = Nothing; + Kind = RegexNodeKind.Nothing; Str = null; } else if (RegexCharClass.IsSingleton(Str)) { Ch = RegexCharClass.SingletonChar(Str); Str = null; - Type = - Type == Set ? One : - Type == Setloop ? Oneloop : - Type == Setloopatomic ? Oneloopatomic : - Onelazy; + Kind = + Kind == RegexNodeKind.Set ? RegexNodeKind.One : + Kind == RegexNodeKind.Setloop ? RegexNodeKind.Oneloop : + Kind == RegexNodeKind.Setloopatomic ? RegexNodeKind.Oneloopatomic : + RegexNodeKind.Onelazy; } else if (RegexCharClass.IsSingletonInverse(Str)) { Ch = RegexCharClass.SingletonChar(Str); Str = null; - Type = - Type == Set ? Notone : - Type == Setloop ? Notoneloop : - Type == Setloopatomic ? Notoneloopatomic : - Notonelazy; + Kind = + Kind == RegexNodeKind.Set ? RegexNodeKind.Notone : + Kind == RegexNodeKind.Setloop ? RegexNodeKind.Notoneloop : + Kind == RegexNodeKind.Setloopatomic ? RegexNodeKind.Notoneloopatomic : + RegexNodeKind.Notonelazy; } return this; @@ -972,12 +903,12 @@ private RegexNode ReduceSet() /// Optimize an alternation. private RegexNode ReduceAlternation() { - Debug.Assert(Type == Alternate); + Debug.Assert(Kind == RegexNodeKind.Alternate); switch (ChildCount()) { case 0: - return new RegexNode(Nothing, Options); + return new RegexNode(RegexNodeKind.Nothing, Options); case 1: return Child(0); @@ -985,13 +916,13 @@ private RegexNode ReduceAlternation() default: ReduceSingleLetterAndNestedAlternations(); RegexNode node = ReplaceNodeIfUnnecessary(); - if (node.Type == Alternate) + if (node.Kind == RegexNodeKind.Alternate) { node = ExtractCommonPrefixText(node); - if (node.Type == Alternate) + if (node.Kind == RegexNodeKind.Alternate) { node = ExtractCommonPrefixOneNotoneSet(node); - if (node.Type == Alternate) + if (node.Kind == RegexNodeKind.Alternate) { node = RemoveRedundantEmptiesAndNothings(node); } @@ -1026,30 +957,30 @@ void ReduceSingleLetterAndNestedAlternations() while (true) { - if (at.Type == Alternate) + if (at.Kind == RegexNodeKind.Alternate) { if (at.Children is List atChildren) { for (int k = 0; k < atChildren.Count; k++) { - atChildren[k].Next = this; + atChildren[k].Parent = this; } children.InsertRange(i + 1, atChildren); } else { RegexNode atChild = (RegexNode)at.Children!; - atChild.Next = this; + atChild.Parent = this; children.Insert(i + 1, atChild); } j--; } - else if (at.Type == Set || at.Type == One) + else if (at.Kind is RegexNodeKind.Set or RegexNodeKind.One) { // Cannot merge sets if L or I options differ, or if either are negated. optionsAt = at.Options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); - if (at.Type == Set) + if (at.Kind == RegexNodeKind.Set) { if (!wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !RegexCharClass.IsMergeable(at.Str!)) { @@ -1073,7 +1004,7 @@ void ReduceSingleLetterAndNestedAlternations() prev = children[j]; RegexCharClass prevCharClass; - if (prev.Type == One) + if (prev.Kind == RegexNodeKind.One) { prevCharClass = new RegexCharClass(); prevCharClass.AddChar(prev.Ch); @@ -1083,7 +1014,7 @@ void ReduceSingleLetterAndNestedAlternations() prevCharClass = RegexCharClass.Parse(prev.Str!); } - if (at.Type == One) + if (at.Kind == RegexNodeKind.One) { prevCharClass.AddChar(at.Ch); } @@ -1093,7 +1024,7 @@ void ReduceSingleLetterAndNestedAlternations() prevCharClass.AddCharClass(atCharClass); } - prev.Type = Set; + prev.Kind = RegexNodeKind.Set; prev.Str = prevCharClass.ToStringClass(Options); if ((prev.Options & RegexOptions.IgnoreCase) != 0 && RegexCharClass.MakeCaseSensitiveIfPossible(prev.Str, RegexParser.GetTargetCulture(prev.Options)) is string newSetString) @@ -1102,7 +1033,7 @@ void ReduceSingleLetterAndNestedAlternations() prev.Options &= ~RegexOptions.IgnoreCase; } } - else if (at.Type == Nothing) + else if (at.Kind == RegexNodeKind.Nothing) { j--; } @@ -1126,7 +1057,7 @@ void ReduceSingleLetterAndNestedAlternations() // e.g. \w12|\d34|\d56|\w78|\w90 => \w12|\d(?:34|56)|\w(?:78|90) static RegexNode ExtractCommonPrefixOneNotoneSet(RegexNode alternation) { - Debug.Assert(alternation.Type == Alternate); + Debug.Assert(alternation.Kind == RegexNodeKind.Alternate); Debug.Assert(alternation.Children is List { Count: >= 2 }); var children = (List)alternation.Children; @@ -1139,7 +1070,7 @@ static RegexNode ExtractCommonPrefixOneNotoneSet(RegexNode alternation) // Only handle the case where each branch is a concatenation foreach (RegexNode child in children) { - if (child.Type != Concatenate || child.ChildCount() < 2) + if (child.Kind != RegexNodeKind.Concatenate || child.ChildCount() < 2) { return alternation; } @@ -1154,11 +1085,11 @@ static RegexNode ExtractCommonPrefixOneNotoneSet(RegexNode alternation) // it for non-atomic variable length loops could change behavior as each branch could otherwise have a // different number of characters consumed by the loop based on what's after it. RegexNode required = children[startingIndex].Child(0); - switch (required.Type) + switch (required.Kind) { - case One or Notone or Set: - case Oneloopatomic or Notoneloopatomic or Setloopatomic: - case Oneloop or Notoneloop or Setloop or Onelazy or Notonelazy or Setlazy when required.M == required.N: + case RegexNodeKind.One or RegexNodeKind.Notone or RegexNodeKind.Set: + case RegexNodeKind.Oneloopatomic or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Setloopatomic: + case RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop or RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy when required.M == required.N: break; default: @@ -1170,7 +1101,7 @@ static RegexNode ExtractCommonPrefixOneNotoneSet(RegexNode alternation) for (; endingIndex < children.Count; endingIndex++) { RegexNode other = children[endingIndex].Child(0); - if (required.Type != other.Type || + if (required.Kind != other.Kind || required.Options != other.Options || required.M != other.M || required.N != other.N || @@ -1188,7 +1119,7 @@ static RegexNode ExtractCommonPrefixOneNotoneSet(RegexNode alternation) } // Remove the prefix node from every branch, adding it to a new alternation - var newAlternate = new RegexNode(Alternate, alternation.Options); + var newAlternate = new RegexNode(RegexNodeKind.Alternate, alternation.Options); for (int i = startingIndex; i < endingIndex; i++) { ((List)children[i].Children!).RemoveAt(0); @@ -1196,16 +1127,16 @@ static RegexNode ExtractCommonPrefixOneNotoneSet(RegexNode alternation) } // If this alternation is wrapped as atomic, we need to do the same for the new alternation. - if (alternation.Next is RegexNode parent && parent.Type == Atomic) + if (alternation.Parent is RegexNode { Kind: RegexNodeKind.Atomic } parent) { - var atomic = new RegexNode(Atomic, alternation.Options); + var atomic = new RegexNode(RegexNodeKind.Atomic, alternation.Options); atomic.AddChild(newAlternate); newAlternate = atomic; } // Now create a concatenation of the prefix node with the new alternation for the combined // branches, and replace all of the branches in this alternation with that new concatenation. - var newConcat = new RegexNode(Concatenate, alternation.Options); + var newConcat = new RegexNode(RegexNodeKind.Concatenate, alternation.Options); newConcat.AddChild(required); newConcat.AddChild(newAlternate); alternation.ReplaceChild(startingIndex, newConcat); @@ -1221,7 +1152,7 @@ static RegexNode ExtractCommonPrefixOneNotoneSet(RegexNode alternation) // alternation, and while we don't check for all duplicates, checking for empty is easy. static RegexNode RemoveRedundantEmptiesAndNothings(RegexNode node) { - Debug.Assert(node.Type == Alternate); + Debug.Assert(node.Kind == RegexNodeKind.Alternate); Debug.Assert(node.ChildCount() >= 2); var children = (List)node.Children!; @@ -1230,14 +1161,14 @@ static RegexNode RemoveRedundantEmptiesAndNothings(RegexNode node) while (i < children.Count) { RegexNode child = children[i]; - switch (child.Type) + switch (child.Kind) { - case Empty when !seenEmpty: + case RegexNodeKind.Empty when !seenEmpty: seenEmpty = true; goto default; - case Empty: - case Nothing: + case RegexNodeKind.Empty: + case RegexNodeKind.Nothing: i++; break; @@ -1264,7 +1195,7 @@ static RegexNode RemoveRedundantEmptiesAndNothings(RegexNode node) // e.g. abc|ade => a(?bc|de) static RegexNode ExtractCommonPrefixText(RegexNode alternation) { - Debug.Assert(alternation.Type == Alternate); + Debug.Assert(alternation.Kind == RegexNodeKind.Alternate); Debug.Assert(alternation.Children is List { Count: >= 2 }); var children = (List)alternation.Children; @@ -1292,7 +1223,7 @@ static RegexNode ExtractCommonPrefixText(RegexNode alternation) RegexOptions startingNodeOptions = startingNode.Options; startingSpan = startingNode.Str.AsSpan(); - if (startingNode.Type == One) + if (startingNode.Kind == RegexNodeKind.One) { scratchChar[0] = startingNode.Ch; startingSpan = scratchChar; @@ -1312,7 +1243,7 @@ static RegexNode ExtractCommonPrefixText(RegexNode alternation) // See if the new branch's prefix has a shared prefix with the current one. // If it does, shorten to that; if it doesn't, bail. - if (startingNode.Type == One) + if (startingNode.Kind == RegexNodeKind.One) { if (startingSpan[0] != startingNode.Ch) { @@ -1326,7 +1257,7 @@ static RegexNode ExtractCommonPrefixText(RegexNode alternation) } else { - Debug.Assert(startingNode.Type == Multi); + Debug.Assert(startingNode.Kind == RegexNodeKind.Multi); Debug.Assert(startingNode.Str!.Length > 0); int minLength = Math.Min(startingSpan.Length, startingNode.Str.Length); @@ -1357,13 +1288,13 @@ static RegexNode ExtractCommonPrefixText(RegexNode alternation) // that replaces all these branches in this alternation. var prefix = startingSpan.Length == 1 ? - new RegexNode(One, startingNodeOptions, startingSpan[0]) : - new RegexNode(Multi, startingNodeOptions, startingSpan.ToString()); - var newAlternate = new RegexNode(Alternate, startingNodeOptions); + new RegexNode(RegexNodeKind.One, startingNodeOptions, startingSpan[0]) : + new RegexNode(RegexNodeKind.Multi, startingNodeOptions, startingSpan.ToString()); + var newAlternate = new RegexNode(RegexNodeKind.Alternate, startingNodeOptions); for (int i = startingIndex; i < endingIndex; i++) { RegexNode branch = children[i]; - ProcessOneOrMulti(branch.Type == Concatenate ? branch.Child(0) : branch, startingSpan); + ProcessOneOrMulti(branch.Kind == RegexNodeKind.Concatenate ? branch.Child(0) : branch, startingSpan); branch = branch.Reduce(); newAlternate.AddChild(branch); @@ -1371,25 +1302,25 @@ static RegexNode ExtractCommonPrefixText(RegexNode alternation) // the type of the node to be Empty if the starting text matches the node's full value. static void ProcessOneOrMulti(RegexNode node, ReadOnlySpan startingSpan) { - if (node.Type == One) + if (node.Kind == RegexNodeKind.One) { Debug.Assert(startingSpan.Length == 1); Debug.Assert(startingSpan[0] == node.Ch); - node.Type = Empty; + node.Kind = RegexNodeKind.Empty; node.Ch = '\0'; } else { - Debug.Assert(node.Type == Multi); + Debug.Assert(node.Kind == RegexNodeKind.Multi); Debug.Assert(node.Str.AsSpan().StartsWith(startingSpan, StringComparison.Ordinal)); if (node.Str!.Length == startingSpan.Length) { - node.Type = Empty; + node.Kind = RegexNodeKind.Empty; node.Str = null; } else if (node.Str.Length - 1 == startingSpan.Length) { - node.Type = One; + node.Kind = RegexNodeKind.One; node.Ch = node.Str[node.Str.Length - 1]; node.Str = null; } @@ -1401,14 +1332,14 @@ static void ProcessOneOrMulti(RegexNode node, ReadOnlySpan startingSpan) } } - if (alternation.Next is RegexNode parent && parent.Type == Atomic) + if (alternation.Parent is RegexNode parent && parent.Kind == RegexNodeKind.Atomic) { - var atomic = new RegexNode(Atomic, startingNodeOptions); + var atomic = new RegexNode(RegexNodeKind.Atomic, startingNodeOptions); atomic.AddChild(newAlternate); newAlternate = atomic; } - var newConcat = new RegexNode(Concatenate, startingNodeOptions); + var newConcat = new RegexNode(RegexNodeKind.Concatenate, startingNodeOptions); newConcat.AddChild(prefix); newConcat.AddChild(newAlternate); alternation.ReplaceChild(startingIndex, newConcat); @@ -1428,23 +1359,23 @@ static void ProcessOneOrMulti(RegexNode node, ReadOnlySpan startingSpan) /// public RegexNode? FindBranchOneOrMultiStart() { - RegexNode branch = Type == Concatenate ? Child(0) : this; - return branch.Type is One or Multi ? branch : null; + RegexNode branch = Kind == RegexNodeKind.Concatenate ? Child(0) : this; + return branch.Kind is RegexNodeKind.One or RegexNodeKind.Multi ? branch : null; } /// Same as but also for Sets. public RegexNode? FindBranchOneMultiOrSetStart() { - RegexNode branch = Type == Concatenate ? Child(0) : this; - return branch.Type is One or Multi or Set ? branch : null; + RegexNode branch = Kind == RegexNodeKind.Concatenate ? Child(0) : this; + return branch.Kind is RegexNodeKind.One or RegexNodeKind.Multi or RegexNodeKind.Set ? branch : null; } /// Gets the character that begins a One or Multi. public char FirstCharOfOneOrMulti() { - Debug.Assert(Type is One or Multi); + Debug.Assert(Kind is RegexNodeKind.One or RegexNodeKind.Multi); Debug.Assert((Options & RegexOptions.RightToLeft) == 0); - return Type == One ? Ch : Str![0]; + return Kind == RegexNodeKind.One ? Ch : Str![0]; } /// Finds the guaranteed beginning character of the node, or null if none exists. @@ -1455,29 +1386,29 @@ public char FirstCharOfOneOrMulti() { if (node is not null && (node.Options & RegexOptions.RightToLeft) == 0) { - switch (node.Type) + switch (node.Kind) { - case One: - case Oneloop or Oneloopatomic or Onelazy when node.M > 0: + case RegexNodeKind.One: + case RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy when node.M > 0: if ((node.Options & RegexOptions.IgnoreCase) == 0 || !RegexCharClass.ParticipatesInCaseConversion(node.Ch)) { return (node.Ch, null); } break; - case Multi: + case RegexNodeKind.Multi: if ((node.Options & RegexOptions.IgnoreCase) == 0 || !RegexCharClass.ParticipatesInCaseConversion(node.Str.AsSpan())) { return ('\0', node.Str); } break; - case Atomic: - case Concatenate: - case Capture: - case Group: - case Loop or Lazyloop when node.M > 0: - case Require: + case RegexNodeKind.Atomic: + case RegexNodeKind.Concatenate: + case RegexNodeKind.Capture: + case RegexNodeKind.Group: + case RegexNodeKind.Loop or RegexNodeKind.Lazyloop when node.M > 0: + case RegexNodeKind.PositiveLookaround: node = node.Child(0); continue; } @@ -1494,13 +1425,13 @@ public char FirstCharOfOneOrMulti() /// private RegexNode ReduceConcatenation() { - Debug.Assert(Type == Concatenate); + Debug.Assert(Kind == RegexNodeKind.Concatenate); // If the concat node has zero or only one child, get rid of the concat. switch (ChildCount()) { case 0: - return new RegexNode(Empty, Options); + return new RegexNode(RegexNodeKind.Empty, Options); case 1: return Child(0); } @@ -1523,7 +1454,7 @@ private RegexNode ReduceConcatenation() /// private void ReduceConcatenationWithAdjacentStrings() { - Debug.Assert(Type == Concatenate); + Debug.Assert(Kind == RegexNodeKind.Concatenate); Debug.Assert(Children is List); bool wasLastString = false; @@ -1540,26 +1471,26 @@ private void ReduceConcatenationWithAdjacentStrings() children[j] = at; } - if (at.Type == Concatenate && + if (at.Kind == RegexNodeKind.Concatenate && ((at.Options & RegexOptions.RightToLeft) == (Options & RegexOptions.RightToLeft))) { if (at.Children is List atChildren) { for (int k = 0; k < atChildren.Count; k++) { - atChildren[k].Next = this; + atChildren[k].Parent = this; } children.InsertRange(i + 1, atChildren); } else { RegexNode atChild = (RegexNode)at.Children!; - atChild.Next = this; + atChild.Parent = this; children.Insert(i + 1, atChild); } j--; } - else if (at.Type == Multi || at.Type == One) + else if (at.Kind is RegexNodeKind.Multi or RegexNodeKind.One) { // Cannot merge strings if L or I options differ RegexOptions optionsAt = at.Options & (RegexOptions.RightToLeft | RegexOptions.IgnoreCase); @@ -1573,22 +1504,22 @@ private void ReduceConcatenationWithAdjacentStrings() RegexNode prev = children[--j]; - if (prev.Type == One) + if (prev.Kind == RegexNodeKind.One) { - prev.Type = Multi; + prev.Kind = RegexNodeKind.Multi; prev.Str = prev.Ch.ToString(); } if ((optionsAt & RegexOptions.RightToLeft) == 0) { - prev.Str = (at.Type == One) ? $"{prev.Str}{at.Ch}" : prev.Str + at.Str; + prev.Str = (at.Kind == RegexNodeKind.One) ? $"{prev.Str}{at.Ch}" : prev.Str + at.Str; } else { - prev.Str = (at.Type == One) ? $"{at.Ch}{prev.Str}" : at.Str + prev.Str; + prev.Str = (at.Kind == RegexNodeKind.One) ? $"{at.Ch}{prev.Str}" : at.Str + prev.Str; } } - else if (at.Type == Empty) + else if (at.Kind == RegexNodeKind.Empty) { j--; } @@ -1610,7 +1541,7 @@ private void ReduceConcatenationWithAdjacentStrings() /// private void ReduceConcatenationWithAdjacentLoops() { - Debug.Assert(Type == Concatenate); + Debug.Assert(Kind == RegexNodeKind.Concatenate); Debug.Assert(Children is List); var children = (List)Children!; @@ -1645,11 +1576,11 @@ static bool CanCombineCounts(int nodeMin, int nodeMax, int nextMin, int nextMax) return true; } - switch (currentNode.Type) + switch (currentNode.Kind) { // Coalescing a loop with its same type - case Oneloop or Oneloopatomic or Onelazy or Notoneloop or Notoneloopatomic or Notonelazy when nextNode.Type == currentNode.Type && currentNode.Ch == nextNode.Ch: - case Setloop or Setloopatomic or Setlazy when nextNode.Type == currentNode.Type && currentNode.Str == nextNode.Str: + case RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Notonelazy when nextNode.Kind == currentNode.Kind && currentNode.Ch == nextNode.Ch: + case RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy when nextNode.Kind == currentNode.Kind && currentNode.Str == nextNode.Str: if (CanCombineCounts(currentNode.M, currentNode.N, nextNode.M, nextNode.N)) { currentNode.M += nextNode.M; @@ -1663,9 +1594,9 @@ static bool CanCombineCounts(int nodeMin, int nodeMax, int nextMin, int nextMax) break; // Coalescing a loop with an additional item of the same type - case Oneloop or Oneloopatomic or Onelazy when nextNode.Type == One && currentNode.Ch == nextNode.Ch: - case Notoneloop or Notoneloopatomic or Notonelazy when nextNode.Type == Notone && currentNode.Ch == nextNode.Ch: - case Setloop or Setloopatomic or Setlazy when nextNode.Type == Set && currentNode.Str == nextNode.Str: + case RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy when nextNode.Kind == RegexNodeKind.One && currentNode.Ch == nextNode.Ch: + case RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Notonelazy when nextNode.Kind == RegexNodeKind.Notone && currentNode.Ch == nextNode.Ch: + case RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy when nextNode.Kind == RegexNodeKind.Set && currentNode.Str == nextNode.Str: if (CanCombineCounts(currentNode.M, currentNode.N, 1, 1)) { currentNode.M++; @@ -1679,12 +1610,12 @@ static bool CanCombineCounts(int nodeMin, int nodeMax, int nextMin, int nextMax) break; // Coalescing an individual item with a loop. - case One when (nextNode.Type == Oneloop || nextNode.Type == Oneloopatomic || nextNode.Type == Onelazy) && currentNode.Ch == nextNode.Ch: - case Notone when (nextNode.Type == Notoneloop || nextNode.Type == Notoneloopatomic || nextNode.Type == Notonelazy) && currentNode.Ch == nextNode.Ch: - case Set when (nextNode.Type == Setloop || nextNode.Type == Setloopatomic || nextNode.Type == Setlazy) && currentNode.Str == nextNode.Str: + case RegexNodeKind.One when (nextNode.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy) && currentNode.Ch == nextNode.Ch: + case RegexNodeKind.Notone when (nextNode.Kind is RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Notonelazy) && currentNode.Ch == nextNode.Ch: + case RegexNodeKind.Set when (nextNode.Kind is RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy) && currentNode.Str == nextNode.Str: if (CanCombineCounts(1, 1, nextNode.M, nextNode.N)) { - currentNode.Type = nextNode.Type; + currentNode.Kind = nextNode.Kind; currentNode.M = nextNode.M + 1; currentNode.N = nextNode.N == int.MaxValue ? int.MaxValue : nextNode.N + 1; next++; @@ -1693,9 +1624,9 @@ static bool CanCombineCounts(int nodeMin, int nodeMax, int nextMin, int nextMax) break; // Coalescing an individual item with another individual item. - case One or Notone when nextNode.Type == currentNode.Type && currentNode.Ch == nextNode.Ch: - case Set when nextNode.Type == Set && currentNode.Str == nextNode.Str: - currentNode.MakeRep(Oneloop, 2, 2); + case RegexNodeKind.One or RegexNodeKind.Notone when nextNode.Kind == currentNode.Kind && currentNode.Ch == nextNode.Ch: + case RegexNodeKind.Set when nextNode.Kind == RegexNodeKind.Set && currentNode.Str == nextNode.Str: + currentNode.MakeRep(RegexNodeKind.Oneloop, 2, 2); next++; continue; } @@ -1744,7 +1675,7 @@ private void FindAndMakeLoopsAtomic() } // If this isn't a concatenation, nothing more to do. - if (Type is not Concatenate) + if (Kind is not RegexNodeKind.Concatenate) { return; } @@ -1769,7 +1700,7 @@ static void ProcessNode(RegexNode node, RegexNode subsequent) while (true) { // We can always recur into captures and into the last node of concatenations. - if (node.Type == Capture || node.Type == Concatenate) + if (node.Kind is RegexNodeKind.Capture or RegexNodeKind.Concatenate) { node = node.Child(node.ChildCount() - 1); continue; @@ -1779,7 +1710,7 @@ static void ProcessNode(RegexNode node, RegexNode subsequent) // we need to be careful not to just always do so; the ending node of a loop can only // be made atomic if what comes after the loop but also the beginning of the loop are // compatible for the optimization. - if (node.Type == Loop) + if (node.Kind == RegexNodeKind.Loop) { RegexNode? loopDescendent = node.FindLastExpressionInLoopForAutoAtomic(); if (loopDescendent != null) @@ -1794,16 +1725,13 @@ static void ProcessNode(RegexNode node, RegexNode subsequent) } // If the node can be changed to atomic based on what comes after it, do so. - switch (node.Type) + switch (node.Kind) { - case Oneloop when CanBeMadeAtomic(node, subsequent, allowSubsequentIteration: true): - case Notoneloop when CanBeMadeAtomic(node, subsequent, allowSubsequentIteration: true): - case Setloop when CanBeMadeAtomic(node, subsequent, allowSubsequentIteration: true): + case RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop when CanBeMadeAtomic(node, subsequent, allowSubsequentIteration: true): node.MakeLoopAtomic(); break; - case Alternate: - case Testref: - case Testgroup: + + case RegexNodeKind.Alternate or RegexNodeKind.BackreferenceConditional or RegexNodeKind.ExpressionConditional: // In the case of alternation, we can't change the alternation node itself // based on what comes after it (at least not with more complicated analysis // that factors in all branches together), but we can look at each individual @@ -1814,7 +1742,7 @@ static void ProcessNode(RegexNode node, RegexNode subsequent) // itself, as it's already considered atomic and handled as part of ReduceTestgroup. { int alternateBranches = node.ChildCount(); - for (int b = node.Type == Testgroup ? 1 : 0; b < alternateBranches; b++) + for (int b = node.Kind == RegexNodeKind.ExpressionConditional ? 1 : 0; b < alternateBranches; b++) { ProcessNode(node.Child(b), subsequent); } @@ -1834,13 +1762,13 @@ static void ProcessNode(RegexNode node, RegexNode subsequent) { RegexNode node = this; - Debug.Assert(node.Type is Loop or Lazyloop); + Debug.Assert(node.Kind is RegexNodeKind.Loop or RegexNodeKind.Lazyloop); // Start by looking at the loop's sole child. node = node.Child(0); // Skip past captures. - while (node.Type == Capture) + while (node.Kind == RegexNodeKind.Capture) { node = node.Child(0); } @@ -1851,7 +1779,7 @@ static void ProcessNode(RegexNode node, RegexNode subsequent) // example, in the expression (a+[def])*, the last child is [def] and the first is // a+, which can't possibly overlap with [def]. In contrast, if we had (a+[ade])*, // [ade] could potentially match the starting 'a'. - if (node.Type == Concatenate) + if (node.Kind == RegexNodeKind.Concatenate) { int concatCount = node.ChildCount(); RegexNode lastConcatChild = node.Child(concatCount - 1); @@ -1868,7 +1796,7 @@ static void ProcessNode(RegexNode node, RegexNode subsequent) /// Optimizations for positive lookaheads/behinds. private RegexNode ReduceRequire() { - Debug.Assert(Type == Require); + Debug.Assert(Kind == RegexNodeKind.PositiveLookaround); Debug.Assert(ChildCount() == 1); // A positive lookaround is a zero-width atomic assertion. @@ -1879,9 +1807,9 @@ private RegexNode ReduceRequire() // A positive lookaround wrapped around an empty is a nop, and can just // be made into an empty. A developer typically doesn't write this, but // rather it evolves due to optimizations resulting in empty. - if (Child(0).Type == Empty) + if (Child(0).Kind == RegexNodeKind.Empty) { - Type = Empty; + Kind = RegexNodeKind.Empty; Children = null; } @@ -1891,15 +1819,15 @@ private RegexNode ReduceRequire() /// Optimizations for negative lookaheads/behinds. private RegexNode ReducePrevent() { - Debug.Assert(Type == Prevent); + Debug.Assert(Kind == RegexNodeKind.NegativeLookaround); Debug.Assert(ChildCount() == 1); // A negative lookaround wrapped around an empty child, i.e. (?!), is // sometimes used as a way to insert a guaranteed no-match into the expression. // We can reduce it to simply Nothing. - if (Child(0).Type == Empty) + if (Child(0).Kind == RegexNodeKind.Empty) { - Type = Nothing; + Kind = RegexNodeKind.Nothing; Children = null; } @@ -1909,7 +1837,7 @@ private RegexNode ReducePrevent() /// Optimizations for backreference conditionals. private RegexNode ReduceTestref() { - Debug.Assert(Type == Testref); + Debug.Assert(Kind == RegexNodeKind.BackreferenceConditional); Debug.Assert(ChildCount() is 1 or 2); // This isn't so much an optimization as it is changing the tree for consistency. @@ -1918,7 +1846,7 @@ private RegexNode ReduceTestref() // we add one that will match empty. if (ChildCount() == 1) { - AddChild(new RegexNode(Empty, Options)); + AddChild(new RegexNode(RegexNodeKind.Empty, Options)); } return this; @@ -1927,7 +1855,7 @@ private RegexNode ReduceTestref() /// Optimizations for expression conditionals. private RegexNode ReduceTestgroup() { - Debug.Assert(Type == Testgroup); + Debug.Assert(Kind == RegexNodeKind.ExpressionConditional); Debug.Assert(ChildCount() is 2 or 3); // This isn't so much an optimization as it is changing the tree for consistency. @@ -1936,7 +1864,7 @@ private RegexNode ReduceTestgroup() // we add one that will match empty. if (ChildCount() == 2) { - AddChild(new RegexNode(Empty, Options)); + AddChild(new RegexNode(RegexNodeKind.Empty, Options)); } // It's common for the condition to be an explicit positive lookahead, as specifying @@ -1945,7 +1873,7 @@ private RegexNode ReduceTestgroup() // there's no ambiguity, and we can remove an extra level of positive lookahead, as the // engines need to treat the condition as a zero-width positive, atomic assertion regardless. RegexNode condition = Child(0); - if (condition.Type == Require && (condition.Options & RegexOptions.RightToLeft) == 0) + if (condition.Kind == RegexNodeKind.PositiveLookaround && (condition.Options & RegexOptions.RightToLeft) == 0) { ReplaceChild(0, condition.Child(0)); } @@ -1980,14 +1908,14 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool a int childCount; while ((childCount = subsequent.ChildCount()) > 0) { - Debug.Assert(subsequent.Type != Group); - switch (subsequent.Type) + Debug.Assert(subsequent.Kind != RegexNodeKind.Group); + switch (subsequent.Kind) { - case Concatenate: - case Capture: - case Atomic: - case Require when (subsequent.Options & RegexOptions.RightToLeft) == 0: // only lookaheads, not lookbehinds (represented as RTL Require nodes) - case Loop or Lazyloop when subsequent.M > 0: + case RegexNodeKind.Concatenate: + case RegexNodeKind.Capture: + case RegexNodeKind.Atomic: + case RegexNodeKind.PositiveLookaround when (subsequent.Options & RegexOptions.RightToLeft) == 0: // only lookaheads, not lookbehinds (represented as RTL PositiveLookaround nodes) + case RegexNodeKind.Loop or RegexNodeKind.Lazyloop when subsequent.M > 0: subsequent = subsequent.Child(0); continue; } @@ -2008,10 +1936,10 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool a // only a yes branch, we'd need to also check whatever comes after the conditional). It doesn't apply to // backreference conditionals, as the condition itself is unknown statically and could overlap with the // loop being considered for atomicity. - switch (subsequent.Type) + switch (subsequent.Kind) { - case Alternate: - case Testgroup when childCount == 3: // condition, yes, and no branch + case RegexNodeKind.Alternate: + case RegexNodeKind.ExpressionConditional when childCount == 3: // condition, yes, and no branch for (int i = 0; i < childCount; i++) { if (!CanBeMadeAtomic(node, subsequent.Child(i), allowSubsequentIteration)) @@ -2025,29 +1953,29 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool a // If this node is a {one/notone/set}loop, see if it overlaps with its successor in the concatenation. // If it doesn't, then we can upgrade it to being a {one/notone/set}loopatomic. // Doing so avoids unnecessary backtracking. - switch (node.Type) + switch (node.Kind) { - case Oneloop: - switch (subsequent.Type) + case RegexNodeKind.Oneloop: + switch (subsequent.Kind) { - case One when node.Ch != subsequent.Ch: - case Notone when node.Ch == subsequent.Ch: - case Set when !RegexCharClass.CharInClass(node.Ch, subsequent.Str!): - case Onelazy or Oneloop or Oneloopatomic when subsequent.M > 0 && node.Ch != subsequent.Ch: - case Notonelazy or Notoneloop or Notoneloopatomic when subsequent.M > 0 && node.Ch == subsequent.Ch: - case Setlazy or Setloop or Setloopatomic when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!): - case Multi when node.Ch != subsequent.Str![0]: - case End: - case EndZ or Eol when node.Ch != '\n': - case Boundary when RegexCharClass.IsBoundaryWordChar(node.Ch): - case NonBoundary when !RegexCharClass.IsBoundaryWordChar(node.Ch): - case ECMABoundary when RegexCharClass.IsECMAWordChar(node.Ch): - case NonECMABoundary when !RegexCharClass.IsECMAWordChar(node.Ch): + case RegexNodeKind.One when node.Ch != subsequent.Ch: + case RegexNodeKind.Notone when node.Ch == subsequent.Ch: + case RegexNodeKind.Set when !RegexCharClass.CharInClass(node.Ch, subsequent.Str!): + case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M > 0 && node.Ch != subsequent.Ch: + case RegexNodeKind.Notonelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic when subsequent.M > 0 && node.Ch == subsequent.Ch: + case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!): + case RegexNodeKind.Multi when node.Ch != subsequent.Str![0]: + case RegexNodeKind.End: + case RegexNodeKind.EndZ or RegexNodeKind.Eol when node.Ch != '\n': + case RegexNodeKind.Boundary when RegexCharClass.IsBoundaryWordChar(node.Ch): + case RegexNodeKind.NonBoundary when !RegexCharClass.IsBoundaryWordChar(node.Ch): + case RegexNodeKind.ECMABoundary when RegexCharClass.IsECMAWordChar(node.Ch): + case RegexNodeKind.NonECMABoundary when !RegexCharClass.IsECMAWordChar(node.Ch): return true; - case Onelazy or Oneloop or Oneloopatomic when subsequent.M == 0 && node.Ch != subsequent.Ch: - case Notonelazy or Notoneloop or Notoneloopatomic when subsequent.M == 0 && node.Ch == subsequent.Ch: - case Setlazy or Setloop or Setloopatomic when subsequent.M == 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!): + case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M == 0 && node.Ch != subsequent.Ch: + case RegexNodeKind.Notonelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic when subsequent.M == 0 && node.Ch == subsequent.Ch: + case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when subsequent.M == 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!): // The loop can be made atomic based on this subsequent node, but we'll need to evaluate the next one as well. break; @@ -2056,16 +1984,16 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool a } break; - case Notoneloop: - switch (subsequent.Type) + case RegexNodeKind.Notoneloop: + switch (subsequent.Kind) { - case One when node.Ch == subsequent.Ch: - case Onelazy or Oneloop or Oneloopatomic when subsequent.M > 0 && node.Ch == subsequent.Ch: - case Multi when node.Ch == subsequent.Str![0]: - case End: + case RegexNodeKind.One when node.Ch == subsequent.Ch: + case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M > 0 && node.Ch == subsequent.Ch: + case RegexNodeKind.Multi when node.Ch == subsequent.Str![0]: + case RegexNodeKind.End: return true; - case Onelazy or Oneloop or Oneloopatomic when subsequent.M == 0 && node.Ch == subsequent.Ch: + case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M == 0 && node.Ch == subsequent.Ch: // The loop can be made atomic based on this subsequent node, but we'll need to evaluate the next one as well. break; @@ -2074,24 +2002,24 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool a } break; - case Setloop: - switch (subsequent.Type) + case RegexNodeKind.Setloop: + switch (subsequent.Kind) { - case One when !RegexCharClass.CharInClass(subsequent.Ch, node.Str!): - case Set when !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!): - case Onelazy or Oneloop or Oneloopatomic when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!): - case Setlazy or Setloop or Setloopatomic when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!): - case Multi when !RegexCharClass.CharInClass(subsequent.Str![0], node.Str!): - case End: - case EndZ or Eol when !RegexCharClass.CharInClass('\n', node.Str!): - case Boundary when node.Str == RegexCharClass.WordClass || node.Str == RegexCharClass.DigitClass: - case NonBoundary when node.Str == RegexCharClass.NotWordClass || node.Str == RegexCharClass.NotDigitClass: - case ECMABoundary when node.Str == RegexCharClass.ECMAWordClass || node.Str == RegexCharClass.ECMADigitClass: - case NonECMABoundary when node.Str == RegexCharClass.NotECMAWordClass || node.Str == RegexCharClass.NotDigitClass: + case RegexNodeKind.One when !RegexCharClass.CharInClass(subsequent.Ch, node.Str!): + case RegexNodeKind.Set when !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!): + case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!): + case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!): + case RegexNodeKind.Multi when !RegexCharClass.CharInClass(subsequent.Str![0], node.Str!): + case RegexNodeKind.End: + case RegexNodeKind.EndZ or RegexNodeKind.Eol when !RegexCharClass.CharInClass('\n', node.Str!): + case RegexNodeKind.Boundary when node.Str is RegexCharClass.WordClass or RegexCharClass.DigitClass: + case RegexNodeKind.NonBoundary when node.Str is RegexCharClass.NotWordClass or RegexCharClass.NotDigitClass: + case RegexNodeKind.ECMABoundary when node.Str is RegexCharClass.ECMAWordClass or RegexCharClass.ECMADigitClass: + case RegexNodeKind.NonECMABoundary when node.Str is RegexCharClass.NotECMAWordClass or RegexCharClass.NotDigitClass: return true; - case Onelazy or Oneloop or Oneloopatomic when subsequent.M == 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!): - case Setlazy or Setloop or Setloopatomic when subsequent.M == 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!): + case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M == 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!): + case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when subsequent.M == 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!): // The loop can be made atomic based on this subsequent node, but we'll need to evaluate the next one as well. break; @@ -2106,7 +2034,7 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool a // We only get here if the node could be made atomic based on subsequent but subsequent has a lower bound of zero // and thus we need to move subsequent to be the next node in sequence and loop around to try again. - Debug.Assert(subsequent.Type is Oneloop or Oneloopatomic or Onelazy or Notoneloop or Notoneloopatomic or Notonelazy or Setloop or Setloopatomic or Setlazy); + Debug.Assert(subsequent.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Notonelazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy); Debug.Assert(subsequent.M == 0); if (!allowSubsequentIteration) { @@ -2118,16 +2046,16 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool a // which point subsequent becomes whatever node is next in that concatenation. while (true) { - RegexNode? parent = subsequent.Next; - switch (parent?.Type) + RegexNode? parent = subsequent.Parent; + switch (parent?.Kind) { - case Atomic: - case Alternate: - case Capture: + case RegexNodeKind.Atomic: + case RegexNodeKind.Alternate: + case RegexNodeKind.Capture: subsequent = parent; continue; - case Concatenate: + case RegexNodeKind.Concatenate: var peers = (List)parent.Children!; int currentIndex = peers.IndexOf(subsequent); Debug.Assert(currentIndex >= 0, "Node should have been in its parent's child list"); @@ -2170,36 +2098,36 @@ public int ComputeMinLength() return 0; } - switch (Type) + switch (Kind) { - case One: - case Notone: - case Set: + case RegexNodeKind.One: + case RegexNodeKind.Notone: + case RegexNodeKind.Set: // Single character. return 1; - case Multi: + case RegexNodeKind.Multi: // Every character in the string needs to match. return Str!.Length; - case Notonelazy: - case Notoneloop: - case Notoneloopatomic: - case Onelazy: - case Oneloop: - case Oneloopatomic: - case Setlazy: - case Setloop: - case Setloopatomic: + case RegexNodeKind.Notonelazy: + case RegexNodeKind.Notoneloop: + case RegexNodeKind.Notoneloopatomic: + case RegexNodeKind.Onelazy: + case RegexNodeKind.Oneloop: + case RegexNodeKind.Oneloopatomic: + case RegexNodeKind.Setlazy: + case RegexNodeKind.Setloop: + case RegexNodeKind.Setloopatomic: // One character repeated at least M times. return M; - case Lazyloop: - case Loop: + case RegexNodeKind.Lazyloop: + case RegexNodeKind.Loop: // A node graph repeated at least M times. return (int)Math.Min(int.MaxValue, (long)M * Child(0).ComputeMinLength()); - case Alternate: + case RegexNodeKind.Alternate: // The minimum required length for any of the alternation's branches. { int childCount = ChildCount(); @@ -2212,15 +2140,15 @@ public int ComputeMinLength() return min; } - case Testref: + case RegexNodeKind.BackreferenceConditional: // Minimum of its yes and no branches. The backreference doesn't add to the length. return Math.Min(Child(0).ComputeMinLength(), Child(1).ComputeMinLength()); - case Testgroup: + case RegexNodeKind.ExpressionConditional: // Minimum of its yes and no branches. The condition is a zero-width assertion. return Math.Min(Child(1).ComputeMinLength(), Child(2).ComputeMinLength()); - case Concatenate: + case RegexNodeKind.Concatenate: // The sum of all of the concatenation's children. { long sum = 0; @@ -2232,36 +2160,36 @@ public int ComputeMinLength() return (int)Math.Min(int.MaxValue, sum); } - case Atomic: - case Capture: - case Group: + case RegexNodeKind.Atomic: + case RegexNodeKind.Capture: + case RegexNodeKind.Group: // For groups, we just delegate to the sole child. Debug.Assert(ChildCount() == 1); return Child(0).ComputeMinLength(); - case Empty: - case Nothing: - case UpdateBumpalong: + case RegexNodeKind.Empty: + case RegexNodeKind.Nothing: + case RegexNodeKind.UpdateBumpalong: // Nothing to match. In the future, we could potentially use Nothing to say that the min length // is infinite, but that would require a different structure, as that would only apply if the // Nothing match is required in all cases (rather than, say, as one branch of an alternation). - case Beginning: - case Bol: - case Boundary: - case ECMABoundary: - case End: - case EndZ: - case Eol: - case NonBoundary: - case NonECMABoundary: - case Start: + case RegexNodeKind.Beginning: + case RegexNodeKind.Bol: + case RegexNodeKind.Boundary: + case RegexNodeKind.ECMABoundary: + case RegexNodeKind.End: + case RegexNodeKind.EndZ: + case RegexNodeKind.Eol: + case RegexNodeKind.NonBoundary: + case RegexNodeKind.NonECMABoundary: + case RegexNodeKind.Start: // Difficult to glean anything meaningful from boundaries or results only known at run time. - case Prevent: - case Require: + case RegexNodeKind.NegativeLookaround: + case RegexNodeKind.PositiveLookaround: // Lookaheads/behinds could potentially be included in the future, but that will require // a different structure, as they can't be added as part of a concatenation, since they overlap // with what comes after. - case Ref: + case RegexNodeKind.Backreference: // Constructs requiring data at runtime from the matching pattern can't influence min length. return 0; @@ -2269,7 +2197,7 @@ public int ComputeMinLength() #if DEBUG Debug.Fail($"Unknown node: {TypeName}"); #endif - goto case Empty; + goto case RegexNodeKind.Empty; } } @@ -2292,13 +2220,14 @@ public int ComputeMinLength() /// public bool TryGetJoinableLengthCheckChildRange(int childIndex, out int requiredLength, out int exclusiveEnd) { - static bool CanJoinLengthCheck(RegexNode node) => node.Type switch - { - One or Notone or Set => true, - Multi => true, - Oneloop or Onelazy or Oneloopatomic or - Notoneloop or Notonelazy or Notoneloopatomic or - Setloop or Setlazy or Setloopatomic when node.M == node.N => true, + static bool CanJoinLengthCheck(RegexNode node) => node.Kind switch + { + RegexNodeKind.One or RegexNodeKind.Notone or RegexNodeKind.Set => true, + RegexNodeKind.Multi => true, + RegexNodeKind.Oneloop or RegexNodeKind.Onelazy or RegexNodeKind.Oneloopatomic or + RegexNodeKind.Notoneloop or RegexNodeKind.Notonelazy or RegexNodeKind.Notoneloopatomic or + RegexNodeKind.Setloop or RegexNodeKind.Setlazy or RegexNodeKind.Setloopatomic + when node.M == node.N => true, _ => false, }; @@ -2333,21 +2262,21 @@ Notoneloop or Notonelazy or Notoneloopatomic or public RegexNode MakeQuantifier(bool lazy, int min, int max) { if (min == 0 && max == 0) - return new RegexNode(Empty, Options); + return new RegexNode(RegexNodeKind.Empty, Options); if (min == 1 && max == 1) return this; - switch (Type) + switch (Kind) { - case One: - case Notone: - case Set: - MakeRep(lazy ? Onelazy : Oneloop, min, max); + case RegexNodeKind.One: + case RegexNodeKind.Notone: + case RegexNodeKind.Set: + MakeRep(lazy ? RegexNodeKind.Onelazy : RegexNodeKind.Oneloop, min, max); return this; default: - var result = new RegexNode(lazy ? Lazyloop : Loop, Options, min, max); + var result = new RegexNode(lazy ? RegexNodeKind.Lazyloop : RegexNodeKind.Loop, Options, min, max); result.AddChild(this); return result; } @@ -2355,9 +2284,9 @@ public RegexNode MakeQuantifier(bool lazy, int min, int max) public void AddChild(RegexNode newChild) { - newChild.Next = this; // so that the child can see its parent while being reduced + newChild.Parent = this; // so that the child can see its parent while being reduced newChild = newChild.Reduce(); - newChild.Next = this; // in case Reduce returns a different node that needs to be reparented + newChild.Parent = this; // in case Reduce returns a different node that needs to be reparented if (Children is null) { @@ -2377,9 +2306,9 @@ public void InsertChild(int index, RegexNode newChild) { Debug.Assert(Children is List); - newChild.Next = this; // so that the child can see its parent while being reduced + newChild.Parent = this; // so that the child can see its parent while being reduced newChild = newChild.Reduce(); - newChild.Next = this; // in case Reduce returns a different node that needs to be reparented + newChild.Parent = this; // in case Reduce returns a different node that needs to be reparented ((List)Children).Insert(index, newChild); } @@ -2389,9 +2318,9 @@ public void ReplaceChild(int index, RegexNode newChild) Debug.Assert(Children != null); Debug.Assert(index < ChildCount()); - newChild.Next = this; // so that the child can see its parent while being reduced + newChild.Parent = this; // so that the child can see its parent while being reduced newChild = newChild.Reduce(); - newChild.Next = this; // in case Reduce returns a different node that needs to be reparented + newChild.Parent = this; // in case Reduce returns a different node that needs to be reparented if (Children is RegexNode) { @@ -2403,15 +2332,7 @@ public void ReplaceChild(int index, RegexNode newChild) } } - public RegexNode Child(int i) - { - if (Children is RegexNode child) - { - return child; - } - - return ((List)Children!)[i]; - } + public RegexNode Child(int i) => Children is RegexNode child ? child : ((List)Children!)[i]; public int ChildCount() { @@ -2458,10 +2379,10 @@ internal bool SupportsCompilation() // TODO: This should be moved somewhere else, to a pass somewhere where we explicitly // annotate the tree, potentially as part of the final optimization pass. It doesn't // belong in this check. - if (Type == Capture) + if (Kind == RegexNodeKind.Capture) { // If we've found a supported capture, mark all of the nodes in its parent hierarchy as containing a capture. - for (RegexNode? parent = this; parent != null && (parent.Options & HasCapturesFlag) == 0; parent = parent.Next) + for (RegexNode? parent = this; parent != null && (parent.Options & HasCapturesFlag) == 0; parent = parent.Parent) { parent.Options |= HasCapturesFlag; } @@ -2472,20 +2393,20 @@ internal bool SupportsCompilation() } /// Gets whether the node is a Set/Setloop/Setloopatomic/Setlazy node. - public bool IsSetFamily => Type is Set or Setloop or Setloopatomic or Setlazy; + public bool IsSetFamily => Kind is RegexNodeKind.Set or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy; /// Gets whether the node is a One/Oneloop/Oneloopatomic/Onelazy node. - public bool IsOneFamily => Type is One or Oneloop or Oneloopatomic or Onelazy; + public bool IsOneFamily => Kind is RegexNodeKind.One or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy; /// Gets whether the node is a Notone/Notoneloop/Notoneloopatomic/Notonelazy node. - public bool IsNotoneFamily => Type is Notone or Notoneloop or Notoneloopatomic or Notonelazy; + public bool IsNotoneFamily => Kind is RegexNodeKind.Notone or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Notonelazy; /// Gets whether this node is contained inside of a loop. public bool IsInLoop() { - for (RegexNode? parent = Next; parent is not null; parent = parent.Next) + for (RegexNode? parent = Parent; parent is not null; parent = parent.Parent) { - if (parent.Type is Loop or Lazyloop) + if (parent.Kind is RegexNodeKind.Loop or RegexNodeKind.Lazyloop) { return true; } @@ -2495,49 +2416,7 @@ public bool IsInLoop() } #if DEBUG - private string TypeName => - Type switch - { - Oneloop => nameof(Oneloop), - Notoneloop => nameof(Notoneloop), - Setloop => nameof(Setloop), - Onelazy => nameof(Onelazy), - Notonelazy => nameof(Notonelazy), - Setlazy => nameof(Setlazy), - One => nameof(One), - Notone => nameof(Notone), - Set => nameof(Set), - Multi => nameof(Multi), - Ref => nameof(Ref), - Bol => nameof(Bol), - Eol => nameof(Eol), - Boundary => nameof(Boundary), - NonBoundary => nameof(NonBoundary), - ECMABoundary => nameof(ECMABoundary), - NonECMABoundary => nameof(NonECMABoundary), - Beginning => nameof(Beginning), - Start => nameof(Start), - EndZ => nameof(EndZ), - End => nameof(End), - Oneloopatomic => nameof(Oneloopatomic), - Notoneloopatomic => nameof(Notoneloopatomic), - Setloopatomic => nameof(Setloopatomic), - Nothing => nameof(Nothing), - Empty => nameof(Empty), - Alternate => nameof(Alternate), - Concatenate => nameof(Concatenate), - Loop => nameof(Loop), - Lazyloop => nameof(Lazyloop), - Capture => nameof(Capture), - Group => nameof(Group), - Require => nameof(Require), - Prevent => nameof(Prevent), - Atomic => nameof(Atomic), - Testref => nameof(Testref), - Testgroup => nameof(Testgroup), - UpdateBumpalong => nameof(UpdateBumpalong), - _ => $"(unknown {Type})" - }; + private string TypeName => Kind.ToString(); [ExcludeFromCodeCoverage] public string Description() @@ -2552,53 +2431,53 @@ public string Description() if ((Options & RegexOptions.IgnorePatternWhitespace) != 0) sb.Append("-X"); if ((Options & RegexOptions.ECMAScript) != 0) sb.Append("-E"); - switch (Type) + switch (Kind) { - case Oneloop: - case Oneloopatomic: - case Notoneloop: - case Notoneloopatomic: - case Onelazy: - case Notonelazy: - case One: - case Notone: + case RegexNodeKind.Oneloop: + case RegexNodeKind.Oneloopatomic: + case RegexNodeKind.Notoneloop: + case RegexNodeKind.Notoneloopatomic: + case RegexNodeKind.Onelazy: + case RegexNodeKind.Notonelazy: + case RegexNodeKind.One: + case RegexNodeKind.Notone: sb.Append(" '").Append(RegexCharClass.CharDescription(Ch)).Append('\''); break; - case Capture: + case RegexNodeKind.Capture: sb.Append(' ').Append($"index = {M}"); if (N != -1) { sb.Append($", unindex = {N}"); } break; - case Ref: - case Testref: + case RegexNodeKind.Backreference: + case RegexNodeKind.BackreferenceConditional: sb.Append(' ').Append($"index = {M}"); break; - case Multi: + case RegexNodeKind.Multi: sb.Append(" \"").Append(Str).Append('"'); break; - case Set: - case Setloop: - case Setloopatomic: - case Setlazy: + case RegexNodeKind.Set: + case RegexNodeKind.Setloop: + case RegexNodeKind.Setloopatomic: + case RegexNodeKind.Setlazy: sb.Append(' ').Append(RegexCharClass.SetDescription(Str!)); break; } - switch (Type) - { - case Oneloop: - case Oneloopatomic: - case Notoneloop: - case Notoneloopatomic: - case Onelazy: - case Notonelazy: - case Setloop: - case Setloopatomic: - case Setlazy: - case Loop: - case Lazyloop: + switch (Kind) + { + case RegexNodeKind.Oneloop: + case RegexNodeKind.Oneloopatomic: + case RegexNodeKind.Notoneloop: + case RegexNodeKind.Notoneloopatomic: + case RegexNodeKind.Onelazy: + case RegexNodeKind.Notonelazy: + case RegexNodeKind.Setloop: + case RegexNodeKind.Setloopatomic: + case RegexNodeKind.Setlazy: + case RegexNodeKind.Loop: + case RegexNodeKind.Lazyloop: sb.Append( (M == 0 && N == int.MaxValue) ? "*" : (M == 0 && N == 1) ? "?" : @@ -2641,7 +2520,7 @@ public override string ToString() curChild = stack[stack.Count - 1]; stack.RemoveAt(stack.Count - 1); - curNode = curNode.Next; + curNode = curNode.Parent; } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNodeKind.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNodeKind.cs new file mode 100644 index 00000000000000..ab4ea881087c5c --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNodeKind.cs @@ -0,0 +1,182 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Globalization; +using System.Threading; + +namespace System.Text.RegularExpressions +{ + /// Specifies the kind of a . + internal enum RegexNodeKind + { + // The following are leaves (no children) and correspond to primitive operations in the regular expression. + + /// A specific character, e.g. `a`. + /// The character is specified in . + One = RegexCode.One, + /// Anything other than a specific character, e.g. `.` when not in mode, or `[^a]`. + /// The character is specified in . + Notone = RegexCode.Notone, + /// A character class / set, e.g. `[a-z1-9]` or `\w`. + /// The set string is specified in . + Set = RegexCode.Set, + + /// A sequence of at least two specific characters, e.g. `abc`. + /// The characters are specified in . This is purely a representational optimization, equivalent to multiple nodes concatenated together. + Multi = RegexCode.Multi, + + /// A loop around a specific character, e.g. `a*`. + /// + /// The character is specified in , the minimum number of iterations in , and the maximum number of iterations in . + /// This is purely a representational optimization, equivalent to a wrapped around a . + /// + Oneloop = RegexCode.Oneloop, + /// A loop around anything other than a specific character, e.g. `.*` when not in mode, or `[^a]*`. + /// The character is specified in , the minimum number of iterations in , and the maximum number of iterations in . + /// This is purely a representational optimization, equivalent to a wrapped around a . + Notoneloop = RegexCode.Notoneloop, + /// A loop around a character class / set, e.g. `[a-z1-9]*` or `\w*`. + /// The set string is specified in , the minimum number of iterations in , and the maximum number of iterations in . + /// This is purely a representational optimization, equivalent to a wrapped around a . + Setloop = RegexCode.Setloop, + + /// A lazy loop around a specific character, e.g. `a*?`. + /// The character is specified in , the minimum number of iterations in , and the maximum number of iterations in . + /// This is purely a representational optimization, equivalent to a wrapped around a . + Onelazy = RegexCode.Onelazy, + /// A lazy loop around anything other than a specific character, e.g. `.*?` when not in mode, or `[^a]*?`. + /// The character is specified in , the minimum number of iterations in , and the maximum number of iterations in . + /// This is purely a representational optimization, equivalent to a wrapped around a . + Notonelazy = RegexCode.Notonelazy, + /// A lazy loop around a character class / set, e.g. `[a-z1-9]*?` or `\w?`. + /// The set string is specified in , the minimum number of iterations in , and the maximum number of iterations in . + /// This is purely a representational optimization, equivalent to a wrapped around a . + Setlazy = RegexCode.Setlazy, + + /// An atomic loop around a specific character, e.g. `(?> a*)`. + /// + /// The character is specified in , the minimum number of iterations in , and the maximum number of iterations in . + /// This is purely a representational optimization, equivalent to a wrapped around a . + /// + Oneloopatomic = RegexCode.Oneloopatomic, + /// An atomic loop around anything other than a specific character, e.g. `(?>.*)` when not in mode. + /// + /// The character is specified in , the minimum number of iterations in , and the maximum number of iterations in . + /// This is purely a representational optimization, equivalent to a wrapped around a . + /// + Notoneloopatomic = RegexCode.Notoneloopatomic, + /// An atomic loop around a character class / set, e.g. `(?>\d*)`. + /// + /// The set string is specified in , the minimum number of iterations in , and the maximum number of iterations in . + /// This is purely a representational optimization, equivalent to a wrapped around a . + /// + Setloopatomic = RegexCode.Setloopatomic, + + /// A backreference, e.g. `\1`. + /// The capture group number referenced is stored in . + Backreference = RegexCode.Ref, + + /// A beginning-of-line anchor, e.g. `^` in mode. + Bol = RegexCode.Bol, + /// An end-of-line anchor, e.g. `$` in mode. + Eol = RegexCode.Eol, + /// A word boundary anchor, e.g. `\b`. + Boundary = RegexCode.Boundary, + /// Not a word boundary anchor, e.g. `\B`. + NonBoundary = RegexCode.NonBoundary, + /// A word boundary anchor, e.g. `\b` in mode. + ECMABoundary = RegexCode.ECMABoundary, + /// Not a word boundary anchor, e.g. `\B` in mode.. + NonECMABoundary = RegexCode.NonECMABoundary, + /// A beginning-of-string anchor, e.g. `\A`, or `^` when not in mode. + Beginning = RegexCode.Beginning, + /// A start anchor, e.g. `\G`. + Start = RegexCode.Start, + /// A end-of-string-or-before-ending-newline anchor, e.g. `\Z`, or `$` when not in mode. + EndZ = RegexCode.EndZ, + /// A end-of-string-only anchor, e.g. `\z`. + End = RegexCode.End, + + /// A fabricated node injected during analyses to signal a location in the matching where the engine may set the next bumpalong position to the current position. + UpdateBumpalong = RegexCode.UpdateBumpalong, + + /// Fails when matching an empty string, e.g. `(?!)`. + Nothing = 22, + /// Matches the empty string, e.g. ``. + Empty = 23, + + // The following are interior nodes (have at least one child) and correspond to control structures composing other operations. + + /// An alternation between branches, e.g. `ab|cd`. + /// + /// Each child represents one branch, in lexical order. A valid alternation contains at + /// least two children: if an alternation contains only a single child, it can be replaced + /// by that child, and if an alternation has no children, it can be replaced by . + /// + Alternate = 24, + /// A sequence / concatenation of nodes, e.g. a[bc]. + /// + /// Each child represents one node in the sequence, in lexical order. A valid concatenation contains at + /// least two children: if a concatenation contains only a single child, it can be replaced + /// by that child, and if a concatenation has no children, it can be replaced by . + /// + Concatenate = 25, + + /// A loop around an arbitrary , e.g. `(ab|cd)*`. + /// + /// One and only one child, the expression in the loop. The minimum number of iterations is in , + /// and the maximum number of iterations is in . + /// + Loop = 26, // m,x * + ? {,} + /// A lazy loop around an arbitrary , e.g. `(ab|cd)*?`. + /// + /// One and only one child, the expression in the loop. The minimum number of iterations is in , + /// and the maximum number of iterations is in . + /// + Lazyloop = 27, + + /// A capture group, e.g. `(\w*)`. + /// + /// One and only one child, the expression in the capture. is the number of the capture, and if a balancing + /// group, is the uncapture. + /// + Capture = 28, + /// A non-capturing group, e.g. `(?:ab|cd)`. + /// + /// One and only one child, the expression in the group. Groups are irrelevant after parsing and can be replaced entirely by their child. + /// These should not be in a valid tree returned from the parsing / reduction phases of processing. + /// + Group = 29, + /// An atomic group, e.g. `(?>ab|cd)`. + /// One and only one child, the expression in the group. + Atomic = 32, + + /// + /// A positive lookaround assertion: lookahead if is not set and lookbehind if + /// is set, e.g. `(?=abc)` or `(?<=abc)`. + /// One and only one child, the expression in the assertion. + PositiveLookaround = 30, + /// + /// A negative lookaround assertion: lookahead if is not set and lookbehind if + /// is set, e.g. `(?!abc)` or `(?<!abc)`. + /// One and only one child, the expression in the assertion. + NegativeLookaround = 31, + + /// A backreference conditional, e.g. `(?(1)abc|def)`. + /// + /// Two children, the first to use if the reference capture group matched and the second to use if it didn't. + /// The referenced capture group number is stored in . + /// + BackreferenceConditional = 33, + /// An expression conditional, e.g. `(?(\d{3})123456|abc)`. + /// + /// Three children. The first is the expression to evaluate as a positive lookahead assertion, the second is + /// the expression to match if the positive lookahead assertion was successful, and the third is the expression + /// to match if the positive lookahead assertion was unsuccessful. + /// + ExpressionConditional = 34, + } +} diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs index 02b4f1408e6790..9c7f8bdb3cc94b 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs @@ -255,7 +255,7 @@ private RegexNode ScanRegex() char ch; bool isQuantifier = false; - StartGroup(new RegexNode(RegexNode.Capture, _options, 0, -1)); + StartGroup(new RegexNode(RegexNodeKind.Capture, _options, 0, -1)); while (CharsRight() > 0) { @@ -326,8 +326,8 @@ private RegexNode ScanRegex() { string setString = ScanCharClass(UseOptionI(), scanOnly: false)!.ToStringClass(_options); _unit = UseOptionI() && RegexCharClass.MakeCaseSensitiveIfPossible(setString, _culture) is string newSetString ? - new RegexNode(RegexNode.Set, _options & ~RegexOptions.IgnoreCase, newSetString) : - new RegexNode(RegexNode.Set, _options, setString); + new RegexNode(RegexNodeKind.Set, _options & ~RegexOptions.IgnoreCase, newSetString) : + new RegexNode(RegexNodeKind.Set, _options, setString); } break; @@ -379,17 +379,17 @@ private RegexNode ScanRegex() break; case '^': - AddUnitType(UseOptionM() ? RegexNode.Bol : RegexNode.Beginning); + AddUnitType(UseOptionM() ? RegexNodeKind.Bol : RegexNodeKind.Beginning); break; case '$': - AddUnitType(UseOptionM() ? RegexNode.Eol : RegexNode.EndZ); + AddUnitType(UseOptionM() ? RegexNodeKind.Eol : RegexNodeKind.EndZ); break; case '.': _unit = UseOptionS() ? - new RegexNode(RegexNode.Set, _options & ~RegexOptions.IgnoreCase, RegexCharClass.AnyClass) : - new RegexNode(RegexNode.Notone, _options & ~RegexOptions.IgnoreCase, '\n'); + new RegexNode(RegexNodeKind.Set, _options & ~RegexOptions.IgnoreCase, RegexCharClass.AnyClass) : + new RegexNode(RegexNodeKind.Notone, _options & ~RegexOptions.IgnoreCase, '\n'); break; case '{': @@ -451,10 +451,7 @@ private RegexNode ScanRegex() if (CharsRight() > 0 && RightChar() == ',') { MoveRight(); - if (CharsRight() == 0 || RightChar() == '}') - max = int.MaxValue; - else - max = ScanDecimal(); + max = CharsRight() == 0 || RightChar() == '}' ? int.MaxValue : ScanDecimal(); } } @@ -511,7 +508,7 @@ private RegexNode ScanRegex() */ private RegexNode ScanReplacement() { - _concatenation = new RegexNode(RegexNode.Concatenate, _options); + _concatenation = new RegexNode(RegexNodeKind.Concatenate, _options); while (true) { @@ -541,7 +538,7 @@ private RegexNode ScanReplacement() // groups are unsupported. However, the replacement patterns that refer to the left/right portion // or all of the input as well as referring to group 0 (i.e. the whole match) are supported. if ((_options & RegexOptions.NonBacktracking) != 0 && - node.Type == RegexNode.Ref && + node.Kind == RegexNodeKind.Backreference && node.M is not (0 or RegexReplacement.LeftPortion or RegexReplacement.RightPortion or RegexReplacement.WholeString)) { throw new NotSupportedException(SR.NotSupported_NonBacktrackingAndReplacementsWithSubstitutionsOfGroups); @@ -788,11 +785,11 @@ node.M is not (0 or RegexReplacement.LeftPortion or RegexReplacement.RightPortio if (UseOptionN() || _ignoreNextParen) { _ignoreNextParen = false; - return new RegexNode(RegexNode.Group, _options); + return new RegexNode(RegexNodeKind.Group, _options); } else { - return new RegexNode(RegexNode.Capture, _options, _autocap++, -1); + return new RegexNode(RegexNodeKind.Capture, _options, _autocap++, -1); } } @@ -805,31 +802,31 @@ node.M is not (0 or RegexReplacement.LeftPortion or RegexReplacement.RightPortio break; } - int nodeType; + RegexNodeKind nodeType; char close = '>'; char ch = RightCharMoveRight(); switch (ch) { case ':': // noncapturing group - nodeType = RegexNode.Group; + nodeType = RegexNodeKind.Group; break; case '=': // lookahead assertion _options &= ~RegexOptions.RightToLeft; - nodeType = RegexNode.Require; + nodeType = RegexNodeKind.PositiveLookaround; break; case '!': // negative lookahead assertion _options &= ~RegexOptions.RightToLeft; - nodeType = RegexNode.Prevent; + nodeType = RegexNodeKind.NegativeLookaround; break; case '>': // atomic subexpression - nodeType = RegexNode.Atomic; + nodeType = RegexNodeKind.Atomic; break; case '\'': @@ -852,7 +849,7 @@ node.M is not (0 or RegexReplacement.LeftPortion or RegexReplacement.RightPortio // lookbehind assertion _options |= RegexOptions.RightToLeft; - nodeType = RegexNode.Require; + nodeType = RegexNodeKind.PositiveLookaround; break; case '!': @@ -863,7 +860,7 @@ node.M is not (0 or RegexReplacement.LeftPortion or RegexReplacement.RightPortio // negative lookbehind assertion _options |= RegexOptions.RightToLeft; - nodeType = RegexNode.Prevent; + nodeType = RegexNodeKind.NegativeLookaround; break; default: @@ -921,7 +918,7 @@ node.M is not (0 or RegexReplacement.LeftPortion or RegexReplacement.RightPortio // grab part after - if any - if ((capnum != -1 || proceed == true) && CharsRight() > 1 && RightChar() == '-') + if ((capnum != -1 || proceed) && CharsRight() > 1 && RightChar() == '-') { MoveRight(); ch = RightChar(); @@ -971,7 +968,7 @@ node.M is not (0 or RegexReplacement.LeftPortion or RegexReplacement.RightPortio if ((capnum != -1 || uncapnum != -1) && CharsRight() > 0 && RightCharMoveRight() == close) { - return new RegexNode(RegexNode.Capture, _options, capnum, uncapnum); + return new RegexNode(RegexNodeKind.Capture, _options, capnum, uncapnum); } goto BreakRecognize; } @@ -993,7 +990,7 @@ node.M is not (0 or RegexReplacement.LeftPortion or RegexReplacement.RightPortio { if (IsCaptureSlot(capnum)) { - return new RegexNode(RegexNode.Testref, _options, capnum); + return new RegexNode(RegexNodeKind.BackreferenceConditional, _options, capnum); } throw MakeException(RegexParseError.AlternationHasUndefinedReference, SR.Format(SR.AlternationHasUndefinedReference, capnum.ToString())); @@ -1007,12 +1004,12 @@ node.M is not (0 or RegexReplacement.LeftPortion or RegexReplacement.RightPortio if (IsCaptureName(capname) && CharsRight() > 0 && RightCharMoveRight() == ')') { - return new RegexNode(RegexNode.Testref, _options, CaptureSlotFromName(capname)); + return new RegexNode(RegexNodeKind.BackreferenceConditional, _options, CaptureSlotFromName(capname)); } } } // not a backref - nodeType = RegexNode.Testgroup; + nodeType = RegexNodeKind.ExpressionConditional; Textto(parenPos - 1); // jump to the start of the parentheses _ignoreNextParen = true; // but make sure we don't try to capture the insides @@ -1044,9 +1041,9 @@ node.M is not (0 or RegexReplacement.LeftPortion or RegexReplacement.RightPortio default: MoveLeft(); - nodeType = RegexNode.Group; + nodeType = RegexNodeKind.Group; // Disallow options in the children of a testgroup node - if (_group!.Type != RegexNode.Testgroup) + if (_group!.Kind != RegexNodeKind.ExpressionConditional) { ScanOptions(); } @@ -1173,32 +1170,32 @@ private void ScanBlank() case 'w': MoveRight(); return scanOnly ? null : - new RegexNode(RegexNode.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.ECMAWordClass : RegexCharClass.WordClass); + new RegexNode(RegexNodeKind.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.ECMAWordClass : RegexCharClass.WordClass); case 'W': MoveRight(); return scanOnly ? null : - new RegexNode(RegexNode.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.NotECMAWordClass : RegexCharClass.NotWordClass); + new RegexNode(RegexNodeKind.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.NotECMAWordClass : RegexCharClass.NotWordClass); case 's': MoveRight(); return scanOnly ? null : - new RegexNode(RegexNode.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.ECMASpaceClass : RegexCharClass.SpaceClass); + new RegexNode(RegexNodeKind.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.ECMASpaceClass : RegexCharClass.SpaceClass); case 'S': MoveRight(); return scanOnly ? null : - new RegexNode(RegexNode.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.NotECMASpaceClass : RegexCharClass.NotSpaceClass); + new RegexNode(RegexNodeKind.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.NotECMASpaceClass : RegexCharClass.NotSpaceClass); case 'd': MoveRight(); return scanOnly ? null : - new RegexNode(RegexNode.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.ECMADigitClass : RegexCharClass.DigitClass); + new RegexNode(RegexNodeKind.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.ECMADigitClass : RegexCharClass.DigitClass); case 'D': MoveRight(); return scanOnly ? null : - new RegexNode(RegexNode.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.NotECMADigitClass : RegexCharClass.NotDigitClass); + new RegexNode(RegexNodeKind.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.NotECMADigitClass : RegexCharClass.NotDigitClass); case 'p': case 'P': @@ -1215,7 +1212,7 @@ private void ScanBlank() cc.AddLowercase(_culture); } - return new RegexNode(RegexNode.Set, _options, cc.ToStringClass(_options)); + return new RegexNode(RegexNodeKind.Set, _options, cc.ToStringClass(_options)); default: return ScanBasicBackslash(scanOnly); @@ -1259,7 +1256,7 @@ static RegexOptions RemoveIgnoreCaseIfNotEcma(RegexOptions options) { MoveRight(); ch = RightCharMoveRight(); - if (ch == '<' || ch == '\'') + if (ch is '<' or '\'') { angled = true; close = (ch == '\'') ? '\'' : '>'; @@ -1294,7 +1291,7 @@ static RegexOptions RemoveIgnoreCaseIfNotEcma(RegexOptions options) { return scanOnly ? null : - IsCaptureSlot(capnum) ? new RegexNode(RegexNode.Ref, _options, capnum) : + IsCaptureSlot(capnum) ? new RegexNode(RegexNodeKind.Backreference, _options, capnum) : throw MakeException(RegexParseError.UndefinedNumberedReference, SR.Format(SR.UndefinedNumberedReference, capnum.ToString())); } } @@ -1326,7 +1323,7 @@ static RegexOptions RemoveIgnoreCaseIfNotEcma(RegexOptions options) if (capnum >= 0) { - return scanOnly ? null : new RegexNode(RegexNode.Ref, _options, capnum); + return scanOnly ? null : new RegexNode(RegexNodeKind.Backreference, _options, capnum); } } else @@ -1340,7 +1337,7 @@ static RegexOptions RemoveIgnoreCaseIfNotEcma(RegexOptions options) if (IsCaptureSlot(capnum)) { - return new RegexNode(RegexNode.Ref, _options, capnum); + return new RegexNode(RegexNodeKind.Backreference, _options, capnum); } if (capnum <= 9) @@ -1360,7 +1357,7 @@ static RegexOptions RemoveIgnoreCaseIfNotEcma(RegexOptions options) { return scanOnly ? null : - IsCaptureName(capname) ? new RegexNode(RegexNode.Ref, _options, CaptureSlotFromName(capname)) : + IsCaptureName(capname) ? new RegexNode(RegexNodeKind.Backreference, _options, CaptureSlotFromName(capname)) : throw MakeException(RegexParseError.UndefinedNamedReference, SR.Format(SR.UndefinedNamedReference, capname)); } } @@ -1440,7 +1437,7 @@ private RegexNode ScanDollar() Textto(lastEndPos); if (capnum >= 0) { - return new RegexNode(RegexNode.Ref, _options, capnum); + return new RegexNode(RegexNodeKind.Backreference, _options, capnum); } } else @@ -1451,7 +1448,7 @@ private RegexNode ScanDollar() CheckUnsupportedNonBacktrackingNumericRef(capnum); if (IsCaptureSlot(capnum)) { - return new RegexNode(RegexNode.Ref, _options, capnum); + return new RegexNode(RegexNodeKind.Backreference, _options, capnum); } } } @@ -1470,7 +1467,7 @@ private RegexNode ScanDollar() if (IsCaptureName(capname)) { - return new RegexNode(RegexNode.Ref, _options, CaptureSlotFromName(capname)); + return new RegexNode(RegexNodeKind.Backreference, _options, CaptureSlotFromName(capname)); } } } @@ -1508,7 +1505,7 @@ private RegexNode ScanDollar() if (capnum != 1) { MoveRight(); - return new RegexNode(RegexNode.Ref, _options, capnum); + return new RegexNode(RegexNodeKind.Backreference, _options, capnum); } } @@ -1787,16 +1784,16 @@ private string ParseProperty() } /// Returns ReNode type for zero-length assertions with a \ code. - private int TypeFromCode(char ch) => + private RegexNodeKind TypeFromCode(char ch) => ch switch { - 'b' => UseOptionE() ? RegexNode.ECMABoundary : RegexNode.Boundary, - 'B' => UseOptionE() ? RegexNode.NonECMABoundary : RegexNode.NonBoundary, - 'A' => RegexNode.Beginning, - 'G' => RegexNode.Start, - 'Z' => RegexNode.EndZ, - 'z' => RegexNode.End, - _ => RegexNode.Nothing, + 'b' => UseOptionE() ? RegexNodeKind.ECMABoundary : RegexNodeKind.Boundary, + 'B' => UseOptionE() ? RegexNodeKind.NonECMABoundary : RegexNodeKind.NonBoundary, + 'A' => RegexNodeKind.Beginning, + 'G' => RegexNodeKind.Start, + 'Z' => RegexNodeKind.EndZ, + 'z' => RegexNodeKind.End, + _ => RegexNodeKind.Nothing, }; /// Returns option bit from single-char (?cimsx) code. @@ -2183,7 +2180,7 @@ private void AddConcatenate(int pos, int cch, bool isReplacement) break; case > 1 when !UseOptionI() || isReplacement || !RegexCharClass.ParticipatesInCaseConversion(_pattern.AsSpan(pos, cch)): - _concatenation!.AddChild(new RegexNode(RegexNode.Multi, _options & ~RegexOptions.IgnoreCase, _pattern.Substring(pos, cch))); + _concatenation!.AddChild(new RegexNode(RegexNodeKind.Multi, _options & ~RegexOptions.IgnoreCase, _pattern.Substring(pos, cch))); break; default: @@ -2198,9 +2195,9 @@ private void AddConcatenate(int pos, int cch, bool isReplacement) /// Push the parser state (in response to an open paren) private void PushGroup() { - _group!.Next = _stack; - _alternation!.Next = _group; - _concatenation!.Next = _alternation; + _group!.Parent = _stack; + _alternation!.Parent = _group; + _concatenation!.Parent = _alternation; _stack = _concatenation; } @@ -2208,12 +2205,12 @@ private void PushGroup() private void PopGroup() { _concatenation = _stack; - _alternation = _concatenation!.Next; - _group = _alternation!.Next; - _stack = _group!.Next; + _alternation = _concatenation!.Parent; + _group = _alternation!.Parent; + _stack = _group!.Parent; // The first () inside a Testgroup group goes directly to the group - if (_group.Type == RegexNode.Testgroup && _group.ChildCount() == 0) + if (_group.Kind == RegexNodeKind.ExpressionConditional && _group.ChildCount() == 0) { if (_unit == null) { @@ -2232,8 +2229,8 @@ private void PopGroup() private void StartGroup(RegexNode openGroup) { _group = openGroup; - _alternation = new RegexNode(RegexNode.Alternate, _options); - _concatenation = new RegexNode(RegexNode.Concatenate, _options); + _alternation = new RegexNode(RegexNodeKind.Alternate, _options); + _concatenation = new RegexNode(RegexNodeKind.Concatenate, _options); } /// Finish the current concatenation (in response to a |) @@ -2241,7 +2238,7 @@ private void AddAlternate() { // The | parts inside a Testgroup group go directly to the group - if (_group!.Type == RegexNode.Testgroup || _group.Type == RegexNode.Testref) + if (_group!.Kind is RegexNodeKind.ExpressionConditional or RegexNodeKind.BackreferenceConditional) { _group.AddChild(_concatenation!.ReverseConcatenationIfRightToLeft()); } @@ -2250,7 +2247,7 @@ private void AddAlternate() _alternation!.AddChild(_concatenation!.ReverseConcatenationIfRightToLeft()); } - _concatenation = new RegexNode(RegexNode.Concatenate, _options); + _concatenation = new RegexNode(RegexNodeKind.Concatenate, _options); } /// Finish the current quantifiable (when a quantifier is not found or is not possible) @@ -2279,16 +2276,16 @@ private void AddConcatenate(bool lazy, int min, int max) private void AddUnitNode(RegexNode node) => _unit = node; /// Sets the current unit to an assertion of the specified type - private void AddUnitType(int type) => _unit = new RegexNode(type, _options); + private void AddUnitType(RegexNodeKind type) => _unit = new RegexNode(type, _options); /// Finish the current group (in response to a ')' or end) private void AddGroup() { - if (_group!.Type == RegexNode.Testgroup || _group.Type == RegexNode.Testref) + if (_group!.Kind is RegexNodeKind.ExpressionConditional or RegexNodeKind.BackreferenceConditional) { _group.AddChild(_concatenation!.ReverseConcatenationIfRightToLeft()); - if (_group.Type == RegexNode.Testref && _group.ChildCount() > 2 || _group.ChildCount() > 3) + if (_group.Kind == RegexNodeKind.BackreferenceConditional && _group.ChildCount() > 2 || _group.ChildCount() > 3) { throw MakeException(RegexParseError.AlternationHasTooManyConditions, SR.AlternationHasTooManyConditions); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs index ceabbef99af6c8..869b5cc5aa285a 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs @@ -14,8 +14,8 @@ namespace System.Text.RegularExpressions internal ref struct RegexPrefixAnalyzer { private const int StackBufferSize = 32; - private const int BeforeChild = 64; - private const int AfterChild = 128; + private const RegexNodeKind BeforeChild = (RegexNodeKind)64; + private const RegexNodeKind AfterChild = (RegexNodeKind)128; // where the regex can be pegged public const int Beginning = 0x0001; @@ -33,6 +33,14 @@ internal ref struct RegexPrefixAnalyzer private bool _skipchild; // don't process the current child. private bool _failed; +#if DEBUG + static RegexPrefixAnalyzer() + { + Debug.Assert(!Enum.IsDefined(typeof(RegexNodeKind), BeforeChild)); + Debug.Assert(!Enum.IsDefined(typeof(RegexNodeKind), AfterChild)); + } +#endif + private RegexPrefixAnalyzer(Span intStack) { _fcStack = new List(StackBufferSize); @@ -63,10 +71,10 @@ static bool Process(RegexNode node, ref ValueStringBuilder vsb) // when handling RightToLeft. bool rtl = (node.Options & RegexOptions.RightToLeft) != 0; - switch (node.Type) + switch (node.Kind) { // Concatenation - case RegexNode.Concatenate: + case RegexNodeKind.Concatenate: { int childCount = node.ChildCount(); for (int i = 0; i < childCount; i++) @@ -80,24 +88,24 @@ static bool Process(RegexNode node, ref ValueStringBuilder vsb) } // One character - case RegexNode.One when (node.Options & RegexOptions.IgnoreCase) == 0: + case RegexNodeKind.One when (node.Options & RegexOptions.IgnoreCase) == 0: vsb.Append(node.Ch); return !rtl; // Multiple characters - case RegexNode.Multi when (node.Options & RegexOptions.IgnoreCase) == 0: + case RegexNodeKind.Multi when (node.Options & RegexOptions.IgnoreCase) == 0: vsb.Append(node.Str); return !rtl; // Loop of one character - case RegexNode.Oneloop or RegexNode.Oneloopatomic or RegexNode.Onelazy when node.M > 0 && (node.Options & RegexOptions.IgnoreCase) == 0: + case RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy when node.M > 0 && (node.Options & RegexOptions.IgnoreCase) == 0: const int SingleCharIterationLimit = 32; // arbitrary cut-off to avoid creating super long strings unnecessarily int count = Math.Min(node.M, SingleCharIterationLimit); vsb.Append(node.Ch, count); return count == node.N && !rtl; // Loop of a node - case RegexNode.Loop or RegexNode.Lazyloop when node.M > 0: + case RegexNodeKind.Loop or RegexNodeKind.Lazyloop when node.M > 0: { const int NodeIterationLimit = 4; // arbitrary cut-off to avoid creating super long strings unnecessarily int limit = Math.Min(node.M, NodeIterationLimit); @@ -112,25 +120,25 @@ static bool Process(RegexNode node, ref ValueStringBuilder vsb) } // Grouping nodes for which we only care about their single child - case RegexNode.Atomic: - case RegexNode.Capture: + case RegexNodeKind.Atomic: + case RegexNodeKind.Capture: return Process(node.Child(0), ref vsb); // Zero-width anchors and assertions - case RegexNode.Bol: - case RegexNode.Eol: - case RegexNode.Boundary: - case RegexNode.ECMABoundary: - case RegexNode.NonBoundary: - case RegexNode.NonECMABoundary: - case RegexNode.Beginning: - case RegexNode.Start: - case RegexNode.EndZ: - case RegexNode.End: - case RegexNode.Empty: - case RegexNode.UpdateBumpalong: - case RegexNode.Require: - case RegexNode.Prevent: + case RegexNodeKind.Bol: + case RegexNodeKind.Eol: + case RegexNodeKind.Boundary: + case RegexNodeKind.ECMABoundary: + case RegexNodeKind.NonBoundary: + case RegexNodeKind.NonECMABoundary: + case RegexNodeKind.Beginning: + case RegexNodeKind.Start: + case RegexNodeKind.EndZ: + case RegexNodeKind.End: + case RegexNodeKind.Empty: + case RegexNodeKind.UpdateBumpalong: + case RegexNodeKind.PositiveLookaround: + case RegexNodeKind.NegativeLookaround: return true; // Give up for anything else @@ -293,9 +301,9 @@ static bool TryFindFixedSets(RegexNode node, List<(char[]? Chars, string Set, in bool caseInsensitive = (node.Options & RegexOptions.IgnoreCase) != 0; - switch (node.Type) + switch (node.Kind) { - case RegexNode.One: + case RegexNodeKind.One: if (results.Count < MaxFixedResults) { string setString = RegexCharClass.OneToStringClass(node.Ch, caseInsensitive ? culture : null, out bool resultIsCaseInsensitive); @@ -304,7 +312,7 @@ static bool TryFindFixedSets(RegexNode node, List<(char[]? Chars, string Set, in } return false; - case RegexNode.Onelazy or RegexNode.Oneloop or RegexNode.Oneloopatomic when node.M > 0: + case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when node.M > 0: { string setString = RegexCharClass.OneToStringClass(node.Ch, caseInsensitive ? culture : null, out bool resultIsCaseInsensitive); int minIterations = Math.Min(node.M, MaxLoopExpansion); @@ -316,7 +324,7 @@ static bool TryFindFixedSets(RegexNode node, List<(char[]? Chars, string Set, in return i == node.M && i == node.N; } - case RegexNode.Multi: + case RegexNodeKind.Multi: { string s = node.Str!; int i = 0; @@ -328,7 +336,7 @@ static bool TryFindFixedSets(RegexNode node, List<(char[]? Chars, string Set, in return i == s.Length; } - case RegexNode.Set: + case RegexNodeKind.Set: if (results.Count < MaxFixedResults) { results.Add((null, node.Str!, distance++, caseInsensitive)); @@ -336,7 +344,7 @@ static bool TryFindFixedSets(RegexNode node, List<(char[]? Chars, string Set, in } return false; - case RegexNode.Setlazy or RegexNode.Setloop or RegexNode.Setloopatomic when node.M > 0: + case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when node.M > 0: { int minIterations = Math.Min(node.M, MaxLoopExpansion); int i = 0; @@ -347,41 +355,41 @@ static bool TryFindFixedSets(RegexNode node, List<(char[]? Chars, string Set, in return i == node.M && i == node.N; } - case RegexNode.Notone: + case RegexNodeKind.Notone: // We could create a set out of Notone, but it will be of little value in helping to improve // the speed of finding the first place to match, as almost every character will match it. distance++; return true; - case RegexNode.Notonelazy or RegexNode.Notoneloop or RegexNode.Notoneloopatomic when node.M == node.N: + case RegexNodeKind.Notonelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic when node.M == node.N: distance += node.M; return true; - case RegexNode.Beginning: - case RegexNode.Bol: - case RegexNode.Boundary: - case RegexNode.ECMABoundary: - case RegexNode.Empty: - case RegexNode.End: - case RegexNode.EndZ: - case RegexNode.Eol: - case RegexNode.NonBoundary: - case RegexNode.NonECMABoundary: - case RegexNode.UpdateBumpalong: - case RegexNode.Start: - case RegexNode.Prevent: - case RegexNode.Require: - // Zero-width anchors and assertions. In theory for Prevent and Require we could also investigate - // them and use the learned knowledge to impact the generated sets, at least for lookaheads. + case RegexNodeKind.Beginning: + case RegexNodeKind.Bol: + case RegexNodeKind.Boundary: + case RegexNodeKind.ECMABoundary: + case RegexNodeKind.Empty: + case RegexNodeKind.End: + case RegexNodeKind.EndZ: + case RegexNodeKind.Eol: + case RegexNodeKind.NonBoundary: + case RegexNodeKind.NonECMABoundary: + case RegexNodeKind.UpdateBumpalong: + case RegexNodeKind.Start: + case RegexNodeKind.NegativeLookaround: + case RegexNodeKind.PositiveLookaround: + // Zero-width anchors and assertions. In theory, for PositiveLookaround and NegativeLookaround we could also + // investigate them and use the learned knowledge to impact the generated sets, at least for lookaheads. // For now, we don't bother. return true; - case RegexNode.Atomic: - case RegexNode.Group: - case RegexNode.Capture: + case RegexNodeKind.Atomic: + case RegexNodeKind.Group: + case RegexNodeKind.Capture: return TryFindFixedSets(node.Child(0), results, ref distance, culture, thorough); - case RegexNode.Lazyloop or RegexNode.Loop when node.M > 0: + case RegexNodeKind.Lazyloop or RegexNodeKind.Loop when node.M > 0: // This effectively only iterates the loop once. If deemed valuable, // it could be updated in the future to duplicate the found results // (updated to incorporate distance from previous iterations) and @@ -391,7 +399,7 @@ static bool TryFindFixedSets(RegexNode node, List<(char[]? Chars, string Set, in TryFindFixedSets(node.Child(0), results, ref distance, culture, thorough); return false; - case RegexNode.Concatenate: + case RegexNodeKind.Concatenate: { int childCount = node.ChildCount(); for (int i = 0; i < childCount; i++) @@ -404,7 +412,7 @@ static bool TryFindFixedSets(RegexNode node, List<(char[]? Chars, string Set, in return true; } - case RegexNode.Alternate when thorough: + case RegexNodeKind.Alternate when thorough: { int childCount = node.ChildCount(); bool allSameSize = true; @@ -523,11 +531,11 @@ public static (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Li } // Find the first concatenation. - while ((node.Type is RegexNode.Atomic or RegexNode.Capture) || (node.Type is RegexNode.Loop or RegexNode.Lazyloop && node.M > 0)) + while ((node.Kind is RegexNodeKind.Atomic or RegexNodeKind.Capture) || (node.Kind is RegexNodeKind.Loop or RegexNodeKind.Lazyloop && node.M > 0)) { node = node.Child(0); } - if (node.Type != RegexNode.Concatenate) + if (node.Kind != RegexNodeKind.Concatenate) { return null; } @@ -540,7 +548,7 @@ public static (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Li // could also be made to support Oneloopatomic and Notoneloopatomic, but the scenarios for that are rare. Debug.Assert(node.ChildCount() >= 2); RegexNode firstChild = node.Child(0); - if (firstChild.Type is not (RegexNode.Setloop or RegexNode.Setloopatomic or RegexNode.Setlazy) || firstChild.N != int.MaxValue) + if (firstChild.Kind is not (RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy) || firstChild.N != int.MaxValue) { return null; } @@ -548,7 +556,7 @@ public static (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Li // Get the subsequent node. An UpdateBumpalong may have been added as an optimization, but it doesn't have an // impact on semantics and we can skip it. RegexNode nextChild = node.Child(1); - if (nextChild.Type == RegexNode.UpdateBumpalong) + if (nextChild.Kind == RegexNodeKind.UpdateBumpalong) { if (node.ChildCount() == 2) { @@ -562,15 +570,15 @@ public static (RegexNode LoopNode, (char Char, string? String, char[]? Chars) Li // and they're both case-sensitive, we have a winner. if (((firstChild.Options | nextChild.Options) & RegexOptions.IgnoreCase) == 0) { - switch (nextChild.Type) + switch (nextChild.Kind) { - case RegexNode.One when !RegexCharClass.CharInClass(nextChild.Ch, firstChild.Str!): + case RegexNodeKind.One when !RegexCharClass.CharInClass(nextChild.Ch, firstChild.Str!): return (firstChild, (nextChild.Ch, null, null)); - case RegexNode.Multi when !RegexCharClass.CharInClass(nextChild.Str![0], firstChild.Str!): + case RegexNodeKind.Multi when !RegexCharClass.CharInClass(nextChild.Str![0], firstChild.Str!): return (firstChild, ('\0', nextChild.Str, null)); - case RegexNode.Set when !RegexCharClass.IsNegated(nextChild.Str!): + case RegexNodeKind.Set when !RegexCharClass.IsNegated(nextChild.Str!): Span chars = stackalloc char[5]; // maximum number of chars optimized by IndexOfAny chars = chars.Slice(0, RegexCharClass.GetSetChars(nextChild.Str!, chars)); if (!chars.IsEmpty) @@ -602,33 +610,33 @@ public static int FindLeadingAnchor(RegexTree tree) while (true) { - switch (curNode.Type) + switch (curNode.Kind) { - case RegexNode.Bol: + case RegexNodeKind.Bol: return Bol; - case RegexNode.Eol: + case RegexNodeKind.Eol: return Eol; - case RegexNode.Boundary: + case RegexNodeKind.Boundary: return Boundary; - case RegexNode.ECMABoundary: + case RegexNodeKind.ECMABoundary: return ECMABoundary; - case RegexNode.Beginning: + case RegexNodeKind.Beginning: return Beginning; - case RegexNode.Start: + case RegexNodeKind.Start: return Start; - case RegexNode.EndZ: + case RegexNodeKind.EndZ: return EndZ; - case RegexNode.End: + case RegexNodeKind.End: return End; - case RegexNode.Concatenate: + case RegexNodeKind.Concatenate: if (curNode.ChildCount() > 0) { concatNode = curNode; @@ -636,15 +644,15 @@ public static int FindLeadingAnchor(RegexTree tree) } break; - case RegexNode.Atomic: - case RegexNode.Capture: + case RegexNodeKind.Atomic: + case RegexNodeKind.Capture: curNode = curNode.Child(0); concatNode = null; continue; - case RegexNode.Empty: - case RegexNode.Require: - case RegexNode.Prevent: + case RegexNodeKind.Empty: + case RegexNodeKind.PositiveLookaround: + case RegexNodeKind.NegativeLookaround: break; default: @@ -727,12 +735,12 @@ private RegexFC PopFC() if (curNodeChildCount == 0) { // This is a leaf node - CalculateFC(curNode.Type, curNode, 0); + CalculateFC(curNode.Kind, curNode, 0); } else if (curChild < curNodeChildCount && !_skipAllChildren) { // This is an interior node, and we have more children to analyze - CalculateFC(curNode.Type | BeforeChild, curNode, curChild); + CalculateFC(curNode.Kind | BeforeChild, curNode, curChild); if (!_skipchild) { @@ -757,9 +765,9 @@ private RegexFC PopFC() break; curChild = PopInt(); - curNode = curNode.Next; + curNode = curNode.Parent; - CalculateFC(curNode!.Type | AfterChild, curNode, curChild); + CalculateFC(curNode!.Kind | AfterChild, curNode, curChild); if (_failed) return null; @@ -780,30 +788,30 @@ private RegexFC PopFC() /// /// FC computation and shortcut cases for each node type /// - private void CalculateFC(int NodeType, RegexNode node, int CurIndex) + private void CalculateFC(RegexNodeKind nodeType, RegexNode node, int CurIndex) { bool ci = (node.Options & RegexOptions.IgnoreCase) != 0; bool rtl = (node.Options & RegexOptions.RightToLeft) != 0; - switch (NodeType) + switch (nodeType) { - case RegexNode.Concatenate | BeforeChild: - case RegexNode.Alternate | BeforeChild: - case RegexNode.Testref | BeforeChild: - case RegexNode.Loop | BeforeChild: - case RegexNode.Lazyloop | BeforeChild: + case RegexNodeKind.Concatenate | BeforeChild: + case RegexNodeKind.Alternate | BeforeChild: + case RegexNodeKind.BackreferenceConditional | BeforeChild: + case RegexNodeKind.Loop | BeforeChild: + case RegexNodeKind.Lazyloop | BeforeChild: break; - case RegexNode.Testgroup | BeforeChild: + case RegexNodeKind.ExpressionConditional | BeforeChild: if (CurIndex == 0) SkipChild(); break; - case RegexNode.Empty: + case RegexNodeKind.Empty: PushFC(new RegexFC(true)); break; - case RegexNode.Concatenate | AfterChild: + case RegexNodeKind.Concatenate | AfterChild: if (CurIndex != 0) { RegexFC child = PopFC(); @@ -816,7 +824,7 @@ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) _skipAllChildren = true; break; - case RegexNode.Testgroup | AfterChild: + case RegexNodeKind.ExpressionConditional | AfterChild: if (CurIndex > 1) { RegexFC child = PopFC(); @@ -826,8 +834,8 @@ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) } break; - case RegexNode.Alternate | AfterChild: - case RegexNode.Testref | AfterChild: + case RegexNodeKind.Alternate | AfterChild: + case RegexNodeKind.BackreferenceConditional | AfterChild: if (CurIndex != 0) { RegexFC child = PopFC(); @@ -837,48 +845,48 @@ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) } break; - case RegexNode.Loop | AfterChild: - case RegexNode.Lazyloop | AfterChild: + case RegexNodeKind.Loop | AfterChild: + case RegexNodeKind.Lazyloop | AfterChild: if (node.M == 0) TopFC()._nullable = true; break; - case RegexNode.Group | BeforeChild: - case RegexNode.Group | AfterChild: - case RegexNode.Capture | BeforeChild: - case RegexNode.Capture | AfterChild: - case RegexNode.Atomic | BeforeChild: - case RegexNode.Atomic | AfterChild: + case RegexNodeKind.Group | BeforeChild: + case RegexNodeKind.Group | AfterChild: + case RegexNodeKind.Capture | BeforeChild: + case RegexNodeKind.Capture | AfterChild: + case RegexNodeKind.Atomic | BeforeChild: + case RegexNodeKind.Atomic | AfterChild: break; - case RegexNode.Require | BeforeChild: - case RegexNode.Prevent | BeforeChild: + case RegexNodeKind.PositiveLookaround | BeforeChild: + case RegexNodeKind.NegativeLookaround | BeforeChild: SkipChild(); PushFC(new RegexFC(true)); break; - case RegexNode.Require | AfterChild: - case RegexNode.Prevent | AfterChild: + case RegexNodeKind.PositiveLookaround | AfterChild: + case RegexNodeKind.NegativeLookaround | AfterChild: break; - case RegexNode.One: - case RegexNode.Notone: - PushFC(new RegexFC(node.Ch, NodeType == RegexNode.Notone, false, ci)); + case RegexNodeKind.One: + case RegexNodeKind.Notone: + PushFC(new RegexFC(node.Ch, nodeType == RegexNodeKind.Notone, false, ci)); break; - case RegexNode.Oneloop: - case RegexNode.Oneloopatomic: - case RegexNode.Onelazy: + case RegexNodeKind.Oneloop: + case RegexNodeKind.Oneloopatomic: + case RegexNodeKind.Onelazy: PushFC(new RegexFC(node.Ch, false, node.M == 0, ci)); break; - case RegexNode.Notoneloop: - case RegexNode.Notoneloopatomic: - case RegexNode.Notonelazy: + case RegexNodeKind.Notoneloop: + case RegexNodeKind.Notoneloopatomic: + case RegexNodeKind.Notonelazy: PushFC(new RegexFC(node.Ch, true, node.M == 0, ci)); break; - case RegexNode.Multi: + case RegexNodeKind.Multi: if (node.Str!.Length == 0) PushFC(new RegexFC(true)); else if (!rtl) @@ -887,37 +895,38 @@ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) PushFC(new RegexFC(node.Str[node.Str.Length - 1], false, false, ci)); break; - case RegexNode.Set: + case RegexNodeKind.Set: PushFC(new RegexFC(node.Str!, false, ci)); break; - case RegexNode.Setloop: - case RegexNode.Setloopatomic: - case RegexNode.Setlazy: + case RegexNodeKind.Setloop: + case RegexNodeKind.Setloopatomic: + case RegexNodeKind.Setlazy: PushFC(new RegexFC(node.Str!, node.M == 0, ci)); break; - case RegexNode.Ref: + case RegexNodeKind.Backreference: PushFC(new RegexFC(RegexCharClass.AnyClass, true, false)); break; - case RegexNode.Nothing: - case RegexNode.Bol: - case RegexNode.Eol: - case RegexNode.Boundary: - case RegexNode.NonBoundary: - case RegexNode.ECMABoundary: - case RegexNode.NonECMABoundary: - case RegexNode.Beginning: - case RegexNode.Start: - case RegexNode.EndZ: - case RegexNode.End: - case RegexNode.UpdateBumpalong: + case RegexNodeKind.Nothing: + case RegexNodeKind.Bol: + case RegexNodeKind.Eol: + case RegexNodeKind.Boundary: + case RegexNodeKind.NonBoundary: + case RegexNodeKind.ECMABoundary: + case RegexNodeKind.NonECMABoundary: + case RegexNodeKind.Beginning: + case RegexNodeKind.Start: + case RegexNodeKind.EndZ: + case RegexNodeKind.End: + case RegexNodeKind.UpdateBumpalong: PushFC(new RegexFC(true)); break; default: - throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, NodeType.ToString(CultureInfo.CurrentCulture))); + Debug.Fail($"Unexpected node: {nodeType}"); + break; } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs index 814f05e5aeb119..2027763780d2c1 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs @@ -31,7 +31,7 @@ internal sealed class RegexReplacement /// public RegexReplacement(string rep, RegexNode concat, Hashtable _caps) { - if (concat.Type != RegexNode.Concatenate) + if (concat.Kind != RegexNodeKind.Concatenate) { throw ThrowHelper.CreateArgumentException(ExceptionResource.ReplacementError); } @@ -47,17 +47,17 @@ public RegexReplacement(string rep, RegexNode concat, Hashtable _caps) { RegexNode child = concat.Child(i); - switch (child.Type) + switch (child.Kind) { - case RegexNode.Multi: + case RegexNodeKind.Multi: vsb.Append(child.Str!); break; - case RegexNode.One: + case RegexNodeKind.One: vsb.Append(child.Ch); break; - case RegexNode.Ref: + case RegexNodeKind.Backreference: if (vsb.Length > 0) { rules.Append(strings.Length); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs index d3caec254a94fb..3a1da357d804a9 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs @@ -3,6 +3,7 @@ using System.Collections; using System.Collections.Generic; +using System.Diagnostics; using System.Globalization; using System.Runtime.InteropServices; @@ -12,8 +13,8 @@ namespace System.Text.RegularExpressions internal ref struct RegexWriter { // These must be unused RegexNode type bits. - private const int BeforeChild = 64; - private const int AfterChild = 128; + private const RegexNodeKind BeforeChild = (RegexNodeKind)64; + private const RegexNodeKind AfterChild = (RegexNodeKind)128; // Distribution of common patterns indicates an average amount of 56 op codes. Since we're stackalloc'ing, // we can afford to make it a bit higher and a power of two for simplicity. @@ -26,6 +27,14 @@ internal ref struct RegexWriter private Hashtable? _caps; private int _trackCount; +#if DEBUG + static RegexWriter() + { + Debug.Assert(!Enum.IsDefined(typeof(RegexNodeKind), BeforeChild)); + Debug.Assert(!Enum.IsDefined(typeof(RegexNodeKind), AfterChild)); + } +#endif + private RegexWriter(Span emittedSpan, Span intStackSpan) { _emitted = new ValueListBuilder(emittedSpan); @@ -103,11 +112,11 @@ public RegexCode RegexCodeFromRegexTree(RegexTree tree, CultureInfo culture) int curNodeChildCount = curNode.ChildCount(); if (curNodeChildCount == 0) { - EmitFragment(curNode.Type, curNode, 0); + EmitFragment(curNode.Kind, curNode, 0); } else if (curChild < curNodeChildCount) { - EmitFragment(curNode.Type | BeforeChild, curNode, curChild); + EmitFragment(curNode.Kind | BeforeChild, curNode, curChild); curNode = curNode.Child(curChild); _intStack.Append(curChild); @@ -121,9 +130,9 @@ public RegexCode RegexCodeFromRegexTree(RegexTree tree, CultureInfo culture) } curChild = _intStack.Pop(); - curNode = curNode.Next!; + curNode = curNode.Parent!; - EmitFragment(curNode.Type | AfterChild, curNode, curChild); + EmitFragment(curNode.Kind | AfterChild, curNode, curChild); curChild++; } @@ -219,7 +228,7 @@ private int StringCode(string str) /// through the tree and calls EmitFragment to emits code before /// and after each child of an interior node, and at each leaf. /// - private void EmitFragment(int nodetype, RegexNode node, int curIndex) + private void EmitFragment(RegexNodeKind nodeType, RegexNode node, int curIndex) { int bits = 0; if ((node.Options & RegexOptions.RightToLeft) != 0) @@ -231,14 +240,14 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) bits |= RegexCode.Ci; } - switch (nodetype) + switch (nodeType) { - case RegexNode.Concatenate | BeforeChild: - case RegexNode.Concatenate | AfterChild: - case RegexNode.Empty: + case RegexNodeKind.Concatenate | BeforeChild: + case RegexNodeKind.Concatenate | AfterChild: + case RegexNodeKind.Empty: break; - case RegexNode.Alternate | BeforeChild: + case RegexNodeKind.Alternate | BeforeChild: if (curIndex < node.ChildCount() - 1) { _intStack.Append(_emitted.Length); @@ -246,7 +255,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) } break; - case RegexNode.Alternate | AfterChild: + case RegexNodeKind.Alternate | AfterChild: { if (curIndex < node.ChildCount() - 1) { @@ -265,7 +274,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) break; } - case RegexNode.Testref | BeforeChild: + case RegexNodeKind.BackreferenceConditional | BeforeChild: switch (curIndex) { case 0: @@ -278,7 +287,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) } break; - case RegexNode.Testref | AfterChild: + case RegexNodeKind.BackreferenceConditional | AfterChild: switch (curIndex) { case 0: @@ -296,7 +305,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) } break; - case RegexNode.Testgroup | BeforeChild: + case RegexNodeKind.ExpressionConditional | BeforeChild: switch (curIndex) { case 0: @@ -308,7 +317,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) } break; - case RegexNode.Testgroup | AfterChild: + case RegexNodeKind.ExpressionConditional | AfterChild: switch (curIndex) { case 0: @@ -329,8 +338,8 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) } break; - case RegexNode.Loop | BeforeChild: - case RegexNode.Lazyloop | BeforeChild: + case RegexNodeKind.Loop | BeforeChild: + case RegexNodeKind.Lazyloop | BeforeChild: if (node.N < int.MaxValue || node.M > 1) Emit(node.M == 0 ? RegexCode.Nullcount : RegexCode.Setcount, node.M == 0 ? 0 : 1 - node.M); @@ -345,11 +354,11 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) _intStack.Append(_emitted.Length); break; - case RegexNode.Loop | AfterChild: - case RegexNode.Lazyloop | AfterChild: + case RegexNodeKind.Loop | AfterChild: + case RegexNodeKind.Lazyloop | AfterChild: { int StartJumpPos = _emitted.Length; - int Lazy = (nodetype - (RegexNode.Loop | AfterChild)); + int Lazy = (nodeType - (RegexNodeKind.Loop | AfterChild)); if (node.N < int.MaxValue || node.M > 1) Emit(RegexCode.Branchcount + Lazy, _intStack.Pop(), node.N == int.MaxValue ? int.MaxValue : node.N - node.M); @@ -361,73 +370,73 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) } break; - case RegexNode.Group | BeforeChild: - case RegexNode.Group | AfterChild: + case RegexNodeKind.Group | BeforeChild: + case RegexNodeKind.Group | AfterChild: break; - case RegexNode.Capture | BeforeChild: + case RegexNodeKind.Capture | BeforeChild: Emit(RegexCode.Setmark); break; - case RegexNode.Capture | AfterChild: + case RegexNodeKind.Capture | AfterChild: Emit(RegexCode.Capturemark, RegexParser.MapCaptureNumber(node.M, _caps), RegexParser.MapCaptureNumber(node.N, _caps)); break; - case RegexNode.Require | BeforeChild: + case RegexNodeKind.PositiveLookaround | BeforeChild: Emit(RegexCode.Setjump); // causes lookahead/lookbehind to be non-backtracking Emit(RegexCode.Setmark); break; - case RegexNode.Require | AfterChild: + case RegexNodeKind.PositiveLookaround | AfterChild: Emit(RegexCode.Getmark); Emit(RegexCode.Forejump); // causes lookahead/lookbehind to be non-backtracking break; - case RegexNode.Prevent | BeforeChild: + case RegexNodeKind.NegativeLookaround | BeforeChild: Emit(RegexCode.Setjump); _intStack.Append(_emitted.Length); Emit(RegexCode.Lazybranch, 0); break; - case RegexNode.Prevent | AfterChild: + case RegexNodeKind.NegativeLookaround | AfterChild: Emit(RegexCode.Backjump); PatchJump(_intStack.Pop(), _emitted.Length); Emit(RegexCode.Forejump); break; - case RegexNode.Atomic | BeforeChild: + case RegexNodeKind.Atomic | BeforeChild: Emit(RegexCode.Setjump); break; - case RegexNode.Atomic | AfterChild: + case RegexNodeKind.Atomic | AfterChild: Emit(RegexCode.Forejump); break; - case RegexNode.One: - case RegexNode.Notone: - Emit(node.Type | bits, node.Ch); + case RegexNodeKind.One: + case RegexNodeKind.Notone: + Emit((int)node.Kind | bits, node.Ch); break; - case RegexNode.Notoneloop: - case RegexNode.Notoneloopatomic: - case RegexNode.Notonelazy: - case RegexNode.Oneloop: - case RegexNode.Oneloopatomic: - case RegexNode.Onelazy: + case RegexNodeKind.Notoneloop: + case RegexNodeKind.Notoneloopatomic: + case RegexNodeKind.Notonelazy: + case RegexNodeKind.Oneloop: + case RegexNodeKind.Oneloopatomic: + case RegexNodeKind.Onelazy: if (node.M > 0) { - Emit(((node.Type == RegexNode.Oneloop || node.Type == RegexNode.Oneloopatomic || node.Type == RegexNode.Onelazy) ? + Emit(((node.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy) ? RegexCode.Onerep : RegexCode.Notonerep) | bits, node.Ch, node.M); } if (node.N > node.M) { - Emit(node.Type | bits, node.Ch, node.N == int.MaxValue ? int.MaxValue : node.N - node.M); + Emit((int)node.Kind | bits, node.Ch, node.N == int.MaxValue ? int.MaxValue : node.N - node.M); } break; - case RegexNode.Setloop: - case RegexNode.Setloopatomic: - case RegexNode.Setlazy: + case RegexNodeKind.Setloop: + case RegexNodeKind.Setloopatomic: + case RegexNodeKind.Setlazy: { int stringCode = StringCode(node.Str!); if (node.M > 0) @@ -436,40 +445,41 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) } if (node.N > node.M) { - Emit(node.Type | bits, stringCode, (node.N == int.MaxValue) ? int.MaxValue : node.N - node.M); + Emit((int)node.Kind | bits, stringCode, (node.N == int.MaxValue) ? int.MaxValue : node.N - node.M); } } break; - case RegexNode.Multi: - Emit(node.Type | bits, StringCode(node.Str!)); + case RegexNodeKind.Multi: + Emit((int)node.Kind | bits, StringCode(node.Str!)); break; - case RegexNode.Set: - Emit(node.Type | bits, StringCode(node.Str!)); + case RegexNodeKind.Set: + Emit((int)node.Kind | bits, StringCode(node.Str!)); break; - case RegexNode.Ref: - Emit(node.Type | bits, RegexParser.MapCaptureNumber(node.M, _caps)); + case RegexNodeKind.Backreference: + Emit((int)node.Kind | bits, RegexParser.MapCaptureNumber(node.M, _caps)); break; - case RegexNode.Nothing: - case RegexNode.Bol: - case RegexNode.Eol: - case RegexNode.Boundary: - case RegexNode.NonBoundary: - case RegexNode.ECMABoundary: - case RegexNode.NonECMABoundary: - case RegexNode.Beginning: - case RegexNode.Start: - case RegexNode.EndZ: - case RegexNode.End: - case RegexNode.UpdateBumpalong: - Emit(node.Type); + case RegexNodeKind.Nothing: + case RegexNodeKind.Bol: + case RegexNodeKind.Eol: + case RegexNodeKind.Boundary: + case RegexNodeKind.NonBoundary: + case RegexNodeKind.ECMABoundary: + case RegexNodeKind.NonECMABoundary: + case RegexNodeKind.Beginning: + case RegexNodeKind.Start: + case RegexNodeKind.EndZ: + case RegexNodeKind.End: + case RegexNodeKind.UpdateBumpalong: + Emit((int)node.Kind); break; default: - throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, nodetype.ToString())); + Debug.Fail($"Unexpected node: {nodeType}"); + break; } } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/RegexNodeToSymbolicConverter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/RegexNodeToSymbolicConverter.cs index 6c299578983d7c..cdafb9114882aa 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/RegexNodeToSymbolicConverter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/RegexNodeToSymbolicConverter.cs @@ -4,7 +4,6 @@ using System.Collections.Generic; using System.Diagnostics; using System.Globalization; -using System.Runtime.CompilerServices; using System.Threading; namespace System.Text.RegularExpressions.Symbolic @@ -212,9 +211,9 @@ public SymbolicRegexNode Convert(RegexNode node, bool topLevel) return StackHelper.CallOnEmptyStack(Convert, node, topLevel); } - switch (node.Type) + switch (node.Kind) { - case RegexNode.Alternate: + case RegexNodeKind.Alternate: { var nested = new SymbolicRegexNode[node.ChildCount()]; for (int i = 0; i < nested.Length; i++) @@ -224,17 +223,17 @@ public SymbolicRegexNode Convert(RegexNode node, bool topLevel) return _builder.MkOr(nested); } - case RegexNode.Beginning: + case RegexNodeKind.Beginning: return _builder._startAnchor; - case RegexNode.Bol: + case RegexNodeKind.Bol: EnsureNewlinePredicateInitialized(); return _builder._bolAnchor; - case RegexNode.Capture when node.N == -1: + case RegexNodeKind.Capture when node.N == -1: return Convert(node.Child(0), topLevel); // treat as non-capturing group (...) - case RegexNode.Concatenate: + case RegexNodeKind.Concatenate: { List nested = FlattenNestedConcatenations(node); var converted = new SymbolicRegexNode[nested.Count]; @@ -245,68 +244,68 @@ public SymbolicRegexNode Convert(RegexNode node, bool topLevel) return _builder.MkConcat(converted, topLevel); } - case RegexNode.Empty: - case RegexNode.UpdateBumpalong: // optional directive that behaves the same as Empty + case RegexNodeKind.Empty: + case RegexNodeKind.UpdateBumpalong: // optional directive that behaves the same as Empty return _builder._epsilon; - case RegexNode.End: // \z anchor + case RegexNodeKind.End: // \z anchor return _builder._endAnchor; - case RegexNode.EndZ: // \Z anchor + case RegexNodeKind.EndZ: // \Z anchor EnsureNewlinePredicateInitialized(); return _builder._endAnchorZ; - case RegexNode.Eol: + case RegexNodeKind.Eol: EnsureNewlinePredicateInitialized(); return _builder._eolAnchor; - case RegexNode.Loop: + case RegexNodeKind.Loop: return _builder.MkLoop(Convert(node.Child(0), topLevel: false), isLazy: false, node.M, node.N); - case RegexNode.Lazyloop: + case RegexNodeKind.Lazyloop: return _builder.MkLoop(Convert(node.Child(0), topLevel: false), isLazy: true, node.M, node.N); - case RegexNode.Multi: + case RegexNodeKind.Multi: return ConvertMulti(node, topLevel); - case RegexNode.Notone: + case RegexNodeKind.Notone: return _builder.MkSingleton(Solver.Not(Solver.CharConstraint(node.Ch, (node.Options & RegexOptions.IgnoreCase) != 0, _culture.Name))); - case RegexNode.Notoneloop: - case RegexNode.Notonelazy: - return ConvertNotoneloop(node, node.Type == RegexNode.Notonelazy); + case RegexNodeKind.Notoneloop: + case RegexNodeKind.Notonelazy: + return ConvertNotoneloop(node, node.Kind == RegexNodeKind.Notonelazy); - case RegexNode.One: + case RegexNodeKind.One: return _builder.MkSingleton(Solver.CharConstraint(node.Ch, (node.Options & RegexOptions.IgnoreCase) != 0, _culture.Name)); - case RegexNode.Oneloop: - case RegexNode.Onelazy: - return ConvertOneloop(node, node.Type == RegexNode.Onelazy); + case RegexNodeKind.Oneloop: + case RegexNodeKind.Onelazy: + return ConvertOneloop(node, node.Kind == RegexNodeKind.Onelazy); - case RegexNode.Set: + case RegexNodeKind.Set: return ConvertSet(node); - case RegexNode.Setloop: - case RegexNode.Setlazy: - return ConvertSetloop(node, node.Type == RegexNode.Setlazy); + case RegexNodeKind.Setloop: + case RegexNodeKind.Setlazy: + return ConvertSetloop(node, node.Kind == RegexNodeKind.Setlazy); // TBD: ECMA case intersect predicate with ascii range ? - case RegexNode.Boundary: - case RegexNode.ECMABoundary: + case RegexNodeKind.Boundary: + case RegexNodeKind.ECMABoundary: EnsureWordLetterPredicateInitialized(); return _builder._wbAnchor; // TBD: ECMA case intersect predicate with ascii range ? - case RegexNode.NonBoundary: - case RegexNode.NonECMABoundary: + case RegexNodeKind.NonBoundary: + case RegexNodeKind.NonECMABoundary: EnsureWordLetterPredicateInitialized(); return _builder._nwbAnchor; - case RegexNode.Nothing: + case RegexNodeKind.Nothing: return _builder._nothing; #if DEBUG - case RegexNode.Testgroup: + case RegexNodeKind.ExpressionConditional: // Try to extract the special case representing complement or intersection if (IsComplementedNode(node)) { @@ -327,19 +326,19 @@ public SymbolicRegexNode Convert(RegexNode node, bool topLevel) #endif default: - throw new NotSupportedException(SR.Format(SR.NotSupported_NonBacktrackingConflictingExpression, node.Type switch + throw new NotSupportedException(SR.Format(SR.NotSupported_NonBacktrackingConflictingExpression, node.Kind switch { - RegexNode.Capture => SR.ExpressionDescription_BalancingGroup, - RegexNode.Testgroup => SR.ExpressionDescription_IfThenElse, - RegexNode.Ref => SR.ExpressionDescription_Backreference, - RegexNode.Testref => SR.ExpressionDescription_Conditional, - RegexNode.Require => SR.ExpressionDescription_PositiveLookaround, - RegexNode.Prevent => SR.ExpressionDescription_NegativeLookaround, - RegexNode.Start => SR.ExpressionDescription_ContiguousMatches, - RegexNode.Atomic or - RegexNode.Setloopatomic or - RegexNode.Oneloopatomic or - RegexNode.Notoneloopatomic => SR.ExpressionDescription_AtomicSubexpressions, + RegexNodeKind.Capture => SR.ExpressionDescription_BalancingGroup, + RegexNodeKind.ExpressionConditional => SR.ExpressionDescription_IfThenElse, + RegexNodeKind.Backreference => SR.ExpressionDescription_Backreference, + RegexNodeKind.BackreferenceConditional => SR.ExpressionDescription_Conditional, + RegexNodeKind.PositiveLookaround => SR.ExpressionDescription_PositiveLookaround, + RegexNodeKind.NegativeLookaround => SR.ExpressionDescription_NegativeLookaround, + RegexNodeKind.Start => SR.ExpressionDescription_ContiguousMatches, + RegexNodeKind.Atomic or + RegexNodeKind.Setloopatomic or + RegexNodeKind.Oneloopatomic or + RegexNodeKind.Notoneloopatomic => SR.ExpressionDescription_AtomicSubexpressions, _ => UnexpectedNodeType(node) })); @@ -347,7 +346,7 @@ static string UnexpectedNodeType(RegexNode node) { // The default should never arise, since other node types are either supported // or have been removed (e.g. Group) from the final parse tree. - string description = $"Unexpected node type ({nameof(RegexNode)}:{node.Type})"; + string description = $"Unexpected node type ({nameof(RegexNode)}:{node.Kind})"; Debug.Fail(description); return description; } @@ -381,7 +380,7 @@ List FlattenNestedConcatenations(RegexNode concat) while (todo.TryPop(out RegexNode? node)) { - if (node.Type == RegexNode.Concatenate) + if (node.Kind == RegexNodeKind.Concatenate) { // Flatten nested concatenations for (int i = node.ChildCount() - 1; i >= 0; i--) @@ -389,7 +388,7 @@ List FlattenNestedConcatenations(RegexNode concat) todo.Push(node.Child(i)); } } - else if (node.Type == RegexNode.Capture) + else if (node.Kind == RegexNodeKind.Capture) { if (node.N == -1) { @@ -413,7 +412,7 @@ List FlattenNestedConcatenations(RegexNode concat) SymbolicRegexNode ConvertMulti(RegexNode node, bool topLevel) { - Debug.Assert(node.Type == RegexNode.Multi); + Debug.Assert(node.Kind == RegexNodeKind.Multi); string? sequence = node.Str; Debug.Assert(sequence is not null); @@ -431,7 +430,7 @@ SymbolicRegexNode ConvertMulti(RegexNode node, bool topLevel) SymbolicRegexNode ConvertOneloop(RegexNode node, bool isLazy) { - Debug.Assert(node.Type is RegexNode.Oneloop or RegexNode.Onelazy); + Debug.Assert(node.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Onelazy); bool ignoreCase = (node.Options & RegexOptions.IgnoreCase) != 0; BDD cond = Solver.CharConstraint(node.Ch, ignoreCase, _culture.Name); @@ -443,7 +442,7 @@ SymbolicRegexNode ConvertOneloop(RegexNode node, bool isLazy) SymbolicRegexNode ConvertNotoneloop(RegexNode node, bool isLazy) { - Debug.Assert(node.Type is RegexNode.Notoneloop or RegexNode.Notonelazy); + Debug.Assert(node.Kind is RegexNodeKind.Notoneloop or RegexNodeKind.Notonelazy); bool ignoreCase = (node.Options & RegexOptions.IgnoreCase) != 0; BDD cond = Solver.Not(Solver.CharConstraint(node.Ch, ignoreCase, _culture.Name)); @@ -455,7 +454,7 @@ SymbolicRegexNode ConvertNotoneloop(RegexNode node, bool isLazy) SymbolicRegexNode ConvertSet(RegexNode node) { - Debug.Assert(node.Type == RegexNode.Set); + Debug.Assert(node.Kind == RegexNodeKind.Set); string? set = node.Str; Debug.Assert(set is not null); @@ -467,7 +466,7 @@ SymbolicRegexNode ConvertSet(RegexNode node) SymbolicRegexNode ConvertSetloop(RegexNode node, bool isLazy) { - Debug.Assert(node.Type is RegexNode.Setloop or RegexNode.Setlazy); + Debug.Assert(node.Kind is RegexNodeKind.Setloop or RegexNodeKind.Setlazy); string? set = node.Str; Debug.Assert(set is not null); @@ -480,11 +479,11 @@ SymbolicRegexNode ConvertSetloop(RegexNode node, bool isLazy) #if DEBUG // TODO-NONBACKTRACKING: recognizing strictly only [] (RegexNode.Nothing), for example [0-[0]] would not be recognized - bool IsNothing(RegexNode node) => node.Type == RegexNode.Nothing || (node.Type == RegexNode.Set && ConvertSet(node).IsNothing); + bool IsNothing(RegexNode node) => node.Kind == RegexNodeKind.Nothing || (node.Kind == RegexNodeKind.Set && ConvertSet(node).IsNothing); - bool IsDotStar(RegexNode node) => node.Type == RegexNode.Setloop && Convert(node, topLevel: false).IsAnyStar; + bool IsDotStar(RegexNode node) => node.Kind == RegexNodeKind.Setloop && Convert(node, topLevel: false).IsAnyStar; - bool IsIntersect(RegexNode node) => node.Type == RegexNode.Testgroup && IsNothing(node.Child(2)); + bool IsIntersect(RegexNode node) => node.Kind == RegexNodeKind.ExpressionConditional && IsNothing(node.Child(2)); bool TryGetIntersection(RegexNode node, [Diagnostics.CodeAnalysis.NotNullWhen(true)] out List? conjuncts) {