From 47eabf7fbab732b4b9c0cdc7b48f1edae14ca807 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 23 Dec 2021 20:27:45 -0500 Subject: [PATCH 1/7] Do addition for EndZ matching at compile time --- .../gen/RegexGenerator.Emitter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index b95387ba4eceed..b328a801c18298 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -1999,7 +1999,7 @@ void EmitAnchors(RegexNode node) break; case RegexNode.EndZ: - writer.WriteLine($"if ({sliceSpan}.Length - 1 > {sliceStaticPos} || ({IsSliceLengthGreaterThanSliceStaticPos()} && {sliceSpan}[{sliceStaticPos}] != '\\n'))"); + writer.WriteLine($"if ({sliceSpan}.Length > {sliceStaticPos + 1} || ({IsSliceLengthGreaterThanSliceStaticPos()} && {sliceSpan}[{sliceStaticPos}] != '\\n'))"); using (EmitBlock(writer, null)) { writer.WriteLine($"goto {doneLabel};"); From a3108ee35d75dda186d0a2cfd4fe85311fa74db1 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 23 Dec 2021 20:44:00 -0500 Subject: [PATCH 2/7] Tweak rendering of optional loops to say "Optional" rather than "Loop optionally" --- .../gen/RegexGenerator.Emitter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index b328a801c18298..52929c7f130593 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -3385,7 +3385,7 @@ private static string DescribeNode(RegexNode node) => RegexNode.End => "Match if at the end of the string.", RegexNode.EndZ => "Match if at the end of the string or if before an ending newline.", RegexNode.Eol => "Match if at the end of a line.", - RegexNode.Loop or RegexNode.Lazyloop => $"Loop {DescribeLoop(node)}.", + RegexNode.Loop or RegexNode.Lazyloop => node.M == 0 && node.N == 1 ? $"Optional ({(node.Type is RegexNode.Loop ? "greedy" : "lazy")})." : $"Loop {DescribeLoop(node)}.", RegexNode.Multi => $"Match the string {Literal(node.Str!)}.", RegexNode.NonBoundary => $"Match if at anything other than a word boundary.", RegexNode.NonECMABoundary => $"Match if at anything other than a word boundary (according to ECMAScript rules).", From 33101544ad85b0d5983ae1aa668dc60f74238227 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 23 Dec 2021 20:45:33 -0500 Subject: [PATCH 3/7] Remove "at least X" from loop description when X is 0 --- .../System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 52929c7f130593..09c1c7ba9f6f44 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -3488,6 +3488,7 @@ private static string DescribeLoop(RegexNode node) (2, int.MaxValue) => " at least twice", (_, int.MaxValue) => $" at least {node.M} times", (0, 1) => ", optionally", + (0, _) => $"at most {node.N} times", _ => $" at least {node.M} and at most {node.N} times" }; From 264da061797c2c1cab557643a9f015d418da8413 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 24 Dec 2021 06:25:49 -0500 Subject: [PATCH 4/7] Add a missing blank line at the beginning of a back reference --- .../gen/RegexGenerator.Emitter.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 09c1c7ba9f6f44..539281f260ac57 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -1128,7 +1128,11 @@ void EmitBackreference(RegexNode node) int capnum = RegexParser.MapCaptureNumber(node.M, rm.Code.Caps); - TransferSliceStaticPosToPos(); + if (sliceStaticPos > 0) + { + TransferSliceStaticPosToPos(); + writer.WriteLine(); + } // If the specified capture hasn't yet captured anything, fail to match... except when using RegexOptions.ECMAScript, // in which case per ECMA 262 section 21.2.2.9 the backreference should succeed. From 215e7a67f9dff9c43182c1345f4e59ca6b47b464 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 24 Dec 2021 06:29:38 -0500 Subject: [PATCH 5/7] Rename ReturnFalse to NoStartingPositionFound --- .../gen/RegexGenerator.Emitter.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 539281f260ac57..e5af86a23d2249 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -377,7 +377,7 @@ private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writ writer.WriteLine(); writer.WriteLine("// No starting position found"); - writer.WriteLine("ReturnFalse:"); + writer.WriteLine("NoStartingPositionFound:"); writer.WriteLine("base.runtextpos = end;"); writer.WriteLine("return false;"); @@ -399,7 +399,7 @@ bool EmitAnchors() additionalDeclarations.Add("int beginning = base.runtextbeg;"); using (EmitBlock(writer, "if (pos > beginning)")) { - writer.WriteLine("goto ReturnFalse;"); + writer.WriteLine("goto NoStartingPositionFound;"); } writer.WriteLine("return true;"); return true; @@ -408,7 +408,7 @@ bool EmitAnchors() writer.WriteLine("// Start \\G anchor"); using (EmitBlock(writer, "if (pos > base.runtextstart)")) { - writer.WriteLine("goto ReturnFalse;"); + writer.WriteLine("goto NoStartingPositionFound;"); } writer.WriteLine("return true;"); return true; @@ -444,7 +444,7 @@ bool EmitAnchors() writer.WriteLine("int newlinePos = global::System.MemoryExtensions.IndexOf(inputSpan.Slice(pos), '\\n');"); using (EmitBlock(writer, "if (newlinePos < 0 || newlinePos + pos + 1 > end)")) { - writer.WriteLine("goto ReturnFalse;"); + writer.WriteLine("goto NoStartingPositionFound;"); } writer.WriteLine("pos = newlinePos + pos + 1;"); } @@ -517,7 +517,7 @@ void EmitFixedSet() writer.WriteLine($"int indexOfPos = {indexOf};"); using (EmitBlock(writer, "if (indexOfPos < 0)")) { - writer.WriteLine("goto ReturnFalse;"); + writer.WriteLine("goto NoStartingPositionFound;"); } writer.WriteLine("i += indexOfPos;"); writer.WriteLine(); @@ -526,7 +526,7 @@ void EmitFixedSet() { using (EmitBlock(writer, $"if (i >= span.Length - {minRequiredLength - 1})")) { - writer.WriteLine("goto ReturnFalse;"); + writer.WriteLine("goto NoStartingPositionFound;"); } writer.WriteLine(); } From 9c2a58063d0693e71093f56a11a5ff2afabb4db1 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Wed, 29 Dec 2021 06:51:07 -0500 Subject: [PATCH 6/7] Delete stale comments --- .../gen/RegexGenerator.Emitter.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index e5af86a23d2249..14fb04c3207375 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -3408,8 +3408,6 @@ private static string DescribeNode(RegexNode node) => RegexNode.Testref => $"Conditionally match {(node.ChildCount() == 1 ? "an expression" : "one of two expressions")} depending on whether the {DescribeNonNegative(node.M)} capture group matched.", RegexNode.UpdateBumpalong => $"Advance the next matching position.", _ => $"Unknown node type {node.Type}", - - // Concatenation }; /// Writes a textual description of the node tree fit for rending in source. From 11e10e74bae0c6405e28b01f6cff4d4428e767b4 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 3 Jan 2022 16:44:19 -0500 Subject: [PATCH 7/7] Address PR feedback --- .../gen/RegexGenerator.Emitter.cs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 14fb04c3207375..ab108fd3ac664c 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -376,8 +376,9 @@ private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writ } writer.WriteLine(); + const string NoStartingPositionFound = "NoStartingPositionFound"; writer.WriteLine("// No starting position found"); - writer.WriteLine("NoStartingPositionFound:"); + writer.WriteLine($"{NoStartingPositionFound}:"); writer.WriteLine("base.runtextpos = end;"); writer.WriteLine("return false;"); @@ -399,7 +400,7 @@ bool EmitAnchors() additionalDeclarations.Add("int beginning = base.runtextbeg;"); using (EmitBlock(writer, "if (pos > beginning)")) { - writer.WriteLine("goto NoStartingPositionFound;"); + writer.WriteLine($"goto {NoStartingPositionFound};"); } writer.WriteLine("return true;"); return true; @@ -408,7 +409,7 @@ bool EmitAnchors() writer.WriteLine("// Start \\G anchor"); using (EmitBlock(writer, "if (pos > base.runtextstart)")) { - writer.WriteLine("goto NoStartingPositionFound;"); + writer.WriteLine($"goto {NoStartingPositionFound};"); } writer.WriteLine("return true;"); return true; @@ -444,7 +445,7 @@ bool EmitAnchors() writer.WriteLine("int newlinePos = global::System.MemoryExtensions.IndexOf(inputSpan.Slice(pos), '\\n');"); using (EmitBlock(writer, "if (newlinePos < 0 || newlinePos + pos + 1 > end)")) { - writer.WriteLine("goto NoStartingPositionFound;"); + writer.WriteLine($"goto {NoStartingPositionFound};"); } writer.WriteLine("pos = newlinePos + pos + 1;"); } @@ -517,7 +518,7 @@ void EmitFixedSet() writer.WriteLine($"int indexOfPos = {indexOf};"); using (EmitBlock(writer, "if (indexOfPos < 0)")) { - writer.WriteLine("goto NoStartingPositionFound;"); + writer.WriteLine($"goto {NoStartingPositionFound};"); } writer.WriteLine("i += indexOfPos;"); writer.WriteLine(); @@ -526,7 +527,7 @@ void EmitFixedSet() { using (EmitBlock(writer, $"if (i >= span.Length - {minRequiredLength - 1})")) { - writer.WriteLine("goto NoStartingPositionFound;"); + writer.WriteLine($"goto {NoStartingPositionFound};"); } writer.WriteLine(); }