-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Add Match and isMatch ReadOnlySpan<char> overloads as well as Regex protected methods which expose ReadOnlySpan<char> to avoid allocations. #62509
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c1c0fa3
2d3aef3
2960e99
d16b8e9
444c017
cc92623
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -222,7 +222,15 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri | |
| DescribeExpression(writer, rm.Code.Tree.Root.Child(0), " // ", rm.Code); // skip implicit root capture | ||
| writer.WriteLine(); | ||
|
|
||
| writer.WriteLine($" protected override bool FindFirstChar()"); | ||
| writer.WriteLine($" protected override void Scan(global::System.ReadOnlySpan<char> text)"); | ||
| writer.WriteLine($" {{"); | ||
| writer.Indent += 4; | ||
| EmitScan(writer, rm, id); | ||
| writer.Indent -= 4; | ||
| writer.WriteLine($" }}"); | ||
| writer.WriteLine(); | ||
|
|
||
| writer.WriteLine($" private bool FindFirstChar(global::System.ReadOnlySpan<char> inputSpan)"); | ||
| writer.WriteLine($" {{"); | ||
| writer.Indent += 4; | ||
| RequiredHelperFunctions requiredHelpers = EmitFindFirstChar(writer, rm, id); | ||
|
|
@@ -233,7 +241,7 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri | |
| { | ||
| writer.WriteLine($" [global::System.Runtime.CompilerServices.SkipLocalsInit]"); | ||
| } | ||
| writer.WriteLine($" protected override void Go()"); | ||
| writer.WriteLine($" private bool Go(global::System.ReadOnlySpan<char> inputSpan)"); | ||
| writer.WriteLine($" {{"); | ||
| writer.Indent += 4; | ||
| requiredHelpers |= EmitGo(writer, rm, id); | ||
|
|
@@ -299,6 +307,43 @@ static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht) | |
| } | ||
| } | ||
|
|
||
| private static void EmitScan(IndentedTextWriter writer, RegexMethod rm, string id) | ||
| { | ||
| using (EmitBlock(writer, "while (true)")) | ||
| { | ||
| using (EmitBlock(writer, "if (FindFirstChar(text))")) | ||
| { | ||
| if (rm.MatchTimeout != Timeout.Infinite) | ||
| { | ||
| writer.WriteLine("base.CheckTimeout();"); | ||
| writer.WriteLine(); | ||
| } | ||
|
|
||
| writer.WriteLine("// If we got a match, we're done."); | ||
| using (EmitBlock(writer, "if (Go(text))")) | ||
stephentoub marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| { | ||
| writer.WriteLine("return;"); | ||
| } | ||
| writer.WriteLine(); | ||
|
|
||
| writer.WriteLine("// Reset state for another iteration."); | ||
| writer.WriteLine("base.runtrackpos = base.runtrack!.Length;"); | ||
| writer.WriteLine("base.runstackpos = base.runstack!.Length;"); | ||
| writer.WriteLine("base.runcrawlpos = base.runcrawl!.Length;"); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are any of these actually necessary? runcrawlpos should be 0 because if there were captures we would have uncaptured all the way back to 0 on a failed match. We don't use runtrack at all. I'd have expected runtrackpos to also end up at runtrack.Length as well, as we use it as a backtracking stack which should get fully unwound. |
||
| } | ||
| writer.WriteLine(); | ||
|
|
||
| writer.WriteLine("// We failed to find a match. If we're at the end of the input, then we are done."); | ||
| using (EmitBlock(writer, "if (base.runtextpos == text.Length)")) | ||
| { | ||
| writer.WriteLine("return;"); | ||
| } | ||
| writer.WriteLine(); | ||
|
|
||
| writer.WriteLine("base.runtextpos++;"); | ||
| } | ||
| } | ||
|
|
||
| /// <summary>Emits the body of the FindFirstChar override.</summary> | ||
| private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writer, RegexMethod rm, string id) | ||
| { | ||
|
|
@@ -347,7 +392,6 @@ private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writ | |
| { | ||
| case FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive: | ||
| Debug.Assert(!string.IsNullOrEmpty(code.FindOptimizations.LeadingCaseSensitivePrefix)); | ||
| additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;"); | ||
| EmitIndexOf(code.FindOptimizations.LeadingCaseSensitivePrefix); | ||
| break; | ||
|
|
||
|
|
@@ -356,13 +400,11 @@ private static RequiredHelperFunctions EmitFindFirstChar(IndentedTextWriter writ | |
| case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive: | ||
| case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive: | ||
| Debug.Assert(code.FindOptimizations.FixedDistanceSets is { Count: > 0 }); | ||
| additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;"); | ||
| EmitFixedSet(); | ||
| break; | ||
|
|
||
| case FindNextStartingPositionMode.LiteralAfterLoop_LeftToRight_CaseSensitive: | ||
| Debug.Assert(code.FindOptimizations.LiteralAfterLoop is not null); | ||
| additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;"); | ||
| EmitLiteralAfterAtomicLoop(); | ||
| break; | ||
|
|
||
|
|
@@ -463,7 +505,6 @@ bool EmitAnchors() | |
| // the other anchors, which all skip all subsequent processing if found, with BOL we just use it | ||
| // to boost our position to the next line, and then continue normally with any searches. | ||
| writer.WriteLine("// Beginning-of-line anchor"); | ||
| additionalDeclarations.Add("global::System.ReadOnlySpan<char> inputSpan = base.runtext;"); | ||
| additionalDeclarations.Add("int beginning = base.runtextbeg;"); | ||
| using (EmitBlock(writer, "if (pos > beginning && inputSpan[pos - 1] != '\\n')")) | ||
| { | ||
|
|
@@ -763,13 +804,15 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe | |
| writer.WriteLine($"int end = start + {(node.Kind == RegexNodeKind.Multi ? node.Str!.Length : 1)};"); | ||
| writer.WriteLine("base.Capture(0, start, end);"); | ||
| writer.WriteLine("base.runtextpos = end;"); | ||
| writer.WriteLine("return true;"); | ||
| return requiredHelpers; | ||
|
|
||
| case RegexNodeKind.Empty: | ||
| // This case isn't common in production, but it's very common when first getting started with the | ||
| // source generator and seeing what happens as you add more to expressions. When approaching | ||
| // it from a learning perspective, this is very common, as it's the empty string you start with. | ||
| writer.WriteLine("base.Capture(0, base.runtextpos, base.runtextpos);"); | ||
| writer.WriteLine("return true;"); | ||
| return requiredHelpers; | ||
| } | ||
|
|
||
|
|
@@ -781,7 +824,6 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe | |
|
|
||
| // Declare some locals. | ||
| string sliceSpan = "slice"; | ||
| writer.WriteLine("global::System.ReadOnlySpan<char> inputSpan = base.runtext;"); | ||
| writer.WriteLine("int pos = base.runtextpos, end = base.runtextend;"); | ||
| writer.WriteLine($"int original_pos = pos;"); | ||
| bool hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm); | ||
|
|
@@ -826,7 +868,7 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe | |
| } | ||
| writer.WriteLine("base.runtextpos = pos;"); | ||
| writer.WriteLine("base.Capture(0, original_pos, pos);"); | ||
| writer.WriteLine("return;"); | ||
| writer.WriteLine("return true;"); | ||
| writer.WriteLine(); | ||
|
|
||
| // We only get here in the code if the whole expression fails to match and jumps to | ||
|
|
@@ -837,6 +879,7 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe | |
| { | ||
| EmitUncaptureUntil("0"); | ||
| } | ||
| writer.WriteLine("return false;"); | ||
|
|
||
| // We're done with the match. | ||
|
|
||
|
|
@@ -846,8 +889,6 @@ private static RequiredHelperFunctions EmitGo(IndentedTextWriter writer, RegexMe | |
| // And emit any required helpers. | ||
| if (additionalLocalFunctions.Count != 0) | ||
| { | ||
| writer.WriteLine("return;"); // not strictly necessary, just for readability | ||
|
|
||
| foreach (KeyValuePair<string, string[]> localFunctions in additionalLocalFunctions.OrderBy(k => k.Key)) | ||
| { | ||
| writer.WriteLine(); | ||
|
|
@@ -2148,7 +2189,7 @@ void EmitBoundary(RegexNode node) | |
| _ => "base.IsECMABoundary", | ||
| }; | ||
|
|
||
| using (EmitBlock(writer, $"if ({call}(pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}, base.runtextbeg, end))")) | ||
| using (EmitBlock(writer, $"if ({call}(inputSpan, pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}, base.runtextbeg, end))")) | ||
| { | ||
| writer.WriteLine($"goto {doneLabel};"); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -171,6 +171,10 @@ public static void CompileToAssembly(System.Text.RegularExpressions.RegexCompila | |
| public string GroupNameFromNumber(int i) { throw null; } | ||
| public int GroupNumberFromName(string name) { throw null; } | ||
| protected void InitializeReferences() { } | ||
| public bool IsMatch(System.ReadOnlySpan<char> input) { throw null; } | ||
| public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; } | ||
| public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; } | ||
| public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; } | ||
| public bool IsMatch(string input) { throw null; } | ||
| public bool IsMatch(string input, int startat) { throw null; } | ||
| public static bool IsMatch(string input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; } | ||
|
|
@@ -330,17 +334,20 @@ protected void DoubleCrawl() { } | |
| protected void DoubleStack() { } | ||
| protected void DoubleTrack() { } | ||
| protected void EnsureStorage() { } | ||
| protected abstract bool FindFirstChar(); | ||
| protected abstract void Go(); | ||
| protected abstract void InitTrackCount(); | ||
| protected virtual bool FindFirstChar() { throw null; } | ||
| protected virtual void Go() { throw null; } | ||
| protected virtual void InitTrackCount() { throw null; } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we delete our generated override of InitTrackCount in the source generator? What breaks if runtrackcount remains 0? I think that'll just mean that we end up with the default minimums enforced by InitializeForGo (or whatever it's now called). We should probably delete the override and then confirm the defaults make sense, and/or determine whether there's a different number we could be computing that would help presize this better. If memory serves, right now it's yielding the number of opcodes produced by RegexWriter that are backtracking... but that's no longer particularly relevant to how we use runtrack, and we don't use runstack at all... so there's a fair bit of waste here to be cleaned up. |
||
| protected bool IsBoundary(int index, int startpos, int endpos) { throw null; } | ||
| protected bool IsBoundary(System.ReadOnlySpan<char> inputSpan, int index, int startpos, int endpos) { throw null; } // -> This is just temporary on the prototype. Method will be emitted by the generator engines | ||
| protected bool IsECMABoundary(int index, int startpos, int endpos) { throw null; } | ||
| protected bool IsECMABoundary(System.ReadOnlySpan<char> inputSpan, int index, int startpos, int endpos) { throw null; } // -> This is just temporary on the prototype. Method will be emitted by the generator engines | ||
| protected bool IsMatched(int cap) { throw null; } | ||
| protected int MatchIndex(int cap) { throw null; } | ||
| protected int MatchLength(int cap) { throw null; } | ||
| protected int Popcrawl() { throw null; } | ||
| protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick) { throw null; } | ||
| protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick, System.TimeSpan timeout) { throw null; } | ||
| protected internal virtual void Scan(System.ReadOnlySpan<char> text) { throw null; } | ||
| protected void TransferCapture(int capnum, int uncapnum, int start, int end) { } | ||
| protected void Uncapture() { } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,25 +7,21 @@ namespace System.Text.RegularExpressions | |
| { | ||
| internal sealed class CompiledRegexRunnerFactory : RegexRunnerFactory | ||
| { | ||
| private readonly DynamicMethod _goMethod; | ||
| private readonly DynamicMethod _findFirstCharMethod; | ||
| private readonly DynamicMethod _scanMethod; | ||
| private readonly int _trackcount; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As with the source generator, it'd be nice to see whether we can just delete _trackcount, the setting of it, etc. That'll contribute to #62450. |
||
|
|
||
| // Delegates are lazily created to avoid forcing JIT'ing until the regex is actually executed. | ||
| private Action<RegexRunner>? _go; | ||
| private Func<RegexRunner, bool>? _findFirstChar; | ||
| private CompiledRegexRunner.ScanDelegate? _scan; | ||
|
|
||
| public CompiledRegexRunnerFactory(DynamicMethod goMethod, DynamicMethod findFirstCharMethod, int trackcount) | ||
| public CompiledRegexRunnerFactory(DynamicMethod scanMethod, int trackcount) | ||
| { | ||
| _goMethod = goMethod; | ||
| _findFirstCharMethod = findFirstCharMethod; | ||
| _scanMethod = scanMethod; | ||
| _trackcount = trackcount; | ||
| } | ||
|
|
||
| protected internal override RegexRunner CreateInstance() => | ||
| new CompiledRegexRunner( | ||
| _go ??= _goMethod.CreateDelegate<Action<RegexRunner>>(), | ||
| _findFirstChar ??= _findFirstCharMethod.CreateDelegate<Func<RegexRunner, bool>>(), | ||
| _scan ??= _scanMethod.CreateDelegate<CompiledRegexRunner.ScanDelegate>(), | ||
| _trackcount); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.