-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Vectorize RegexInterpreter opcode loops for Oneloop, Onerep, Notonerep, and MatchString #124628
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
8e0b5a0
22e1768
1fe43b5
6d7b8c6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -210,7 +210,6 @@ private char Forwardcharnext(ReadOnlySpan<char> inputSpan) | |
| private bool MatchString(string str, ReadOnlySpan<char> inputSpan) | ||
| { | ||
| int c = str.Length; | ||
| int pos; | ||
|
|
||
| if (!_rightToLeft) | ||
| { | ||
|
|
@@ -219,7 +218,13 @@ private bool MatchString(string str, ReadOnlySpan<char> inputSpan) | |
| return false; | ||
| } | ||
|
|
||
| pos = runtextpos + c; | ||
| if (!inputSpan.Slice(runtextpos, c).SequenceEqual(str.AsSpan())) | ||
| { | ||
| return false; | ||
| } | ||
|
|
||
| runtextpos += c; | ||
| return true; | ||
| } | ||
| else | ||
| { | ||
|
|
@@ -228,25 +233,18 @@ private bool MatchString(string str, ReadOnlySpan<char> inputSpan) | |
| return false; | ||
| } | ||
|
|
||
| pos = runtextpos; | ||
| } | ||
|
|
||
| while (c != 0) | ||
| { | ||
| if (str[--c] != inputSpan[--pos]) | ||
| int pos = runtextpos; | ||
| while (c != 0) | ||
| { | ||
| return false; | ||
| if (str[--c] != inputSpan[--pos]) | ||
| { | ||
| return false; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| if (!_rightToLeft) | ||
| { | ||
| pos += str.Length; | ||
| runtextpos = pos; | ||
| return true; | ||
| } | ||
|
|
||
| runtextpos = pos; | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| private bool MatchRef(int index, int length, ReadOnlySpan<char> inputSpan, bool caseInsensitive) | ||
|
|
@@ -896,12 +894,23 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan) | |
| } | ||
|
|
||
| char ch = (char)Operand(0); | ||
| while (c-- > 0) | ||
| if (!_rightToLeft) | ||
| { | ||
| if (Forwardcharnext(inputSpan) != ch) | ||
| if (inputSpan.Slice(runtextpos, c).ContainsAnyExcept(ch)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this one actually beneficial? It's pretty rare to have long runs of a single specific character. |
||
| { | ||
| goto BreakBackward; | ||
| } | ||
| runtextpos += c; | ||
| } | ||
| else | ||
| { | ||
| while (c-- > 0) | ||
| { | ||
| if (Forwardcharnext(inputSpan) != ch) | ||
| { | ||
| goto BreakBackward; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| advance = 2; | ||
|
|
@@ -916,12 +925,23 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan) | |
| } | ||
|
|
||
| char ch = (char)Operand(0); | ||
| while (c-- > 0) | ||
| if (!_rightToLeft) | ||
| { | ||
| if (Forwardcharnext(inputSpan) == ch) | ||
| if (inputSpan.Slice(runtextpos, c).Contains(ch)) | ||
| { | ||
| goto BreakBackward; | ||
| } | ||
| runtextpos += c; | ||
| } | ||
| else | ||
| { | ||
| while (c-- > 0) | ||
| { | ||
| if (Forwardcharnext(inputSpan) == ch) | ||
| { | ||
| goto BreakBackward; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| advance = 2; | ||
|
|
@@ -957,12 +977,31 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan) | |
| char ch = (char)Operand(0); | ||
| int i; | ||
|
|
||
| for (i = len; i > 0; i--) | ||
| if (!_rightToLeft) | ||
| { | ||
| if (Forwardcharnext(inputSpan) != ch) | ||
| // We're left-to-right, so we can employ the vectorized IndexOfAnyExcept | ||
| // to search for any character that isn't the target. | ||
| i = inputSpan.Slice(runtextpos, len).IndexOfAnyExcept(ch); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same question. If these onerep/loop/loopatomic actually improve performance on real regexes rather than microbenchmarks targeting these cases, great. Otherwise, though, I'd rather avoid adding more specialized code paths for things that only help with fake scenarios. |
||
| if (i == -1) | ||
| { | ||
| Backwardnext(); | ||
| break; | ||
| runtextpos += len; | ||
| i = 0; | ||
| } | ||
| else | ||
| { | ||
| runtextpos += i; | ||
| i = len - i; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| for (i = len; i > 0; i--) | ||
| { | ||
| if (Forwardcharnext(inputSpan) != ch) | ||
| { | ||
| Backwardnext(); | ||
| break; | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can/should this also be a SequenceEqual or EndsWith or equivalent?