From 8e0b5a09ce49c030bef355dfe5b411454ac37658 Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Thu, 19 Feb 2026 22:23:55 -0700 Subject: [PATCH 1/4] Use vectorized IndexOfAnyExcept for Oneloop/Oneloopatomic in interpreter Replace the per-character loop in the Oneloop/Oneloopatomic opcode handler with a vectorized IndexOfAnyExcept call for left-to-right matching. This mirrors the existing optimization already applied to Notoneloop (which uses IndexOf), enabling SIMD-accelerated scanning when matching repeated occurrences of a single character (e.g. a+ or a{3,}). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../RegularExpressions/RegexInterpreter.cs | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index e477f7c742babc..bbb2eebfc0156c 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -957,12 +957,31 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan) char ch = (char)Operand(0); int i; - for (i = len; i > 0; i--) + if (!_rightToLeft) { - if (Forwardcharnext(inputSpan) != ch) + // We're left-to-right, so we can employ the vectorized IndexOfAnyExcept + // to search for any character that isn't the target. + i = inputSpan.Slice(runtextpos, len).IndexOfAnyExcept(ch); + if (i == -1) { - Backwardnext(); - break; + runtextpos += len; + i = 0; + } + else + { + runtextpos += i; + i = len - i; + } + } + else + { + for (i = len; i > 0; i--) + { + if (Forwardcharnext(inputSpan) != ch) + { + Backwardnext(); + break; + } } } From 22e17684180a116b85fbdc66fd94c871067f262d Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Thu, 19 Feb 2026 22:25:24 -0700 Subject: [PATCH 2/4] Use vectorized ContainsAnyExcept for Onerep in interpreter Replace the per-character loop in the Onerep opcode handler with a vectorized ContainsAnyExcept call for left-to-right matching. This enables SIMD-accelerated verification when matching a fixed number of occurrences of a single character (e.g. the minimum repetitions of a{5,}). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Text/RegularExpressions/RegexInterpreter.cs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index bbb2eebfc0156c..33abc941eb7007 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -896,12 +896,23 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan) } char ch = (char)Operand(0); - while (c-- > 0) + if (!_rightToLeft) { - if (Forwardcharnext(inputSpan) != ch) + if (inputSpan.Slice(runtextpos, c).ContainsAnyExcept(ch)) { goto BreakBackward; } + runtextpos += c; + } + else + { + while (c-- > 0) + { + if (Forwardcharnext(inputSpan) != ch) + { + goto BreakBackward; + } + } } } advance = 2; From 1fe43b55e3757c0e905d4289f174579ac799d1bc Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Thu, 19 Feb 2026 22:25:36 -0700 Subject: [PATCH 3/4] Use vectorized Contains for Notonerep in interpreter Replace the per-character loop in the Notonerep opcode handler with a vectorized Contains call for left-to-right matching. This enables SIMD-accelerated verification when matching a fixed number of characters that must not be a specific character (e.g. [^a]{5}). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Text/RegularExpressions/RegexInterpreter.cs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index 33abc941eb7007..a9ba32d0107762 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -927,12 +927,23 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan) } char ch = (char)Operand(0); - while (c-- > 0) + if (!_rightToLeft) { - if (Forwardcharnext(inputSpan) == ch) + if (inputSpan.Slice(runtextpos, c).Contains(ch)) { goto BreakBackward; } + runtextpos += c; + } + else + { + while (c-- > 0) + { + if (Forwardcharnext(inputSpan) == ch) + { + goto BreakBackward; + } + } } } advance = 2; From 6d7b8c67ba6482657bf8dfceeb248359cf173487 Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Thu, 19 Feb 2026 22:26:13 -0700 Subject: [PATCH 4/4] Use vectorized SequenceEqual for MatchString in interpreter Replace the per-character backwards comparison loop in MatchString with a vectorized SequenceEqual call for left-to-right matching. This enables SIMD-accelerated string comparison when matching literal multi-character strings within regex patterns. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../RegularExpressions/RegexInterpreter.cs | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index a9ba32d0107762..00715b9086a22a 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -210,7 +210,6 @@ private char Forwardcharnext(ReadOnlySpan inputSpan) private bool MatchString(string str, ReadOnlySpan inputSpan) { int c = str.Length; - int pos; if (!_rightToLeft) { @@ -219,7 +218,13 @@ private bool MatchString(string str, ReadOnlySpan inputSpan) return false; } - pos = runtextpos + c; + if (!inputSpan.Slice(runtextpos, c).SequenceEqual(str.AsSpan())) + { + return false; + } + + runtextpos += c; + return true; } else { @@ -228,25 +233,18 @@ private bool MatchString(string str, ReadOnlySpan inputSpan) return false; } - pos = runtextpos; - } - - while (c != 0) - { - if (str[--c] != inputSpan[--pos]) + int pos = runtextpos; + while (c != 0) { - return false; + if (str[--c] != inputSpan[--pos]) + { + return false; + } } - } - if (!_rightToLeft) - { - pos += str.Length; + runtextpos = pos; + return true; } - - runtextpos = pos; - - return true; } private bool MatchRef(int index, int length, ReadOnlySpan inputSpan, bool caseInsensitive)