Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2326,11 +2326,17 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i
return true;
}

// If this node is a {one/notone/set}loop, see if it overlaps with its successor in the concatenation.
// If it doesn't, then we can upgrade it to being a {one/notone/set}loopatomic.
// Doing so avoids unnecessary backtracking.
// If this node is a loop, see if it overlaps with its successor in the concatenation.
// If it doesn't, then we can upgrade it to being atomic to avoid unnecessary backtracking.
switch (node.Kind)
{
case RegexNodeKind when iterateNullableSubsequent && subsequent.Kind is RegexNodeKind.PositiveLookaround:
if (!CanBeMadeAtomic(node, subsequent.Child(0), iterateNullableSubsequent: false, allowLazy: allowLazy))
{
return false;
}
break;

case RegexNodeKind.Oneloop:
case RegexNodeKind.Onelazy when allowLazy:
switch (subsequent.Kind)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,55 @@ public static IEnumerable<object[]> Match_MemberData()
yield return (@"(?:(?!(b)b)\1a)*", "babababa", RegexOptions.None, 0, 8, true, string.Empty);
yield return (@"(.*?)a(?!(a+)b\2c)\2(.*)", "baaabaac", RegexOptions.None, 0, 8, false, string.Empty);
yield return (@"(?!(abc))+\w\w\w", "abcdef", RegexOptions.None, 0, 6, true, "bcd");
yield return (@"a+(?!c)", "aaab", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"a+(?!c)", "aaac", RegexOptions.None, 0, 4, true, "aa");
yield return (@"a*(?!c)", "aaab", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"a{2,5}(?!c)", "aaaaac", RegexOptions.None, 0, 6, true, "aaaa");
yield return (@"a+?(?!c)", "aaab", RegexOptions.None, 0, 4, true, "a");
yield return (@"a*?(?!c)", "aaab", RegexOptions.None, 0, 4, true, "");
yield return (@"a{2,5}?(?!c)", "aaaaab", RegexOptions.None, 0, 6, true, "aa");
yield return (@"[ab]*(?!x)", "ababc", RegexOptions.None, 0, 5, true, "abab");
yield return (@"a+(?=b)(?!c)", "aabx", RegexOptions.None, 0, 4, true, "aa");
yield return (@"a+?(?=b)(?!c)", "aabx", RegexOptions.None, 0, 4, true, "aa");

// Zero-width positive lookahead assertion
yield return (@"(?=(abc))?\1", "abc", RegexOptions.None, 0, 3, true, "abc");
yield return (@"(?=(abc))+\1", "abc", RegexOptions.None, 0, 3, true, "abc");
yield return (@"(?=(abc))*\1", "abc", RegexOptions.None, 0, 3, true, "abc");
yield return (@"^.*?(?=.)b", "ab", RegexOptions.None, 0, 2, true, "ab");
yield return (@".*?(?=.)b", "ab", RegexOptions.None, 0, 2, true, "ab");
yield return (@"^(?>.*?)(?=.)b", "ab", RegexOptions.None, 0, 2, false, "");
yield return (@"(?>.*?)(?=.)b", "ab", RegexOptions.None, 0, 2, true, "b");
yield return (@"a+(?=b)", "aaab", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"a+(?=b)", "aaabc", RegexOptions.None, 0, 5, true, "aaa");
yield return (@"a*(?=b)", "aaab", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"a{2,5}(?=b)", "aaaaab", RegexOptions.None, 0, 6, true, "aaaaa");
yield return (@"a+?(?=b)", "aaab", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"a*?(?=b)", "aaab", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"a{2,5}?(?=b)", "aaaaab", RegexOptions.None, 0, 6, true, "aaaaa");
yield return (@"a+b+(?=c)", "aabbbc", RegexOptions.None, 0, 6, true, "aabbb");
yield return (@"a+?b+(?=c)", "aabbbc", RegexOptions.None, 0, 6, true, "aabbb");
yield return (@"a+b+?(?=c)", "aabbbc", RegexOptions.None, 0, 6, true, "aabbb");
yield return (@"[ab]+(?=c)", "ababc", RegexOptions.None, 0, 5, true, "abab");
yield return (@"[ab]+?(?=c)", "ababc", RegexOptions.None, 0, 5, true, "abab");
yield return (@"\w+(?=\b)", "hello world", RegexOptions.None, 0, 11, true, "hello");
yield return (@"\w+?(?=\b)", "hello world", RegexOptions.None, 0, 11, true, "hello");
yield return (@"(?>a+)(?=b)", "aaab", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"(?>a*)(?=b)", "aaab", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"(?>a{2,5})(?=b)", "aaaaab", RegexOptions.None, 0, 6, true, "aaaaa");
yield return (@"a*(?=a)", "aaa", RegexOptions.None, 0, 3, true, "aa");
yield return (@"a*?(?=a)", "aaa", RegexOptions.None, 0, 3, true, "");
yield return (@"a+(?=a*b)ab", "aaaab", RegexOptions.None, 0, 5, true, "aaaab");
yield return (@"a+?(?=a*b)ab", "aaaab", RegexOptions.None, 0, 5, true, "aaaab");
yield return (@"(a+)+(?=b)", "aaab", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"(a+?)+(?=b)", "aaab", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"(a+)+?(?=b)", "aaab", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"(a+|b+)(?=c)", "aaac", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"(a+?|b+?)(?=c)", "aaac", RegexOptions.None, 0, 4, true, "aaa");
yield return (@"(a+)(?=\1b)", "aaaaaab", RegexOptions.None, 0, 7, true, "aaa");
yield return (@"(a+?)(?=\1b)", "aaaaaab", RegexOptions.None, 0, 7, true, "aaa");
yield return (@"[A-Z]+(?=b)", "AAAb", RegexOptions.IgnoreCase, 0, 4, true, "AAA");
yield return (@"[A-Z]+?(?=b)", "AAAb", RegexOptions.IgnoreCase, 0, 4, true, "AAA");

// Zero-width positive lookbehind assertion
yield return (@"(\w){6}(?<=XXX)def", "abcXXXdef", RegexOptions.None, 0, 9, true, "abcXXXdef");
Expand Down Expand Up @@ -136,13 +182,21 @@ public static IEnumerable<object[]> Match_MemberData()
yield return (@"(?<=(abc)+?)", "123abc", RegexOptions.None, 0, 6, true, "");
yield return (@"(?<=(abc)+?)", "123ab", RegexOptions.None, 0, 5, false, "");
yield return (@"(?<=(abc)+?123)", "abcabc123", RegexOptions.None, 0, 9, true, "");
yield return (@"a+(?!c)(?<=y)", "yaab", RegexOptions.None, 0, 4, false, "");
yield return (@"(?<=a{2,4})b+", "aaabbb", RegexOptions.None, 0, 6, true, "bbb");
yield return (@"(?<=a+)b+?", "aaabbb", RegexOptions.None, 0, 6, true, "b");

// Zero-width negative lookbehind assertion: Actual - "(\\w){6}(?<!XXX)def"
yield return (@"(\w){6}(?<!XXX)def", "XXXabcdef", RegexOptions.None, 0, 9, true, "XXXabcdef");
yield return (@"123(?<!$) abcdef", "123 abcdef", RegexOptions.None, 0, 10, true, "123 abcdef");
yield return (@"(abc)\w(?<!(?(1)e|d))", "abcdabc", RegexOptions.None, 0, 7, true, "abcd");
yield return (@"(abc)\w(?<!(?(cd)e|d))", "abcdabc", RegexOptions.None, 0, 7, true, "abcd");
yield return (@"(?<!(b)a)\1", "bb", RegexOptions.None, 0, 2, false, string.Empty); // negative assertion should not capture
yield return (@"(?<=a)b+c", "abbbbc", RegexOptions.None, 0, 6, true, "bbbbc");
yield return (@"(?<=a+)bc", "aaabc", RegexOptions.None, 0, 5, true, "bc");
yield return (@"(?<!x)a+b", "yaab", RegexOptions.None, 0, 4, true, "aab");
yield return (@"(?<!x)a+b", "xaab", RegexOptions.None, 0, 4, true, "ab");
yield return (@"a+(?=b)(?<!x)", "yaab", RegexOptions.None, 0, 4, true, "aa");

// Nonbacktracking subexpression: Actual - "[^0-9]+(?>[0-9]+)3"
// The last 3 causes the match to fail, since the non backtracking subexpression does not give up the last digit it matched
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ public class RegexReductionTests
[InlineData(@"abc(?=\Z)", @"abc\Z")]
[InlineData(@"abc(?=\A)", @"abc\A")]
[InlineData(@"abc(?=$)", @"abc$")]
[InlineData(@"a*(?=b)bcd", @"(?>a*)(?=b)bcd")]
// Alternation reduction
[InlineData("a|b", "[ab]")]
[InlineData("a|b|c|d|e|g|h|z", "[a-eghz]")]
Expand Down Expand Up @@ -574,6 +575,10 @@ public void PatternsReduceIdentically(string actual, string expected)
[InlineData("(?=(abc))", "(?=abc)")]
[InlineData("(?=a(b*)c)", "(?=ab*c)")]
[InlineData("(?=a((((b))))c)", "(?=abc)")]
[InlineData(@"a*(?=a)", @"(?>a*)(?=a)")]
[InlineData(@"a*(?!b)b", @"(?>a*)(?!b)b")]
[InlineData(@"a*(?<!b)cde", @"(?>a*)(?<!b)cde")]
[InlineData(@"a*(?<=b)cde", @"(?>a*)(?<=b)cde")]
// Loops inside alternation constructs
[InlineData("(abc*|def)chi", "(ab(?>c*)|def)chi")]
[InlineData("(abc|def*)fhi", "(abc|de(?>f*))fhi")]
Expand Down
Loading