Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -349,58 +349,6 @@ internal RegexNode FinalOptimize()
break;
}
}

// Optimization: implicit anchoring.
// If the expression begins with a .* loop, add an anchor to the beginning:
// - If Singleline is set such that '.' eats anything, the .* will zip to the end of the string and then backtrack through
// the whole thing looking for a match; since it will have examined everything, there's no benefit to examining it all
// again, and we can anchor to beginning.
// - If Singleline is not set, then '.' eats anything up until a '\n' and backtracks from there, so we can similarly avoid
// re-examining that content and anchor to the beginning of lines.
// We are currently very conservative here, only examining concat nodes. This could be loosened in the future, e.g. to
// explore captures (but think through any implications of there being a back ref to that capture), to explore loops and
// lazy loops a positive minimum (but the anchor shouldn't be part of the loop), to explore alternations and support adding
// an anchor if all of them begin with appropriate star loops (though this could also be accomplished by factoring out the
// loops to be before the alternation), etc.
{
RegexNode node = rootNode.Child(0); // skip implicit root capture node
while (true)
{
bool singleline = (node.Options & RegexOptions.Singleline) != 0;
switch (node.Type)
{
case Concatenate:
node = node.Child(0);
continue;

case Setloop when singleline && node.N == int.MaxValue && node.Str == RegexCharClass.AnyClass:
case Setloopatomic when singleline && node.N == int.MaxValue && node.Str == RegexCharClass.AnyClass:
case Notoneloop when !singleline && node.N == int.MaxValue && node.Ch == '\n':
case Notoneloopatomic when !singleline && node.N == int.MaxValue && node.Ch == '\n':
RegexNode? parent = node.Next;
var anchor = new RegexNode(singleline ? Beginning : Bol, node.Options);
Debug.Assert(parent != null);
if (parent.Type == Concatenate)
{
Debug.Assert(parent.ChildCount() >= 2);
Debug.Assert(parent.Children is List<RegexNode>);
anchor.Next = parent;
((List<RegexNode>)parent.Children).Insert(0, anchor);
}
else
{
Debug.Assert(parent.Type == Capture && parent.Next is null, "Only valid capture is the implicit root capture");
var concat = new RegexNode(Concatenate, parent.Options);
concat.AddChild(anchor);
concat.AddChild(node);
parent.ReplaceChild(0, concat);
}
break;
}

break;
}
}
}

// Optimization: Unnecessary root atomic.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,13 @@ public static IEnumerable<object[]> Match_Basic_TestData()
yield return new object[] { @"(?>\w+)(?<!a)", "aa", RegexOptions.None, 0, 2, false, string.Empty };
yield return new object[] { @".+a", "baa", RegexOptions.None, 0, 3, true, "baa" };
yield return new object[] { @"[ab]+a", "cacbaac", RegexOptions.None, 0, 7, true, "baa" };
foreach (RegexOptions lineOption in new[] { RegexOptions.None, RegexOptions.Singleline, RegexOptions.Multiline })
{
yield return new object[] { @".*", "abc", lineOption, 1, 2, true, "bc" };
yield return new object[] { @".*c", "abc", lineOption, 1, 2, true, "bc" };
yield return new object[] { @"b.*", "abc", lineOption, 1, 2, true, "bc" };
yield return new object[] { @".*", "abc", lineOption, 2, 1, true, "c" };
}

// Using beginning/end of string chars \A, \Z: Actual - "\\Aaaa\\w+zzz\\Z"
yield return new object[] { @"\Aaaa\w+zzz\Z", "aaaasdfajsdlfjzzz", RegexOptions.IgnoreCase, 0, 17, true, "aaaasdfajsdlfjzzz" };
Expand Down Expand Up @@ -852,18 +859,21 @@ public void Match_Advanced(string pattern, string input, RegexOptions options, i
Assert.True(Regex.IsMatch(input, pattern));
}

// Note: this block will fail if any inputs attempt to look for anchors or lookbehinds at the initial position,
// as there is a difference between Match(input, beginning) and Match(input, beginning, input.Length - beginning)
// in that the former doesn't modify from 0 what the engine sees as the beginning of the input whereas the latter
// is equivalent to taking a substring and then matching on that. However, as we currently don't have any such inputs,
// it's currently a viable way to test the additional overload. Same goes for the similar case below with options.
if (beginning + length == input.Length)
{
// Use Match(string, int)
VerifyMatch(r.Match(input, beginning), true, expected);

Assert.True(r.IsMatch(input, beginning));
}
else
{
// Use Match(string, int, int)
VerifyMatch(r.Match(input, beginning, length), true, expected);
}

// Use Match(string, int, int)
VerifyMatch(r.Match(input, beginning, length), true, expected);
}

r = new Regex(pattern, options);
Expand All @@ -890,6 +900,36 @@ public void Match_Advanced(string pattern, string input, RegexOptions options, i
}
}

public static IEnumerable<object[]> Match_StartatDiffersFromBeginning_MemberData()
{
foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.Singleline, RegexOptions.Multiline })
{
// Anchors
yield return new object[] { @"^.*", "abc", options, 0, true, true };
yield return new object[] { @"^.*", "abc", options, 1, false, true };

// Positive Lookbehinds
yield return new object[] { @"(?<=abc)def", "abcdef", options, 3, true, false };

// Negative Lookbehinds
yield return new object[] { @"(?<!abc)def", "abcdef", options, 3, false, true };
}
}

[Theory]
[MemberData(nameof(Match_StartatDiffersFromBeginning_MemberData))]
[MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Match_StartatDiffersFromBeginning_MemberData), 2, MemberType = typeof(RegexCompilationHelper))]
public void Match_StartatDiffersFromBeginning(string pattern, string input, RegexOptions options, int startat, bool expectedSuccessStartAt, bool expectedSuccessBeginning)
{
var r = new Regex(pattern, options);

Assert.Equal(expectedSuccessStartAt, r.IsMatch(input, startat));
Assert.Equal(expectedSuccessStartAt, r.Match(input, startat).Success);

Assert.Equal(expectedSuccessBeginning, r.Match(input.Substring(startat)).Success);
Assert.Equal(expectedSuccessBeginning, r.Match(input, startat, input.Length - startat).Success);
}

private static void VerifyMatch(Match match, bool expectedSuccess, CaptureData[] expected)
{
Assert.Equal(expectedSuccess, match.Success);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,16 @@ public static IEnumerable<object[]> Matches_TestData()
}
};

yield return new object[]
{
".*", "abc", RegexOptions.None,
new[]
{
new CaptureData("abc", 0, 3),
new CaptureData("", 3, 0)
}
};

if (!PlatformDetection.IsNetFramework)
{
// .NET Framework missing fix in https://github.com/dotnet/runtime/pull/1075
Expand Down