Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ internal abstract class RegexCompiler
private static MethodInfo SpanSliceIntIntMethod => field ??= typeof(ReadOnlySpan<char>).GetMethod("Slice", [typeof(int), typeof(int)])!;
private static MethodInfo SpanStartsWithSpanMethod => field ??= typeof(MemoryExtensions).GetMethod("StartsWith", [typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0))])!.MakeGenericMethod(typeof(char));
private static MethodInfo SpanStartsWithSpanComparisonMethod => field ??= typeof(MemoryExtensions).GetMethod("StartsWith", [typeof(ReadOnlySpan<char>), typeof(ReadOnlySpan<char>), typeof(StringComparison)])!;
private static MethodInfo SpanSequenceEqualSpanMethod => field ??= typeof(MemoryExtensions).GetMethod("SequenceEqual", [typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0))])!.MakeGenericMethod(typeof(char));
private static MethodInfo StringAsSpanMethod => field ??= typeof(MemoryExtensions).GetMethod("AsSpan", [typeof(string)])!;
private static MethodInfo StringGetCharsMethod => field ??= typeof(string).GetMethod("get_Chars", [typeof(int)])!;
private static MethodInfo ArrayResizeMethod => field ??= typeof(Array).GetMethod("Resize")!.MakeGenericMethod(typeof(int));
Expand Down Expand Up @@ -2112,8 +2113,6 @@ void EmitBackreference(RegexNode node)
BrfalseFar((node.Options & RegexOptions.ECMAScript) == 0 ? doneLabel : backreferenceEnd);

using RentedLocalBuilder matchLength = RentInt32Local();
using RentedLocalBuilder matchIndex = RentInt32Local();
using RentedLocalBuilder i = RentInt32Local();

// int matchLength = base.MatchLength(capnum);
Ldthis();
Expand All @@ -2135,118 +2134,135 @@ void EmitBackreference(RegexNode node)
Ldloc(matchLength);
BltFar(doneLabel);

// int matchIndex = base.MatchIndex(capnum);
Ldthis();
Ldc(capnum);
Call(MatchIndexMethod);
Stloc(matchIndex);
if ((node.Options & RegexOptions.IgnoreCase) != 0)
{
// For case-insensitive, we need to compare character-by-character with case equivalence checks.
using RentedLocalBuilder matchIndex = RentInt32Local();
using RentedLocalBuilder i = RentInt32Local();

Label condition = DefineLabel();
Label body = DefineLabel();
Label charactersMatched = DefineLabel();
LocalBuilder backreferenceCharacter = _ilg!.DeclareLocal(typeof(char));
LocalBuilder currentCharacter = _ilg.DeclareLocal(typeof(char));
// int matchIndex = base.MatchIndex(capnum);
Ldthis();
Ldc(capnum);
Call(MatchIndexMethod);
Stloc(matchIndex);

// for (int i = 0; ...)
Ldc(0);
Stloc(i);
Br(condition);
Label condition = DefineLabel();
Label body = DefineLabel();
Label charactersMatched = DefineLabel();
LocalBuilder backreferenceCharacter = _ilg!.DeclareLocal(typeof(char));
LocalBuilder currentCharacter = _ilg.DeclareLocal(typeof(char));

MarkLabel(body);
// for (int i = 0; ...)
Ldc(0);
Stloc(i);
Br(condition);

// char backreferenceChar = inputSpan[matchIndex + i];
Ldloca(inputSpan);
Ldloc(matchIndex);
Ldloc(i);
Add();
Call(SpanGetItemMethod);
LdindU2();
Stloc(backreferenceCharacter);
if (!rtl)
{
// char currentChar = slice[i];
Ldloca(slice);
Ldloc(i);
}
else
{
// char currentChar = inputSpan[pos - matchLength + i];
MarkLabel(body);

// char backreferenceChar = inputSpan[matchIndex + i];
Ldloca(inputSpan);
Ldloc(pos);
Ldloc(matchLength);
Sub();
Ldloc(matchIndex);
Ldloc(i);
Add();
}
Call(SpanGetItemMethod);
LdindU2();
Stloc(currentCharacter);
Call(SpanGetItemMethod);
LdindU2();
Stloc(backreferenceCharacter);
if (!rtl)
{
// char currentChar = slice[i];
Ldloca(slice);
Ldloc(i);
}
else
{
// char currentChar = inputSpan[pos - matchLength + i];
Ldloca(inputSpan);
Ldloc(pos);
Ldloc(matchLength);
Sub();
Ldloc(i);
Add();
}
Call(SpanGetItemMethod);
LdindU2();
Stloc(currentCharacter);

if ((node.Options & RegexOptions.IgnoreCase) != 0)
{
LocalBuilder caseEquivalences = DeclareReadOnlySpanChar();

// if (backreferenceChar != currentChar)
// if (backreferenceChar == currentChar) goto charactersMatched;
Ldloc(backreferenceCharacter);
Ldloc(currentCharacter);
Ceq();
BrtrueFar(charactersMatched);

// if (RegexCaseEquivalences.TryFindCaseEquivalencesForCharWithIBehavior(backreferenceChar, _culture, ref _caseBehavior, out ReadOnlySpan<char> equivalences))
// if (!RegexCaseEquivalences.TryFindCaseEquivalencesForCharWithIBehavior(backreferenceChar, _culture, ref _caseBehavior, out ReadOnlySpan<char> equivalences)) goto doneLabel;
Ldloc(backreferenceCharacter);
Ldthisfld(CultureField);
Ldthisflda(CaseBehaviorField);
Ldloca(caseEquivalences);
Call(RegexCaseEquivalencesTryFindCaseEquivalencesForCharWithIBehaviorMethod);
BrfalseFar(doneLabel);

// if (equivalences.IndexOf(slice[i]) < 0) // Or if (equivalences.IndexOf(inputSpan[pos - matchLength + i]) < 0) when rtl
// if (equivalences.IndexOf(currentCharacter) < 0) goto doneLabel;
Ldloc(caseEquivalences);
Ldloc(currentCharacter);
Call(SpanIndexOfCharMethod);
Ldc(0);
BltFar(doneLabel);

MarkLabel(charactersMatched);

// for (...; ...; i++)
Ldloc(i);
Ldc(1);
Add();
Stloc(i);

// for (...; i < matchLength; ...)
MarkLabel(condition);
Ldloc(i);
Ldloc(matchLength);
Blt(body);
}
else
{
// For case-sensitive, we can use SequenceEqual for efficient comparison.
// if (!inputSpan.Slice(base.MatchIndex(capnum), matchLength).SequenceEqual(slice.Slice(0, matchLength))) goto doneLabel;
// or for RTL:
// if (!inputSpan.Slice(base.MatchIndex(capnum), matchLength).SequenceEqual(inputSpan.Slice(pos - matchLength, matchLength))) goto doneLabel;

// inputSpan.Slice(base.MatchIndex(capnum), matchLength)
Ldloca(inputSpan);
Ldthis();
Ldc(capnum);
Call(MatchIndexMethod);
Ldloc(matchLength);
Call(SpanSliceIntIntMethod);

if (!rtl)
{
// slice.Slice(0, matchLength)
Ldloca(slice);
Ldloc(i);
Ldc(0);
Ldloc(matchLength);
Call(SpanSliceIntIntMethod);
}
else
{
// inputSpan.Slice(pos - matchLength, matchLength)
Ldloca(inputSpan);
Ldloc(pos);
Ldloc(matchLength);
Sub();
Ldloc(i);
Add();
Ldloc(matchLength);
Call(SpanSliceIntIntMethod);
}
Call(SpanGetItemMethod);
LdindU2();
Call(SpanIndexOfCharMethod);
Ldc(0);
// return false; // input didn't match.
BltFar(doneLabel);
}
else
{
// if (backreferenceCharacter != currentCharacter)
Ldloc(backreferenceCharacter);
Ldloc(currentCharacter);
Ceq();
// return false; // input didn't match.

// .SequenceEqual(...)
Call(SpanSequenceEqualSpanMethod);
BrfalseFar(doneLabel);
}

MarkLabel(charactersMatched);

// for (...; ...; i++)
Ldloc(i);
Ldc(1);
Add();
Stloc(i);

// for (...; i < matchLength; ...)
MarkLabel(condition);
Ldloc(i);
Ldloc(matchLength);
Blt(body);

// pos += matchLength; // or -= for rtl
Ldloc(pos);
Ldloc(matchLength);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1940,6 +1940,72 @@ public static IEnumerable<object[]> Match_Advanced_TestData()
}
};

// Backreferences with RightToLeft
// Note: For RTL, the pattern is processed right-to-left, so the group must come
// AFTER the backreference in the pattern (i.e., to the right of \1)
yield return new object[]
{
engine,
@"\1(\w)", "aa", RegexOptions.RightToLeft, 2, 2,
new CaptureData[]
{
new CaptureData("aa", 0, 2),
new CaptureData("a", 1, 1),
}
};
yield return new object[]
{
engine,
@"\1(\w+)", "abcabc", RegexOptions.RightToLeft, 6, 6,
new CaptureData[]
{
new CaptureData("abcabc", 0, 6),
new CaptureData("abc", 3, 3),
}
};
yield return new object[]
{
engine,
@"\1(\w)", "abba", RegexOptions.RightToLeft, 4, 4,
new CaptureData[]
{
new CaptureData("bb", 1, 2),
new CaptureData("b", 2, 1),
}
};

// Backreferences with RightToLeft and IgnoreCase
yield return new object[]
{
engine,
@"\1(\w)", "aA", RegexOptions.RightToLeft | RegexOptions.IgnoreCase, 2, 2,
new CaptureData[]
{
new CaptureData("aA", 0, 2),
new CaptureData("A", 1, 1),
}
};
yield return new object[]
{
engine,
@"\1(\w+)", "abcABC", RegexOptions.RightToLeft | RegexOptions.IgnoreCase, 6, 6,
new CaptureData[]
{
new CaptureData("abcABC", 0, 6),
new CaptureData("ABC", 3, 3),
}
};
yield return new object[]
{
engine,
@"\1(\w)", "aBBa", RegexOptions.RightToLeft | RegexOptions.IgnoreCase, 4, 4,
new CaptureData[]
{
new CaptureData("BB", 1, 2),
new CaptureData("B", 2, 1),
}
};

// Actual - "(?<1>\\d+)abc(?(1)222|111)"
yield return new object[]
{
Expand Down
Loading