Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter wri
writer.WriteLine();

writer.WriteLine(" // Description:");
DescribeExpression(writer, rm.Code.Tree.Root.Child(0), " // "); // skip implicit root capture
DescribeExpression(writer, rm.Code.Tree.Root.Child(0), " // ", rm.Code); // skip implicit root capture
writer.WriteLine();

writer.WriteLine($" protected override bool FindFirstChar()");
Expand Down Expand Up @@ -903,11 +903,11 @@ void EmitSwitchedBranches()
sliceStaticPos = startingSliceStaticPos;

RegexNode child = node.Child(i);
Debug.Assert(child.Type is RegexNode.One or RegexNode.Multi or RegexNode.Concatenate, DescribeNode(child));
Debug.Assert(child.Type is RegexNode.One or RegexNode.Multi or RegexNode.Concatenate, DescribeNode(child, rm.Code));
Debug.Assert(child.Type is not RegexNode.Concatenate || (child.ChildCount() >= 2 && child.Child(0).Type is RegexNode.One or RegexNode.Multi));

RegexNode? childStart = child.FindBranchOneOrMultiStart();
Debug.Assert(childStart is not null, DescribeNode(child));
Debug.Assert(childStart is not null, DescribeNode(child, rm.Code));

writer.WriteLine($"case {Literal(childStart.FirstCharOfOneOrMulti())}:");
writer.Indent++;
Expand Down Expand Up @@ -1140,15 +1140,15 @@ void EmitBackreference(RegexNode node)
// in which case per ECMA 262 section 21.2.2.9 the backreference should succeed.
if ((node.Options & RegexOptions.ECMAScript) != 0)
{
writer.WriteLine($"// If the {DescribeNonNegative(node.M)} capture hasn't matched, the backreference matches with RegexOptions.ECMAScript rules.");
writer.WriteLine($"// If the {DescribeCapture(node.M, rm.Code)} hasn't matched, the backreference matches with RegexOptions.ECMAScript rules.");
using (EmitBlock(writer, $"if (base.IsMatched({capnum}))"))
{
EmitWhenHasCapture();
}
}
else
{
writer.WriteLine($"// If the {DescribeNonNegative(node.M)} capture hasn't matched, the backreference doesn't match.");
writer.WriteLine($"// If the {DescribeCapture(node.M, rm.Code)} hasn't matched, the backreference doesn't match.");
using (EmitBlock(writer, $"if (!base.IsMatched({capnum}))"))
{
writer.WriteLine($"goto {doneLabel};");
Expand Down Expand Up @@ -1225,7 +1225,7 @@ void EmitBackreferenceConditional(RegexNode node)
{
using (EmitBlock(writer, $"if (base.IsMatched({capnum}))"))
{
writer.WriteLine($"// The {DescribeNonNegative(node.M)} capture group captured a value. Match the first branch.");
writer.WriteLine($"// The {DescribeCapture(node.M, rm.Code)} captured a value. Match the first branch.");
EmitNode(yesBranch);
writer.WriteLine();
TransferSliceStaticPosToPos(); // make sure sliceStaticPos is 0 after each branch
Expand Down Expand Up @@ -1702,7 +1702,7 @@ void EmitNode(RegexNode node, RegexNode? subsequent = null, bool emitLengthCheck

// Put the node's code into its own scope. If the node contains labels that may need to
// be visible outside of its scope, the scope is still emitted for clarity but is commented out.
using (EmitScope(writer, DescribeNode(node), faux: PossiblyBacktracks(node) && !node.IsAtomicByParent()))
using (EmitScope(writer, DescribeNode(node, rm.Code), faux: PossiblyBacktracks(node) && !node.IsAtomicByParent()))
{
switch (node.Type)
{
Expand Down Expand Up @@ -1857,7 +1857,7 @@ void WriteSingleCharChild(RegexNode child, bool includeDescription = true)
writer.Write("if (");
}
EmitSingleChar(child, emitLengthCheck: false, clauseOnly: true);
prevDescription = includeDescription ? DescribeNode(child) : null;
prevDescription = includeDescription ? DescribeNode(child, rm.Code) : null;
wroteClauses = true;
}

Expand Down Expand Up @@ -3395,16 +3395,17 @@ private static string Literal(RegexOptions options)
}

/// <summary>Gets a textual description of the node fit for rendering in a comment in source.</summary>
private static string DescribeNode(RegexNode node) =>
private static string DescribeNode(RegexNode node, RegexCode regexCode) =>
node.Type switch
{
RegexNode.Alternate => $"Match with {node.ChildCount()} alternative expressions{(node.IsAtomicByParent() ? ", atomically" : "")}.",
RegexNode.Atomic => $"Atomic group.",
RegexNode.Beginning => "Match if at the beginning of the string.",
RegexNode.Bol => "Match if at the beginning of a line.",
RegexNode.Boundary => $"Match if at a word boundary.",
RegexNode.Capture when node.N != -1 => $"{DescribeNonNegative(node.M)} capturing group. Uncaptures the {DescribeNonNegative(node.N)} capturing group.",
RegexNode.Capture when node.N == -1 => $"{DescribeNonNegative(node.M)} capturing group.",
RegexNode.Capture when node.M == -1 && node.N != -1 => $"Non-capturing balancing group. Uncaptures the {DescribeCapture(node.N, regexCode)}.",
RegexNode.Capture when node.N != -1 => $"Balancing group. Captures the {DescribeCapture(node.M, regexCode)} and uncaptures the {DescribeCapture(node.N, regexCode)}.",
RegexNode.Capture when node.N == -1 => $"{DescribeCapture(node.M, regexCode)}.",
RegexNode.Concatenate => "Match a sequence of expressions.",
RegexNode.ECMABoundary => $"Match if at a word boundary (according to ECMAScript rules).",
RegexNode.Empty => $"Match an empty string.",
Expand All @@ -3421,23 +3422,51 @@ private static string DescribeNode(RegexNode node) =>
RegexNode.One => $"Match {Literal(node.Ch)}.",
RegexNode.Oneloop or RegexNode.Oneloopatomic or RegexNode.Onelazy => $"Match {Literal(node.Ch)} {DescribeLoop(node)}.",
RegexNode.Prevent => $"Zero-width negative lookahead assertion.",
RegexNode.Ref => $"Match the same text as matched by the {DescribeNonNegative(node.M)} capture group.",
RegexNode.Ref => $"Match the same text as matched by the {DescribeCapture(node.M, regexCode)}.",
RegexNode.Require => $"Zero-width positive lookahead assertion.",
RegexNode.Set => $"Match a character in the set {RegexCharClass.SetDescription(node.Str!)}.",
RegexNode.Setloop or RegexNode.Setloopatomic or RegexNode.Setlazy => $"Match a character in the set {RegexCharClass.SetDescription(node.Str!)} {DescribeLoop(node)}.",
RegexNode.Start => "Match if at the start position.",
RegexNode.Testgroup => $"Conditionally match one of two expressions depending on whether an initial expression matches.",
RegexNode.Testref => $"Conditionally match one of two expressions depending on whether the {DescribeNonNegative(node.M)} capture group matched.",
RegexNode.Testref => $"Conditionally match one of two expressions depending on whether the {DescribeCapture(node.M, regexCode)} matched.",
RegexNode.UpdateBumpalong => $"Advance the next matching position.",
_ => $"Unknown node type {node.Type}",
};

/// <summary>Gets an identifer to describe a capture group.</summary>
private static string DescribeCapture(int capNum, RegexCode regexCode)
{
// If we can get a capture name from the captures collection and it's not just a numerical representation of the group, use it.
string name = RegexParser.GroupNameFromNumber(regexCode.Caps, regexCode.Tree.CapsList, regexCode.CapSize, capNum);
if (!string.IsNullOrEmpty(name) &&
(!int.TryParse(name, out int id) || id != capNum))
{
name = Literal(name);
}
else
{
// Otherwise, create a numerical description of the capture group.
int tens = capNum % 10;
name = tens is >= 1 and <= 3 && capNum % 100 is < 10 or > 20 ? // Ends in 1, 2, 3 but not 11, 12, or 13
tens switch
{
1 => $"{capNum}st",
2 => $"{capNum}nd",
_ => $"{capNum}rd",
} :
$"{capNum}th";
}

return $"{name} capture group";
}

/// <summary>Writes a textual description of the node tree fit for rending in source.</summary>
/// <param name="writer">The writer to which the description should be written.</param>
/// <param name="node">The node being written.</param>
/// <param name="prefix">The prefix to write at the beginning of every line, including a "//" for a comment.</param>
/// <param name="regexTree">regex tree</param>
/// <param name="depth">The depth of the current node.</param>
private static void DescribeExpression(TextWriter writer, RegexNode node, string prefix, int depth = 0)
private static void DescribeExpression(TextWriter writer, RegexNode node, string prefix, RegexCode regexCode, int depth = 0)
{
bool skip = node.Type switch
{
Expand Down Expand Up @@ -3468,35 +3497,16 @@ RegexNode.Testref when node.Next.Child(1) == node => "Not Matched: ",

// Write out the line for the node.
const char BulletPoint = '\u25CB';
writer.WriteLine($"{prefix}{new string(' ', depth * 4)}{BulletPoint} {tag}{DescribeNode(node)}");
writer.WriteLine($"{prefix}{new string(' ', depth * 4)}{BulletPoint} {tag}{DescribeNode(node, regexCode)}");
}

// Recur into each of its children.
int childCount = node.ChildCount();
for (int i = 0; i < childCount; i++)
{
int childDepth = skip ? depth : depth + 1;
DescribeExpression(writer, node.Child(i), prefix, childDepth);
}
}

/// <summary>Gets a textual description of a number, e.g. 3 => "3rd".</summary>
private static string DescribeNonNegative(int n)
{
if (n < 0)
{
return n.ToString(CultureInfo.InvariantCulture);
DescribeExpression(writer, node.Child(i), prefix, regexCode, childDepth);
}

int tens = n % 10;
return tens is >= 1 and <= 3 && n % 100 is < 10 or > 20 ? // Ends in 1, 2, 3 but not 11, 12, or 13
tens switch
{
1 => $"{n}st",
2 => $"{n}nd",
_ => $"{n}rd",
} :
$"{n}th";
}

/// <summary>Gets a textual description of a loop's style and bounds.</summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
<Compile Include="..\src\System\Text\RegularExpressions\RegexPrefixAnalyzer.cs" Link="Production\RegexPrefixAnalyzer.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexTree.cs" Link="Production\RegexTree.cs" />
<Compile Include="..\src\System\Text\RegularExpressions\RegexWriter.cs" Link="Production\RegexWriter.cs" />
<Compile Include="..\src\System\Collections\HashtableExtensions.cs" Link="Production\HashtableExtensions.cs" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -340,18 +340,7 @@ public int[] GetGroupNumbers()
/// </summary>
public string GroupNameFromNumber(int i)
{
if (capslist is null)
{
return (uint)i < (uint)capsize ?
((uint)i).ToString() :
string.Empty;
}
else
{
return caps != null && !caps.TryGetValue(i, out i) ? string.Empty :
(uint)i < (uint)capslist.Length ? capslist[i] :
string.Empty;
}
return RegexParser.GroupNameFromNumber(caps, capslist, capsize, i);
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2346,5 +2346,27 @@ private RegexParseException MakeException(RegexParseError error, string message)

/// <summary>Number of characters to the right of the current parsing position.</summary>
private int CharsRight() => _pattern.Length - _currentPos;

/// <summary>Gets group name from its number.</summary>
internal static string GroupNameFromNumber(Hashtable? caps, string[]? capslist, int capsize, int i)
{
if (capslist is null)
{
if ((uint)i < (uint)capsize)
{
return ((uint)i).ToString();
}
}
else
{
if ((caps is null || caps.TryGetValue(i, out i)) &&
(uint)i < (uint)capslist.Length)
{
return capslist[i];
}
}

return string.Empty;
}
}
}