dotnet · danmoseley · Feb 26, 2026 · Feb 25, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
@@ -1193,24 +1193,16 @@ static RegexNode ExtractCommonPrefixNode(RegexNode alternation)
                     return alternation;
                 }
 
-                // Only handle the case where each branch is a concatenation
-                foreach (RegexNode child in children)
-                {
-                    if (child.Kind != RegexNodeKind.Concatenate || child.ChildCount() < 2)
-                    {
-                        return alternation;
-                    }
-                }
-
                 for (int startingIndex = 0; startingIndex < children.Count - 1; startingIndex++)
                 {
-                    Debug.Assert(children[startingIndex].Children is List<RegexNode> { Count: >= 2 });
-
                     // Only handle the case where each branch begins with the same One, Notone, Set (individual or loop), or Anchor.
                     // Note that while we can do this for individual characters, fixed length loops, and atomic loops, doing
                     // it for non-atomic variable length loops could change behavior as each branch could otherwise have a
                     // different number of characters consumed by the loop based on what's after it.
-                    RegexNode required = children[startingIndex].Child(0);
+                    // A branch may be either a Concatenation (get its first child) or a single node (e.g., a Set
+                    // that was reduced from a single-child Concatenation after prior prefix extraction).
+                    RegexNode startingNode = children[startingIndex];
+                    RegexNode required = startingNode.Kind == RegexNodeKind.Concatenate ? startingNode.Child(0) : startingNode;
                     switch (required.Kind)
                     {
                         case RegexNodeKind.One or RegexNodeKind.Notone or RegexNodeKind.Set:
@@ -1230,7 +1222,8 @@ or RegexNodeKind.Boundary or RegexNodeKind.ECMABoundary
                     int endingIndex = startingIndex + 1;
                     for (; endingIndex < children.Count; endingIndex++)
                     {
-                        RegexNode other = children[endingIndex].Child(0);
+                        RegexNode endingNode = children[endingIndex];
+                        RegexNode other = endingNode.Kind == RegexNodeKind.Concatenate ? endingNode.Child(0) : endingNode;
                         if (required.Kind != other.Kind ||
                             required.Options != other.Options ||
                             required.M != other.M ||
@@ -1252,8 +1245,16 @@ or RegexNodeKind.Boundary or RegexNodeKind.ECMABoundary
                     var newAlternate = new RegexNode(RegexNodeKind.Alternate, alternation.Options);
                     for (int i = startingIndex; i < endingIndex; i++)
                     {
-                        ((List<RegexNode>)children[i].Children!).RemoveAt(0);
-                        newAlternate.AddChild(children[i]);
+                        if (children[i].Kind == RegexNodeKind.Concatenate)
+                        {
+                            ((List<RegexNode>)children[i].Children!).RemoveAt(0);
+                            newAlternate.AddChild(children[i]);
+                        }
+                        else
+                        {
+                            // The entire branch was the extracted prefix; what remains is Empty.
+                            newAlternate.AddChild(new RegexNode(RegexNodeKind.Empty, children[i].Options));
+                        }
                     }
 
                     // If this alternation is wrapped as atomic, we need to do the same for the new alternation.

diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
@@ -849,6 +849,11 @@ public static IEnumerable<object[]> Match_MemberData()
                 yield return (@"a\wc|\wgh|de\w", upper, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, input.Length, true, upper);
                 yield return (@"a\wc|\wgh|de\w", upper, RegexOptions.None, 0, input.Length, false, "");
             }
+            // Alternation prefix extraction with IgnoreCase: correctness after single-node branch handling
+            yield return (@"(?:http|https)://foo", "HTTP://FOO", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 10, true, "HTTP://FOO");
+            yield return (@"(?:http|https)://foo", "HTTPS://FOO", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 11, true, "HTTPS://FOO");
+            yield return (@"(?:http|https)://foo", "ftp://foo", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 9, false, "");
+
             yield return ("[^a-z0-9]etag|[^a-z0-9]digest", "this string has .digest as a substring", RegexOptions.None, 16, 7, true, ".digest");
             yield return (@"(\w+|\d+)a+[ab]+", "123123aa", RegexOptions.None, 0, 8, true, "123123aa");
 

diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs
@@ -120,6 +120,25 @@ public void TrailingAnchor(string pattern, int options, int expectedMode, int ex
         [InlineData(@"(?<=cd)ab", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingString_RightToLeft, "ab")]
         [InlineData(@"\bab(?=\w)(?!=\d)c\b", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "abc")]
         [InlineData(@"\bab(?=\w)(?!=\d)c\b", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")]
+        // Alternation branches differing by one trailing character: prefix extraction should include all shared characters
+        [InlineData(@"(?:http|https)://foo", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "http")]
+        [InlineData(@"(?:http|https)://foo", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "http")]
+        // Alternation where shorter branch is just the shared prefix
+        [InlineData(@"(?:ab|abc)d", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "ab")]
+        // Alternation where branches differ by more than one character
+        [InlineData(@"(?:abc|abcdef)g", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")]
+        [InlineData(@"(?:abc|abcdef)g", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "abc")]
+        // Three-branch alternation with shared prefix and different lengths
+        [InlineData(@"(?:ab|abc|abcd)e", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "ab")]
+        [InlineData(@"(?:ab|abc|abcd)e", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "ab")]
+        // Three-branch alternation with shared prefix and different trailing characters
+        [InlineData(@"(?:ab|abc|abd)e", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "ab")]
+        [InlineData(@"(?:ab|abc|abd)e", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "ab")]
+        // Case-sensitive alternation with branches differing by one (handled by ExtractCommonPrefixText, not Node, but verifies no regression)
+        [InlineData(@"(?:ab|abc)d", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "ab")]
+        // Four-branch alternation mixing single-node and Concat branches after IgnoreCase prefix extraction
+        [InlineData(@"(?:abc|abcd|abce|abcfg)h", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")]
+        [InlineData(@"(?:abc|abcd|abce|abcfg)h", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "abc")]
         public void LeadingPrefix(string pattern, int options, int expectedMode, string expectedPrefix)
         {
             RegexFindOptimizations opts = ComputeOptimizations(pattern, (RegexOptions)options);
@@ -138,6 +157,12 @@ public void LeadingPrefix(string pattern, int options, int expectedMode, string
         [InlineData(@"ab|cd|ef|gh", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingSet_RightToLeft, "bdfh")]
         [InlineData(@"\bab(?=\w)(?!=\d)c\b", (int)(RegexOptions.IgnoreCase | RegexOptions.RightToLeft), (int)FindNextStartingPositionMode.LeadingSet_RightToLeft, "Cc")]
         [InlineData(@"ab|(abc)|(abcd)", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingSet_RightToLeft, "bcd")]
+        // Non-IgnoreCase Set-node branch: single-node branch after prefix extraction of character class
+        [InlineData(@"(?:[ab][0-9]|[ab])x", 0, (int)FindNextStartingPositionMode.LeadingSet_LeftToRight, "ab")]
+        // Single-node before Concat branch (reversed order)
+        [InlineData(@"(?:[ab]|[ab][0-9])x", 0, (int)FindNextStartingPositionMode.LeadingSet_LeftToRight, "ab")]
+        // IgnoreCase Set-node branch: prefix extraction across set-expanded branches
+        [InlineData(@"(?:a|ab)c", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingSet_LeftToRight, "Aa")]
         public void LeadingSet(string pattern, int options, int expectedMode, string expectedChars)
         {
             RegexFindOptimizations opts = ComputeOptimizations(pattern, (RegexOptions)options);

diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs
@@ -463,6 +463,18 @@ public class RegexReductionTests
         [InlineData("(?i)\\d", "\\d")]
         [InlineData("(?i).", ".")]
         [InlineData("(?i)\\$", "\\$")]
+        // IgnoreCase node prefix extraction with single-node branch handling
+        [InlineData("(?i)(?:ab|abc)d", "(?i)ab(?>c?)d")]
+        [InlineData("(?i)(?:http|https)://foo", "[Hh](?>[Tt]{2})[Pp](?>[Ss]?)://[Ff](?>[Oo]{2})")]
+        [InlineData("(?i)(?:abc|abcd|abce|abcfg)h", "(?i)abc(?:|[de]|fg)h")]
+        [InlineData("(?i)(?:ab|abc|abcd)e", "(?i)ab(?:c(?>d?))??e")]
+        // Non-IgnoreCase node prefix extraction with single-node branch handling
+        [InlineData("(?:[ab][0-9]|[ab])x", "[ab](?>[0-9]?)x")]
+        [InlineData("(?:\\w\\d|\\w)x", "\\w(?>\\d?)x")]
+        // Non-IgnoreCase text prefix extraction (regression guards)
+        [InlineData("(?:http|https)://foo", "http(?>s?)://foo")]
+        [InlineData("(?:ab|abc)d", "ab(?>c?)d")]
+        [InlineData("(?:abc|abcd|abce|abcfg)h", "abc(?:|[de]|fg)h")]
         public void PatternsReduceIdentically(string actual, string expected)
         {
             // NOTE: RegexNode.ToString is only compiled into debug builds, so DEBUG is currently set on the unit tests project.
@@ -643,6 +655,9 @@ public void PatternsReduceIdentically(string actual, string expected)
         [InlineData(@"\b\B", "\b")]
         [InlineData(@"^$", "^")]
         [InlineData(@"^$", "$")]
+        // After alternation prefix extraction, optional patterns should differ from non-optional
+        [InlineData("(?i)(?:ab|abc)d", "(?i)abcd")]
+        [InlineData("(?:[ab][0-9]|[ab])x", "[ab][0-9]x")]
         public void PatternsReduceDifferently(string actual, string expected)
         {
             // NOTE: RegexNode.ToString is only compiled into debug builds, so DEBUG is currently set on the unit tests project.