From eadf1108c6367695ae73f7291d8f43d1fa432ccf Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Tue, 15 Sep 2020 15:41:21 -0700 Subject: [PATCH 01/12] Bug fix and unit test --- .../Text/RegularExpressions/RegexCharClass.cs | 16 +++++++++++++++- .../tests/Regex.Match.Tests.cs | 3 +++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index d4022f673e4ff7..9a094d325e3c81 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -555,7 +555,21 @@ public void AddLowercase(CultureInfo culture) } else { - AddLowercaseRange(range.First, range.Last); + char lower = culture.TextInfo.ToLower(range.First); + char upper = culture.TextInfo.ToLower(range.Last); + if (range.Last - range.First == upper - lower) + { + // Bug fix: https://github.com/dotnet/runtime/issues/36149 + AddLowercaseRange(range.First, range.Last); + } + else + { + for (int j = range.First; j <= range.Last; j++) + { + char lowerInRange = culture.TextInfo.ToLower((char)j); + AddRange(lowerInRange, lowerInRange); + } + } } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index a7e73699f36e8e..3b62f1bf0a831b 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -376,6 +376,9 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { "\u05D0(?:\u05D1|\u05D2|\u05D3)", "\u05D0\u05D2", options, 0, 2, true, "\u05D0\u05D2" }; yield return new object[] { "\u05D0(?:\u05D1|\u05D2|\u05D3)", "\u05D0\u05D4", options, 0, 0, false, "" }; } + + // Edge case: Unicode symbol in range + yield return new object[] { @"^(?i:[\xD7-\xD8])$", @"\xF7", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; } [Theory] From e1c3219c603af2d1adf1cb6019d2a816f6bc7d6c Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Wed, 16 Sep 2020 11:21:27 -0700 Subject: [PATCH 02/12] Address comments --- .../src/System/Text/RegularExpressions/RegexCharClass.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 9a094d325e3c81..d84dbdff00c798 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -559,14 +559,15 @@ public void AddLowercase(CultureInfo culture) char upper = culture.TextInfo.ToLower(range.Last); if (range.Last - range.First == upper - lower) { - // Bug fix: https://github.com/dotnet/runtime/issues/36149 AddLowercaseRange(range.First, range.Last); } else { + // Bug fix: Unicode `Symbol`s sometimes exist in the middle of character ranges. char.ToLower(Symbol) returns Symbol. In these cases, we cannot use an offset to find the lowercase chars. For ex: https://github.com/dotnet/runtime/issues/36149 + TextInfo? cultureTextInfo = culture.TextInfo; for (int j = range.First; j <= range.Last; j++) { - char lowerInRange = culture.TextInfo.ToLower((char)j); + char lowerInRange = cultureTextInfo.ToLower((char)j); AddRange(lowerInRange, lowerInRange); } } From a7c8f9e76f44f9713949f74c6e5c19a1906e8b94 Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Thu, 17 Sep 2020 15:30:44 -0700 Subject: [PATCH 03/12] An elegant fix and unit tests --- .../Text/RegularExpressions/RegexCharClass.cs | 12 +++++++----- .../tests/Regex.Match.Tests.cs | 15 +++++++++++++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index d84dbdff00c798..1d064be0c7e3db 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -300,7 +300,8 @@ these intervals. It follows from the nature of the L on each interval. private static readonly LowerCaseMapping[] s_lcTable = new LowerCaseMapping[] { new LowerCaseMapping('\u0041', '\u005A', LowercaseAdd, 32), - new LowerCaseMapping('\u00C0', '\u00DE', LowercaseAdd, 32), + new LowerCaseMapping('\u00C0', '\u00D6', LowercaseAdd, 32), + new LowerCaseMapping('\u00D8', '\u00DE', LowercaseAdd, 32), new LowerCaseMapping('\u0100', '\u012E', LowercaseBor, 0), new LowerCaseMapping('\u0130', '\u0130', LowercaseSet, 0x0069), new LowerCaseMapping('\u0132', '\u0136', LowercaseBor, 0), @@ -349,7 +350,8 @@ these intervals. It follows from the nature of the L on each interval. new LowerCaseMapping('\u0388', '\u038A', LowercaseAdd, 37), new LowerCaseMapping('\u038C', '\u038C', LowercaseSet, 0x03CC), new LowerCaseMapping('\u038E', '\u038F', LowercaseAdd, 63), - new LowerCaseMapping('\u0391', '\u03AB', LowercaseAdd, 32), + new LowerCaseMapping('\u0391', '\u03A1', LowercaseAdd, 32), + new LowerCaseMapping('\u03A3', '\u03AB', LowercaseAdd, 32), new LowerCaseMapping('\u03E2', '\u03EE', LowercaseBor, 0), new LowerCaseMapping('\u0401', '\u040F', LowercaseAdd, 80), new LowerCaseMapping('\u0410', '\u042F', LowercaseAdd, 32), @@ -362,10 +364,10 @@ these intervals. It follows from the nature of the L on each interval. new LowerCaseMapping('\u04EE', '\u04F4', LowercaseBor, 0), new LowerCaseMapping('\u04F8', '\u04F8', LowercaseSet, 0x04F9), new LowerCaseMapping('\u0531', '\u0556', LowercaseAdd, 48), - new LowerCaseMapping('\u10A0', '\u10C5', LowercaseAdd, 48), + new LowerCaseMapping('\u10A0', '\u10C5', LowercaseAdd, 7264), new LowerCaseMapping('\u1E00', '\u1EF8', LowercaseBor, 0), new LowerCaseMapping('\u1F08', '\u1F0F', LowercaseAdd, -8), - new LowerCaseMapping('\u1F18', '\u1F1F', LowercaseAdd, -8), + new LowerCaseMapping('\u1F18', '\u1F1D', LowercaseAdd, -8), new LowerCaseMapping('\u1F28', '\u1F2F', LowercaseAdd, -8), new LowerCaseMapping('\u1F38', '\u1F3F', LowercaseAdd, -8), new LowerCaseMapping('\u1F48', '\u1F4D', LowercaseAdd, -8), @@ -391,7 +393,7 @@ these intervals. It follows from the nature of the L on each interval. new LowerCaseMapping('\u1FFA', '\u1FFB', LowercaseAdd, -126), new LowerCaseMapping('\u1FFC', '\u1FFC', LowercaseSet, 0x1FF3), new LowerCaseMapping('\u2160', '\u216F', LowercaseAdd, 16), - new LowerCaseMapping('\u24B6', '\u24D0', LowercaseAdd, 26), + new LowerCaseMapping('\u24B6', '\u24CF', LowercaseAdd, 26), new LowerCaseMapping('\uFF21', '\uFF3A', LowercaseAdd, 32), }; diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index 3b62f1bf0a831b..3690986e928948 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -377,8 +377,19 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { "\u05D0(?:\u05D1|\u05D2|\u05D3)", "\u05D0\u05D4", options, 0, 0, false, "" }; } - // Edge case: Unicode symbol in range - yield return new object[] { @"^(?i:[\xD7-\xD8])$", @"\xF7", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; + // Edge cases: Unicode symbols in character ranges. Cannot find the lowercase chars for these cases by using an offset + yield return new object[] { @"^(?i:[\u00C0 -\u00DE])$", @"\u00F7", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; + yield return new object[] { @"^(?i:[\u00C0 -\u00DE])$", ((char)('\u00C0' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u00C0' + 32)).ToString() }; + yield return new object[] { @"^(?i:[\u00C0 -\u00DE])$", ((char)('\u00DE' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u00DE' + 32)).ToString() }; + yield return new object[] { @"^(?i:[\u0391 -\u03AB])$", ((char)('\u03A2' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; + yield return new object[] { @"^(?i:[\u0391 -\u03AB])$", ((char)('\u0391' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u0391' + 32)).ToString() }; + yield return new object[] { @"^(?i:[\u0391 -\u03AB])$", ((char)('\u03AB' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u03AB' + 32)).ToString() }; + yield return new object[] { @"^(?i:[\u1F18 -\u1F1F])$", ((char)('\u1F1F' - 8)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; + yield return new object[] { @"^(?i:[\u1F18 -\u1F1F])$", ((char)('\u1F18' - 8)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u1F18' - 8)).ToString() }; + yield return new object[] { @"^(?i:[\u10A0 -\u10C5])$", ((char)('\u10A0' + 7264)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u10A0' + 7264)).ToString() }; + yield return new object[] { @"^(?i:[\u10A0 -\u10C5])$", ((char)('\u1F1F' + 48)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; + yield return new object[] { @"^(?i:[\u24B6 -\u24D0])$", ((char)('\u24D0' + 26)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; + yield return new object[] { @"^(?i:[\u24B6 -\u24D0])$", ((char)('\u24CF' + 26)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u24CF' + 26)).ToString() }; } [Theory] From db6b6971ec0b32bf435baa044f61ec937a23a074 Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Thu, 17 Sep 2020 17:31:50 -0700 Subject: [PATCH 04/12] Remove old code --- .../Text/RegularExpressions/RegexCharClass.cs | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 1d064be0c7e3db..cfad5a5b3b503a 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -557,22 +557,7 @@ public void AddLowercase(CultureInfo culture) } else { - char lower = culture.TextInfo.ToLower(range.First); - char upper = culture.TextInfo.ToLower(range.Last); - if (range.Last - range.First == upper - lower) - { - AddLowercaseRange(range.First, range.Last); - } - else - { - // Bug fix: Unicode `Symbol`s sometimes exist in the middle of character ranges. char.ToLower(Symbol) returns Symbol. In these cases, we cannot use an offset to find the lowercase chars. For ex: https://github.com/dotnet/runtime/issues/36149 - TextInfo? cultureTextInfo = culture.TextInfo; - for (int j = range.First; j <= range.Last; j++) - { - char lowerInRange = cultureTextInfo.ToLower((char)j); - AddRange(lowerInRange, lowerInRange); - } - } + AddLowercaseRange(range.First, range.Last); } } } @@ -586,7 +571,7 @@ private void AddLowercaseRange(char chMin, char chMax) { int i = 0; - for (int iMax = s_lcTable.Length; i < iMax; ) + for (int iMax = s_lcTable.Length; i < iMax;) { int iMid = (i + iMax) >> 1; if (s_lcTable[iMid].ChMax < chMin) From 6895ea7aed519c4f16d129f5498ac4c95da9e344 Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Fri, 18 Sep 2020 15:24:13 -0700 Subject: [PATCH 05/12] Fix the unit tests --- .../Text/RegularExpressions/RegexCharClass.cs | 18 ++++++++++++++ .../tests/Regex.Match.Tests.cs | 24 +++++++++---------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index cfad5a5b3b503a..d97fa31df6287e 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -414,6 +414,24 @@ static RegexCharClass() int len = s_propTable.Length; for (int i = 0; i < len - 1; i++) Debug.Assert(string.Compare(s_propTable[i][0], s_propTable[i + 1][0], StringComparison.Ordinal) < 0, $"RegexCharClass s_propTable is out of order at ({s_propTable[i][0]}, {s_propTable[i + 1][0]})"); + + CultureInfo culture = CultureInfo.InvariantCulture; + for (int k = 0; k < s_lcTable.Length; k++) + { + LowerCaseMapping loc = s_lcTable[k]; + if (loc.LcOp == 1) + { + // Validate only the LowercaseAdd cases + int offset = loc.Data; + for (int l = loc.ChMin; l <= loc.ChMax; l++) + { + if (culture.TextInfo.ToLower((char)l) != (char)(l + offset)) + { + Debug.Assert(false, $"The Unicode character range at index {k} in s_lcTable contains the character {(char)l} (decimal value: {l}) whose lowercase value cannot be obtained by using the specified offset."); + } + } + } + } } #endif diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index 3690986e928948..591f3b6dc6622e 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -378,18 +378,18 @@ public static IEnumerable Match_Basic_TestData() } // Edge cases: Unicode symbols in character ranges. Cannot find the lowercase chars for these cases by using an offset - yield return new object[] { @"^(?i:[\u00C0 -\u00DE])$", @"\u00F7", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; - yield return new object[] { @"^(?i:[\u00C0 -\u00DE])$", ((char)('\u00C0' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u00C0' + 32)).ToString() }; - yield return new object[] { @"^(?i:[\u00C0 -\u00DE])$", ((char)('\u00DE' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u00DE' + 32)).ToString() }; - yield return new object[] { @"^(?i:[\u0391 -\u03AB])$", ((char)('\u03A2' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; - yield return new object[] { @"^(?i:[\u0391 -\u03AB])$", ((char)('\u0391' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u0391' + 32)).ToString() }; - yield return new object[] { @"^(?i:[\u0391 -\u03AB])$", ((char)('\u03AB' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u03AB' + 32)).ToString() }; - yield return new object[] { @"^(?i:[\u1F18 -\u1F1F])$", ((char)('\u1F1F' - 8)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; - yield return new object[] { @"^(?i:[\u1F18 -\u1F1F])$", ((char)('\u1F18' - 8)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u1F18' - 8)).ToString() }; - yield return new object[] { @"^(?i:[\u10A0 -\u10C5])$", ((char)('\u10A0' + 7264)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u10A0' + 7264)).ToString() }; - yield return new object[] { @"^(?i:[\u10A0 -\u10C5])$", ((char)('\u1F1F' + 48)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; - yield return new object[] { @"^(?i:[\u24B6 -\u24D0])$", ((char)('\u24D0' + 26)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; - yield return new object[] { @"^(?i:[\u24B6 -\u24D0])$", ((char)('\u24CF' + 26)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u24CF' + 26)).ToString() }; + yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", ((char)('\u00F7')).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; + yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", ((char)('\u00C0' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u00C0' + 32)).ToString() }; + yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", ((char)('\u00DE' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u00DE' + 32)).ToString() }; + yield return new object[] { @"^(?i:[\u0391-\u03AB])$", ((char)('\u03A2' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; + yield return new object[] { @"^(?i:[\u0391-\u03AB])$", ((char)('\u0391' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u0391' + 32)).ToString() }; + yield return new object[] { @"^(?i:[\u0391-\u03AB])$", ((char)('\u03AB' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u03AB' + 32)).ToString() }; + yield return new object[] { @"^(?i:[\u1F18-\u1F1F])$", ((char)('\u1F1F' - 8)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; + yield return new object[] { @"^(?i:[\u1F18-\u1F1F])$", ((char)('\u1F18' - 8)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u1F18' - 8)).ToString() }; + yield return new object[] { @"^(?i:[\u10A0-\u10C5])$", ((char)('\u10A0' + 7264)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u10A0' + 7264)).ToString() }; + yield return new object[] { @"^(?i:[\u10A0-\u10C5])$", ((char)('\u1F1F' + 48)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; + yield return new object[] { @"^(?i:[\u24B6-\u24D0])$", ((char)('\u24D0' + 26)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; + yield return new object[] { @"^(?i:[\u24B6-\u24D0])$", ((char)('\u24CF' + 26)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u24CF' + 26)).ToString() }; } [Theory] From 48fbce491c1993f6511f750d322dd904222fbdf6 Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Fri, 18 Sep 2020 15:26:35 -0700 Subject: [PATCH 06/12] sq --- .../System.Text.RegularExpressions/tests/Regex.Match.Tests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index 591f3b6dc6622e..b82dad2189c715 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -377,7 +377,7 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { "\u05D0(?:\u05D1|\u05D2|\u05D3)", "\u05D0\u05D4", options, 0, 0, false, "" }; } - // Edge cases: Unicode symbols in character ranges. Cannot find the lowercase chars for these cases by using an offset + // Unicode symbols in character ranges. Cannot find the lowercase chars for these cases by using an offset yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", ((char)('\u00F7')).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", ((char)('\u00C0' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u00C0' + 32)).ToString() }; yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", ((char)('\u00DE' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u00DE' + 32)).ToString() }; From 0a43316bb41228621e4a10575d10298118e0311d Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Fri, 18 Sep 2020 15:31:17 -0700 Subject: [PATCH 07/12] sq --- .../System.Text.RegularExpressions/tests/Regex.Match.Tests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index b82dad2189c715..89945c511fa859 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -377,8 +377,8 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { "\u05D0(?:\u05D1|\u05D2|\u05D3)", "\u05D0\u05D4", options, 0, 0, false, "" }; } - // Unicode symbols in character ranges. Cannot find the lowercase chars for these cases by using an offset - yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", ((char)('\u00F7')).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; + // Unicode symbols in character ranges. These are chars whose lowercase values cannot be found by using the offsets specified in s_lcTable. + yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", '\u00F7'.ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", ((char)('\u00C0' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u00C0' + 32)).ToString() }; yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", ((char)('\u00DE' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u00DE' + 32)).ToString() }; yield return new object[] { @"^(?i:[\u0391-\u03AB])$", ((char)('\u03A2' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; From 165cbab16cd6dd2277528c14c75ed8ba17eb73dc Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Mon, 21 Sep 2020 23:27:35 -0700 Subject: [PATCH 08/12] Address comments and add test case --- .../Text/RegularExpressions/RegexCharClass.cs | 38 ++++++++++++++----- .../tests/Regex.Match.Tests.cs | 1 + 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index d97fa31df6287e..20f19b2412e3d9 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -303,7 +303,6 @@ these intervals. It follows from the nature of the L on each interval. new LowerCaseMapping('\u00C0', '\u00D6', LowercaseAdd, 32), new LowerCaseMapping('\u00D8', '\u00DE', LowercaseAdd, 32), new LowerCaseMapping('\u0100', '\u012E', LowercaseBor, 0), - new LowerCaseMapping('\u0130', '\u0130', LowercaseSet, 0x0069), new LowerCaseMapping('\u0132', '\u0136', LowercaseBor, 0), new LowerCaseMapping('\u0139', '\u0147', LowercaseBad, 0), new LowerCaseMapping('\u014A', '\u0176', LowercaseBor, 0), @@ -365,7 +364,8 @@ these intervals. It follows from the nature of the L on each interval. new LowerCaseMapping('\u04F8', '\u04F8', LowercaseSet, 0x04F9), new LowerCaseMapping('\u0531', '\u0556', LowercaseAdd, 48), new LowerCaseMapping('\u10A0', '\u10C5', LowercaseAdd, 7264), - new LowerCaseMapping('\u1E00', '\u1EF8', LowercaseBor, 0), + new LowerCaseMapping('\u1E00', '\u1E95', LowercaseBor, 0), + new LowerCaseMapping('\u1EA0', '\u1EF8', LowercaseBor, 0), new LowerCaseMapping('\u1F08', '\u1F0F', LowercaseAdd, -8), new LowerCaseMapping('\u1F18', '\u1F1D', LowercaseAdd, -8), new LowerCaseMapping('\u1F28', '\u1F2F', LowercaseAdd, -8), @@ -415,20 +415,40 @@ static RegexCharClass() for (int i = 0; i < len - 1; i++) Debug.Assert(string.Compare(s_propTable[i][0], s_propTable[i + 1][0], StringComparison.Ordinal) < 0, $"RegexCharClass s_propTable is out of order at ({s_propTable[i][0]}, {s_propTable[i + 1][0]})"); + // Make sure the s_lcTable's ranges are correctly populated. CultureInfo culture = CultureInfo.InvariantCulture; for (int k = 0; k < s_lcTable.Length; k++) { LowerCaseMapping loc = s_lcTable[k]; - if (loc.LcOp == 1) + if (loc.LcOp == LowercaseAdd) { - // Validate only the LowercaseAdd cases int offset = loc.Data; - for (int l = loc.ChMin; l <= loc.ChMax; l++) + for (char l = loc.ChMin; l <= loc.ChMax; l++) { - if (culture.TextInfo.ToLower((char)l) != (char)(l + offset)) - { - Debug.Assert(false, $"The Unicode character range at index {k} in s_lcTable contains the character {(char)l} (decimal value: {l}) whose lowercase value cannot be obtained by using the specified offset."); - } + Debug.Assert(culture.TextInfo.ToLower((char)l) == (char)(l + offset), $"The Unicode character range at index {k} in s_lcTable contains the character {(char)l} (decimal value: {l}). Its lowercase value cannot be obtained by using the specified offset."); + } + } + else if (loc.LcOp == LowercaseSet) + { + char lowercase = (char)loc.Data; + for (char l = loc.ChMin; l <= loc.ChMax; l++) + { + char uppercase = l; + Debug.Assert(culture.TextInfo.ToLower(uppercase) == lowercase, $"The Unicode character range at index {k} in s_lcTable contains the character {uppercase} (decimal value: {(int)uppercase}). Its lowercase value {culture.TextInfo.ToLower(uppercase)} is not the stored value {lowercase}."); + } + } + else if (loc.LcOp == LowercaseBor) + { + for (char l = loc.ChMin; l <= loc.ChMax; l++) + { + Debug.Assert(culture.TextInfo.ToLower((char)l) == (char)(l | (char)1), $"The Unicode character range at index {k} in s_lcTable contains the character {(char)l} (decimal value: {l}). Its lowercase value {culture.TextInfo.ToLower(l)} cannot be obtained by OR-ing with 1: {(char)(l | (char)1)}"); + } + } + else if (loc.LcOp == LowercaseBad) + { + for (char l = loc.ChMin; l <= loc.ChMax; l++) + { + Debug.Assert(culture.TextInfo.ToLower((char)l) == (char)(l + (l & 1)), $"The Unicode character range at index {k} in s_lcTable contains the character {(char)l} (decimal value: {l}). Its lowercase value cannot be obtained by AND-ing with 1."); } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index 89945c511fa859..ec174720011a2a 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -378,6 +378,7 @@ public static IEnumerable Match_Basic_TestData() } // Unicode symbols in character ranges. These are chars whose lowercase values cannot be found by using the offsets specified in s_lcTable. + yield return new object[] { @"^(?i:[\u00D7-\u00D8])$", '\u00F7'.ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", '\u00F7'.ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", ((char)('\u00C0' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u00C0' + 32)).ToString() }; yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", ((char)('\u00DE' + 32)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u00DE' + 32)).ToString() }; From 4d3e58d4e176e3095ed1d8958ef79024a9fecdcf Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Mon, 26 Oct 2020 11:51:26 -0700 Subject: [PATCH 09/12] Skip the unit tests on net framework --- .../tests/Regex.Match.Tests.cs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index ec174720011a2a..a8b319ad27deeb 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -376,7 +376,10 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { "\u05D0(?:\u05D1|\u05D2|\u05D3)", "\u05D0\u05D2", options, 0, 2, true, "\u05D0\u05D2" }; yield return new object[] { "\u05D0(?:\u05D1|\u05D2|\u05D3)", "\u05D0\u05D4", options, 0, 0, false, "" }; } + } + public static IEnumerable Match_Basic_TestData_NetCore() + { // Unicode symbols in character ranges. These are chars whose lowercase values cannot be found by using the offsets specified in s_lcTable. yield return new object[] { @"^(?i:[\u00D7-\u00D8])$", '\u00F7'.ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; yield return new object[] { @"^(?i:[\u00C0-\u00DE])$", '\u00F7'.ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, false, "" }; @@ -393,6 +396,14 @@ public static IEnumerable Match_Basic_TestData() yield return new object[] { @"^(?i:[\u24B6-\u24D0])$", ((char)('\u24CF' + 26)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u24CF' + 26)).ToString() }; } + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)] + [Theory] + [MemberData(nameof(Match_Basic_TestData_NetCore))] + public void Match_NetCore(string pattern, string input, RegexOptions options, int beginning, int length, bool expectedSuccess, string expectedValue) + { + Match(pattern, input, options, beginning, length, expectedSuccess, expectedValue); + } + [Theory] [MemberData(nameof(Match_Basic_TestData))] [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Match_Basic_TestData), 2, MemberType = typeof(RegexCompilationHelper))] From 1aa54b0262aa5e0aee0955f6f02e8a462e4a9e2d Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Mon, 26 Oct 2020 14:47:00 -0700 Subject: [PATCH 10/12] Debug commit for CI --- .../src/System/Text/RegularExpressions/RegexCharClass.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 20f19b2412e3d9..958af154b61e13 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -420,6 +420,10 @@ static RegexCharClass() for (int k = 0; k < s_lcTable.Length; k++) { LowerCaseMapping loc = s_lcTable[k]; + if (loc.ChMin == '\u01C4') + { + Debug.Assert(true); + } if (loc.LcOp == LowercaseAdd) { int offset = loc.Data; @@ -434,7 +438,7 @@ static RegexCharClass() for (char l = loc.ChMin; l <= loc.ChMax; l++) { char uppercase = l; - Debug.Assert(culture.TextInfo.ToLower(uppercase) == lowercase, $"The Unicode character range at index {k} in s_lcTable contains the character {uppercase} (decimal value: {(int)uppercase}). Its lowercase value {culture.TextInfo.ToLower(uppercase)} is not the stored value {lowercase}."); + Debug.Assert(culture.TextInfo.ToLower(uppercase) == lowercase, $"The Unicode character range at index {k} in s_lcTable contains the character {uppercase} (decimal value: {(int)uppercase}). Its lowercase value {culture.TextInfo.ToLower(uppercase).ToString()} is not the stored value {lowercase.ToString()}. CultureInfo: {culture.ToString()}"); } } else if (loc.LcOp == LowercaseBor) From 04da0a323b4dd2e48b98d2ca98ea37541249c0ca Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Tue, 27 Oct 2020 14:35:09 -0700 Subject: [PATCH 11/12] Try to get more error info --- .../src/System/Text/RegularExpressions/RegexCharClass.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 958af154b61e13..72f0140cdc29f0 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -438,7 +438,7 @@ static RegexCharClass() for (char l = loc.ChMin; l <= loc.ChMax; l++) { char uppercase = l; - Debug.Assert(culture.TextInfo.ToLower(uppercase) == lowercase, $"The Unicode character range at index {k} in s_lcTable contains the character {uppercase} (decimal value: {(int)uppercase}). Its lowercase value {culture.TextInfo.ToLower(uppercase).ToString()} is not the stored value {lowercase.ToString()}. CultureInfo: {culture.ToString()}"); + Debug.Assert(culture.TextInfo.ToLower(uppercase) == lowercase, $"The Unicode character range at index {k} in s_lcTable contains the character {uppercase} (decimal value: {(int)uppercase}, hex: {(int)uppercase:X}. Its lowercase value {culture.TextInfo.ToLower(uppercase).ToString()} (decimal value: {(int)culture.TextInfo.ToLower(uppercase)}, hex: {(int)culture.TextInfo.ToLower(uppercase):X} is not the stored value {lowercase} (decimal value: {(int)lowercase}, hex: {(int)lowercase:X}. CultureInfo: {culture.EnglishName}"); } } else if (loc.LcOp == LowercaseBor) From f42e2a0b9c7cacba5f37f16d35659be6b011376f Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Thu, 29 Oct 2020 15:08:14 -0700 Subject: [PATCH 12/12] Run validation as a unit test. Skip it on non-ICU environments --- .../src/System.Text.RegularExpressions.csproj | 13 +- .../RegexCharClass.MappingTable.cs | 162 +++++++++++++++ .../Text/RegularExpressions/RegexCharClass.cs | 195 +----------------- .../tests/RegexValidations.netcoreapp.cs | 53 +++++ ...ystem.Text.RegularExpressions.Tests.csproj | 8 +- 5 files changed, 225 insertions(+), 206 deletions(-) create mode 100644 src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.MappingTable.cs create mode 100644 src/libraries/System.Text.RegularExpressions/tests/RegexValidations.netcoreapp.cs diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj index 5313bf0d948138..77351ca146f7ae 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj +++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj @@ -7,6 +7,7 @@ + @@ -46,14 +47,10 @@ - - - - + + + + diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.MappingTable.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.MappingTable.cs new file mode 100644 index 00000000000000..b299a3b8e1e8b9 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.MappingTable.cs @@ -0,0 +1,162 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; + +namespace System.Text.RegularExpressions +{ + internal sealed partial class RegexCharClass + { + /************************************************************************** + Let U be the set of Unicode character values and let L be the lowercase + function, mapping from U to U. To perform case insensitive matching of + character sets, we need to be able to map an interval I in U, say + + I = [chMin, chMax] = { ch : chMin <= ch <= chMax } + + to a set A such that A contains L(I) and A is contained in the union of + I and L(I). + + The table below partitions U into intervals on which L is non-decreasing. + Thus, for any interval J = [a, b] contained in one of these intervals, + L(J) is contained in [L(a), L(b)]. + + It is also true that for any such J, [L(a), L(b)] is contained in the + union of J and L(J). This does not follow from L being non-decreasing on + these intervals. It follows from the nature of the L on each interval. + On each interval, L has one of the following forms: + + (1) L(ch) = constant (LowercaseSet) + (2) L(ch) = ch + offset (LowercaseAdd) + (3) L(ch) = ch | 1 (LowercaseBor) + (4) L(ch) = ch + (ch & 1) (LowercaseBad) + + It is easy to verify that for any of these forms [L(a), L(b)] is + contained in the union of [a, b] and L([a, b]). + ***************************************************************************/ + + internal const int LowercaseSet = 0; // Set to arg. + internal const int LowercaseAdd = 1; // Add arg. + internal const int LowercaseBor = 2; // Bitwise or with 1. + internal const int LowercaseBad = 3; // Bitwise and with 1 and add original. + + internal static readonly LowerCaseMapping[] s_lcTable = new LowerCaseMapping[] + { + new LowerCaseMapping('\u0041', '\u005A', LowercaseAdd, 32), + new LowerCaseMapping('\u00C0', '\u00D6', LowercaseAdd, 32), + new LowerCaseMapping('\u00D8', '\u00DE', LowercaseAdd, 32), + new LowerCaseMapping('\u0100', '\u012E', LowercaseBor, 0), + new LowerCaseMapping('\u0132', '\u0136', LowercaseBor, 0), + new LowerCaseMapping('\u0139', '\u0147', LowercaseBad, 0), + new LowerCaseMapping('\u014A', '\u0176', LowercaseBor, 0), + new LowerCaseMapping('\u0178', '\u0178', LowercaseSet, 0x00FF), + new LowerCaseMapping('\u0179', '\u017D', LowercaseBad, 0), + new LowerCaseMapping('\u0181', '\u0181', LowercaseSet, 0x0253), + new LowerCaseMapping('\u0182', '\u0184', LowercaseBor, 0), + new LowerCaseMapping('\u0186', '\u0186', LowercaseSet, 0x0254), + new LowerCaseMapping('\u0187', '\u0187', LowercaseSet, 0x0188), + new LowerCaseMapping('\u0189', '\u018A', LowercaseAdd, 205), + new LowerCaseMapping('\u018B', '\u018B', LowercaseSet, 0x018C), + new LowerCaseMapping('\u018E', '\u018E', LowercaseSet, 0x01DD), + new LowerCaseMapping('\u018F', '\u018F', LowercaseSet, 0x0259), + new LowerCaseMapping('\u0190', '\u0190', LowercaseSet, 0x025B), + new LowerCaseMapping('\u0191', '\u0191', LowercaseSet, 0x0192), + new LowerCaseMapping('\u0193', '\u0193', LowercaseSet, 0x0260), + new LowerCaseMapping('\u0194', '\u0194', LowercaseSet, 0x0263), + new LowerCaseMapping('\u0196', '\u0196', LowercaseSet, 0x0269), + new LowerCaseMapping('\u0197', '\u0197', LowercaseSet, 0x0268), + new LowerCaseMapping('\u0198', '\u0198', LowercaseSet, 0x0199), + new LowerCaseMapping('\u019C', '\u019C', LowercaseSet, 0x026F), + new LowerCaseMapping('\u019D', '\u019D', LowercaseSet, 0x0272), + new LowerCaseMapping('\u019F', '\u019F', LowercaseSet, 0x0275), + new LowerCaseMapping('\u01A0', '\u01A4', LowercaseBor, 0), + new LowerCaseMapping('\u01A7', '\u01A7', LowercaseSet, 0x01A8), + new LowerCaseMapping('\u01A9', '\u01A9', LowercaseSet, 0x0283), + new LowerCaseMapping('\u01AC', '\u01AC', LowercaseSet, 0x01AD), + new LowerCaseMapping('\u01AE', '\u01AE', LowercaseSet, 0x0288), + new LowerCaseMapping('\u01AF', '\u01AF', LowercaseSet, 0x01B0), + new LowerCaseMapping('\u01B1', '\u01B2', LowercaseAdd, 217), + new LowerCaseMapping('\u01B3', '\u01B5', LowercaseBad, 0), + new LowerCaseMapping('\u01B7', '\u01B7', LowercaseSet, 0x0292), + new LowerCaseMapping('\u01B8', '\u01B8', LowercaseSet, 0x01B9), + new LowerCaseMapping('\u01BC', '\u01BC', LowercaseSet, 0x01BD), + new LowerCaseMapping('\u01C4', '\u01C5', LowercaseSet, 0x01C6), + new LowerCaseMapping('\u01C7', '\u01C8', LowercaseSet, 0x01C9), + new LowerCaseMapping('\u01CA', '\u01CB', LowercaseSet, 0x01CC), + new LowerCaseMapping('\u01CD', '\u01DB', LowercaseBad, 0), + new LowerCaseMapping('\u01DE', '\u01EE', LowercaseBor, 0), + new LowerCaseMapping('\u01F1', '\u01F2', LowercaseSet, 0x01F3), + new LowerCaseMapping('\u01F4', '\u01F4', LowercaseSet, 0x01F5), + new LowerCaseMapping('\u01FA', '\u0216', LowercaseBor, 0), + new LowerCaseMapping('\u0386', '\u0386', LowercaseSet, 0x03AC), + new LowerCaseMapping('\u0388', '\u038A', LowercaseAdd, 37), + new LowerCaseMapping('\u038C', '\u038C', LowercaseSet, 0x03CC), + new LowerCaseMapping('\u038E', '\u038F', LowercaseAdd, 63), + new LowerCaseMapping('\u0391', '\u03A1', LowercaseAdd, 32), + new LowerCaseMapping('\u03A3', '\u03AB', LowercaseAdd, 32), + new LowerCaseMapping('\u03E2', '\u03EE', LowercaseBor, 0), + new LowerCaseMapping('\u0401', '\u040F', LowercaseAdd, 80), + new LowerCaseMapping('\u0410', '\u042F', LowercaseAdd, 32), + new LowerCaseMapping('\u0460', '\u0480', LowercaseBor, 0), + new LowerCaseMapping('\u0490', '\u04BE', LowercaseBor, 0), + new LowerCaseMapping('\u04C1', '\u04C3', LowercaseBad, 0), + new LowerCaseMapping('\u04C7', '\u04C7', LowercaseSet, 0x04C8), + new LowerCaseMapping('\u04CB', '\u04CB', LowercaseSet, 0x04CC), + new LowerCaseMapping('\u04D0', '\u04EA', LowercaseBor, 0), + new LowerCaseMapping('\u04EE', '\u04F4', LowercaseBor, 0), + new LowerCaseMapping('\u04F8', '\u04F8', LowercaseSet, 0x04F9), + new LowerCaseMapping('\u0531', '\u0556', LowercaseAdd, 48), + new LowerCaseMapping('\u10A0', '\u10C5', LowercaseAdd, 7264), + new LowerCaseMapping('\u1E00', '\u1E95', LowercaseBor, 0), + new LowerCaseMapping('\u1EA0', '\u1EF8', LowercaseBor, 0), + new LowerCaseMapping('\u1F08', '\u1F0F', LowercaseAdd, -8), + new LowerCaseMapping('\u1F18', '\u1F1D', LowercaseAdd, -8), + new LowerCaseMapping('\u1F28', '\u1F2F', LowercaseAdd, -8), + new LowerCaseMapping('\u1F38', '\u1F3F', LowercaseAdd, -8), + new LowerCaseMapping('\u1F48', '\u1F4D', LowercaseAdd, -8), + new LowerCaseMapping('\u1F59', '\u1F59', LowercaseSet, 0x1F51), + new LowerCaseMapping('\u1F5B', '\u1F5B', LowercaseSet, 0x1F53), + new LowerCaseMapping('\u1F5D', '\u1F5D', LowercaseSet, 0x1F55), + new LowerCaseMapping('\u1F5F', '\u1F5F', LowercaseSet, 0x1F57), + new LowerCaseMapping('\u1F68', '\u1F6F', LowercaseAdd, -8), + new LowerCaseMapping('\u1F88', '\u1F8F', LowercaseAdd, -8), + new LowerCaseMapping('\u1F98', '\u1F9F', LowercaseAdd, -8), + new LowerCaseMapping('\u1FA8', '\u1FAF', LowercaseAdd, -8), + new LowerCaseMapping('\u1FB8', '\u1FB9', LowercaseAdd, -8), + new LowerCaseMapping('\u1FBA', '\u1FBB', LowercaseAdd, -74), + new LowerCaseMapping('\u1FBC', '\u1FBC', LowercaseSet, 0x1FB3), + new LowerCaseMapping('\u1FC8', '\u1FCB', LowercaseAdd, -86), + new LowerCaseMapping('\u1FCC', '\u1FCC', LowercaseSet, 0x1FC3), + new LowerCaseMapping('\u1FD8', '\u1FD9', LowercaseAdd, -8), + new LowerCaseMapping('\u1FDA', '\u1FDB', LowercaseAdd, -100), + new LowerCaseMapping('\u1FE8', '\u1FE9', LowercaseAdd, -8), + new LowerCaseMapping('\u1FEA', '\u1FEB', LowercaseAdd, -112), + new LowerCaseMapping('\u1FEC', '\u1FEC', LowercaseSet, 0x1FE5), + new LowerCaseMapping('\u1FF8', '\u1FF9', LowercaseAdd, -128), + new LowerCaseMapping('\u1FFA', '\u1FFB', LowercaseAdd, -126), + new LowerCaseMapping('\u1FFC', '\u1FFC', LowercaseSet, 0x1FF3), + new LowerCaseMapping('\u2160', '\u216F', LowercaseAdd, 16), + new LowerCaseMapping('\u24B6', '\u24CF', LowercaseAdd, 26), + new LowerCaseMapping('\uFF21', '\uFF3A', LowercaseAdd, 32), + }; + + /// + /// Lower case mapping descriptor. + /// + internal readonly struct LowerCaseMapping + { + public readonly char ChMin; + public readonly char ChMax; + public readonly int LcOp; + public readonly int Data; + + internal LowerCaseMapping(char chMin, char chMax, int lcOp, int data) + { + ChMin = chMin; + ChMax = chMax; + LcOp = lcOp; + Data = data; + } + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 72f0140cdc29f0..3dc776fe604711 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -25,7 +25,7 @@ namespace System.Text.RegularExpressions // included in this class. /// Provides the "set of Unicode chars" functionality used by the regexp engine. - internal sealed class RegexCharClass + internal sealed partial class RegexCharClass { // Constants internal const int FlagsIndex = 0; @@ -264,139 +264,6 @@ internal sealed class RegexCharClass +"\u3041\u3097\u3099\u30A0\u30A1\u30FB\u30FC\u3100\u3105\u312D\u3131\u318F\u3190\u31B8\u31F0\u321D\u3220\u3244\u3251\u327C\u327F\u32CC\u32D0\u32FF\u3300\u3377\u337B\u33DE\u33E0\u33FF\u3400\u4DB6\u4E00\u9FA6\uA000\uA48D\uA490\uA4C7\uAC00\uD7A4\uF900\uFA2E\uFA30\uFA6B\uFB00\uFB07\uFB13\uFB18\uFB1D\uFB37\uFB38\uFB3D\uFB3E\uFB3F\uFB40\uFB42\uFB43\uFB45\uFB46\uFBB2\uFBD3\uFD3E\uFD50\uFD90\uFD92\uFDC8\uFDF0\uFDFD\uFE00\uFE10\uFE20\uFE24\uFE62\uFE63\uFE64\uFE67\uFE69\uFE6A\uFE70\uFE75\uFE76\uFEFD\uFF04\uFF05\uFF0B\uFF0C\uFF10\uFF1A\uFF1C\uFF1F\uFF21\uFF3B\uFF3E\uFF3F\uFF40\uFF5B\uFF5C\uFF5D\uFF5E\uFF5F\uFF66\uFFBF\uFFC2\uFFC8\uFFCA\uFFD0\uFFD2\uFFD8\uFFDA\uFFDD\uFFE0\uFFE7\uFFE8\uFFEF\uFFFC\uFFFE"}, }; - /************************************************************************** - Let U be the set of Unicode character values and let L be the lowercase - function, mapping from U to U. To perform case insensitive matching of - character sets, we need to be able to map an interval I in U, say - - I = [chMin, chMax] = { ch : chMin <= ch <= chMax } - - to a set A such that A contains L(I) and A is contained in the union of - I and L(I). - - The table below partitions U into intervals on which L is non-decreasing. - Thus, for any interval J = [a, b] contained in one of these intervals, - L(J) is contained in [L(a), L(b)]. - - It is also true that for any such J, [L(a), L(b)] is contained in the - union of J and L(J). This does not follow from L being non-decreasing on - these intervals. It follows from the nature of the L on each interval. - On each interval, L has one of the following forms: - - (1) L(ch) = constant (LowercaseSet) - (2) L(ch) = ch + offset (LowercaseAdd) - (3) L(ch) = ch | 1 (LowercaseBor) - (4) L(ch) = ch + (ch & 1) (LowercaseBad) - - It is easy to verify that for any of these forms [L(a), L(b)] is - contained in the union of [a, b] and L([a, b]). - ***************************************************************************/ - - private const int LowercaseSet = 0; // Set to arg. - private const int LowercaseAdd = 1; // Add arg. - private const int LowercaseBor = 2; // Bitwise or with 1. - private const int LowercaseBad = 3; // Bitwise and with 1 and add original. - - private static readonly LowerCaseMapping[] s_lcTable = new LowerCaseMapping[] - { - new LowerCaseMapping('\u0041', '\u005A', LowercaseAdd, 32), - new LowerCaseMapping('\u00C0', '\u00D6', LowercaseAdd, 32), - new LowerCaseMapping('\u00D8', '\u00DE', LowercaseAdd, 32), - new LowerCaseMapping('\u0100', '\u012E', LowercaseBor, 0), - new LowerCaseMapping('\u0132', '\u0136', LowercaseBor, 0), - new LowerCaseMapping('\u0139', '\u0147', LowercaseBad, 0), - new LowerCaseMapping('\u014A', '\u0176', LowercaseBor, 0), - new LowerCaseMapping('\u0178', '\u0178', LowercaseSet, 0x00FF), - new LowerCaseMapping('\u0179', '\u017D', LowercaseBad, 0), - new LowerCaseMapping('\u0181', '\u0181', LowercaseSet, 0x0253), - new LowerCaseMapping('\u0182', '\u0184', LowercaseBor, 0), - new LowerCaseMapping('\u0186', '\u0186', LowercaseSet, 0x0254), - new LowerCaseMapping('\u0187', '\u0187', LowercaseSet, 0x0188), - new LowerCaseMapping('\u0189', '\u018A', LowercaseAdd, 205), - new LowerCaseMapping('\u018B', '\u018B', LowercaseSet, 0x018C), - new LowerCaseMapping('\u018E', '\u018E', LowercaseSet, 0x01DD), - new LowerCaseMapping('\u018F', '\u018F', LowercaseSet, 0x0259), - new LowerCaseMapping('\u0190', '\u0190', LowercaseSet, 0x025B), - new LowerCaseMapping('\u0191', '\u0191', LowercaseSet, 0x0192), - new LowerCaseMapping('\u0193', '\u0193', LowercaseSet, 0x0260), - new LowerCaseMapping('\u0194', '\u0194', LowercaseSet, 0x0263), - new LowerCaseMapping('\u0196', '\u0196', LowercaseSet, 0x0269), - new LowerCaseMapping('\u0197', '\u0197', LowercaseSet, 0x0268), - new LowerCaseMapping('\u0198', '\u0198', LowercaseSet, 0x0199), - new LowerCaseMapping('\u019C', '\u019C', LowercaseSet, 0x026F), - new LowerCaseMapping('\u019D', '\u019D', LowercaseSet, 0x0272), - new LowerCaseMapping('\u019F', '\u019F', LowercaseSet, 0x0275), - new LowerCaseMapping('\u01A0', '\u01A4', LowercaseBor, 0), - new LowerCaseMapping('\u01A7', '\u01A7', LowercaseSet, 0x01A8), - new LowerCaseMapping('\u01A9', '\u01A9', LowercaseSet, 0x0283), - new LowerCaseMapping('\u01AC', '\u01AC', LowercaseSet, 0x01AD), - new LowerCaseMapping('\u01AE', '\u01AE', LowercaseSet, 0x0288), - new LowerCaseMapping('\u01AF', '\u01AF', LowercaseSet, 0x01B0), - new LowerCaseMapping('\u01B1', '\u01B2', LowercaseAdd, 217), - new LowerCaseMapping('\u01B3', '\u01B5', LowercaseBad, 0), - new LowerCaseMapping('\u01B7', '\u01B7', LowercaseSet, 0x0292), - new LowerCaseMapping('\u01B8', '\u01B8', LowercaseSet, 0x01B9), - new LowerCaseMapping('\u01BC', '\u01BC', LowercaseSet, 0x01BD), - new LowerCaseMapping('\u01C4', '\u01C5', LowercaseSet, 0x01C6), - new LowerCaseMapping('\u01C7', '\u01C8', LowercaseSet, 0x01C9), - new LowerCaseMapping('\u01CA', '\u01CB', LowercaseSet, 0x01CC), - new LowerCaseMapping('\u01CD', '\u01DB', LowercaseBad, 0), - new LowerCaseMapping('\u01DE', '\u01EE', LowercaseBor, 0), - new LowerCaseMapping('\u01F1', '\u01F2', LowercaseSet, 0x01F3), - new LowerCaseMapping('\u01F4', '\u01F4', LowercaseSet, 0x01F5), - new LowerCaseMapping('\u01FA', '\u0216', LowercaseBor, 0), - new LowerCaseMapping('\u0386', '\u0386', LowercaseSet, 0x03AC), - new LowerCaseMapping('\u0388', '\u038A', LowercaseAdd, 37), - new LowerCaseMapping('\u038C', '\u038C', LowercaseSet, 0x03CC), - new LowerCaseMapping('\u038E', '\u038F', LowercaseAdd, 63), - new LowerCaseMapping('\u0391', '\u03A1', LowercaseAdd, 32), - new LowerCaseMapping('\u03A3', '\u03AB', LowercaseAdd, 32), - new LowerCaseMapping('\u03E2', '\u03EE', LowercaseBor, 0), - new LowerCaseMapping('\u0401', '\u040F', LowercaseAdd, 80), - new LowerCaseMapping('\u0410', '\u042F', LowercaseAdd, 32), - new LowerCaseMapping('\u0460', '\u0480', LowercaseBor, 0), - new LowerCaseMapping('\u0490', '\u04BE', LowercaseBor, 0), - new LowerCaseMapping('\u04C1', '\u04C3', LowercaseBad, 0), - new LowerCaseMapping('\u04C7', '\u04C7', LowercaseSet, 0x04C8), - new LowerCaseMapping('\u04CB', '\u04CB', LowercaseSet, 0x04CC), - new LowerCaseMapping('\u04D0', '\u04EA', LowercaseBor, 0), - new LowerCaseMapping('\u04EE', '\u04F4', LowercaseBor, 0), - new LowerCaseMapping('\u04F8', '\u04F8', LowercaseSet, 0x04F9), - new LowerCaseMapping('\u0531', '\u0556', LowercaseAdd, 48), - new LowerCaseMapping('\u10A0', '\u10C5', LowercaseAdd, 7264), - new LowerCaseMapping('\u1E00', '\u1E95', LowercaseBor, 0), - new LowerCaseMapping('\u1EA0', '\u1EF8', LowercaseBor, 0), - new LowerCaseMapping('\u1F08', '\u1F0F', LowercaseAdd, -8), - new LowerCaseMapping('\u1F18', '\u1F1D', LowercaseAdd, -8), - new LowerCaseMapping('\u1F28', '\u1F2F', LowercaseAdd, -8), - new LowerCaseMapping('\u1F38', '\u1F3F', LowercaseAdd, -8), - new LowerCaseMapping('\u1F48', '\u1F4D', LowercaseAdd, -8), - new LowerCaseMapping('\u1F59', '\u1F59', LowercaseSet, 0x1F51), - new LowerCaseMapping('\u1F5B', '\u1F5B', LowercaseSet, 0x1F53), - new LowerCaseMapping('\u1F5D', '\u1F5D', LowercaseSet, 0x1F55), - new LowerCaseMapping('\u1F5F', '\u1F5F', LowercaseSet, 0x1F57), - new LowerCaseMapping('\u1F68', '\u1F6F', LowercaseAdd, -8), - new LowerCaseMapping('\u1F88', '\u1F8F', LowercaseAdd, -8), - new LowerCaseMapping('\u1F98', '\u1F9F', LowercaseAdd, -8), - new LowerCaseMapping('\u1FA8', '\u1FAF', LowercaseAdd, -8), - new LowerCaseMapping('\u1FB8', '\u1FB9', LowercaseAdd, -8), - new LowerCaseMapping('\u1FBA', '\u1FBB', LowercaseAdd, -74), - new LowerCaseMapping('\u1FBC', '\u1FBC', LowercaseSet, 0x1FB3), - new LowerCaseMapping('\u1FC8', '\u1FCB', LowercaseAdd, -86), - new LowerCaseMapping('\u1FCC', '\u1FCC', LowercaseSet, 0x1FC3), - new LowerCaseMapping('\u1FD8', '\u1FD9', LowercaseAdd, -8), - new LowerCaseMapping('\u1FDA', '\u1FDB', LowercaseAdd, -100), - new LowerCaseMapping('\u1FE8', '\u1FE9', LowercaseAdd, -8), - new LowerCaseMapping('\u1FEA', '\u1FEB', LowercaseAdd, -112), - new LowerCaseMapping('\u1FEC', '\u1FEC', LowercaseSet, 0x1FE5), - new LowerCaseMapping('\u1FF8', '\u1FF9', LowercaseAdd, -128), - new LowerCaseMapping('\u1FFA', '\u1FFB', LowercaseAdd, -126), - new LowerCaseMapping('\u1FFC', '\u1FFC', LowercaseSet, 0x1FF3), - new LowerCaseMapping('\u2160', '\u216F', LowercaseAdd, 16), - new LowerCaseMapping('\u24B6', '\u24CF', LowercaseAdd, 26), - new LowerCaseMapping('\uFF21', '\uFF3A', LowercaseAdd, 32), - }; - private List? _rangelist; private StringBuilder? _categories; private RegexCharClass? _subtractor; @@ -415,47 +282,6 @@ static RegexCharClass() for (int i = 0; i < len - 1; i++) Debug.Assert(string.Compare(s_propTable[i][0], s_propTable[i + 1][0], StringComparison.Ordinal) < 0, $"RegexCharClass s_propTable is out of order at ({s_propTable[i][0]}, {s_propTable[i + 1][0]})"); - // Make sure the s_lcTable's ranges are correctly populated. - CultureInfo culture = CultureInfo.InvariantCulture; - for (int k = 0; k < s_lcTable.Length; k++) - { - LowerCaseMapping loc = s_lcTable[k]; - if (loc.ChMin == '\u01C4') - { - Debug.Assert(true); - } - if (loc.LcOp == LowercaseAdd) - { - int offset = loc.Data; - for (char l = loc.ChMin; l <= loc.ChMax; l++) - { - Debug.Assert(culture.TextInfo.ToLower((char)l) == (char)(l + offset), $"The Unicode character range at index {k} in s_lcTable contains the character {(char)l} (decimal value: {l}). Its lowercase value cannot be obtained by using the specified offset."); - } - } - else if (loc.LcOp == LowercaseSet) - { - char lowercase = (char)loc.Data; - for (char l = loc.ChMin; l <= loc.ChMax; l++) - { - char uppercase = l; - Debug.Assert(culture.TextInfo.ToLower(uppercase) == lowercase, $"The Unicode character range at index {k} in s_lcTable contains the character {uppercase} (decimal value: {(int)uppercase}, hex: {(int)uppercase:X}. Its lowercase value {culture.TextInfo.ToLower(uppercase).ToString()} (decimal value: {(int)culture.TextInfo.ToLower(uppercase)}, hex: {(int)culture.TextInfo.ToLower(uppercase):X} is not the stored value {lowercase} (decimal value: {(int)lowercase}, hex: {(int)lowercase:X}. CultureInfo: {culture.EnglishName}"); - } - } - else if (loc.LcOp == LowercaseBor) - { - for (char l = loc.ChMin; l <= loc.ChMax; l++) - { - Debug.Assert(culture.TextInfo.ToLower((char)l) == (char)(l | (char)1), $"The Unicode character range at index {k} in s_lcTable contains the character {(char)l} (decimal value: {l}). Its lowercase value {culture.TextInfo.ToLower(l)} cannot be obtained by OR-ing with 1: {(char)(l | (char)1)}"); - } - } - else if (loc.LcOp == LowercaseBad) - { - for (char l = loc.ChMin; l <= loc.ChMax; l++) - { - Debug.Assert(culture.TextInfo.ToLower((char)l) == (char)(l + (l & 1)), $"The Unicode character range at index {k} in s_lcTable contains the character {(char)l} (decimal value: {l}). Its lowercase value cannot be obtained by AND-ing with 1."); - } - } - } } #endif @@ -1736,25 +1562,6 @@ private static string CategoryDescription(char ch) } #endif - /// - /// Lower case mapping descriptor. - /// - private readonly struct LowerCaseMapping - { - public readonly char ChMin; - public readonly char ChMax; - public readonly int LcOp; - public readonly int Data; - - internal LowerCaseMapping(char chMin, char chMax, int lcOp, int data) - { - ChMin = chMin; - ChMax = chMax; - LcOp = lcOp; - Data = data; - } - } - /// /// A first/last pair representing a single range of characters. /// diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexValidations.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexValidations.netcoreapp.cs new file mode 100644 index 00000000000000..9f4cf807d5c9db --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/tests/RegexValidations.netcoreapp.cs @@ -0,0 +1,53 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Globalization; +using Xunit; + +namespace System.Text.RegularExpressions.Tests +{ + public class RegexValidations + { + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))] + public void ValidateLowercaseMapTableInRegexCharClass() + { + CultureInfo culture = CultureInfo.InvariantCulture; + for (int k = 0; k < RegexCharClass.s_lcTable.Length; k++) + { + RegexCharClass.LowerCaseMapping loc = RegexCharClass.s_lcTable[k]; + if (loc.LcOp == RegexCharClass.LowercaseAdd) + { + int offset = loc.Data; + for (char l = loc.ChMin; l <= loc.ChMax; l++) + { + Assert.True(culture.TextInfo.ToLower((char)l) == (char)(l + offset), $"The Unicode character range at index {k} in s_lcTable contains the character {(char)l} (decimal value: {l}). Its lowercase value cannot be obtained by using the specified offset."); + } + } + else if (loc.LcOp == RegexCharClass.LowercaseSet) + { + char lowercase = (char)loc.Data; + for (char l = loc.ChMin; l <= loc.ChMax; l++) + { + char uppercase = l; + Assert.True(culture.TextInfo.ToLower(uppercase) == lowercase, $"The Unicode character range at index {k} in s_lcTable contains the character {uppercase} (decimal value: {(int)uppercase}, hex: {(int)uppercase:X}). Its lowercase value {culture.TextInfo.ToLower(uppercase).ToString()} (decimal value: {(int)culture.TextInfo.ToLower(uppercase)}, hex: {(int)culture.TextInfo.ToLower(uppercase):X}) is not the stored value {lowercase} (decimal value: {(int)lowercase}, hex: {(int)lowercase:X})."); + } + } + else if (loc.LcOp == RegexCharClass.LowercaseBor) + { + for (char l = loc.ChMin; l <= loc.ChMax; l++) + { + Assert.True(culture.TextInfo.ToLower((char)l) == (char)(l | (char)1), $"The Unicode character range at index {k} in s_lcTable contains the character {(char)l} (decimal value: {l}). Its lowercase value {culture.TextInfo.ToLower(l)} cannot be obtained by OR-ing with 1: {(char)(l | (char)1)}"); + } + } + else if (loc.LcOp == RegexCharClass.LowercaseBad) + { + for (char l = loc.ChMin; l <= loc.ChMax; l++) + { + Assert.True(culture.TextInfo.ToLower((char)l) == (char)(l + (l & 1)), $"The Unicode character range at index {k} in s_lcTable contains the character {(char)l} (decimal value: {l}). Its lowercase value cannot be obtained by AND-ing with 1."); + } + } + } + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj index fade03bcb99e88..45fe74f6287690 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj +++ b/src/libraries/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj @@ -32,12 +32,12 @@ - + + @@ -45,7 +45,7 @@ - + +