From 03e524f391fb10ac062f1562f1e4526b7c0c5f16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20M=C3=BCller?= Date: Fri, 28 Mar 2025 20:31:13 +0100 Subject: [PATCH 1/2] ``: Do not reset matched capture groups in POSIX regexes --- stl/inc/regex | 10 +++++--- .../std/tests/VSO_0000000_regex_use/test.cpp | 25 +++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/stl/inc/regex b/stl/inc/regex index 17fd2ee72c5..e7ac8c6b8af 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -3622,9 +3622,13 @@ bool _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Match_pat(_Node_base* _Nx) { // c { // record current position _Node_capture* _Node = static_cast<_Node_capture*>(_Nx); _Tgt_state._Grps[_Node->_Idx]._Begin = _Tgt_state._Cur; - // CodeQL [SM02323] Comparing unchanging unsigned int _Node->_Idx to decreasing size_t _Idx is safe. - for (size_t _Idx = _Tgt_state._Grp_valid.size(); _Node->_Idx < _Idx;) { - _Tgt_state._Grp_valid[--_Idx] = false; + if (!(_Sflags + & (regex_constants::basic | regex_constants::extended | regex_constants::grep + | regex_constants::egrep | regex_constants::awk))) { + // CodeQL [SM02323] Comparing unchanging unsigned int _Node->_Idx to decreasing size_t _Idx is safe. + for (size_t _Idx = _Tgt_state._Grp_valid.size(); _Node->_Idx < _Idx;) { + _Tgt_state._Grp_valid[--_Idx] = false; + } } break; diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp index ba9c575940a..b69df97ca88 100644 --- a/tests/std/tests/VSO_0000000_regex_use/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp @@ -1171,6 +1171,30 @@ void test_gh_5253() { g_regexTester.should_not_match("a", "()*"); } +void test_gh_5377() { + for (syntax_option_type option : {extended, egrep}) { + test_regex abcd_regex(&g_regexTester, R"(^((a)|(b)|(c)|(d))+$)", option); + abcd_regex.should_search_match_capture_groups( + "abcd", "abcd", match_default, {{3, 4}, {0, 1}, {1, 2}, {2, 3}, {3, 4}}); + abcd_regex.should_search_match_capture_groups( + "acbd", "acbd", match_default, {{3, 4}, {0, 1}, {2, 3}, {1, 2}, {3, 4}}); + abcd_regex.should_search_match_capture_groups( + "dcba", "dcba", match_default, {{3, 4}, {3, 4}, {2, 3}, {1, 2}, {0, 1}}); + } + + for (syntax_option_type option : {basic, grep}) { + test_regex abcd_regex(&g_regexTester, R"(^\(\(a\)*\(b\)*\(c\)*\(d\)*\)*$)", option); + abcd_regex.should_search_match_capture_groups( + "abcd", "abcd", match_default, {{0, 4}, {0, 1}, {1, 2}, {2, 3}, {3, 4}}); + abcd_regex.should_search_match_capture_groups( + "acbd", "acbd", match_default, {{2, 4}, {0, 1}, {2, 3}, {1, 2}, {3, 4}}); + abcd_regex.should_search_match_capture_groups( + "dcba", "dcba", match_default, {{3, 4}, {3, 4}, {2, 3}, {1, 2}, {0, 1}}); + + test_regex backref_regex(&g_regexTester, R"(^\(\(a\)\{0,1\}\(\2b\)\{0,1\}\)*)", option); + backref_regex.should_search_match_capture_groups("aaababb", "aaabab", match_default, {{4, 6}, {1, 2}, {4, 6}}); + } +} int main() { test_dev10_449367_case_insensitivity_should_work(); test_dev11_462743_regex_collate_should_not_disable_regex_icase(); @@ -1208,6 +1232,7 @@ int main() { test_gh_5192(); test_gh_5214(); test_gh_5253(); + test_gh_5377(); return g_regexTester.result(); } From 15666eab3ccf32b7b89eb69760b4e1a34593c03f Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 3 Apr 2025 14:25:00 -0700 Subject: [PATCH 2/2] Code review feedback. --- .../std/tests/VSO_0000000_regex_use/test.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp index b69df97ca88..042d5e834d1 100644 --- a/tests/std/tests/VSO_0000000_regex_use/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp @@ -1172,7 +1172,8 @@ void test_gh_5253() { } void test_gh_5377() { - for (syntax_option_type option : {extended, egrep}) { + // GH-5377 : Do not reset matched capture groups in POSIX regexes + for (syntax_option_type option : {extended, awk, egrep}) { test_regex abcd_regex(&g_regexTester, R"(^((a)|(b)|(c)|(d))+$)", option); abcd_regex.should_search_match_capture_groups( "abcd", "abcd", match_default, {{3, 4}, {0, 1}, {1, 2}, {2, 3}, {3, 4}}); @@ -1194,7 +1195,23 @@ void test_gh_5377() { test_regex backref_regex(&g_regexTester, R"(^\(\(a\)\{0,1\}\(\2b\)\{0,1\}\)*)", option); backref_regex.should_search_match_capture_groups("aaababb", "aaabab", match_default, {{4, 6}, {1, 2}, {4, 6}}); } + + { + // ECMAScript's behavior is different: + test_regex abcd_regex(&g_regexTester, R"(^((a)|(b)|(c)|(d))+$)", ECMAScript); + abcd_regex.should_search_match_capture_groups( + "abcd", "abcd", match_default, {{3, 4}, {-1, -1}, {-1, -1}, {-1, -1}, {3, 4}}); + abcd_regex.should_search_match_capture_groups( + "acbd", "acbd", match_default, {{3, 4}, {-1, -1}, {-1, -1}, {-1, -1}, {3, 4}}); + abcd_regex.should_search_match_capture_groups( + "dcba", "dcba", match_default, {{3, 4}, {3, 4}, {-1, -1}, {-1, -1}, {-1, -1}}); + + test_regex backref_regex(&g_regexTester, R"(^((a){0,1}(\2b){0,1})*)", ECMAScript); + backref_regex.should_search_match_capture_groups( + "aaababb", "aaababb", match_default, {{6, 7}, {-1, -1}, {6, 7}}); + } } + int main() { test_dev10_449367_case_insensitivity_should_work(); test_dev11_462743_regex_collate_should_not_disable_regex_icase();