From 1ef8a05170294b06a6009d55fbe16bd9f139b9b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Julian=20M=C3=BCller?= <julian.mueller@uni-konstanz.de>
Date: Fri, 23 May 2025 19:43:26 +0200
Subject: [PATCH 1/6] `<regex>`: Add multiline option and make non-multiline
 mode the default

---
 benchmarks/src/regex_search.cpp               |   1 +
 stl/inc/regex                                 |  76 ++++--
 tests/libcxx/expected_results.txt             |   6 -
 tests/std/test.lst                            |   1 +
 .../env.lst                                   |   4 +
 .../test.cpp                                  | 107 ++++++++
 .../VSO_0000000_regex_interface/test.cpp      |   4 +-
 .../std/tests/VSO_0000000_regex_use/test.cpp  | 242 ++++++++++++++----
 tests/tr1/tests/regex2/test.cpp               |   2 +-
 9 files changed, 373 insertions(+), 70 deletions(-)
 create mode 100644 tests/std/tests/GH_000073_regex_multiline_escape_hatch/env.lst
 create mode 100644 tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp
diff --git a/benchmarks/src/regex_search.cpp b/benchmarks/src/regex_search.cpp
index 8a96a98e77f..28e88c4b313 100644
--- a/benchmarks/src/regex_search.cpp
+++ b/benchmarks/src/regex_search.cpp
@@ -31,6 +31,7 @@ void bm_lorem_search(benchmark::State& state, const char* pattern) {
     }
 }
 
+BENCHMARK_CAPTURE(bm_lorem_search, "^bibe", "^bibe")->Arg(2)->Arg(3)->Arg(4);
 BENCHMARK_CAPTURE(bm_lorem_search, "bibe", "bibe")->Arg(2)->Arg(3)->Arg(4);
 BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)", "(bibe)")->Arg(2)->Arg(3)->Arg(4);
 BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)+", "(bibe)+")->Arg(2)->Arg(3)->Arg(4);
diff --git a/stl/inc/regex b/stl/inc/regex
index 4c04da603b8..bb38105d6f6 100644
--- a/stl/inc/regex
+++ b/stl/inc/regex
@@ -121,10 +121,11 @@ namespace regex_constants {
         _Gmask     = 0x3F,
         _Any_posix = basic | extended | grep | egrep | awk,
 
-        icase    = 0x0100,
-        nosubs   = 0x0200,
-        optimize = 0x0400,
-        collate  = 0x0800
+        icase     = 0x0100,
+        nosubs    = 0x0200,
+        optimize  = 0x0400,
+        collate   = 0x0800,
+        multiline = 0x1000
     };
 
     _BITMASK_OPS(_EXPORT_STD, syntax_option_type)
@@ -1666,6 +1667,15 @@ public:
         if (_Re->_Flags & _Fl_begin_needs_d) {
             _Char_class_d = _Lookup_char_class(static_cast<_Elem>('D'));
         }
+
+// sanitize multiline mode setting
+#ifdef _REGEX_MAKE_MULTILINE_MODE_DEFAULT
+        _Sflags |= regex_constants::multiline; // old matcher applied multiline mode for all grammars
+#else // ^^^ defined(_REGEX_MAKE_MULTILINE_MODE_DEFAULT) / !defined(_REGEX_MAKE_MULTILINE_MODE_DEFAULT) vvv
+        if (_Sflags & regex_constants::_Any_posix) { // multiline mode is ECMAScript-only
+            _Sflags &= ~regex_constants::multiline;
+        }
+#endif // ^^^ !defined(_REGEX_MAKE_MULTILINE_MODE_DEFAULT) ^^^
     }
 
     void _Setf(regex_constants::match_flag_type _Mf) { // set specified flags
@@ -1920,6 +1930,7 @@ public:
     static constexpr flag_type awk        = regex_constants::awk;
     static constexpr flag_type grep       = regex_constants::grep;
     static constexpr flag_type egrep      = regex_constants::egrep;
+    static constexpr flag_type multiline  = regex_constants::multiline;
 
     basic_regex() = default; // construct empty object
 
@@ -3833,6 +3844,11 @@ typename _RxTraits::char_class_type _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Al
     return _Traits.lookup_classname(_Ptr, _Ptr + 1, (_Sflags & regex_constants::icase) != 0);
 }
 
+template <class _Elem>
+bool _Is_ecmascript_line_terminator(_Elem _Ch) {
+    return _Ch == _Meta_nl || _Ch == _Meta_cr || _Ch == _Meta_ls || _Ch == _Meta_ps;
+}
+
 template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
 bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _Nx) { // check for match
     if (0 < _Max_stack_count && --_Max_stack_count <= 0) {
@@ -3852,18 +3868,19 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
         case _N_bol:
             if ((_Mflags & regex_constants::match_prev_avail)
                 || _Tgt_state._Cur != _Begin) { // if --_Cur is valid, check for preceding newline
-                _Failed = *_Prev_iter(_Tgt_state._Cur) != _Meta_nl;
+                _Failed = !(_Sflags & regex_constants::multiline)
+                       || !_STD _Is_ecmascript_line_terminator(*_STD _Prev_iter(_Tgt_state._Cur));
             } else {
                 _Failed = (_Mflags & regex_constants::match_not_bol) != 0;
             }
-
             break;
 
         case _N_eol:
             if (_Tgt_state._Cur == _End) {
                 _Failed = (_Mflags & regex_constants::match_not_eol) != 0;
             } else {
-                _Failed = *_Tgt_state._Cur != _Meta_nl;
+                _Failed =
+                    !(_Sflags & regex_constants::multiline) || !_STD _Is_ecmascript_line_terminator(*_Tgt_state._Cur);
             }
 
             break;
@@ -3881,7 +3898,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
                     if (_Ch == _Elem()) {
                         _Failed = true;
                     }
-                } else if (_Ch == _Meta_nl || _Ch == _Meta_cr || _Ch == _Meta_ls || _Ch == _Meta_ps) { // ECMAScript
+                } else if (_STD _Is_ecmascript_line_terminator(_Ch)) {
                     _Failed = true;
                 }
 
@@ -4054,30 +4071,55 @@ template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
 _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg, _BidIt _Last, _Node_base* _Node_arg) {
     // skip until possible match
     // assumes --_First_arg is valid
-    _Node_base* _Nx = _Node_arg ? _Node_arg : _Rep;
+    constexpr char _Line_terminators_char[]       = {static_cast<char>(_Meta_cr), static_cast<char>(_Meta_nl)};
+    constexpr wchar_t _Line_terminators_wchar_t[] = {static_cast<wchar_t>(_Meta_cr), static_cast<wchar_t>(_Meta_nl),
+        static_cast<wchar_t>(_Meta_ls), static_cast<wchar_t>(_Meta_ps)};
+    _Node_base* _Nx                               = _Node_arg ? _Node_arg : _Rep;
 
     while (_First_arg != _Last && _Nx) { // check current node
         switch (_Nx->_Kind) { // handle current node's type
         case _N_nop:
             break;
 
-        case _N_bol:
-            { // check for embedded newline
-              // return iterator to character just after the newline; for input like "\nabc"
-              // matching "^abc", _First_arg could be pointing at 'a', so we need to check
-              // --_First_arg for '\n'
-                if (*_Prev_iter(_First_arg) != _Meta_nl) {
-                    _First_arg = _STD find(_First_arg, _Last, _Meta_nl);
+        case _N_bol: // check for beginning anchor
+            if (_Sflags & regex_constants::multiline) {
+                // multiline mode: check for embedded line terminator
+                // return iterator to character just after the newline; for input like "\nabc"
+                // matching "^abc", _First_arg could be pointing at 'a', so we need to check
+                // --_First_arg for '\n'
+                if (!_STD _Is_ecmascript_line_terminator(*_STD _Prev_iter(_First_arg))) {
+                    if constexpr (sizeof(_Elem) == 1) {
+                        _First_arg = _STD find_first_of(
+                            _First_arg, _Last, _Line_terminators_char, _STD end(_Line_terminators_char));
+                    } else {
+                        _First_arg = _STD find_first_of(
+                            _First_arg, _Last, _Line_terminators_wchar_t, _STD end(_Line_terminators_wchar_t));
+                    }
+
                     if (_First_arg != _Last) {
                         ++_First_arg;
                     }
                 }
 
                 return _First_arg;
+            } else {
+                // non-multiline mode: never matches because --_First_arg is valid
+                return _Last;
             }
 
         case _N_eol:
-            return _STD find(_First_arg, _Last, _Meta_nl);
+            if (_Sflags & regex_constants::multiline) {
+                // multiline mode: matches at next line terminator or end of input
+                if constexpr (sizeof(_Elem) == 1) {
+                    return _STD find_first_of(
+                        _First_arg, _Last, _Line_terminators_char, _STD end(_Line_terminators_char));
+                } else {
+                    return _STD find_first_of(
+                        _First_arg, _Last, _Line_terminators_wchar_t, _STD end(_Line_terminators_wchar_t));
+                }
+            } else {
+                return _Last; // non-multiline mode: matches at end of input or not at all
+            }
 
         case _N_str:
             { // check for string match
diff --git a/tests/libcxx/expected_results.txt b/tests/libcxx/expected_results.txt
index ed1781f9b1a..d5f9274662e 100644
--- a/tests/libcxx/expected_results.txt
+++ b/tests/libcxx/expected_results.txt
@@ -575,12 +575,6 @@ std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.pass.cpp FAIL
 
 
 # *** MISSING LWG ISSUE RESOLUTIONS ***
-# LWG-2503 "multiline option should be added to syntax_option_type"
-std/re/re.alg/re.alg.search/no_update_pos.pass.cpp FAIL
-std/re/re.const/re.matchflag/match_multiline.pass.cpp FAIL
-std/re/re.const/re.matchflag/match_not_eol.pass.cpp FAIL
-std/re/re.const/re.synopt/syntax_option_type.pass.cpp FAIL
-
 # LWG-2532 "Satisfying a promise at thread exit" (Open)
 std/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp FAIL
 std/thread/futures/futures.promise/set_lvalue_at_thread_exit.pass.cpp FAIL
diff --git a/tests/std/test.lst b/tests/std/test.lst
index 4c7c40603d1..46be77e6b8b 100644
--- a/tests/std/test.lst
+++ b/tests/std/test.lst
@@ -154,6 +154,7 @@ tests\Dev11_1140665_unique_ptr_array_conversions
 tests\Dev11_1150223_shared_mutex
 tests\Dev11_1158803_regex_thread_safety
 tests\Dev11_1180290_filesystem_error_code
+tests\GH_000073_regex_multiline_escape_hatch
 tests\GH_000140_adl_proof_comparison
 tests\GH_000140_adl_proof_construction
 tests\GH_000140_adl_proof_views
diff --git a/tests/std/tests/GH_000073_regex_multiline_escape_hatch/env.lst b/tests/std/tests/GH_000073_regex_multiline_escape_hatch/env.lst
new file mode 100644
index 00000000000..19f025bd0e6
--- /dev/null
+++ b/tests/std/tests/GH_000073_regex_multiline_escape_hatch/env.lst
@@ -0,0 +1,4 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+RUNALL_INCLUDE ..\usual_matrix.lst
diff --git a/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp b/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp
new file mode 100644
index 00000000000..ea66b886982
--- /dev/null
+++ b/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp
@@ -0,0 +1,107 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#define _REGEX_MAKE_MULTILINE_MODE_DEFAULT
+
+#include <cstddef>
+#include <cstdio>
+#include <regex>
+#include <string>
+
+#include <test_regex_support.hpp>
+
+using namespace std;
+using namespace std::regex_constants;
+
+regex_fixture g_regexTester;
+
+void test_VSO_225160_match_bol_flag() {
+    // Old tests for caret anchor in default multiline mode
+    for (syntax_option_type syntax : {syntax_option_type{}, ECMAScript, basic, grep, extended, egrep, awk}) {
+        const test_regex emptyAnchor(&g_regexTester, R"(^)", syntax);
+        emptyAnchor.should_search_match("", "");
+        emptyAnchor.should_search_fail("", match_not_bol);
+        emptyAnchor.should_search_match("\n", "");
+        emptyAnchor.should_search_match("\n", "", match_not_bol);
+
+        const test_regex beginCd(&g_regexTester, R"(^cd)", syntax);
+        beginCd.should_search_match("ab\ncdefg", "cd");
+        beginCd.should_search_match("ab\ncdefg", "cd", match_not_bol);
+
+        beginCd.should_search_match("cdefg", "cd");
+        beginCd.should_search_fail("cdefg", match_not_bol);
+        beginCd.should_search_match("\ncdefg", "cd");
+        beginCd.should_search_match("\ncdefg", "cd", match_not_bol);
+
+        beginCd.should_search_fail("ab\nxcdefg");
+        beginCd.should_search_fail("ab\nxcdefg", match_not_bol);
+    }
+}
+
+void test_VSO_225160_match_eol_flag() {
+    // Old tests for dollar anchor in default multiline mode
+    for (syntax_option_type syntax : {syntax_option_type{}, ECMAScript, basic, grep, extended, egrep, awk}) {
+        const test_regex emptyAnchor(&g_regexTester, R"($)", syntax);
+        emptyAnchor.should_search_match("", "");
+        emptyAnchor.should_search_fail("", match_not_eol);
+        emptyAnchor.should_search_match("\n", "");
+        emptyAnchor.should_search_match("\n", "", match_not_eol);
+
+        const test_regex cdEnd(&g_regexTester, R"(cd$)", syntax);
+        cdEnd.should_search_match("abcd\nefg", "cd");
+        cdEnd.should_search_match("abcd\nefg", "cd", match_not_eol);
+
+        cdEnd.should_search_match("abcd", "cd");
+        cdEnd.should_search_fail("abcd", match_not_eol);
+        cdEnd.should_search_match("abcd\n", "cd");
+        cdEnd.should_search_match("abcd\n", "cd", match_not_eol);
+
+        cdEnd.should_search_fail("abcdx\nefg");
+        cdEnd.should_search_fail("abcdx\nefg", match_not_eol);
+    }
+}
+
+void test_gh_73() {
+    for (syntax_option_type syntax : {syntax_option_type{}, ECMAScript, basic, grep, extended, egrep, awk}) {
+        {
+            test_regex a_anchored_on_both_sides(&g_regexTester, "^a$", syntax);
+            a_anchored_on_both_sides.should_search_match("a", "a");
+            a_anchored_on_both_sides.should_search_match("b\na", "a");
+            a_anchored_on_both_sides.should_search_match("a\nb", "a");
+            a_anchored_on_both_sides.should_search_fail("a\nb", match_not_bol);
+            a_anchored_on_both_sides.should_search_fail("b\na", match_not_eol);
+        }
+
+        {
+            test_regex a_anchored_front(&g_regexTester, "^a", syntax);
+            a_anchored_front.should_search_match("a", "a");
+            a_anchored_front.should_search_match("a\n", "a");
+            a_anchored_front.should_search_match("a\nb", "a");
+            a_anchored_front.should_search_match("b\na", "a");
+            a_anchored_front.should_search_match("\na", "a");
+            a_anchored_front.should_search_fail("a", match_not_bol);
+            a_anchored_front.should_search_match("\na", "a", match_not_bol);
+            a_anchored_front.should_search_match("b\na", "a", match_not_bol);
+        }
+
+        {
+            test_regex a_anchored_back(&g_regexTester, "a$", syntax);
+            a_anchored_back.should_search_match("a", "a");
+            a_anchored_back.should_search_match("\na", "a");
+            a_anchored_back.should_search_match("b\na", "a");
+            a_anchored_back.should_search_match("a\nb", "a");
+            a_anchored_back.should_search_match("a\n", "a");
+            a_anchored_back.should_search_fail("a", match_not_eol);
+            a_anchored_back.should_search_match("a\n", "a", match_not_eol);
+            a_anchored_back.should_search_match("a\nb", "a", match_not_eol);
+        }
+    }
+}
+
+int main() {
+    test_VSO_225160_match_bol_flag();
+    test_VSO_225160_match_eol_flag();
+    test_gh_73();
+
+    return g_regexTester.result();
+}
diff --git a/tests/std/tests/VSO_0000000_regex_interface/test.cpp b/tests/std/tests/VSO_0000000_regex_interface/test.cpp
index c057696eb3a..3fd281f4b85 100644
--- a/tests/std/tests/VSO_0000000_regex_interface/test.cpp
+++ b/tests/std/tests/VSO_0000000_regex_interface/test.cpp
@@ -387,9 +387,9 @@ void test_VSO_180466_regex_search_missing_Unchecked_call() {
 }
 
 void test_VSO_226914_match_prev_avail() {
-    // N.B. assumes our nonstandard multiline behavior. See also: LWG-2343, LWG-2503
+    // test assumes multiline mode
     const char bol_haystack[] = {'\n', 'a'};
-    const regex bol_anchor(R"(^a)");
+    const regex bol_anchor(R"(^a)", regex_constants::multiline);
     assert(regex_match(bol_haystack + 1, end(bol_haystack), bol_anchor));
     assert(!regex_match(bol_haystack + 1, end(bol_haystack), bol_anchor, match_not_bol));
     assert(regex_match(bol_haystack + 1, end(bol_haystack), bol_anchor, match_prev_avail));
diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp
index 7160c9007fc..066a2f957e9 100644
--- a/tests/std/tests/VSO_0000000_regex_use/test.cpp
+++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp
@@ -453,48 +453,95 @@ void test_VSO_208146_regex_smoke_test_rewritten_explicit_quantifier() {
 }
 
 void test_VSO_225160_match_bol_flag() {
-    // Note that this tests that we are consistent about the ECMAScript "multiline" setting being
-    // true, but the standard currently appears to mandate that that is false. We don't want to
-    // break existing customers, but we should at least be consistently multiline.
-    // See also: LWG-2343, LWG-2503
-    const test_regex emptyAnchor(&g_regexTester, R"(^)");
-    emptyAnchor.should_search_match("", "");
-    emptyAnchor.should_search_fail("", match_not_bol);
-    emptyAnchor.should_search_match("\n", "");
-    emptyAnchor.should_search_match("\n", "", match_not_bol);
-
-    const test_regex beginCd(&g_regexTester, R"(^cd)");
-    beginCd.should_search_match("ab\ncdefg", "cd");
-    beginCd.should_search_match("ab\ncdefg", "cd", match_not_bol);
-
-    beginCd.should_search_match("cdefg", "cd");
-    beginCd.should_search_fail("cdefg", match_not_bol);
-    beginCd.should_search_match("\ncdefg", "cd");
-    beginCd.should_search_match("\ncdefg", "cd", match_not_bol);
-
-    beginCd.should_search_fail("ab\nxcdefg");
-    beginCd.should_search_fail("ab\nxcdefg", match_not_bol);
+    // After implementation of LWG-2503/GH-73: These tests make sure that
+    // we consistently implement "multiline" option for ECMAScript
+    // (whether the ECMAScript flag is included or not)
+    for (syntax_option_type syntax : {multiline, ECMAScript | multiline}) {
+        for (string line_terminator : {"\n", "\r"}) {
+            const test_regex emptyAnchor(&g_regexTester, R"(^)", syntax);
+            emptyAnchor.should_search_match("", "");
+            emptyAnchor.should_search_fail("", match_not_bol);
+            emptyAnchor.should_search_match(line_terminator, "");
+            emptyAnchor.should_search_match(line_terminator, "", match_not_bol);
+
+            const test_regex beginCd(&g_regexTester, R"(^cd)", syntax);
+            beginCd.should_search_match("ab" + line_terminator + "cdefg", "cd");
+            beginCd.should_search_match("ab" + line_terminator + "cdefg", "cd", match_not_bol);
+
+            beginCd.should_search_match("cdefg", "cd");
+            beginCd.should_search_fail("cdefg", match_not_bol);
+            beginCd.should_search_match(line_terminator + "cdefg", "cd");
+            beginCd.should_search_match(line_terminator + "cdefg", "cd", match_not_bol);
+
+            beginCd.should_search_fail("ab" + line_terminator + "xcdefg");
+            beginCd.should_search_fail("ab" + line_terminator + "xcdefg", match_not_bol);
+        }
+
+        for (wstring line_terminator : {L"\u2028", L"\u2029"}) { // U+2028 LINE SEPARATOR, U+2029 PARAGRAPH SEPARATOR
+            const test_wregex emptyAnchor(&g_regexTester, LR"(^)", syntax);
+            emptyAnchor.should_search_match(L"", L"");
+            emptyAnchor.should_search_fail(L"", match_not_bol);
+            emptyAnchor.should_search_match(line_terminator, L"");
+            emptyAnchor.should_search_match(line_terminator, L"", match_not_bol);
+
+            const test_wregex beginCd(&g_regexTester, LR"(^cd)", syntax);
+            beginCd.should_search_match(L"ab" + line_terminator + L"cdefg", L"cd");
+            beginCd.should_search_match(L"ab" + line_terminator + L"cdefg", L"cd", match_not_bol);
+
+            beginCd.should_search_match(L"cdefg", L"cd");
+            beginCd.should_search_fail(L"cdefg", match_not_bol);
+            beginCd.should_search_match(line_terminator + L"cdefg", L"cd");
+            beginCd.should_search_match(line_terminator + L"cdefg", L"cd", match_not_bol);
+
+            beginCd.should_search_fail(L"ab" + line_terminator + L"xcdefg");
+            beginCd.should_search_fail(L"ab" + line_terminator + L"xcdefg", match_not_bol);
+        }
+    }
 }
 
 void test_VSO_225160_match_eol_flag() {
     // Ditto multiline comment
-    const test_regex emptyAnchor(&g_regexTester, R"($)");
-    emptyAnchor.should_search_match("", "");
-    emptyAnchor.should_search_fail("", match_not_eol);
-    emptyAnchor.should_search_match("\n", "");
-    emptyAnchor.should_search_match("\n", "", match_not_eol);
-
-    const test_regex cdEnd(&g_regexTester, R"(cd$)");
-    cdEnd.should_search_match("abcd\nefg", "cd");
-    cdEnd.should_search_match("abcd\nefg", "cd", match_not_eol);
-
-    cdEnd.should_search_match("abcd", "cd");
-    cdEnd.should_search_fail("abcd", match_not_eol);
-    cdEnd.should_search_match("abcd\n", "cd");
-    cdEnd.should_search_match("abcd\n", "cd", match_not_eol);
-
-    cdEnd.should_search_fail("abcdx\nefg");
-    cdEnd.should_search_fail("abcdx\nefg", match_not_eol);
+    for (syntax_option_type syntax : {multiline, ECMAScript | multiline}) {
+        for (string line_terminator : {"\n", "\r"}) {
+            const test_regex emptyAnchor(&g_regexTester, R"($)", syntax);
+            emptyAnchor.should_search_match("", "");
+            emptyAnchor.should_search_fail("", match_not_eol);
+            emptyAnchor.should_search_match(line_terminator, "");
+            emptyAnchor.should_search_match(line_terminator, "", match_not_eol);
+
+            const test_regex cdEnd(&g_regexTester, R"(cd$)", syntax);
+            cdEnd.should_search_match("abcd" + line_terminator + "efg", "cd");
+            cdEnd.should_search_match("abcd" + line_terminator + "efg", "cd", match_not_eol);
+
+            cdEnd.should_search_match("abcd", "cd");
+            cdEnd.should_search_fail("abcd", match_not_eol);
+            cdEnd.should_search_match("abcd" + line_terminator, "cd");
+            cdEnd.should_search_match("abcd" + line_terminator, "cd", match_not_eol);
+
+            cdEnd.should_search_fail("abcdx" + line_terminator + "efg");
+            cdEnd.should_search_fail("abcdx" + line_terminator + "efg", match_not_eol);
+        }
+
+        for (wstring line_terminator : {L"\u2028", L"\u2029"}) { // U+2028 LINE SEPARATOR, U+2029 PARAGRAPH SEPARATOR
+            const test_wregex emptyAnchor(&g_regexTester, LR"($)", syntax);
+            emptyAnchor.should_search_match(L"", L"");
+            emptyAnchor.should_search_fail(L"", match_not_eol);
+            emptyAnchor.should_search_match(line_terminator, L"");
+            emptyAnchor.should_search_match(line_terminator, L"", match_not_eol);
+
+            const test_wregex cdEnd(&g_regexTester, LR"(cd$)", syntax);
+            cdEnd.should_search_match(L"abcd" + line_terminator + L"efg", L"cd");
+            cdEnd.should_search_match(L"abcd" + line_terminator + L"efg", L"cd", match_not_eol);
+
+            cdEnd.should_search_match(L"abcd", L"cd");
+            cdEnd.should_search_fail(L"abcd", match_not_eol);
+            cdEnd.should_search_match(L"abcd" + line_terminator, L"cd");
+            cdEnd.should_search_match(L"abcd" + line_terminator, L"cd", match_not_eol);
+
+            cdEnd.should_search_fail(L"abcdx" + line_terminator + L"efg");
+            cdEnd.should_search_fail(L"abcdx" + line_terminator + L"efg", match_not_eol);
+        }
+    }
 }
 
 void test_VSO_226914_word_boundaries() {
@@ -558,8 +605,102 @@ void test_construction_from_nullptr_and_zero() {
     }
 }
 
+void test_gh_73() {
+    // GH-73: LWG-2503 multiline option should be added to syntax_option_type
+    for (syntax_option_type grammar : {basic, grep, extended, egrep, awk}) {
+        for (syntax_option_type multiline_mode : {syntax_option_type{}, multiline}) {
+            {
+                test_regex a_anchored_on_both_sides(&g_regexTester, "^a$", grammar | multiline_mode);
+                a_anchored_on_both_sides.should_search_match("a", "a");
+                a_anchored_on_both_sides.should_search_fail("b\na");
+                a_anchored_on_both_sides.should_search_fail("a\nb");
+            }
+
+            {
+                test_regex a_anchored_front(&g_regexTester, "^a", grammar | multiline_mode);
+                a_anchored_front.should_search_match("a", "a");
+                a_anchored_front.should_search_match("a\n", "a");
+                a_anchored_front.should_search_match("a\nb", "a");
+                a_anchored_front.should_search_fail("b\na");
+                a_anchored_front.should_search_fail("\na");
+            }
+
+            {
+                test_regex a_anchored_back(&g_regexTester, "a$", grammar | multiline_mode);
+                a_anchored_back.should_search_match("a", "a");
+                a_anchored_back.should_search_match("\na", "a");
+                a_anchored_back.should_search_match("b\na", "a");
+                a_anchored_back.should_search_fail("a\nb");
+                a_anchored_back.should_search_fail("a\n");
+            }
+        }
+    }
+
+    for (syntax_option_type grammar : {syntax_option_type{}, ECMAScript}) {
+        {
+            test_regex a_anchored_on_both_sides(&g_regexTester, "^a$", grammar);
+            a_anchored_on_both_sides.should_search_match("a", "a");
+            a_anchored_on_both_sides.should_search_fail("b\na");
+            a_anchored_on_both_sides.should_search_fail("a\nb");
+        }
+
+        {
+            test_regex a_anchored_front(&g_regexTester, "^a", grammar);
+            a_anchored_front.should_search_match("a", "a");
+            a_anchored_front.should_search_match("a\n", "a");
+            a_anchored_front.should_search_match("a\nb", "a");
+            a_anchored_front.should_search_fail("b\na");
+            a_anchored_front.should_search_fail("\na");
+        }
+
+        {
+            test_regex a_anchored_back(&g_regexTester, "a$", grammar);
+            a_anchored_back.should_search_match("a", "a");
+            a_anchored_back.should_search_match("\na", "a");
+            a_anchored_back.should_search_match("b\na", "a");
+            a_anchored_back.should_search_fail("a\nb");
+            a_anchored_back.should_search_fail("a\n");
+        }
+    }
+
+    for (syntax_option_type syntax : {multiline, ECMAScript | multiline}) {
+        {
+            test_regex a_anchored_on_both_sides(&g_regexTester, "^a$", syntax);
+            a_anchored_on_both_sides.should_search_match("a", "a");
+            a_anchored_on_both_sides.should_search_match("b\na", "a");
+            a_anchored_on_both_sides.should_search_match("a\nb", "a");
+            a_anchored_on_both_sides.should_search_fail("a\nb", match_not_bol);
+            a_anchored_on_both_sides.should_search_fail("b\na", match_not_eol);
+        }
+
+        {
+            test_regex a_anchored_front(&g_regexTester, "^a", syntax);
+            a_anchored_front.should_search_match("a", "a");
+            a_anchored_front.should_search_match("a\n", "a");
+            a_anchored_front.should_search_match("a\nb", "a");
+            a_anchored_front.should_search_match("b\na", "a");
+            a_anchored_front.should_search_match("\na", "a");
+            a_anchored_front.should_search_fail("a", match_not_bol);
+            a_anchored_front.should_search_match("\na", "a", match_not_bol);
+            a_anchored_front.should_search_match("b\na", "a", match_not_bol);
+        }
+
+        {
+            test_regex a_anchored_back(&g_regexTester, "a$", syntax);
+            a_anchored_back.should_search_match("a", "a");
+            a_anchored_back.should_search_match("\na", "a");
+            a_anchored_back.should_search_match("b\na", "a");
+            a_anchored_back.should_search_match("a\nb", "a");
+            a_anchored_back.should_search_match("a\n", "a");
+            a_anchored_back.should_search_fail("a", match_not_eol);
+            a_anchored_back.should_search_match("a\n", "a", match_not_eol);
+            a_anchored_back.should_search_match("a\nb", "a", match_not_eol);
+        }
+    }
+}
+
 void test_gh_731() {
-    // GH-731 <regex>: Incorrect behavior for capture groups
+    // GH-731: <regex>: Incorrect behavior for capture groups
     // GH-996: regex_search behaves incorrectly when the regex contains R"(\[)"
 
     // Several bugs were fixed in ECMAScript (depth-first) and POSIX (leftmost-longest) matching rules.
@@ -1533,7 +1674,7 @@ void test_gh_5362_grep() {
     {
         const test_regex middle_nl_with_dollar(&g_regexTester, "a$\nb$", grep);
         middle_nl_with_dollar.should_search_match("a$\nb", "b");
-        middle_nl_with_dollar.should_search_match("a\nb", "a");
+        middle_nl_with_dollar.should_search_match("a\nb", "b");
         middle_nl_with_dollar.should_search_match("ba", "a");
         middle_nl_with_dollar.should_search_match("a", "a");
         middle_nl_with_dollar.should_search_match("b", "b");
@@ -1913,16 +2054,28 @@ void test_gh_5509() {
     }
 
     {
-        test_regex anchored_string_plus_regex(&g_regexTester, "((?:^aw)+)");
-        anchored_string_plus_regex.should_search_match_capture_groups(
+        test_regex anchored_string_plus_regex_multi(&g_regexTester, "((?:^aw)+)", multiline);
+        anchored_string_plus_regex_multi.should_search_match_capture_groups(
             "blwerofa\nawaweraf", "aw", match_default, {{9, 11}});
+        anchored_string_plus_regex_multi.should_search_fail("blwerof\naerwaf");
+    }
+
+    {
+        test_regex anchored_string_plus_regex(&g_regexTester, "((?:^aw)+)");
+        anchored_string_plus_regex.should_search_fail("blwerofa\nawaweraf");
         anchored_string_plus_regex.should_search_fail("blwerof\naerwaf");
     }
 
     {
-        test_regex anchored_string_plus_regex(&g_regexTester, "((?:$\naw)+)");
-        anchored_string_plus_regex.should_search_match_capture_groups(
+        test_regex anchored_string_plus_regex_multi(&g_regexTester, "((?:$\naw)+)", multiline);
+        anchored_string_plus_regex_multi.should_search_match_capture_groups(
             "blwerofa\nawaweraf", "\naw", match_default, {{8, 11}});
+        anchored_string_plus_regex_multi.should_search_fail("blwerof\naerwaf");
+    }
+
+    {
+        test_regex anchored_string_plus_regex(&g_regexTester, "((?:$\naw)+)");
+        anchored_string_plus_regex.should_search_fail("blwerofa\nawaweraf");
         anchored_string_plus_regex.should_search_fail("blwerof\naerwaf");
     }
 
@@ -1964,6 +2117,7 @@ int main() {
     test_VSO_225160_match_eol_flag();
     test_VSO_226914_word_boundaries();
     test_construction_from_nullptr_and_zero();
+    test_gh_73();
     test_gh_731();
     test_gh_992();
     test_gh_993();
diff --git a/tests/tr1/tests/regex2/test.cpp b/tests/tr1/tests/regex2/test.cpp
index 83706bb66bc..c720774d3c2 100644
--- a/tests/tr1/tests/regex2/test.cpp
+++ b/tests/tr1/tests/regex2/test.cpp
@@ -132,7 +132,7 @@ static const regex_test tests[] = {
     {__LINE__, T("a$"), T("ba"), "1 1 2", ALL},
     {__LINE__, T("a$"), T("ab"), "0", ALL},
 
-    {__LINE__, T("^a$"), T("b\na"), "1 2 3", ALL},
+    {__LINE__, T("^a$"), T("b\na"), "0", ALL},
 
     {__LINE__, T("\\b"), T("a"), "1 0 0", ECMA},
     {__LINE__, T("\\b"), T(""), "-1", BASIC | GREP | EXTENDED | EGREP},

From ec646527b5db77ecc476ea3b7ac4943d2a29f700 Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@microsoft.com>
Date: Mon, 9 Jun 2025 12:58:46 -0700
Subject: [PATCH 2/6] Update comments.

---
 tests/std/tests/VSO_0000000_regex_interface/test.cpp | 2 +-
 tests/std/tests/VSO_0000000_regex_use/test.cpp       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/std/tests/VSO_0000000_regex_interface/test.cpp b/tests/std/tests/VSO_0000000_regex_interface/test.cpp
index 3fd281f4b85..c05c4aa4173 100644
--- a/tests/std/tests/VSO_0000000_regex_interface/test.cpp
+++ b/tests/std/tests/VSO_0000000_regex_interface/test.cpp
@@ -387,7 +387,7 @@ void test_VSO_180466_regex_search_missing_Unchecked_call() {
 }
 
 void test_VSO_226914_match_prev_avail() {
-    // test assumes multiline mode
+    // test exercises multiline mode
     const char bol_haystack[] = {'\n', 'a'};
     const regex bol_anchor(R"(^a)", regex_constants::multiline);
     assert(regex_match(bol_haystack + 1, end(bol_haystack), bol_anchor));
diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp
index 31da2d9685b..282c64cbafe 100644
--- a/tests/std/tests/VSO_0000000_regex_use/test.cpp
+++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp
@@ -454,7 +454,7 @@ void test_VSO_208146_regex_smoke_test_rewritten_explicit_quantifier() {
 
 void test_VSO_225160_match_bol_flag() {
     // After implementation of LWG-2503/GH-73: These tests make sure that
-    // we consistently implement "multiline" option for ECMAScript
+    // we consistently implement the "multiline" option for ECMAScript
     // (whether the ECMAScript flag is included or not)
     for (syntax_option_type syntax : {multiline, ECMAScript | multiline}) {
         for (string line_terminator : {"\n", "\r"}) {

From 34fde65b3956940933a7ea4934aff0ec13222ce0 Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@microsoft.com>
Date: Mon, 9 Jun 2025 13:16:13 -0700
Subject: [PATCH 3/6] Use static constexpr for arrays.

---
 stl/inc/regex | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/stl/inc/regex b/stl/inc/regex
index 5cec65f3870..3187a9e45ca 100644
--- a/stl/inc/regex
+++ b/stl/inc/regex
@@ -4071,10 +4071,10 @@ template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
 _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg, _BidIt _Last, _Node_base* _Node_arg) {
     // skip until possible match
     // assumes --_First_arg is valid
-    constexpr char _Line_terminators_char[]       = {static_cast<char>(_Meta_cr), static_cast<char>(_Meta_nl)};
-    constexpr wchar_t _Line_terminators_wchar_t[] = {static_cast<wchar_t>(_Meta_cr), static_cast<wchar_t>(_Meta_nl),
-        static_cast<wchar_t>(_Meta_ls), static_cast<wchar_t>(_Meta_ps)};
-    _Node_base* _Nx                               = _Node_arg ? _Node_arg : _Rep;
+    static constexpr char _Line_terminators_char[]       = {static_cast<char>(_Meta_cr), static_cast<char>(_Meta_nl)};
+    static constexpr wchar_t _Line_terminators_wchar_t[] = {static_cast<wchar_t>(_Meta_cr),
+        static_cast<wchar_t>(_Meta_nl), static_cast<wchar_t>(_Meta_ls), static_cast<wchar_t>(_Meta_ps)};
+    _Node_base* _Nx                                      = _Node_arg ? _Node_arg : _Rep;
 
     while (_First_arg != _Last && _Nx) { // check current node
         switch (_Nx->_Kind) { // handle current node's type

From 776a7c57c4893f8bc0127d6095bab636ade08515 Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@microsoft.com>
Date: Mon, 9 Jun 2025 14:20:11 -0700
Subject: [PATCH 4/6] Test wide CR and LF as line terminators.

---
 tests/std/tests/VSO_0000000_regex_use/test.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp
index 282c64cbafe..92fe235892f 100644
--- a/tests/std/tests/VSO_0000000_regex_use/test.cpp
+++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp
@@ -477,7 +477,8 @@ void test_VSO_225160_match_bol_flag() {
             beginCd.should_search_fail("ab" + line_terminator + "xcdefg", match_not_bol);
         }
 
-        for (wstring line_terminator : {L"\u2028", L"\u2029"}) { // U+2028 LINE SEPARATOR, U+2029 PARAGRAPH SEPARATOR
+        for (wstring line_terminator :
+            {L"\n", L"\r", L"\u2028", L"\u2029"}) { // U+2028 LINE SEPARATOR, U+2029 PARAGRAPH SEPARATOR
             const test_wregex emptyAnchor(&g_regexTester, LR"(^)", syntax);
             emptyAnchor.should_search_match(L"", L"");
             emptyAnchor.should_search_fail(L"", match_not_bol);
@@ -522,7 +523,8 @@ void test_VSO_225160_match_eol_flag() {
             cdEnd.should_search_fail("abcdx" + line_terminator + "efg", match_not_eol);
         }
 
-        for (wstring line_terminator : {L"\u2028", L"\u2029"}) { // U+2028 LINE SEPARATOR, U+2029 PARAGRAPH SEPARATOR
+        for (wstring line_terminator :
+            {L"\n", L"\r", L"\u2028", L"\u2029"}) { // U+2028 LINE SEPARATOR, U+2029 PARAGRAPH SEPARATOR
             const test_wregex emptyAnchor(&g_regexTester, LR"($)", syntax);
             emptyAnchor.should_search_match(L"", L"");
             emptyAnchor.should_search_fail(L"", match_not_eol);

From dd1c7a19699c77d01aad7ed007914d529d0a11d2 Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@microsoft.com>
Date: Mon, 9 Jun 2025 15:12:18 -0700
Subject: [PATCH 5/6] Make the option always defined to 0 or 1, add a detailed
 comment.

---
 stl/inc/regex                                    | 16 +++++++++++++---
 .../test.cpp                                     |  2 +-
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/stl/inc/regex b/stl/inc/regex
index 3187a9e45ca..bba27650ded 100644
--- a/stl/inc/regex
+++ b/stl/inc/regex
@@ -33,6 +33,16 @@ _STL_DISABLE_CLANG_WARNINGS
 #pragma push_macro("new")
 #undef new
 
+// Controls whether LWG-2503 "multiline option should be added to syntax_option_type" is implemented.
+// Defining this to 0 requests Standard behavior:
+// * For ECMAScript, matching is non-multiline by default, but regex_constants::multiline can be requested.
+// * For POSIX grammars, matching is non-multiline, and regex_constants::multiline is ignored (N5008 [tab:re.synopt]).
+// Defining this to 1 requests legacy behavior:
+// * For all grammars, matching is multiline, and regex_constants::multiline is redundant.
+#ifndef _REGEX_MAKE_MULTILINE_MODE_DEFAULT
+#define _REGEX_MAKE_MULTILINE_MODE_DEFAULT 0
+#endif
+
 #ifndef _REGEX_MAX_COMPLEXITY_COUNT
 #define _REGEX_MAX_COMPLEXITY_COUNT 10000000L // set to 0 to disable
 #endif // !defined(_REGEX_MAX_COMPLEXITY_COUNT)
@@ -1669,13 +1679,13 @@ public:
         }
 
 // sanitize multiline mode setting
-#ifdef _REGEX_MAKE_MULTILINE_MODE_DEFAULT
+#if _REGEX_MAKE_MULTILINE_MODE_DEFAULT
         _Sflags |= regex_constants::multiline; // old matcher applied multiline mode for all grammars
-#else // ^^^ defined(_REGEX_MAKE_MULTILINE_MODE_DEFAULT) / !defined(_REGEX_MAKE_MULTILINE_MODE_DEFAULT) vvv
+#else // ^^^ _REGEX_MAKE_MULTILINE_MODE_DEFAULT / !_REGEX_MAKE_MULTILINE_MODE_DEFAULT vvv
         if (_Sflags & regex_constants::_Any_posix) { // multiline mode is ECMAScript-only
             _Sflags &= ~regex_constants::multiline;
         }
-#endif // ^^^ !defined(_REGEX_MAKE_MULTILINE_MODE_DEFAULT) ^^^
+#endif // ^^^ !_REGEX_MAKE_MULTILINE_MODE_DEFAULT ^^^
     }
 
     void _Setf(regex_constants::match_flag_type _Mf) { // set specified flags
diff --git a/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp b/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp
index ea66b886982..b494174dfcc 100644
--- a/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp
+++ b/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-#define _REGEX_MAKE_MULTILINE_MODE_DEFAULT
+#define _REGEX_MAKE_MULTILINE_MODE_DEFAULT 1
 
 #include <cstddef>
 #include <cstdio>

From 549d1cedcc84e074cc4d8a39ddd71bb6031e5754 Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@microsoft.com>
Date: Mon, 9 Jun 2025 15:14:46 -0700
Subject: [PATCH 6/6] Rename to `_REGEX_LEGACY_MULTILINE_MODE`.

---
 stl/inc/regex                                          | 10 +++++-----
 .../GH_000073_regex_multiline_escape_hatch/test.cpp    |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/stl/inc/regex b/stl/inc/regex
index bba27650ded..88ec4f59821 100644
--- a/stl/inc/regex
+++ b/stl/inc/regex
@@ -39,8 +39,8 @@ _STL_DISABLE_CLANG_WARNINGS
 // * For POSIX grammars, matching is non-multiline, and regex_constants::multiline is ignored (N5008 [tab:re.synopt]).
 // Defining this to 1 requests legacy behavior:
 // * For all grammars, matching is multiline, and regex_constants::multiline is redundant.
-#ifndef _REGEX_MAKE_MULTILINE_MODE_DEFAULT
-#define _REGEX_MAKE_MULTILINE_MODE_DEFAULT 0
+#ifndef _REGEX_LEGACY_MULTILINE_MODE
+#define _REGEX_LEGACY_MULTILINE_MODE 0
 #endif
 
 #ifndef _REGEX_MAX_COMPLEXITY_COUNT
@@ -1679,13 +1679,13 @@ public:
         }
 
 // sanitize multiline mode setting
-#if _REGEX_MAKE_MULTILINE_MODE_DEFAULT
+#if _REGEX_LEGACY_MULTILINE_MODE
         _Sflags |= regex_constants::multiline; // old matcher applied multiline mode for all grammars
-#else // ^^^ _REGEX_MAKE_MULTILINE_MODE_DEFAULT / !_REGEX_MAKE_MULTILINE_MODE_DEFAULT vvv
+#else // ^^^ _REGEX_LEGACY_MULTILINE_MODE / !_REGEX_LEGACY_MULTILINE_MODE vvv
         if (_Sflags & regex_constants::_Any_posix) { // multiline mode is ECMAScript-only
             _Sflags &= ~regex_constants::multiline;
         }
-#endif // ^^^ !_REGEX_MAKE_MULTILINE_MODE_DEFAULT ^^^
+#endif // ^^^ !_REGEX_LEGACY_MULTILINE_MODE ^^^
     }
 
     void _Setf(regex_constants::match_flag_type _Mf) { // set specified flags
diff --git a/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp b/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp
index b494174dfcc..31968afa26c 100644
--- a/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp
+++ b/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-#define _REGEX_MAKE_MULTILINE_MODE_DEFAULT 1
+#define _REGEX_LEGACY_MULTILINE_MODE 1
 
 #include <cstddef>
 #include <cstdio>