From d0434f61230b68a16608bfeaa48eaa4477046f2a Mon Sep 17 00:00:00 2001 From: Erik Corry Date: Mon, 5 May 2025 15:49:27 +0200 Subject: [PATCH 1/4] deps: V8: cherry-pick df20105ccf36 Original commit message: [regexp] Rename "greedy loops" to "fixed length". Some greedy quantifiers are code-generated in a more efficient way, but far from all greedy quantifiers are generated in this way. This change renames the specially optimized loops from "greedy loops" to "fixed length loops", which should be clearer. Going forward, we can probably reuse much of the code for fixed length loops when code-generating simple possessive quantifiers. Change-Id: I13b9d14beac430e2d05d0feaf887fc0566bc4103 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6508846 Reviewed-by: Patrick Thier Commit-Queue: Erik Corry Cr-Commit-Position: refs/heads/main@{#100062} Refs: https://github.com/v8/v8/commit/df20105ccf366215e7a10d8e161f257fc860a8c3 --- common.gypi | 2 +- .../regexp/arm/regexp-macro-assembler-arm.cc | 2 +- .../regexp/arm/regexp-macro-assembler-arm.h | 4 +- .../arm64/regexp-macro-assembler-arm64.cc | 2 +- .../arm64/regexp-macro-assembler-arm64.h | 4 +- .../ia32/regexp-macro-assembler-ia32.cc | 2 +- .../regexp/ia32/regexp-macro-assembler-ia32.h | 4 +- .../loong64/regexp-macro-assembler-loong64.cc | 2 +- .../loong64/regexp-macro-assembler-loong64.h | 4 +- .../mips64/regexp-macro-assembler-mips64.cc | 2 +- .../mips64/regexp-macro-assembler-mips64.h | 4 +- .../regexp/ppc/regexp-macro-assembler-ppc.cc | 2 +- .../regexp/ppc/regexp-macro-assembler-ppc.h | 4 +- .../src/regexp/regexp-bytecode-generator.cc | 4 +- .../v8/src/regexp/regexp-bytecode-generator.h | 2 +- deps/v8/src/regexp/regexp-bytecodes.h | 68 ++++++------- deps/v8/src/regexp/regexp-compiler.cc | 96 ++++++++++--------- deps/v8/src/regexp/regexp-compiler.h | 4 +- deps/v8/src/regexp/regexp-interpreter.cc | 4 +- .../regexp/regexp-macro-assembler-tracer.cc | 8 +- .../regexp/regexp-macro-assembler-tracer.h | 2 +- deps/v8/src/regexp/regexp-macro-assembler.h | 2 +- deps/v8/src/regexp/regexp-nodes.h | 27 +++--- .../riscv/regexp-macro-assembler-riscv.cc | 2 +- .../riscv/regexp-macro-assembler-riscv.h | 4 +- .../s390/regexp-macro-assembler-s390.cc | 2 +- .../regexp/s390/regexp-macro-assembler-s390.h | 4 +- .../regexp/x64/regexp-macro-assembler-x64.cc | 2 +- .../regexp/x64/regexp-macro-assembler-x64.h | 4 +- .../test/unittests/regexp/regexp-unittest.cc | 2 +- 30 files changed, 139 insertions(+), 136 deletions(-) diff --git a/common.gypi b/common.gypi index 62c52c27b7fc21..1ebdc4636f1734 100644 --- a/common.gypi +++ b/common.gypi @@ -38,7 +38,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.33', + 'v8_embedder_string': '-node.34', ##### V8 defaults for Node.js ##### diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc index 7ef74c6d504593..68d6b84ddb2b4f 100644 --- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc +++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc @@ -217,7 +217,7 @@ void RegExpMacroAssemblerARM::CheckCharacterLT(base::uc16 limit, BranchOrBacktrack(lt, on_less); } -void RegExpMacroAssemblerARM::CheckGreedyLoop(Label* on_equal) { +void RegExpMacroAssemblerARM::CheckFixedLengthLoop(Label* on_equal) { __ ldr(r0, MemOperand(backtrack_stackpointer(), 0)); __ cmp(current_input_offset(), r0); __ add(backtrack_stackpointer(), backtrack_stackpointer(), diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h index 20677a255e016c..79b270d228cd08 100644 --- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h +++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h @@ -29,9 +29,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM Label* on_equal) override; void CheckCharacterGT(base::uc16 limit, Label* on_greater) override; void CheckCharacterLT(base::uc16 limit, Label* on_less) override; - // A "greedy loop" is a loop that is both greedy and with a simple + // A "fixed length loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. - void CheckGreedyLoop(Label* on_tos_equals_current_position) override; + void CheckFixedLengthLoop(Label* on_tos_equals_current_position) override; void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override; void CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) override; diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc index 64b635115608da..ad1be627d4ca3f 100644 --- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc +++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc @@ -283,7 +283,7 @@ void RegExpMacroAssemblerARM64::CheckCharacters( } } -void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) { +void RegExpMacroAssemblerARM64::CheckFixedLengthLoop(Label* on_equal) { __ Ldr(w10, MemOperand(backtrack_stackpointer())); __ Cmp(current_input_offset(), w10); __ Cset(x11, eq); diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h index f336d8d8c5f2e4..cff18dc9db6364 100644 --- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h +++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h @@ -33,9 +33,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64 void CheckCharacterLT(base::uc16 limit, Label* on_less) override; void CheckCharacters(base::Vector str, int cp_offset, Label* on_failure, bool check_end_of_string); - // A "greedy loop" is a loop that is both greedy and with a simple + // A "fixed length loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. - void CheckGreedyLoop(Label* on_tos_equals_current_position) override; + void CheckFixedLengthLoop(Label* on_tos_equals_current_position) override; void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override; void CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) override; diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc index 8da66442df94fc..5496ad7227819e 100644 --- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc +++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc @@ -196,7 +196,7 @@ void RegExpMacroAssemblerIA32::CheckCharacterLT(base::uc16 limit, BranchOrBacktrack(less, on_less); } -void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) { +void RegExpMacroAssemblerIA32::CheckFixedLengthLoop(Label* on_equal) { Label fallthrough; __ cmp(edi, Operand(backtrack_stackpointer(), 0)); __ j(not_equal, &fallthrough); diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h index bda7e2b2c126ba..eed8f1e63702db 100644 --- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h +++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h @@ -30,9 +30,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerIA32 Label* on_equal) override; void CheckCharacterGT(base::uc16 limit, Label* on_greater) override; void CheckCharacterLT(base::uc16 limit, Label* on_less) override; - // A "greedy loop" is a loop that is both greedy and with a simple + // A "fixed length loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. - void CheckGreedyLoop(Label* on_tos_equals_current_position) override; + void CheckFixedLengthLoop(Label* on_tos_equals_current_position) override; void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override; void CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) override; diff --git a/deps/v8/src/regexp/loong64/regexp-macro-assembler-loong64.cc b/deps/v8/src/regexp/loong64/regexp-macro-assembler-loong64.cc index f76e10395b36ac..50865c1473ec96 100644 --- a/deps/v8/src/regexp/loong64/regexp-macro-assembler-loong64.cc +++ b/deps/v8/src/regexp/loong64/regexp-macro-assembler-loong64.cc @@ -203,7 +203,7 @@ void RegExpMacroAssemblerLOONG64::CheckCharacterLT(base::uc16 limit, BranchOrBacktrack(on_less, lt, current_character(), Operand(limit)); } -void RegExpMacroAssemblerLOONG64::CheckGreedyLoop(Label* on_equal) { +void RegExpMacroAssemblerLOONG64::CheckFixedLengthLoop(Label* on_equal) { Label backtrack_non_equal; __ Ld_w(a0, MemOperand(backtrack_stackpointer(), 0)); __ Branch(&backtrack_non_equal, ne, current_input_offset(), Operand(a0)); diff --git a/deps/v8/src/regexp/loong64/regexp-macro-assembler-loong64.h b/deps/v8/src/regexp/loong64/regexp-macro-assembler-loong64.h index 283d396ebebb8e..70c09e3c0aac4b 100644 --- a/deps/v8/src/regexp/loong64/regexp-macro-assembler-loong64.h +++ b/deps/v8/src/regexp/loong64/regexp-macro-assembler-loong64.h @@ -28,9 +28,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerLOONG64 Label* on_equal) override; void CheckCharacterGT(base::uc16 limit, Label* on_greater) override; void CheckCharacterLT(base::uc16 limit, Label* on_less) override; - // A "greedy loop" is a loop that is both greedy and with a simple + // A "fixed length loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. - void CheckGreedyLoop(Label* on_tos_equals_current_position) override; + void CheckFixedLengthLoop(Label* on_tos_equals_current_position) override; void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override; void CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) override; diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc index ae6a574c4f77fe..906c7754292954 100644 --- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc +++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc @@ -245,7 +245,7 @@ void RegExpMacroAssemblerMIPS::CheckCharacterLT(base::uc16 limit, BranchOrBacktrack(on_less, lt, current_character(), Operand(limit)); } -void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) { +void RegExpMacroAssemblerMIPS::CheckFixedLengthLoop(Label* on_equal) { Label backtrack_non_equal; __ Lw(a0, MemOperand(backtrack_stackpointer(), 0)); __ Branch(&backtrack_non_equal, ne, current_input_offset(), Operand(a0)); diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h index 8c4b3ca7d8c255..e01801488336d5 100644 --- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h +++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h @@ -28,9 +28,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS Label* on_equal) override; void CheckCharacterGT(base::uc16 limit, Label* on_greater) override; void CheckCharacterLT(base::uc16 limit, Label* on_less) override; - // A "greedy loop" is a loop that is both greedy and with a simple + // A "fixed length loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. - void CheckGreedyLoop(Label* on_tos_equals_current_position) override; + void CheckFixedLengthLoop(Label* on_tos_equals_current_position) override; void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override; void CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) override; diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc index 027f0f8dab10c5..748dc2d980e5d4 100644 --- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc +++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc @@ -234,7 +234,7 @@ void RegExpMacroAssemblerPPC::CheckCharacterLT(base::uc16 limit, BranchOrBacktrack(lt, on_less); } -void RegExpMacroAssemblerPPC::CheckGreedyLoop(Label* on_equal) { +void RegExpMacroAssemblerPPC::CheckFixedLengthLoop(Label* on_equal) { Label backtrack_non_equal; __ LoadU64(r3, MemOperand(backtrack_stackpointer(), 0)); __ CmpS64(current_input_offset(), r3); diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h index 917b7897a1d4fa..fca28c9c2e3551 100644 --- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h +++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h @@ -29,9 +29,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC void CheckCharacterGT(base::uc16 limit, Label* on_greater) override; void CheckCharacterLT(base::uc16 limit, Label* on_less) override; - // A "greedy loop" is a loop that is both greedy and with a simple + // A "fixed length loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. - void CheckGreedyLoop(Label* on_tos_equals_current_position) override; + void CheckFixedLengthLoop(Label* on_tos_equals_current_position) override; void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override; void CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) override; diff --git a/deps/v8/src/regexp/regexp-bytecode-generator.cc b/deps/v8/src/regexp/regexp-bytecode-generator.cc index 3aa7027df5cd29..a3a548079e11e2 100644 --- a/deps/v8/src/regexp/regexp-bytecode-generator.cc +++ b/deps/v8/src/regexp/regexp-bytecode-generator.cc @@ -174,9 +174,9 @@ void RegExpBytecodeGenerator::AdvanceCurrentPosition(int by) { advance_current_end_ = pc_; } -void RegExpBytecodeGenerator::CheckGreedyLoop( +void RegExpBytecodeGenerator::CheckFixedLengthLoop( Label* on_tos_equals_current_position) { - Emit(BC_CHECK_GREEDY, 0); + Emit(BC_CHECK_FIXED_LENGTH, 0); EmitOrLink(on_tos_equals_current_position); } diff --git a/deps/v8/src/regexp/regexp-bytecode-generator.h b/deps/v8/src/regexp/regexp-bytecode-generator.h index 2b70ba47b6d1f7..c6c522c3cbe79d 100644 --- a/deps/v8/src/regexp/regexp-bytecode-generator.h +++ b/deps/v8/src/regexp/regexp-bytecode-generator.h @@ -56,7 +56,7 @@ class V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler { Label* on_equal) override; void CheckCharacterGT(base::uc16 limit, Label* on_greater) override; void CheckCharacterLT(base::uc16 limit, Label* on_less) override; - void CheckGreedyLoop(Label* on_tos_equals_current_position) override; + void CheckFixedLengthLoop(Label* on_tos_equals_current_position) override; void CheckAtStart(int cp_offset, Label* on_at_start) override; void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override; void CheckNotCharacter(unsigned c, Label* on_not_equal) override; diff --git a/deps/v8/src/regexp/regexp-bytecodes.h b/deps/v8/src/regexp/regexp-bytecodes.h index 3d51f116f277b6..a0f0315457fda2 100644 --- a/deps/v8/src/regexp/regexp-bytecodes.h +++ b/deps/v8/src/regexp/regexp-bytecodes.h @@ -53,7 +53,7 @@ static_assert(1 << BYTECODE_SHIFT > BYTECODE_MASK); /* 0x00 - 0x07: 0x10 (fixed) Bytecode */ \ /* 0x08 - 0x1F: 0x00 (unused) Padding */ \ /* 0x20 - 0x3F: Address of bytecode to jump to */ \ - V(GOTO, 16, 8) /* bc8 pad24 addr32 */ \ + V(GOTO, 16, 8) /* bc8 pad24 addr32 */ \ /* Check if offset is in range and load character at given offset. */ \ /* Bit Layout: */ \ /* 0x00 - 0x07: 0x11 (fixed) Bytecode */ \ @@ -64,22 +64,22 @@ static_assert(1 << BYTECODE_SHIFT > BYTECODE_MASK); /* Bit Layout: */ \ /* 0x00 - 0x07: 0x12 (fixed) Bytecode */ \ /* 0x08 - 0x1F: Offset from current position */ \ - V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 4) /* bc8 offset24 */ \ - V(LOAD_2_CURRENT_CHARS, 19, 8) /* bc8 offset24 addr32 */ \ - V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 4) /* bc8 offset24 */ \ - V(LOAD_4_CURRENT_CHARS, 21, 8) /* bc8 offset24 addr32 */ \ - V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 4) /* bc8 offset24 */ \ - V(CHECK_4_CHARS, 23, 12) /* bc8 pad24 uint32 addr32 */ \ + V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 4) /* bc8 offset24 */ \ + V(LOAD_2_CURRENT_CHARS, 19, 8) /* bc8 offset24 addr32 */ \ + V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 4) /* bc8 offset24 */ \ + V(LOAD_4_CURRENT_CHARS, 21, 8) /* bc8 offset24 addr32 */ \ + V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 4) /* bc8 offset24 */ \ + V(CHECK_4_CHARS, 23, 12) /* bc8 pad24 uint32 addr32 */ \ /* Check if current character is equal to a given character */ \ /* Bit Layout: */ \ /* 0x00 - 0x07: 0x19 (fixed) Bytecode */ \ /* 0x08 - 0x0F: 0x00 (unused) Padding */ \ /* 0x10 - 0x1F: Character to check */ \ /* 0x20 - 0x3F: Address of bytecode when matched */ \ - V(CHECK_CHAR, 24, 8) /* bc8 pad8 uint16 addr32 */ \ - V(CHECK_NOT_4_CHARS, 25, 12) /* bc8 pad24 uint32 addr32 */ \ - V(CHECK_NOT_CHAR, 26, 8) /* bc8 pad8 uint16 addr32 */ \ - V(AND_CHECK_4_CHARS, 27, 16) /* bc8 pad24 uint32 uint32 addr32 */ \ + V(CHECK_CHAR, 24, 8) /* bc8 pad8 uint16 addr32 */ \ + V(CHECK_NOT_4_CHARS, 25, 12) /* bc8 pad24 uint32 addr32 */ \ + V(CHECK_NOT_CHAR, 26, 8) /* bc8 pad8 uint16 addr32 */ \ + V(AND_CHECK_4_CHARS, 27, 16) /* bc8 pad24 uint32 uint32 addr32 */ \ /* Checks if the current character combined with mask (bitwise and) */ \ /* matches a character (e.g. used when two characters in a disjunction */ \ /* differ by only a single bit */ \ @@ -89,14 +89,14 @@ static_assert(1 << BYTECODE_SHIFT > BYTECODE_MASK); /* 0x10 - 0x1F: Character to match against (after mask aplied) */ \ /* 0x20 - 0x3F: Bitmask bitwise and combined with current character */ \ /* 0x40 - 0x5F: Address of bytecode when matched */ \ - V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \ - V(AND_CHECK_NOT_4_CHARS, 29, 16) /* bc8 pad24 uint32 uint32 addr32 */ \ - V(AND_CHECK_NOT_CHAR, 30, 12) /* bc8 pad8 uint16 uint32 addr32 */ \ + V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \ + V(AND_CHECK_NOT_4_CHARS, 29, 16) /* bc8 pad24 uint32 uint32 addr32 */ \ + V(AND_CHECK_NOT_CHAR, 30, 12) /* bc8 pad8 uint16 uint32 addr32 */ \ V(MINUS_AND_CHECK_NOT_CHAR, 31, \ - 12) /* bc8 pad8 base::uc16 base::uc16 base::uc16 addr32 */ \ - V(CHECK_CHAR_IN_RANGE, 32, 12) /* bc8 pad24 base::uc16 base::uc16 addr32 */ \ + 12) /* bc8 pad8 base::uc16 base::uc16 base::uc16 addr32 */ \ + V(CHECK_CHAR_IN_RANGE, 32, 12) /* bc8 pad24 base::uc16 base::uc16 addr32 */ \ V(CHECK_CHAR_NOT_IN_RANGE, 33, \ - 12) /* bc8 pad24 base::uc16 base::uc16 addr32 */ \ + 12) /* bc8 pad24 base::uc16 base::uc16 addr32 */ \ /* Checks if the current character matches any of the characters encoded */ \ /* in a bit table. Similar to/inspired by boyer moore string search */ \ /* Bit Layout: */ \ @@ -104,40 +104,40 @@ static_assert(1 << BYTECODE_SHIFT > BYTECODE_MASK); /* 0x08 - 0x1F: 0x00 (unused) Padding */ \ /* 0x20 - 0x3F: Address of bytecode when bit is set */ \ /* 0x40 - 0xBF: Bit table */ \ - V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \ - V(CHECK_LT, 35, 8) /* bc8 pad8 base::uc16 addr32 */ \ - V(CHECK_GT, 36, 8) /* bc8 pad8 base::uc16 addr32 */ \ - V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \ - V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \ + V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \ + V(CHECK_LT, 35, 8) /* bc8 pad8 base::uc16 addr32 */ \ + V(CHECK_GT, 36, 8) /* bc8 pad8 base::uc16 addr32 */ \ + V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \ + V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \ V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 39, 8) \ - V(CHECK_NOT_BACK_REF_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \ - V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 41, 8) /* bc8 reg_idx24 addr32 */ \ + V(CHECK_NOT_BACK_REF_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \ + V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 41, 8) /* bc8 reg_idx24 addr32 */ \ V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 42, 8) \ - V(CHECK_NOT_REGS_EQUAL, 43, 12) /* bc8 regidx24 reg_idx32 addr32 */ \ - V(CHECK_REGISTER_LT, 44, 12) /* bc8 reg_idx24 value32 addr32 */ \ - V(CHECK_REGISTER_GE, 45, 12) /* bc8 reg_idx24 value32 addr32 */ \ - V(CHECK_REGISTER_EQ_POS, 46, 8) /* bc8 reg_idx24 addr32 */ \ - V(CHECK_AT_START, 47, 8) /* bc8 pad24 addr32 */ \ - V(CHECK_NOT_AT_START, 48, 8) /* bc8 offset24 addr32 */ \ + V(CHECK_NOT_REGS_EQUAL, 43, 12) /* bc8 regidx24 reg_idx32 addr32 */ \ + V(CHECK_REGISTER_LT, 44, 12) /* bc8 reg_idx24 value32 addr32 */ \ + V(CHECK_REGISTER_GE, 45, 12) /* bc8 reg_idx24 value32 addr32 */ \ + V(CHECK_REGISTER_EQ_POS, 46, 8) /* bc8 reg_idx24 addr32 */ \ + V(CHECK_AT_START, 47, 8) /* bc8 pad24 addr32 */ \ + V(CHECK_NOT_AT_START, 48, 8) /* bc8 offset24 addr32 */ \ /* Checks if the current position matches top of backtrack stack */ \ /* Bit Layout: */ \ /* 0x00 - 0x07: 0x31 (fixed) Bytecode */ \ /* 0x08 - 0x1F: 0x00 (unused) Padding */ \ /* 0x20 - 0x3F: Address of bytecode when current matches tos */ \ - V(CHECK_GREEDY, 49, 8) /* bc8 pad24 addr32 */ \ + V(CHECK_FIXED_LENGTH, 49, 8) /* bc8 pad24 addr32 */ \ /* Advance character pointer by given offset and jump to another bytecode.*/ \ /* Bit Layout: */ \ /* 0x00 - 0x07: 0x32 (fixed) Bytecode */ \ /* 0x08 - 0x1F: Number of characters to advance */ \ /* 0x20 - 0x3F: Address of bytecode to jump to */ \ - V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \ - V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */ \ + V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \ + V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */ \ /* Checks if current position + given offset is in range. */ \ /* Bit Layout: */ \ /* 0x00 - 0x07: 0x34 (fixed) Bytecode */ \ /* 0x08 - 0x1F: Offset from current position */ \ /* 0x20 - 0x3F: Address of bytecode when position is out of range */ \ - V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */ \ + V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */ \ /* Combination of: */ \ /* LOAD_CURRENT_CHAR, CHECK_BIT_IN_TABLE and ADVANCE_CP_AND_GOTO */ \ /* Emitted by RegExpBytecodePeepholeOptimization. */ \ diff --git a/deps/v8/src/regexp/regexp-compiler.cc b/deps/v8/src/regexp/regexp-compiler.cc index d9761c9277e588..e36335fc8c5830 100644 --- a/deps/v8/src/regexp/regexp-compiler.cc +++ b/deps/v8/src/regexp/regexp-compiler.cc @@ -126,7 +126,8 @@ using namespace regexp_compiler_constants; // NOLINT(build/namespaces) // A choice node looks at the following character and eliminates some of // the choices immediately based on that character. This is not yet // implemented. -// * Simple greedy loops store reduced backtracking information. +// * Simple greedy loops store reduced backtracking information. We call +// these fixed length loops // A quantifier like /.*foo/m will greedily match the whole input. It will // then need to backtrack to a point where it can match "foo". The naive // implementation of this would push each character position onto the @@ -1354,7 +1355,8 @@ RegExpNode::~RegExpNode() = default; RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, Trace* trace) { - // If we are generating a greedy loop then don't stop and don't reuse code. + // If we are generating a fixed length loop then don't stop and don't reuse + // code. if (trace->stop_node() != nullptr) { return CONTINUE; } @@ -2643,7 +2645,7 @@ void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte, } } -int TextNode::GreedyLoopTextLength() { return Length(); } +int TextNode::FixedLengthLoopLength() { return Length(); } RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( RegExpCompiler* compiler) { @@ -2665,8 +2667,8 @@ RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( // Finds the fixed match length of a sequence of nodes that goes from // this alternative and back to this choice node. If there are variable // length nodes or other complications in the way then return a sentinel -// value indicating that a greedy loop cannot be constructed. -int ChoiceNode::GreedyLoopTextLengthForAlternative( +// value indicating that a fixed length loop cannot be constructed. +int ChoiceNode::FixedLengthLoopLengthForAlternative( GuardedAlternative* alternative) { int length = 0; RegExpNode* node = alternative->node(); @@ -2675,11 +2677,11 @@ int ChoiceNode::GreedyLoopTextLengthForAlternative( int recursion_depth = 0; while (node != this) { if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { - return kNodeIsTooComplexForGreedyLoops; + return kNodeIsTooComplexForFixedLengthLoops; } - int node_length = node->GreedyLoopTextLength(); - if (node_length == kNodeIsTooComplexForGreedyLoops) { - return kNodeIsTooComplexForGreedyLoops; + int node_length = node->FixedLengthLoopLength(); + if (node_length == kNodeIsTooComplexForFixedLengthLoops) { + return kNodeIsTooComplexForFixedLengthLoops; } length += node_length; node = node->AsSeqRegExpNode()->on_success(); @@ -2688,10 +2690,10 @@ int ChoiceNode::GreedyLoopTextLengthForAlternative( length = -length; } // Check that we can jump by the whole text length. If not, return sentinel - // to indicate the we can't construct a greedy loop. + // to indicate the we can't construct a fixed length loop. if (length < RegExpMacroAssembler::kMinCPOffset || length > RegExpMacroAssembler::kMaxCPOffset) { - return kNodeIsTooComplexForGreedyLoops; + return kNodeIsTooComplexForFixedLengthLoops; } return length; } @@ -2711,12 +2713,12 @@ void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); if (trace->stop_node() == this) { - // Back edge of greedy optimized loop node graph. + // Back edge of fixed length optimized loop node graph. int text_length = - GreedyLoopTextLengthForAlternative(&(alternatives_->at(0))); - DCHECK_NE(kNodeIsTooComplexForGreedyLoops, text_length); + FixedLengthLoopLengthForAlternative(&(alternatives_->at(0))); + DCHECK_NE(kNodeIsTooComplexForFixedLengthLoops, text_length); // Update the counter-based backtracking info on the stack. This is an - // optimization for greedy loops (see below). + // optimization for fixed length loops (see below). DCHECK(trace->cp_offset() == text_length); macro_assembler->AdvanceCurrentPosition(text_length); macro_assembler->GoTo(trace->loop_label()); @@ -3129,11 +3131,12 @@ void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { * \ F V * \-----S4 * - * For greedy loops we push the current position, then generate the code that - * eats the input specially in EmitGreedyLoop. The other choice (the + * For fixed length loops we push the current position, then generate the code + * that eats the input specially in EmitFixedLengthLoop. The other choice (the * continuation) is generated by the normal code in EmitChoices, and steps back * in the input to the starting position when it fails to match. The loop code - * looks like this (U is the unwind code that steps back in the greedy loop). + * looks like this (U is the unwind code that steps back in the fixed length + * loop). * * _____ * / \ @@ -3153,7 +3156,7 @@ void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { * S2--/ */ -GreedyLoopState::GreedyLoopState(bool not_at_start) { +FixedLengthLoopState::FixedLengthLoopState(bool not_at_start) { counter_backtrack_trace_.set_backtrack(&label_); if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE); } @@ -3212,14 +3215,14 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { PreloadState preload; preload.init(); - GreedyLoopState greedy_loop_state(not_at_start()); + FixedLengthLoopState fixed_length_loop_state(not_at_start()); - int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0)); + int text_length = FixedLengthLoopLengthForAlternative(&alternatives_->at(0)); AlternativeGenerationList alt_gens(choice_count, zone()); - if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { - trace = EmitGreedyLoop(compiler, trace, &alt_gens, &preload, - &greedy_loop_state, text_length); + if (choice_count > 1 && text_length != kNodeIsTooComplexForFixedLengthLoops) { + trace = EmitFixedLengthLoop(compiler, trace, &alt_gens, &preload, + &fixed_length_loop_state, text_length); } else { preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); @@ -3247,42 +3250,41 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { } } -Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace, - AlternativeGenerationList* alt_gens, - PreloadState* preload, - GreedyLoopState* greedy_loop_state, - int text_length) { +Trace* ChoiceNode::EmitFixedLengthLoop( + RegExpCompiler* compiler, Trace* trace, AlternativeGenerationList* alt_gens, + PreloadState* preload, FixedLengthLoopState* fixed_length_loop_state, + int text_length) { RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); // Here we have special handling for greedy loops containing only text nodes - // and other simple nodes. These are handled by pushing the current - // position on the stack and then incrementing the current position each - // time around the switch. On backtrack we decrement the current position - // and check it against the pushed value. This avoids pushing backtrack - // information for each iteration of the loop, which could take up a lot of - // space. + // and other simple nodes. We call these fixed length loops. These are + // handled by pushing the current position on the stack and then incrementing + // the current position each time around the switch. On backtrack we + // decrement the current position and check it against the pushed value. + // This avoids pushing backtrack information for each iteration of the loop, + // which could take up a lot of space. DCHECK(trace->stop_node() == nullptr); macro_assembler->PushCurrentPosition(); - Label greedy_match_failed; - Trace greedy_match_trace; - if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE); - greedy_match_trace.set_backtrack(&greedy_match_failed); + Label fixed_length_match_failed; + Trace fixed_length_match_trace; + if (not_at_start()) fixed_length_match_trace.set_at_start(Trace::FALSE_VALUE); + fixed_length_match_trace.set_backtrack(&fixed_length_match_failed); Label loop_label; macro_assembler->Bind(&loop_label); - greedy_match_trace.set_stop_node(this); - greedy_match_trace.set_loop_label(&loop_label); - alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace); - macro_assembler->Bind(&greedy_match_failed); + fixed_length_match_trace.set_stop_node(this); + fixed_length_match_trace.set_loop_label(&loop_label); + alternatives_->at(0).node()->Emit(compiler, &fixed_length_match_trace); + macro_assembler->Bind(&fixed_length_match_failed); - Label second_choice; // For use in greedy matches. + Label second_choice; // For use in fixed length matches. macro_assembler->Bind(&second_choice); - Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); + Trace* new_trace = fixed_length_loop_state->counter_backtrack_trace(); EmitChoices(compiler, alt_gens, 1, new_trace, preload); - macro_assembler->Bind(greedy_loop_state->label()); + macro_assembler->Bind(fixed_length_loop_state->label()); // If we have unwound to the bottom then backtrack. - macro_assembler->CheckGreedyLoop(trace->backtrack()); + macro_assembler->CheckFixedLengthLoop(trace->backtrack()); // Otherwise try the second priority at an earlier position. macro_assembler->AdvanceCurrentPosition(-text_length); macro_assembler->GoTo(&second_choice); diff --git a/deps/v8/src/regexp/regexp-compiler.h b/deps/v8/src/regexp/regexp-compiler.h index f599c50371f3d0..e68c87e43a03f8 100644 --- a/deps/v8/src/regexp/regexp-compiler.h +++ b/deps/v8/src/regexp/regexp-compiler.h @@ -379,9 +379,9 @@ class Trace { TriBool at_start_; }; -class GreedyLoopState { +class FixedLengthLoopState { public: - explicit GreedyLoopState(bool not_at_start); + explicit FixedLengthLoopState(bool not_at_start); Label* label() { return &label_; } Trace* counter_backtrack_trace() { return &counter_backtrack_trace_; } diff --git a/deps/v8/src/regexp/regexp-interpreter.cc b/deps/v8/src/regexp/regexp-interpreter.cc index f5d0dfc7cc0ee8..fc08173412954a 100644 --- a/deps/v8/src/regexp/regexp-interpreter.cc +++ b/deps/v8/src/regexp/regexp-interpreter.cc @@ -578,12 +578,12 @@ IrregexpInterpreter::Result RawMatch( ADVANCE_CURRENT_POSITION(LoadPacked24Signed(insn)); DISPATCH(); } - BYTECODE(CHECK_GREEDY) { + BYTECODE(CHECK_FIXED_LENGTH) { if (current == backtrack_stack.peek()) { SET_PC_FROM_OFFSET(Load32Aligned(pc + 4)); backtrack_stack.pop(); } else { - ADVANCE(CHECK_GREEDY); + ADVANCE(CHECK_FIXED_LENGTH); } DISPATCH(); } diff --git a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc index 0e229a08350f12..22a3b1181e6e57 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc +++ b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc @@ -43,13 +43,11 @@ void RegExpMacroAssemblerTracer::AdvanceCurrentPosition(int by) { assembler_->AdvanceCurrentPosition(by); } - -void RegExpMacroAssemblerTracer::CheckGreedyLoop(Label* label) { - PrintF(" CheckGreedyLoop(label[%08x]);\n\n", LabelToInt(label)); - assembler_->CheckGreedyLoop(label); +void RegExpMacroAssemblerTracer::CheckFixedLengthLoop(Label* label) { + PrintF(" CheckFixedLengthLoop(label[%08x]);\n\n", LabelToInt(label)); + assembler_->CheckFixedLengthLoop(label); } - void RegExpMacroAssemblerTracer::PopCurrentPosition() { PrintF(" PopCurrentPosition();\n"); assembler_->PopCurrentPosition(); diff --git a/deps/v8/src/regexp/regexp-macro-assembler-tracer.h b/deps/v8/src/regexp/regexp-macro-assembler-tracer.h index e6ec33f0aa86e7..0f039fb6c5bc56 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler-tracer.h +++ b/deps/v8/src/regexp/regexp-macro-assembler-tracer.h @@ -32,7 +32,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { Label* on_equal) override; void CheckCharacterGT(base::uc16 limit, Label* on_greater) override; void CheckCharacterLT(base::uc16 limit, Label* on_less) override; - void CheckGreedyLoop(Label* on_tos_equals_current_position) override; + void CheckFixedLengthLoop(Label* on_tos_equals_current_position) override; void CheckAtStart(int cp_offset, Label* on_at_start) override; void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override; void CheckNotBackReference(int start_reg, bool read_backward, diff --git a/deps/v8/src/regexp/regexp-macro-assembler.h b/deps/v8/src/regexp/regexp-macro-assembler.h index 16ffdd916ee4aa..451229e53f611d 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler.h +++ b/deps/v8/src/regexp/regexp-macro-assembler.h @@ -72,7 +72,7 @@ class RegExpMacroAssembler { Label* on_equal) = 0; virtual void CheckCharacterGT(base::uc16 limit, Label* on_greater) = 0; virtual void CheckCharacterLT(base::uc16 limit, Label* on_less) = 0; - virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0; + virtual void CheckFixedLengthLoop(Label* on_tos_equals_current_position) = 0; virtual void CheckAtStart(int cp_offset, Label* on_at_start) = 0; virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start) = 0; virtual void CheckNotBackReference(int start_reg, bool read_backward, diff --git a/deps/v8/src/regexp/regexp-nodes.h b/deps/v8/src/regexp/regexp-nodes.h index fc81bf79408375..b2bf746163c836 100644 --- a/deps/v8/src/regexp/regexp-nodes.h +++ b/deps/v8/src/regexp/regexp-nodes.h @@ -14,7 +14,7 @@ namespace internal { class AlternativeGenerationList; class BoyerMooreLookahead; -class GreedyLoopState; +class FixedLengthLoopState; class NodeVisitor; class QuickCheckDetails; class RegExpCompiler; @@ -185,8 +185,10 @@ class RegExpNode : public ZoneObject { RegExpCompiler* compiler, int characters_filled_in, bool not_at_start); - static const int kNodeIsTooComplexForGreedyLoops = kMinInt; - virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } + static const int kNodeIsTooComplexForFixedLengthLoops = kMinInt; + virtual int FixedLengthLoopLength() { + return kNodeIsTooComplexForFixedLengthLoops; + } // Only returns the successor for a text node of length 1 that matches any // character and that has no guards on it. virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( @@ -366,9 +368,9 @@ class ActionNode : public SeqRegExpNode { void FillInBMInfo(Isolate* isolate, int offset, int budget, BoyerMooreLookahead* bm, bool not_at_start) override; ActionType action_type() const { return action_type_; } - // TODO(erikcorry): We should allow some action nodes in greedy loops. - int GreedyLoopTextLength() override { - return kNodeIsTooComplexForGreedyLoops; + // TODO(erikcorry): We should allow some action nodes in fixed length loops. + int FixedLengthLoopLength() override { + return kNodeIsTooComplexForFixedLengthLoops; } RegExpFlags flags() const { DCHECK_EQ(action_type(), MODIFY_FLAGS); @@ -458,7 +460,7 @@ class TextNode : public SeqRegExpNode { bool read_backward() { return read_backward_; } void MakeCaseIndependent(Isolate* isolate, bool is_one_byte, RegExpFlags flags); - int GreedyLoopTextLength() override; + int FixedLengthLoopLength() override; RegExpNode* GetSuccessorOfOmnivorousTextNode( RegExpCompiler* compiler) override; void FillInBMInfo(Isolate* isolate, int offset, int budget, @@ -660,7 +662,7 @@ class ChoiceNode : public RegExpNode { virtual bool read_backward() { return false; } protected: - int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); + int FixedLengthLoopLengthForAlternative(GuardedAlternative* alternative); ZoneList* alternatives_; private: @@ -679,10 +681,11 @@ class ChoiceNode : public RegExpNode { PreloadState* preloads); void AssertGuardsMentionRegisters(Trace* trace); int EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, Trace* trace); - Trace* EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace, - AlternativeGenerationList* alt_gens, - PreloadState* preloads, - GreedyLoopState* greedy_loop_state, int text_length); + Trace* EmitFixedLengthLoop(RegExpCompiler* compiler, Trace* trace, + AlternativeGenerationList* alt_gens, + PreloadState* preloads, + FixedLengthLoopState* fixed_length_loop_state, + int text_length); void EmitChoices(RegExpCompiler* compiler, AlternativeGenerationList* alt_gens, int first_choice, Trace* trace, PreloadState* preloads); diff --git a/deps/v8/src/regexp/riscv/regexp-macro-assembler-riscv.cc b/deps/v8/src/regexp/riscv/regexp-macro-assembler-riscv.cc index 7984b7a1b0574d..01b1bcc61482d3 100644 --- a/deps/v8/src/regexp/riscv/regexp-macro-assembler-riscv.cc +++ b/deps/v8/src/regexp/riscv/regexp-macro-assembler-riscv.cc @@ -202,7 +202,7 @@ void RegExpMacroAssemblerRISCV::CheckCharacterLT(base::uc16 limit, BranchOrBacktrack(on_less, lt, current_character(), Operand(limit)); } -void RegExpMacroAssemblerRISCV::CheckGreedyLoop(Label* on_equal) { +void RegExpMacroAssemblerRISCV::CheckFixedLengthLoop(Label* on_equal) { Label backtrack_non_equal; __ Lw(a0, MemOperand(backtrack_stackpointer(), 0)); __ BranchShort(&backtrack_non_equal, ne, current_input_offset(), Operand(a0)); diff --git a/deps/v8/src/regexp/riscv/regexp-macro-assembler-riscv.h b/deps/v8/src/regexp/riscv/regexp-macro-assembler-riscv.h index 432fb7745ca1ee..6f37cd0de816ee 100644 --- a/deps/v8/src/regexp/riscv/regexp-macro-assembler-riscv.h +++ b/deps/v8/src/regexp/riscv/regexp-macro-assembler-riscv.h @@ -30,9 +30,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerRISCV Label* on_equal) override; void CheckCharacterGT(base::uc16 limit, Label* on_greater) override; void CheckCharacterLT(base::uc16 limit, Label* on_less) override; - // A "greedy loop" is a loop that is both greedy and with a simple + // A "fixed length loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. - void CheckGreedyLoop(Label* on_tos_equals_current_position) override; + void CheckFixedLengthLoop(Label* on_tos_equals_current_position) override; void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override; void CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) override; diff --git a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc index e6dfecad1459aa..6e865671e725c5 100644 --- a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc +++ b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc @@ -228,7 +228,7 @@ void RegExpMacroAssemblerS390::CheckCharacterLT(base::uc16 limit, BranchOrBacktrack(lt, on_less); } -void RegExpMacroAssemblerS390::CheckGreedyLoop(Label* on_equal) { +void RegExpMacroAssemblerS390::CheckFixedLengthLoop(Label* on_equal) { Label backtrack_non_equal; __ CmpS64(current_input_offset(), MemOperand(backtrack_stackpointer(), 0)); __ bne(&backtrack_non_equal); diff --git a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h index bbb6e61a230c21..09bcc2da8ea091 100644 --- a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h +++ b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h @@ -29,9 +29,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390 void CheckCharacterGT(base::uc16 limit, Label* on_greater) override; void CheckCharacterLT(base::uc16 limit, Label* on_less) override; - // A "greedy loop" is a loop that is both greedy and with a simple + // A "fixed length loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. - void CheckGreedyLoop(Label* on_tos_equals_current_position) override; + void CheckFixedLengthLoop(Label* on_tos_equals_current_position) override; void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override; void CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) override; diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc index 5b153e715573be..61d4392f336422 100644 --- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc +++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc @@ -214,7 +214,7 @@ void RegExpMacroAssemblerX64::CheckCharacterLT(base::uc16 limit, BranchOrBacktrack(less, on_less); } -void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) { +void RegExpMacroAssemblerX64::CheckFixedLengthLoop(Label* on_equal) { Label fallthrough; __ cmpl(rdi, Operand(backtrack_stackpointer(), 0)); __ j(not_equal, &fallthrough); diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h index e04a805f441764..d17dea90f0a33f 100644 --- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h +++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h @@ -29,9 +29,9 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerX64 Label* on_equal) override; void CheckCharacterGT(base::uc16 limit, Label* on_greater) override; void CheckCharacterLT(base::uc16 limit, Label* on_less) override; - // A "greedy loop" is a loop that is both greedy and with a simple + // A "fixed length loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. - void CheckGreedyLoop(Label* on_tos_equals_current_position) override; + void CheckFixedLengthLoop(Label* on_tos_equals_current_position) override; void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override; void CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) override; diff --git a/deps/v8/test/unittests/regexp/regexp-unittest.cc b/deps/v8/test/unittests/regexp/regexp-unittest.cc index edefe2fc7a667b..e9bae2f54c311a 100644 --- a/deps/v8/test/unittests/regexp/regexp-unittest.cc +++ b/deps/v8/test/unittests/regexp/regexp-unittest.cc @@ -1121,7 +1121,7 @@ TEST_F(RegExpTest, MacroAssemblerNativeRegisters) { m.ReadCurrentPositionFromRegister(out3); m.Bind(&loop3); m.AdvanceCurrentPosition(1); - m.CheckGreedyLoop(&exit_loop3); + m.CheckFixedLengthLoop(&exit_loop3); m.GoTo(&loop3); m.Bind(&exit_loop3); m.PopCurrentPosition(); From 974300dd40a7f9ffc1c631719224e5698f7d8635 Mon Sep 17 00:00:00 2001 From: Erik Corry Date: Tue, 6 May 2025 13:40:45 +0200 Subject: [PATCH 2/4] deps: V8: cherry-pick 0dd2318b5237 Original commit message: [regexp] Remove DeferredAction class. We can just chain up the traces and use the regular ActionNodes that we already have to represent the deferred actions. Also simplifies the ActionNodes a little. No functional change intended. Reduce the size of the Trace from 128->120 bytes. This is a stack allocated struct so to avoid stack overflows after adding the next_ field I am reducing it back down to 120 bytes by rearranging and shrinking fields. Change-Id: I6dca9946e035e9b22798e160b8fadaeca61f4955 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6512931 Reviewed-by: Patrick Thier Commit-Queue: Erik Corry Cr-Commit-Position: refs/heads/main@{#100092} Refs: https://github.com/v8/v8/commit/0dd2318b52379894e2d1754303e4a3bc3f660cb8 --- common.gypi | 2 +- deps/v8/src/regexp/regexp-compiler-tonode.cc | 6 +- deps/v8/src/regexp/regexp-compiler.cc | 150 +++++++---------- deps/v8/src/regexp/regexp-compiler.h | 160 +++++++++---------- deps/v8/src/regexp/regexp-dotprinter.cc | 22 +-- deps/v8/src/regexp/regexp-nodes.h | 56 +++++-- 6 files changed, 190 insertions(+), 206 deletions(-) diff --git a/common.gypi b/common.gypi index 1ebdc4636f1734..0bf6c606f9eba5 100644 --- a/common.gypi +++ b/common.gypi @@ -38,7 +38,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.34', + 'v8_embedder_string': '-node.35', ##### V8 defaults for Node.js ##### diff --git a/deps/v8/src/regexp/regexp-compiler-tonode.cc b/deps/v8/src/regexp/regexp-compiler-tonode.cc index a71b6112e5c76c..d021885646f302 100644 --- a/deps/v8/src/regexp/regexp-compiler-tonode.cc +++ b/deps/v8/src/regexp/regexp-compiler-tonode.cc @@ -1248,9 +1248,9 @@ RegExpNode* RegExpCapture::ToNode(RegExpTree* body, int index, int start_reg = RegExpCapture::StartRegister(index); int end_reg = RegExpCapture::EndRegister(index); if (compiler->read_backward()) std::swap(start_reg, end_reg); - RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); + RegExpNode* store_end = ActionNode::ClearPosition(end_reg, on_success); RegExpNode* body_node = body->ToNode(compiler, store_end); - return ActionNode::StorePosition(start_reg, true, body_node); + return ActionNode::ClearPosition(start_reg, body_node); } namespace { @@ -2039,7 +2039,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy, if (body_can_be_empty) { // If the body can be empty we need to store the start position // so we can bail out if it was empty. - body_node = ActionNode::StorePosition(body_start_reg, false, body_node); + body_node = ActionNode::RestorePosition(body_start_reg, body_node); } if (needs_capture_clearing) { // Before entering the body of this loop we need to clear captures. diff --git a/deps/v8/src/regexp/regexp-compiler.cc b/deps/v8/src/regexp/regexp-compiler.cc index e36335fc8c5830..d995a9a46f4838 100644 --- a/deps/v8/src/regexp/regexp-compiler.cc +++ b/deps/v8/src/regexp/regexp-compiler.cc @@ -294,30 +294,20 @@ RegExpCompiler::CompilationResult RegExpCompiler::Assemble( return {code, next_register_}; } -bool Trace::DeferredAction::Mentions(int that) { - if (action_type() == ActionNode::CLEAR_CAPTURES) { - Interval range = static_cast(this)->range(); - return range.Contains(that); - } else { - return reg() == that; - } -} - -bool Trace::mentions_reg(int reg) { - for (DeferredAction* action = actions_; action != nullptr; - action = action->next()) { - if (action->Mentions(reg)) return true; +bool Trace::mentions_reg(int reg) const { + for (auto trace : *this) { + if (trace->has_action() && trace->action()->Mentions(reg)) return true; } return false; } -bool Trace::GetStoredPosition(int reg, int* cp_offset) { +bool Trace::GetStoredPosition(int reg, int* cp_offset) const { DCHECK_EQ(0, *cp_offset); - for (DeferredAction* action = actions_; action != nullptr; - action = action->next()) { - if (action->Mentions(reg)) { - if (action->action_type() == ActionNode::STORE_POSITION) { - *cp_offset = static_cast(action)->cp_offset(); + for (auto trace : *this) { + if (trace->has_action() && trace->action()->Mentions(reg)) { + if (trace->action_->action_type() == ActionNode::CLEAR_POSITION || + trace->action_->action_type() == ActionNode::RESTORE_POSITION) { + *cp_offset = trace->next_->cp_offset(); return true; } else { return false; @@ -363,16 +353,13 @@ class DynamicBitSet : public ZoneObject { int Trace::FindAffectedRegisters(DynamicBitSet* affected_registers, Zone* zone) { int max_register = RegExpCompiler::kNoRegister; - for (DeferredAction* action = actions_; action != nullptr; - action = action->next()) { - if (action->action_type() == ActionNode::CLEAR_CAPTURES) { - Interval range = static_cast(action)->range(); - for (int i = range.from(); i <= range.to(); i++) + for (auto trace : *this) { + if (ActionNode* action = trace->action_) { + int to = action->register_to(); + for (int i = action->register_from(); i <= to; i++) { affected_registers->Set(i, zone); - if (range.to() > max_register) max_register = range.to(); - } else { - affected_registers->Set(action->reg(), zone); - if (action->reg() > max_register) max_register = action->reg(); + } + if (to > max_register) max_register = to; } } return max_register; @@ -420,15 +407,14 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, int store_position = kNoStore; // This is a little tricky because we are scanning the actions in reverse // historical order (newest first). - for (DeferredAction* action = actions_; action != nullptr; - action = action->next()) { + for (auto trace : *this) { + ActionNode* action = trace->action_; + if (!action) continue; if (action->Mentions(reg)) { switch (action->action_type()) { case ActionNode::SET_REGISTER_FOR_LOOP: { - Trace::DeferredSetRegisterForLoop* psr = - static_cast(action); if (!absolute) { - value += psr->value(); + value += action->value(); absolute = true; } // SET_REGISTER_FOR_LOOP is only used for newly introduced loop @@ -449,11 +435,10 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, DCHECK(!clear); undo_action = RESTORE; break; - case ActionNode::STORE_POSITION: { - Trace::DeferredCapture* pc = - static_cast(action); + case ActionNode::CLEAR_POSITION: + case ActionNode::RESTORE_POSITION: { if (!clear && store_position == kNoStore) { - store_position = pc->cp_offset(); + store_position = trace->next()->cp_offset(); } // For captures we know that stores and clears alternate. @@ -466,7 +451,11 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, // will set it again or fail. undo_action = IGNORE; } else { - undo_action = pc->is_capture() ? CLEAR : RESTORE; + if (action->action_type() == ActionNode::CLEAR_POSITION) { + undo_action = CLEAR; + } else { + undo_action = RESTORE; + } } DCHECK(!absolute); DCHECK_EQ(value, 0); @@ -527,7 +516,7 @@ void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { DCHECK(!is_trivial()); - if (actions_ == nullptr && backtrack() == nullptr) { + if (!has_any_actions() && backtrack() == nullptr) { // Here we just have some deferred cp advances to fix and we are back to // a normal situation. We may also have to forget some information gained // through a quick check that was already performed. @@ -638,35 +627,26 @@ void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { ActionNode* ActionNode::SetRegisterForLoop(int reg, int val, RegExpNode* on_success) { - ActionNode* result = - on_success->zone()->New(SET_REGISTER_FOR_LOOP, on_success); - result->data_.u_store_register.reg = reg; - result->data_.u_store_register.value = val; - return result; + return on_success->zone()->New(SET_REGISTER_FOR_LOOP, on_success, + reg, reg, val); } ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) { - ActionNode* result = - on_success->zone()->New(INCREMENT_REGISTER, on_success); - result->data_.u_increment_register.reg = reg; - return result; + return on_success->zone()->New(INCREMENT_REGISTER, on_success, + reg); } -ActionNode* ActionNode::StorePosition(int reg, bool is_capture, - RegExpNode* on_success) { - ActionNode* result = - on_success->zone()->New(STORE_POSITION, on_success); - result->data_.u_position_register.reg = reg; - result->data_.u_position_register.is_capture = is_capture; - return result; +ActionNode* ActionNode::ClearPosition(int reg, RegExpNode* on_success) { + return on_success->zone()->New(CLEAR_POSITION, on_success, reg); +} + +ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) { + return on_success->zone()->New(RESTORE_POSITION, on_success, reg); } ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) { - ActionNode* result = - on_success->zone()->New(CLEAR_CAPTURES, on_success); - result->data_.u_clear_captures.range_from = range.from(); - result->data_.u_clear_captures.range_to = range.to(); - return result; + return on_success->zone()->New(CLEAR_CAPTURES, on_success, + range.from(), range.to()); } ActionNode* ActionNode::BeginPositiveSubmatch(int stack_reg, int position_reg, @@ -2373,7 +2353,7 @@ void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { namespace { -bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) { +bool DeterminedAlready(const QuickCheckDetails* quick_check, int offset) { if (quick_check == nullptr) return false; if (offset >= quick_check->characters()) return false; return quick_check->positions(offset)->determines_perfectly; @@ -2423,7 +2403,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass, Isolate* isolate = assembler->isolate(); bool one_byte = compiler->one_byte(); Label* backtrack = trace->backtrack(); - QuickCheckDetails* quick_check = trace->quick_check_performed(); + const QuickCheckDetails* quick_check = trace->quick_check_performed(); int element_count = elements()->length(); int backward_offset = read_backward() ? -Length() : 0; for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { @@ -3206,7 +3186,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for // other choice nodes we only flush if we are out of code size budget. - if (trace->flush_budget() == 0 && trace->actions() != nullptr) { + if (trace->flush_budget() == 0 && trace->has_any_actions()) { trace->Flush(compiler, this); return; } @@ -3239,7 +3219,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { // If there are actions to be flushed we have to limit how many times // they are flushed. Take the budget of the parent trace and distribute // it fairly amongst the children. - if (new_trace.actions() != nullptr) { + if (new_trace.has_any_actions()) { new_trace.set_flush_budget(new_flush_budget); } bool next_expects_preload = @@ -3407,7 +3387,7 @@ void ChoiceNode::EmitChoices(RegExpCompiler* compiler, generate_full_check_inline = true; } if (generate_full_check_inline) { - if (new_trace.actions() != nullptr) { + if (new_trace.has_any_actions()) { new_trace.set_flush_budget(new_flush_budget); } for (int j = 0; j < guard_count; j++) { @@ -3468,42 +3448,26 @@ void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { RecursionCheck rc(compiler); switch (action_type_) { - case STORE_POSITION: { - Trace::DeferredCapture new_capture(data_.u_position_register.reg, - data_.u_position_register.is_capture, - trace); - Trace new_trace = *trace; - new_trace.add_action(&new_capture); - on_success()->Emit(compiler, &new_trace); - break; - } - case INCREMENT_REGISTER: { - Trace::DeferredIncrementRegister new_increment( - data_.u_increment_register.reg); - Trace new_trace = *trace; - new_trace.add_action(&new_increment); - on_success()->Emit(compiler, &new_trace); - break; - } - case SET_REGISTER_FOR_LOOP: { - Trace::DeferredSetRegisterForLoop new_set(data_.u_store_register.reg, - data_.u_store_register.value); - Trace new_trace = *trace; - new_trace.add_action(&new_set); - on_success()->Emit(compiler, &new_trace); - break; - } + // Start with the actions we know how to defer. These are just recorded in + // the new trace, no code is emitted right now. (If we backtrack then we + // don't have to perform and undo these actions.) + case CLEAR_POSITION: + case RESTORE_POSITION: + case INCREMENT_REGISTER: + case SET_REGISTER_FOR_LOOP: case CLEAR_CAPTURES: { - Trace::DeferredClearCaptures new_capture(Interval( - data_.u_clear_captures.range_from, data_.u_clear_captures.range_to)); Trace new_trace = *trace; - new_trace.add_action(&new_capture); + new_trace.add_action(this); on_success()->Emit(compiler, &new_trace); break; } + // We don't yet have the ability to defer these. case BEGIN_POSITIVE_SUBMATCH: case BEGIN_NEGATIVE_SUBMATCH: if (!trace->is_trivial()) { + // Complex situation: Flush the trace state to the assembler and + // generate a generic version of this action. This call will + // recurse back to the else clause here. trace->Flush(compiler, this); } else { assembler->WriteCurrentPositionToRegister( diff --git a/deps/v8/src/regexp/regexp-compiler.h b/deps/v8/src/regexp/regexp-compiler.h index e68c87e43a03f8..d90959bf9b792c 100644 --- a/deps/v8/src/regexp/regexp-compiler.h +++ b/deps/v8/src/regexp/regexp-compiler.h @@ -78,13 +78,18 @@ class QuickCheckDetails { base::uc32 value; bool determines_perfectly; }; - int characters() { return characters_; } + int characters() const { return characters_; } void set_characters(int characters) { characters_ = characters; } Position* positions(int index) { DCHECK_LE(0, index); DCHECK_GT(characters_, index); return positions_ + index; } + const Position* positions(int index) const { + DCHECK_LE(0, index); + DCHECK_GT(characters_, index); + return positions_ + index; + } uint32_t mask() { return mask_; } uint32_t value() { return value_; } @@ -234,82 +239,43 @@ class Trace { // or not known. enum TriBool { UNKNOWN = -1, FALSE_VALUE = 0, TRUE_VALUE = 1 }; - class DeferredAction { - public: - DeferredAction(ActionNode::ActionType action_type, int reg) - : action_type_(action_type), reg_(reg), next_(nullptr) {} - DeferredAction* next() { return next_; } - bool Mentions(int reg); - int reg() { return reg_; } - ActionNode::ActionType action_type() { return action_type_; } - - private: - ActionNode::ActionType action_type_; - int reg_; - DeferredAction* next_; - friend class Trace; - }; - - class DeferredCapture : public DeferredAction { - public: - DeferredCapture(int reg, bool is_capture, Trace* trace) - : DeferredAction(ActionNode::STORE_POSITION, reg), - cp_offset_(trace->cp_offset()), - is_capture_(is_capture) {} - int cp_offset() { return cp_offset_; } - bool is_capture() { return is_capture_; } - - private: - int cp_offset_; - bool is_capture_; - void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } - }; - - class DeferredSetRegisterForLoop : public DeferredAction { - public: - DeferredSetRegisterForLoop(int reg, int value) - : DeferredAction(ActionNode::SET_REGISTER_FOR_LOOP, reg), - value_(value) {} - int value() { return value_; } - - private: - int value_; - }; - - class DeferredClearCaptures : public DeferredAction { - public: - explicit DeferredClearCaptures(Interval range) - : DeferredAction(ActionNode::CLEAR_CAPTURES, -1), range_(range) {} - Interval range() { return range_; } - - private: - Interval range_; - }; - - class DeferredIncrementRegister : public DeferredAction { - public: - explicit DeferredIncrementRegister(int reg) - : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) {} - }; - Trace() : cp_offset_(0), - actions_(nullptr), + flush_budget_(100), // Note: this is a 16 bit field. + at_start_(UNKNOWN), + has_any_actions_(false), + action_(nullptr), backtrack_(nullptr), stop_node_(nullptr), loop_label_(nullptr), characters_preloaded_(0), bound_checked_up_to_(0), - flush_budget_(100), - at_start_(UNKNOWN) {} + next_(nullptr) {} + + Trace(const Trace& other) V8_NOEXCEPT + : cp_offset_(other.cp_offset_), + flush_budget_(other.flush_budget_), + at_start_(other.at_start_), + has_any_actions_(other.has_any_actions_), + action_(nullptr), + backtrack_(other.backtrack_), + stop_node_(other.stop_node_), + loop_label_(other.loop_label_), + characters_preloaded_(other.characters_preloaded_), + bound_checked_up_to_(other.bound_checked_up_to_), + next_(&other) {} // End the trace. This involves flushing the deferred actions in the trace // and pushing a backtrack location onto the backtrack stack. Once this is // done we can start a new trace or go to one that has already been // generated. void Flush(RegExpCompiler* compiler, RegExpNode* successor); - int cp_offset() { return cp_offset_; } - DeferredAction* actions() { return actions_; } + int cp_offset() const { return cp_offset_; } + // Does any trace in the chain have an action? + bool has_any_actions() const { return has_any_actions_; } + // Does this particular trace object have an action? + bool has_action() const { return action_ != nullptr; } + ActionNode* action() const { return action_; } // A trivial trace is one that has no deferred actions or other state that // affects the assumptions used when generating code. There is no recorded // backtrack location in a trivial trace, so with a trivial trace we will @@ -320,43 +286,69 @@ class Trace { // actions in the trace. The location of the code generated for a node using // a trivial trace is recorded in a label in the node so that gotos can be // generated to that code. - bool is_trivial() { - return backtrack_ == nullptr && actions_ == nullptr && cp_offset_ == 0 && + bool is_trivial() const { + return backtrack_ == nullptr && !has_any_actions_ && cp_offset_ == 0 && characters_preloaded_ == 0 && bound_checked_up_to_ == 0 && quick_check_performed_.characters() == 0 && at_start_ == UNKNOWN; } - TriBool at_start() { return at_start_; } + TriBool at_start() const { return at_start_; } void set_at_start(TriBool at_start) { at_start_ = at_start; } - Label* backtrack() { return backtrack_; } - Label* loop_label() { return loop_label_; } - RegExpNode* stop_node() { return stop_node_; } - int characters_preloaded() { return characters_preloaded_; } - int bound_checked_up_to() { return bound_checked_up_to_; } - int flush_budget() { return flush_budget_; } + Label* backtrack() const { return backtrack_; } + Label* loop_label() const { return loop_label_; } + RegExpNode* stop_node() const { return stop_node_; } + int characters_preloaded() const { return characters_preloaded_; } + int bound_checked_up_to() const { return bound_checked_up_to_; } + int flush_budget() const { return flush_budget_; } QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; } - bool mentions_reg(int reg); + bool mentions_reg(int reg) const; // Returns true if a deferred position store exists to the specified // register and stores the offset in the out-parameter. Otherwise // returns false. - bool GetStoredPosition(int reg, int* cp_offset); + bool GetStoredPosition(int reg, int* cp_offset) const; // These set methods and AdvanceCurrentPositionInTrace should be used only on // new traces - the intention is that traces are immutable after creation. - void add_action(DeferredAction* new_action) { - DCHECK(new_action->next_ == nullptr); - new_action->next_ = actions_; - actions_ = new_action; + void add_action(ActionNode* new_action) { + DCHECK(action_ == nullptr); // Otherwise we lose an action. + action_ = new_action; + has_any_actions_ = true; } void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } void set_stop_node(RegExpNode* node) { stop_node_ = node; } void set_loop_label(Label* label) { loop_label_ = label; } void set_characters_preloaded(int count) { characters_preloaded_ = count; } void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; } - void set_flush_budget(int to) { flush_budget_ = to; } + void set_flush_budget(int to) { + DCHECK(to <= UINT16_MAX); // Flush-budget is 16 bit. + flush_budget_ = to; + } void set_quick_check_performed(QuickCheckDetails* d) { quick_check_performed_ = *d; } void InvalidateCurrentCharacter(); void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler); + const Trace* next() const { return next_; } + + class ConstIterator final { + public: + ConstIterator& operator++() { + trace_ = trace_->next(); + return *this; + } + bool operator==(const ConstIterator& other) const { + return trace_ == other.trace_; + } + const Trace* operator*() const { return trace_; } + + private: + explicit ConstIterator(const Trace* trace) : trace_(trace) {} + + const Trace* trace_; + + friend class Trace; + }; + + ConstIterator begin() const { return ConstIterator(this); } + ConstIterator end() const { return ConstIterator(nullptr); } private: int FindAffectedRegisters(DynamicBitSet* affected_registers, Zone* zone); @@ -368,15 +360,17 @@ class Trace { const DynamicBitSet& registers_to_pop, const DynamicBitSet& registers_to_clear); int cp_offset_; - DeferredAction* actions_; + uint16_t flush_budget_; + TriBool at_start_ : 8; // Whether we are at the start of the string. + bool has_any_actions_ : 8; // Whether any trace in the chain has an action. + ActionNode* action_; Label* backtrack_; RegExpNode* stop_node_; Label* loop_label_; int characters_preloaded_; int bound_checked_up_to_; QuickCheckDetails quick_check_performed_; - int flush_budget_; - TriBool at_start_; + const Trace* next_; }; class FixedLengthLoopState { diff --git a/deps/v8/src/regexp/regexp-dotprinter.cc b/deps/v8/src/regexp/regexp-dotprinter.cc index 8917eab403bd57..c5eac05768d3ea 100644 --- a/deps/v8/src/regexp/regexp-dotprinter.cc +++ b/deps/v8/src/regexp/regexp-dotprinter.cc @@ -200,16 +200,19 @@ void DotPrinterImpl::VisitAction(ActionNode* that) { os_ << " n" << that << " ["; switch (that->action_type_) { case ActionNode::SET_REGISTER_FOR_LOOP: - os_ << "label=\"$" << that->data_.u_store_register.reg - << ":=" << that->data_.u_store_register.value << "\", shape=octagon"; + os_ << "label=\"$" << that->register_from() << ":=" << that->value() + << "\", shape=octagon"; break; case ActionNode::INCREMENT_REGISTER: - os_ << "label=\"$" << that->data_.u_increment_register.reg - << "++\", shape=octagon"; + os_ << "label=\"$" << that->register_from() << "++\", shape=octagon"; break; - case ActionNode::STORE_POSITION: - os_ << "label=\"$" << that->data_.u_position_register.reg - << ":=$pos\", shape=octagon"; + case ActionNode::CLEAR_POSITION: + os_ << "label=\"$" << that->register_from() + << ":=$pos c\", shape=octagon"; + break; + case ActionNode::RESTORE_POSITION: + os_ << "label=\"$" << that->register_from() + << ":=$pos r\", shape=octagon"; break; case ActionNode::BEGIN_POSITIVE_SUBMATCH: os_ << "label=\"$" << that->data_.u_submatch.current_position_register @@ -229,9 +232,8 @@ void DotPrinterImpl::VisitAction(ActionNode* that) { << "?\", shape=septagon"; break; case ActionNode::CLEAR_CAPTURES: { - os_ << "label=\"clear $" << that->data_.u_clear_captures.range_from - << " to $" << that->data_.u_clear_captures.range_to - << "\", shape=septagon"; + os_ << "label=\"clear $" << that->register_from() << " to $" + << that->register_to() << "\", shape=septagon"; break; } case ActionNode::MODIFY_FLAGS: { diff --git a/deps/v8/src/regexp/regexp-nodes.h b/deps/v8/src/regexp/regexp-nodes.h index b2bf746163c836..ecc0ee7c4fcb63 100644 --- a/deps/v8/src/regexp/regexp-nodes.h +++ b/deps/v8/src/regexp/regexp-nodes.h @@ -329,7 +329,8 @@ class ActionNode : public SeqRegExpNode { enum ActionType { SET_REGISTER_FOR_LOOP, INCREMENT_REGISTER, - STORE_POSITION, + CLEAR_POSITION, + RESTORE_POSITION, BEGIN_POSITIVE_SUBMATCH, BEGIN_NEGATIVE_SUBMATCH, POSITIVE_SUBMATCH_SUCCESS, @@ -340,8 +341,8 @@ class ActionNode : public SeqRegExpNode { static ActionNode* SetRegisterForLoop(int reg, int val, RegExpNode* on_success); static ActionNode* IncrementRegister(int reg, RegExpNode* on_success); - static ActionNode* StorePosition(int reg, bool is_capture, - RegExpNode* on_success); + static ActionNode* ClearPosition(int reg, RegExpNode* on_success); + static ActionNode* RestorePosition(int reg, RegExpNode* on_success); static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success); static ActionNode* BeginPositiveSubmatch(int stack_pointer_reg, int position_reg, RegExpNode* body, @@ -381,23 +382,50 @@ class ActionNode : public SeqRegExpNode { return data_.u_submatch.success_node; } + bool Mentions(int reg) const { + return base::IsInRange(reg, register_from(), register_to()); + } + + int value() const { + DCHECK(action_type() == SET_REGISTER_FOR_LOOP); + return data_.u_simple.value; + } + + bool IsSimpleAction() const { + return action_type() == CLEAR_POSITION || + action_type() == RESTORE_POSITION || + action_type() == INCREMENT_REGISTER || + action_type() == SET_REGISTER_FOR_LOOP || + action_type() == CLEAR_CAPTURES; + } + + int register_from() const { + DCHECK(IsSimpleAction()); + return data_.u_simple.register_from; + } + + int register_to() const { return data_.u_simple.register_to; } + protected: ActionNode(ActionType action_type, RegExpNode* on_success) : SeqRegExpNode(on_success), action_type_(action_type) {} + ActionNode(ActionType action_type, RegExpNode* on_success, int from, + int to = -1, int value = 0) + : SeqRegExpNode(on_success), action_type_(action_type) { + data_.u_simple.register_from = from; + data_.u_simple.register_to = to == -1 ? from : to; + data_.u_simple.value = value; + DCHECK(IsSimpleAction()); + } + private: union { struct { - int reg; + int register_from; + int register_to; int value; - } u_store_register; - struct { - int reg; - } u_increment_register; - struct { - int reg; - bool is_capture; - } u_position_register; + } u_simple; struct { int stack_pointer_register; int current_position_register; @@ -410,10 +438,6 @@ class ActionNode : public SeqRegExpNode { int repetition_register; int repetition_limit; } u_empty_match_check; - struct { - int range_from; - int range_to; - } u_clear_captures; struct { int flags; } u_modify_flags; From 615ef5a0701dd187e7ccc080ac583e91ffc1bddb Mon Sep 17 00:00:00 2001 From: Erik Corry Date: Wed, 7 May 2025 13:11:07 +0200 Subject: [PATCH 3/4] deps: V8: cherry-pick 6bb32bd2c194 Original commit message: [regexp] Clean up state for fixed length loop. Also reduces the on-stack Trace size by one word, adds some comments, renames some variables for more clarity. Change-Id: I9ec105cd9cebbaba65e9801c47dd0574cc81f967 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6512896 Reviewed-by: Patrick Thier Commit-Queue: Erik Corry Cr-Commit-Position: refs/heads/main@{#100117} Refs: https://github.com/v8/v8/commit/6bb32bd2c194f67778188014d3cf8b8be9467b27 --- common.gypi | 2 +- deps/v8/src/regexp/regexp-compiler.cc | 60 ++++++++++++++++++--------- deps/v8/src/regexp/regexp-compiler.h | 32 ++++++++------ 3 files changed, 60 insertions(+), 34 deletions(-) diff --git a/common.gypi b/common.gypi index 0bf6c606f9eba5..292aa3daa524e6 100644 --- a/common.gypi +++ b/common.gypi @@ -38,7 +38,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.35', + 'v8_embedder_string': '-node.36', ##### V8 defaults for Node.js ##### diff --git a/deps/v8/src/regexp/regexp-compiler.cc b/deps/v8/src/regexp/regexp-compiler.cc index d995a9a46f4838..23095db552a2eb 100644 --- a/deps/v8/src/regexp/regexp-compiler.cc +++ b/deps/v8/src/regexp/regexp-compiler.cc @@ -1337,7 +1337,7 @@ RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, Trace* trace) { // If we are generating a fixed length loop then don't stop and don't reuse // code. - if (trace->stop_node() != nullptr) { + if (trace->fixed_length_loop_state() != nullptr) { return CONTINUE; } @@ -2692,7 +2692,8 @@ void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); - if (trace->stop_node() == this) { + if (trace->fixed_length_loop_state() != nullptr && + trace->fixed_length_loop_state()->loop_choice_node() == this) { // Back edge of fixed length optimized loop node graph. int text_length = FixedLengthLoopLengthForAlternative(&(alternatives_->at(0))); @@ -2701,10 +2702,10 @@ void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { // optimization for fixed length loops (see below). DCHECK(trace->cp_offset() == text_length); macro_assembler->AdvanceCurrentPosition(text_length); - macro_assembler->GoTo(trace->loop_label()); + trace->fixed_length_loop_state()->GoToLoopTopLabel(macro_assembler); return; } - DCHECK_NULL(trace->stop_node()); + DCHECK_NULL(trace->fixed_length_loop_state()); if (!trace->is_trivial()) { trace->Flush(compiler, this); return; @@ -3136,11 +3137,28 @@ void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { * S2--/ */ -FixedLengthLoopState::FixedLengthLoopState(bool not_at_start) { - counter_backtrack_trace_.set_backtrack(&label_); +FixedLengthLoopState::FixedLengthLoopState(bool not_at_start, + ChoiceNode* loop_choice_node) + : loop_choice_node_(loop_choice_node) { + counter_backtrack_trace_.set_backtrack(&step_backwards_label_); if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE); } +void FixedLengthLoopState::BindStepBackwardsLabel( + RegExpMacroAssembler* macro_assembler) { + macro_assembler->Bind(&step_backwards_label_); +} + +void FixedLengthLoopState::BindLoopTopLabel( + RegExpMacroAssembler* macro_assembler) { + macro_assembler->Bind(&loop_top_label_); +} + +void FixedLengthLoopState::GoToLoopTopLabel( + RegExpMacroAssembler* macro_assembler) { + macro_assembler->GoTo(&loop_top_label_); +} + void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { #ifdef DEBUG int choice_count = alternatives_->length(); @@ -3195,7 +3213,9 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { PreloadState preload; preload.init(); - FixedLengthLoopState fixed_length_loop_state(not_at_start()); + // This must be outside the 'if' because the trace we use for what + // comes after the fixed_length_loop is inside it and needs the lifetime. + FixedLengthLoopState fixed_length_loop_state(not_at_start(), this); int text_length = FixedLengthLoopLengthForAlternative(&alternatives_->at(0)); AlternativeGenerationList alt_gens(choice_count, zone()); @@ -3242,32 +3262,32 @@ Trace* ChoiceNode::EmitFixedLengthLoop( // decrement the current position and check it against the pushed value. // This avoids pushing backtrack information for each iteration of the loop, // which could take up a lot of space. - DCHECK(trace->stop_node() == nullptr); + DCHECK(trace->fixed_length_loop_state() == nullptr); macro_assembler->PushCurrentPosition(); - Label fixed_length_match_failed; + // This is the label for trying to match what comes after the greedy + // quantifier, either because the body of the quantifier failed, or because + // we have stepped back to try again with one iteration fewer. + Label after_body_match_attempt; Trace fixed_length_match_trace; if (not_at_start()) fixed_length_match_trace.set_at_start(Trace::FALSE_VALUE); - fixed_length_match_trace.set_backtrack(&fixed_length_match_failed); - Label loop_label; - macro_assembler->Bind(&loop_label); - fixed_length_match_trace.set_stop_node(this); - fixed_length_match_trace.set_loop_label(&loop_label); + fixed_length_match_trace.set_backtrack(&after_body_match_attempt); + fixed_length_loop_state->BindLoopTopLabel(macro_assembler); + fixed_length_match_trace.set_fixed_length_loop_state(fixed_length_loop_state); alternatives_->at(0).node()->Emit(compiler, &fixed_length_match_trace); - macro_assembler->Bind(&fixed_length_match_failed); - - Label second_choice; // For use in fixed length matches. - macro_assembler->Bind(&second_choice); + macro_assembler->Bind(&after_body_match_attempt); Trace* new_trace = fixed_length_loop_state->counter_backtrack_trace(); + // In a fixed length loop there is only one other choice, which is what + // comes after the greedy quantifer. Try to match that now. EmitChoices(compiler, alt_gens, 1, new_trace, preload); - macro_assembler->Bind(fixed_length_loop_state->label()); + fixed_length_loop_state->BindStepBackwardsLabel(macro_assembler); // If we have unwound to the bottom then backtrack. macro_assembler->CheckFixedLengthLoop(trace->backtrack()); // Otherwise try the second priority at an earlier position. macro_assembler->AdvanceCurrentPosition(-text_length); - macro_assembler->GoTo(&second_choice); + macro_assembler->GoTo(&after_body_match_attempt); return new_trace; } diff --git a/deps/v8/src/regexp/regexp-compiler.h b/deps/v8/src/regexp/regexp-compiler.h index d90959bf9b792c..a0fc3352292e59 100644 --- a/deps/v8/src/regexp/regexp-compiler.h +++ b/deps/v8/src/regexp/regexp-compiler.h @@ -17,6 +17,7 @@ namespace internal { class DynamicBitSet; class Isolate; +class FixedLengthLoopState; namespace regexp_compiler_constants { @@ -246,8 +247,7 @@ class Trace { has_any_actions_(false), action_(nullptr), backtrack_(nullptr), - stop_node_(nullptr), - loop_label_(nullptr), + fixed_length_loop_state_(nullptr), characters_preloaded_(0), bound_checked_up_to_(0), next_(nullptr) {} @@ -259,8 +259,7 @@ class Trace { has_any_actions_(other.has_any_actions_), action_(nullptr), backtrack_(other.backtrack_), - stop_node_(other.stop_node_), - loop_label_(other.loop_label_), + fixed_length_loop_state_(other.fixed_length_loop_state_), characters_preloaded_(other.characters_preloaded_), bound_checked_up_to_(other.bound_checked_up_to_), next_(&other) {} @@ -294,8 +293,9 @@ class Trace { TriBool at_start() const { return at_start_; } void set_at_start(TriBool at_start) { at_start_ = at_start; } Label* backtrack() const { return backtrack_; } - Label* loop_label() const { return loop_label_; } - RegExpNode* stop_node() const { return stop_node_; } + FixedLengthLoopState* fixed_length_loop_state() const { + return fixed_length_loop_state_; + } int characters_preloaded() const { return characters_preloaded_; } int bound_checked_up_to() const { return bound_checked_up_to_; } int flush_budget() const { return flush_budget_; } @@ -313,8 +313,9 @@ class Trace { has_any_actions_ = true; } void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } - void set_stop_node(RegExpNode* node) { stop_node_ = node; } - void set_loop_label(Label* label) { loop_label_ = label; } + void set_fixed_length_loop_state(FixedLengthLoopState* state) { + fixed_length_loop_state_ = state; + } void set_characters_preloaded(int count) { characters_preloaded_ = count; } void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; } void set_flush_budget(int to) { @@ -365,8 +366,7 @@ class Trace { bool has_any_actions_ : 8; // Whether any trace in the chain has an action. ActionNode* action_; Label* backtrack_; - RegExpNode* stop_node_; - Label* loop_label_; + FixedLengthLoopState* fixed_length_loop_state_; int characters_preloaded_; int bound_checked_up_to_; QuickCheckDetails quick_check_performed_; @@ -375,13 +375,19 @@ class Trace { class FixedLengthLoopState { public: - explicit FixedLengthLoopState(bool not_at_start); + explicit FixedLengthLoopState(bool not_at_start, + ChoiceNode* loop_choice_node); - Label* label() { return &label_; } + void BindStepBackwardsLabel(RegExpMacroAssembler* macro_assembler); + void BindLoopTopLabel(RegExpMacroAssembler* macro_assembler); + void GoToLoopTopLabel(RegExpMacroAssembler* macro_assembler); + ChoiceNode* loop_choice_node() const { return loop_choice_node_; } Trace* counter_backtrack_trace() { return &counter_backtrack_trace_; } private: - Label label_; + Label step_backwards_label_; + Label loop_top_label_; + ChoiceNode* loop_choice_node_; Trace counter_backtrack_trace_; }; From 5b26741d19de60fb7a9d81d9bb46a4be5e9f70b0 Mon Sep 17 00:00:00 2001 From: pthier Date: Mon, 29 Sep 2025 17:32:45 +0200 Subject: [PATCH 4/4] deps: V8: cherry-pick 72b0e27bd936 Original commit message: [regexp] Fix modifiers for ChoiceNodes Each alternative might modify flags when their sub-graph is emitted. We need to restore flags to the value at the beginning of a ChoiceNode for each alternative. Drive-by: Move regexp-modifiers test out of harmony/ Fixed: 447583670 Change-Id: I9f41e51f34df7659461da0a4fcd28b7e157f52e1 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6995181 Reviewed-by: Jakob Linke Commit-Queue: Patrick Thier Cr-Commit-Position: refs/heads/main@{#102838} Refs: https://github.com/v8/v8/commit/72b0e27bd936432d4998e4d1762e7a8fe6d347f5 Fixes: https://github.com/nodejs/node/issues/60030 --- common.gypi | 2 +- deps/v8/src/regexp/regexp-compiler.cc | 16 +++++++++++----- deps/v8/src/regexp/regexp-nodes.h | 4 ++-- .../mjsunit/{harmony => }/regexp-modifiers.js | 5 +++++ 4 files changed, 19 insertions(+), 8 deletions(-) rename deps/v8/test/mjsunit/{harmony => }/regexp-modifiers.js (91%) diff --git a/common.gypi b/common.gypi index 292aa3daa524e6..db1625378697d9 100644 --- a/common.gypi +++ b/common.gypi @@ -38,7 +38,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.36', + 'v8_embedder_string': '-node.37', ##### V8 defaults for Node.js ##### diff --git a/deps/v8/src/regexp/regexp-compiler.cc b/deps/v8/src/regexp/regexp-compiler.cc index 23095db552a2eb..0bd775dfba1926 100644 --- a/deps/v8/src/regexp/regexp-compiler.cc +++ b/deps/v8/src/regexp/regexp-compiler.cc @@ -3220,13 +3220,17 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { int text_length = FixedLengthLoopLengthForAlternative(&alternatives_->at(0)); AlternativeGenerationList alt_gens(choice_count, zone()); + // Flags need to be reset to the state of the ChoiceNode at the beginning + // of each alternative (in-line and out-of-line), as flags might be modified + // when emitting an alternative. + RegExpFlags flags = compiler->flags(); if (choice_count > 1 && text_length != kNodeIsTooComplexForFixedLengthLoops) { trace = EmitFixedLengthLoop(compiler, trace, &alt_gens, &preload, - &fixed_length_loop_state, text_length); + &fixed_length_loop_state, text_length, flags); } else { preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); - EmitChoices(compiler, &alt_gens, 0, trace, &preload); + EmitChoices(compiler, &alt_gens, 0, trace, &preload, flags); } // At this point we need to generate slow checks for the alternatives where @@ -3234,6 +3238,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { // label was bound. int new_flush_budget = trace->flush_budget() / choice_count; for (int i = 0; i < choice_count; i++) { + compiler->set_flags(flags); AlternativeGeneration* alt_gen = alt_gens.at(i); Trace new_trace(*trace); // If there are actions to be flushed we have to limit how many times @@ -3253,7 +3258,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { Trace* ChoiceNode::EmitFixedLengthLoop( RegExpCompiler* compiler, Trace* trace, AlternativeGenerationList* alt_gens, PreloadState* preload, FixedLengthLoopState* fixed_length_loop_state, - int text_length) { + int text_length, RegExpFlags flags) { RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); // Here we have special handling for greedy loops containing only text nodes // and other simple nodes. We call these fixed length loops. These are @@ -3280,7 +3285,7 @@ Trace* ChoiceNode::EmitFixedLengthLoop( // In a fixed length loop there is only one other choice, which is what // comes after the greedy quantifer. Try to match that now. - EmitChoices(compiler, alt_gens, 1, new_trace, preload); + EmitChoices(compiler, alt_gens, 1, new_trace, preload, flags); fixed_length_loop_state->BindStepBackwardsLabel(macro_assembler); // If we have unwound to the bottom then backtrack. @@ -3340,7 +3345,7 @@ int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, void ChoiceNode::EmitChoices(RegExpCompiler* compiler, AlternativeGenerationList* alt_gens, int first_choice, Trace* trace, - PreloadState* preload) { + PreloadState* preload, RegExpFlags flags) { RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); SetUpPreLoad(compiler, trace, preload); @@ -3351,6 +3356,7 @@ void ChoiceNode::EmitChoices(RegExpCompiler* compiler, int new_flush_budget = trace->flush_budget() / choice_count; for (int i = first_choice; i < choice_count; i++) { + compiler->set_flags(flags); bool is_last = i == choice_count - 1; bool fall_through_on_failure = !is_last; GuardedAlternative alternative = alternatives_->at(i); diff --git a/deps/v8/src/regexp/regexp-nodes.h b/deps/v8/src/regexp/regexp-nodes.h index ecc0ee7c4fcb63..34c228d0e81861 100644 --- a/deps/v8/src/regexp/regexp-nodes.h +++ b/deps/v8/src/regexp/regexp-nodes.h @@ -709,10 +709,10 @@ class ChoiceNode : public RegExpNode { AlternativeGenerationList* alt_gens, PreloadState* preloads, FixedLengthLoopState* fixed_length_loop_state, - int text_length); + int text_length, RegExpFlags flags); void EmitChoices(RegExpCompiler* compiler, AlternativeGenerationList* alt_gens, int first_choice, - Trace* trace, PreloadState* preloads); + Trace* trace, PreloadState* preloads, RegExpFlags flags); // If true, this node is never checked at the start of the input. // Allows a new trace to start with at_start() set to false. diff --git a/deps/v8/test/mjsunit/harmony/regexp-modifiers.js b/deps/v8/test/mjsunit/regexp-modifiers.js similarity index 91% rename from deps/v8/test/mjsunit/harmony/regexp-modifiers.js rename to deps/v8/test/mjsunit/regexp-modifiers.js index f065f18df5929a..e012dc8d406c28 100644 --- a/deps/v8/test/mjsunit/harmony/regexp-modifiers.js +++ b/deps/v8/test/mjsunit/regexp-modifiers.js @@ -51,6 +51,11 @@ test(/F(?i:oo(?-i:b)a)r/, ['Foobar', 'FoObAr'], ['FooBar', 'FoobaR']); test(/F(?i:oo(?i:b)a)r/, ['Foobar', 'FoObAr', 'FOOBAr'], ['FoobaR']); test(/^[a-z](?-i:[a-z])$/i, ['ab', 'Ab'], ['aB']); test(/^(?i:[a-z])[a-z]$/, ['ab', 'Ab'], ['aB']); +test(/(?i:foo|bar)/, ['FOO', 'FOo', 'Foo', 'fOO', 'BAR', 'BAr', 'Bar', 'bAR']); +test(/(?i:foo|bar|baz)/, [ + 'FOO', 'FOo', 'Foo', 'fOO', 'BAR', 'BAr', 'Bar', 'bAR', 'BAZ', 'BAz', 'Baz', + 'bAZ' +]); test( /Foo(?i:B[\q{ĀĂĄ|AaA}--\q{āăą}])r/v, ['FooBaaar', 'FoobAAAr'], ['FooBĀĂĄr', 'FooBaaaR']);