From 8f7cb54445f8ee868ead6943f34ed59d2fb9d704 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 2 Feb 2024 16:15:35 +0100 Subject: [PATCH 01/13] Implement insertion mode algorithm --- .../class-wp-html-processor-state.php | 108 ++++++++++++++++++ .../html-api/class-wp-html-processor.php | 80 +++++++++++++ 2 files changed, 188 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php index 9cf10c344107a..f72b6ebd765c9 100644 --- a/src/wp-includes/html-api/class-wp-html-processor-state.php +++ b/src/wp-includes/html-api/class-wp-html-processor-state.php @@ -59,6 +59,114 @@ class WP_HTML_Processor_State { */ const INSERTION_MODE_IN_BODY = 'insertion-mode-in-body'; + /** + * In select insertion mode for full HTML parser. + * + * @since 6.5.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-inselect + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_IN_SELECT = 'insertion-mode-in-select'; + + /** + * In select in table insertion mode for full HTML parser. + * + * @since 6.5.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-inselectintable + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_IN_SELECT_IN_TABLE = 'insertion-mode-in-select-in-table'; + + /** + * In table insertion mode for full HTML parser. + * + * @since 6.5.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-intable + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_IN_TABLE = 'insertion-mode-in-table'; + + /** + * In caption insertion mode for full HTML parser. + * + * @since 6.5.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-incaption + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_IN_CAPTION = 'insertion-mode-in-caption'; + + /** + * In table body insertion mode for full HTML parser. + * + * @since 6.5.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-intablebody + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_IN_TABLE_BODY = 'insertion-mode-in-table-body'; + + /** + * In row insertion mode for full HTML parser. + * + * @since 6.5.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-inrow + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_IN_ROW = 'insertion-mode-in-row'; + + /** + * In cell insertion mode for full HTML parser. + * + * @since 6.5.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-incell + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_IN_CELL = 'insertion-mode-in-cell'; + + /** + * In column group insertion mode for full HTML parser. + * + * @since 6.5.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-incolumngroup + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_IN_COLUMN_GROUP = 'insertion-mode-in-column-group'; + + /** + * In frameset insertion mode for full HTML parser. + * + * @since 6.5.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-inframeset + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_IN_FRAMESET = 'insertion-mode-in-frameset'; + /** * Tracks open elements while scanning HTML. * diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 29f1c7ac6d4cc..d597be34d12fa 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2129,6 +2129,86 @@ private function reconstruct_active_formatting_elements() { throw new WP_HTML_Unsupported_Exception( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' ); } + /** + * Runs the reset the insertion mode appropriately algorithm. + * + * @since 6.5.0 + * + * @throws WP_HTML_Unsupported_Exception When encoutering unsupported nodes. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately + */ + private function reset_insertion_mode() { + foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { + switch ( $node->node_name ) { + case 'SELECT': + foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $ancestor ) { + switch ( $ancestor ) { + // > If ancestor is a table node, switch the insertion mode to "in select in table" and return. + case 'TABLE': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE; + return; + // > If ancestor is a template node, jump to the step below labeled done. + case 'TEMPLATE': + break 2; + } + } + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT; + return; + + case 'TD': + case 'TH': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; + return; + + case 'TR': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; + return; + + case 'TBODY': + case 'THEAD': + case 'TFOOT': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; + return; + + case 'CAPTION': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; + return; + + case 'COLGROUP': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; + return; + + case 'TABLE': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; + return; + + case 'TEMPLATE': + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at TEMPLATE node.' ); + + case 'HEAD': + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at HEAD node.' ); + + case 'BODY': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + return; + + case 'FRAMESET': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET; + return; + + case 'HTML': + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at HTML node.' ); + } + } + + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + return; + } + /** * Runs the adoption agency algorithm. * From 91c6bf7ba6b76bf3e775785d0d5acf73bb6cb0e9 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 28 Jun 2024 17:04:21 +0200 Subject: [PATCH 02/13] Document complete algorithm, fix last handling --- .../html-api/class-wp-html-processor.php | 107 ++++++++++++++++-- 1 file changed, 97 insertions(+), 10 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index d597be34d12fa..06bd7b30dedf4 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2132,81 +2132,168 @@ private function reconstruct_active_formatting_elements() { /** * Runs the reset the insertion mode appropriately algorithm. * - * @since 6.5.0 + * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encoutering unsupported nodes. * * @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately */ - private function reset_insertion_mode() { - foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { + private function reset_insertion_mode(): void { + + /* + * > 1. Let _last_ be false. + * > 2. Let _node_ be the last node in the stack of open elements. + * > 3. _Loop_: If _node_ is the first node in the stack of open elements, then set _last_ + * > to true, and, if the parser was created as part of the HTML fragment parsing + * > algorithm (fragment case), set node to the context element passed to + * > that algorithm. + * > … + */ + $last = false; + $last_index = $this->state->stack_of_open_elements->count() - 1; + foreach ( $this->state->stack_of_open_elements->walk_up() as $i => $node ) { + // todo No idea if this is correct. + if ( $i === $last_index ) { + $last = true; + } switch ( $node->node_name ) { + /* + * > 4. If node is a `select` element, run these substeps: + * > 1. If _last_ is true, jump to the step below labeled done. + * > 2. Let _ancestor_ be _node_. + * > 3. _Loop_: If _ancestor_ is the first node in the stack of open elements, + * > jump to the step below labeled done. + * > 4. Let ancestor be the node before ancestor in the stack of open elements. + * > … + * > 7. Jump back to the step labeled _loop_. + * > 8. _Done_: Switch the insertion mode to "in select" and return. + */ case 'SELECT': foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $ancestor ) { switch ( $ancestor ) { - // > If ancestor is a table node, switch the insertion mode to "in select in table" and return. + /* + * > 5. If _ancestor_ is a `template` node, jump to the step below + * > labeled _done_. + */ + case 'TEMPLATE': + break 2; + + /* + * > 6. If _ancestor_ is a `table` node, switch the insertion mode to + * > "in select in table" and return. + */ case 'TABLE': $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE; return; - // > If ancestor is a template node, jump to the step below labeled done. - case 'TEMPLATE': - break 2; } } $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT; return; + /* + * > 5. If _node_ is a `td` or `th` element and _last_ is false, then switch the + * > insertion mode to "in cell" and return. + */ case 'TD': case 'TH': - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; - return; + if ( ! $last ) { + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; + return; + } + /* + * > 6. If _node_ is a `tr` element, then switch the insertion mode to "in row" + * > and return. + */ case 'TR': $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; return; + /* + * > 7. If _node_ is a `tbody`, `thead`, or `tfoot` element, then switch the + * > insertion mode to "in table body" and return. + */ case 'TBODY': case 'THEAD': case 'TFOOT': $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return; + /* + * > 8. If _node_ is a `caption` element, then switch the insertion mode to + * > "in caption" and return. + */ case 'CAPTION': $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; return; + /* + * > 9. If _node_ is a `colgroup` element, then switch the insertion mode to + * > "in column group" and return. + */ case 'COLGROUP': $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; return; + /* + * > 10. If _node_ is a `table` element, then switch the insertion mode to + * > "in table" and return. + */ case 'TABLE': $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; return; + /* + * > 11. If _node_ is a `template` element, then switch the insertion mode to the + * > current template insertion mode and return. + */ case 'TEMPLATE': $this->last_error = self::ERROR_UNSUPPORTED; throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at TEMPLATE node.' ); + /* + * > 12. If _node_ is a `head` element and _last_ is false, then switch the + * > insertion mode to "in head" and return. + */ case 'HEAD': $this->last_error = self::ERROR_UNSUPPORTED; throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at HEAD node.' ); + /* + * > 13. If _node_ is a `body` element, then switch the insertion mode to "in body" + * > and return. + */ case 'BODY': $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; return; + /* + * > 14. If _node_ is a `frameset` element, then switch the insertion mode to + * > "in frameset" and return. (fragment case) + */ case 'FRAMESET': $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET; return; + /* + * > 15. If _node_ is an `html` element, run these substeps: + * > 1. If the head element pointer is null, switch the insertion mode to + * > "before head" and return. (fragment case) + * > 2. Otherwise, the head element pointer is not null, switch the insertion mode to "after head" and return. + */ case 'HTML': $this->last_error = self::ERROR_UNSUPPORTED; throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at HTML node.' ); } } + /* + * > 16. If _last_ is true, then switch the insertion mode to "in body" + * > and return. (fragment case) + * + * `$last` will always be true here, we've reached the end of the stack. + */ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; - return; } /** From 2f246a81a42eef97553dbfa452456d2409a1c913 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 2 Jul 2024 18:31:49 +0200 Subject: [PATCH 03/13] Add tests --- .../tests/html-api/wpHtmlProcessorState.php | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 tests/phpunit/tests/html-api/wpHtmlProcessorState.php diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorState.php b/tests/phpunit/tests/html-api/wpHtmlProcessorState.php new file mode 100644 index 0000000000000..56d85a5520cf1 --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorState.php @@ -0,0 +1,70 @@ +context_node = array( 'BODY', array() ); + + foreach ( $stack_of_open_elements as $i => $tag_name ) { + if ( ! ctype_upper( $tag_name ) ) { + throw new Error( 'Expected upper case tag names.' ); + } + $state->stack_of_open_elements->push( new WP_HTML_Token( $i, $tag_name, false ) ); + } + $state->reset_insertion_mode(); + + $this->assertSame( $expected_insertion_mode, $state->insertion_mode ); + } + + /** + * Data provider. + * + * @return array{ 0: array, 1: string, 2: string } + */ + public static function data_insertion_mode_cases() { + return array( + 'SELECT last element' => array( array( 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), + 'SELECT' => array( array( 'HTML', 'BODY', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), + 'SELECT in table' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TD', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE ), + 'SELECT in template in table' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TD', 'TEMPLATE', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), + 'SELECT > OPTION' => array( array( 'HTML', 'BODY', 'SELECT', 'OPTION' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), + 'SELECT > OPTGROUP > OPTION' => array( array( 'HTML', 'BODY', 'SELECT', 'OPTGROUP', 'OPTION' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), + 'TD' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_CELL ), + 'TH' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TH' ), WP_HTML_Processor_State::INSERTION_MODE_IN_CELL ), + 'TR' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR' ), WP_HTML_Processor_State::INSERTION_MODE_IN_ROW ), + 'TBODY' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY ), + 'THEAD' => array( array( 'HTML', 'BODY', 'TABLE', 'THEAD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY ), + 'TFOOT' => array( array( 'HTML', 'BODY', 'TABLE', 'TFOOT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY ), + 'CAPTION' => array( array( 'HTML', 'BODY', 'TABLE', 'CAPTION' ), WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION ), + 'COLGROUP' => array( array( 'HTML', 'BODY', 'TABLE', 'COLGROUP' ), WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP ), + 'TABLE' => array( array( 'HTML', 'BODY', 'TABLE' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE ), + 'BODY' => array( array( 'HTML', 'BODY' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), + 'FRAMESET' => array( array( 'HTML', 'BODY', 'FRAMESET' ), WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET ), + 'Last element (DIV)' => array( array( 'DIV' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), + ); + } +} From 180f54c90a262118a88e706bc5fa292180d8ee88 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 2 Jul 2024 18:32:37 +0200 Subject: [PATCH 04/13] Move reset insertion mode to state class This allows it to be better tested via public properties. --- .../class-wp-html-processor-state.php | 164 +++++++++++++++++ .../html-api/class-wp-html-processor.php | 167 ------------------ 2 files changed, 164 insertions(+), 167 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php index f72b6ebd765c9..609b390ba4612 100644 --- a/src/wp-includes/html-api/class-wp-html-processor-state.php +++ b/src/wp-includes/html-api/class-wp-html-processor-state.php @@ -248,4 +248,168 @@ public function __construct() { $this->stack_of_open_elements = new WP_HTML_Open_Elements(); $this->active_formatting_elements = new WP_HTML_Active_Formatting_Elements(); } + + /** + * Runs the reset the insertion mode appropriately algorithm. + * + * @since 6.7.0 + * + * @throws WP_HTML_Unsupported_Exception When encoutering unsupported nodes. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately + */ + public function reset_insertion_mode(): void { + /* + * > 1. Let _last_ be false. + * > 2. Let _node_ be the last node in the stack of open elements. + * > 3. _Loop_: If _node_ is the first node in the stack of open elements, then set _last_ + * > to true, and, if the parser was created as part of the HTML fragment parsing + * > algorithm (fragment case), set node to the context element passed to + * > that algorithm. + * > … + */ + $last = false; + $last_index = $this->stack_of_open_elements->count() - 1; + foreach ( $this->stack_of_open_elements->walk_up() as $i => $node ) { + if ( $i === $last_index ) { + $last = true; + } + switch ( $node->node_name ) { + /* + * > 4. If node is a `select` element, run these substeps: + * > 1. If _last_ is true, jump to the step below labeled done. + * > 2. Let _ancestor_ be _node_. + * > 3. _Loop_: If _ancestor_ is the first node in the stack of open elements, + * > jump to the step below labeled done. + * > 4. Let ancestor be the node before ancestor in the stack of open elements. + * > … + * > 7. Jump back to the step labeled _loop_. + * > 8. _Done_: Switch the insertion mode to "in select" and return. + */ + case 'SELECT': + if ( ! $last ) { + foreach ( $this->stack_of_open_elements->walk_up( $node ) as $ancestor ) { + switch ( $ancestor->node_name ) { + /* + * > 5. If _ancestor_ is a `template` node, jump to the step below + * > labeled _done_. + */ + case 'TEMPLATE': + break 2; + + /* + * > 6. If _ancestor_ is a `table` node, switch the insertion mode to + * > "in select in table" and return. + */ + case 'TABLE': + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE; + return; + } + } + } + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT; + return; + + /* + * > 5. If _node_ is a `td` or `th` element and _last_ is false, then switch the + * > insertion mode to "in cell" and return. + */ + case 'TD': + case 'TH': + if ( ! $last ) { + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; + return; + } + + /* + * > 6. If _node_ is a `tr` element, then switch the insertion mode to "in row" + * > and return. + */ + case 'TR': + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; + return; + + /* + * > 7. If _node_ is a `tbody`, `thead`, or `tfoot` element, then switch the + * > insertion mode to "in table body" and return. + */ + case 'TBODY': + case 'THEAD': + case 'TFOOT': + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; + return; + + /* + * > 8. If _node_ is a `caption` element, then switch the insertion mode to + * > "in caption" and return. + */ + case 'CAPTION': + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; + return; + + /* + * > 9. If _node_ is a `colgroup` element, then switch the insertion mode to + * > "in column group" and return. + */ + case 'COLGROUP': + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; + return; + + /* + * > 10. If _node_ is a `table` element, then switch the insertion mode to + * > "in table" and return. + */ + case 'TABLE': + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; + return; + + /* + * > 11. If _node_ is a `template` element, then switch the insertion mode to the + * > current template insertion mode and return. + */ + case 'TEMPLATE': + throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at TEMPLATE node.' ); + + /* + * > 12. If _node_ is a `head` element and _last_ is false, then switch the + * > insertion mode to "in head" and return. + */ + case 'HEAD': + throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at HEAD node.' ); + + /* + * > 13. If _node_ is a `body` element, then switch the insertion mode to "in body" + * > and return. + */ + case 'BODY': + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + return; + + /* + * > 14. If _node_ is a `frameset` element, then switch the insertion mode to + * > "in frameset" and return. (fragment case) + */ + case 'FRAMESET': + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET; + return; + + /* + * > 15. If _node_ is an `html` element, run these substeps: + * > 1. If the head element pointer is null, switch the insertion mode to + * > "before head" and return. (fragment case) + * > 2. Otherwise, the head element pointer is not null, switch the insertion mode to "after head" and return. + */ + case 'HTML': + throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at HTML node.' ); + } + } + + /* + * > 16. If _last_ is true, then switch the insertion mode to "in body" + * > and return. (fragment case) + * + * `$last` will always be true here, we've reached the end of the stack. + */ + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + } } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 06bd7b30dedf4..29f1c7ac6d4cc 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2129,173 +2129,6 @@ private function reconstruct_active_formatting_elements() { throw new WP_HTML_Unsupported_Exception( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' ); } - /** - * Runs the reset the insertion mode appropriately algorithm. - * - * @since 6.7.0 - * - * @throws WP_HTML_Unsupported_Exception When encoutering unsupported nodes. - * - * @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately - */ - private function reset_insertion_mode(): void { - - /* - * > 1. Let _last_ be false. - * > 2. Let _node_ be the last node in the stack of open elements. - * > 3. _Loop_: If _node_ is the first node in the stack of open elements, then set _last_ - * > to true, and, if the parser was created as part of the HTML fragment parsing - * > algorithm (fragment case), set node to the context element passed to - * > that algorithm. - * > … - */ - $last = false; - $last_index = $this->state->stack_of_open_elements->count() - 1; - foreach ( $this->state->stack_of_open_elements->walk_up() as $i => $node ) { - // todo No idea if this is correct. - if ( $i === $last_index ) { - $last = true; - } - switch ( $node->node_name ) { - /* - * > 4. If node is a `select` element, run these substeps: - * > 1. If _last_ is true, jump to the step below labeled done. - * > 2. Let _ancestor_ be _node_. - * > 3. _Loop_: If _ancestor_ is the first node in the stack of open elements, - * > jump to the step below labeled done. - * > 4. Let ancestor be the node before ancestor in the stack of open elements. - * > … - * > 7. Jump back to the step labeled _loop_. - * > 8. _Done_: Switch the insertion mode to "in select" and return. - */ - case 'SELECT': - foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $ancestor ) { - switch ( $ancestor ) { - /* - * > 5. If _ancestor_ is a `template` node, jump to the step below - * > labeled _done_. - */ - case 'TEMPLATE': - break 2; - - /* - * > 6. If _ancestor_ is a `table` node, switch the insertion mode to - * > "in select in table" and return. - */ - case 'TABLE': - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE; - return; - } - } - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT; - return; - - /* - * > 5. If _node_ is a `td` or `th` element and _last_ is false, then switch the - * > insertion mode to "in cell" and return. - */ - case 'TD': - case 'TH': - if ( ! $last ) { - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; - return; - } - - /* - * > 6. If _node_ is a `tr` element, then switch the insertion mode to "in row" - * > and return. - */ - case 'TR': - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; - return; - - /* - * > 7. If _node_ is a `tbody`, `thead`, or `tfoot` element, then switch the - * > insertion mode to "in table body" and return. - */ - case 'TBODY': - case 'THEAD': - case 'TFOOT': - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; - return; - - /* - * > 8. If _node_ is a `caption` element, then switch the insertion mode to - * > "in caption" and return. - */ - case 'CAPTION': - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; - return; - - /* - * > 9. If _node_ is a `colgroup` element, then switch the insertion mode to - * > "in column group" and return. - */ - case 'COLGROUP': - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; - return; - - /* - * > 10. If _node_ is a `table` element, then switch the insertion mode to - * > "in table" and return. - */ - case 'TABLE': - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; - return; - - /* - * > 11. If _node_ is a `template` element, then switch the insertion mode to the - * > current template insertion mode and return. - */ - case 'TEMPLATE': - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at TEMPLATE node.' ); - - /* - * > 12. If _node_ is a `head` element and _last_ is false, then switch the - * > insertion mode to "in head" and return. - */ - case 'HEAD': - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at HEAD node.' ); - - /* - * > 13. If _node_ is a `body` element, then switch the insertion mode to "in body" - * > and return. - */ - case 'BODY': - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; - return; - - /* - * > 14. If _node_ is a `frameset` element, then switch the insertion mode to - * > "in frameset" and return. (fragment case) - */ - case 'FRAMESET': - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET; - return; - - /* - * > 15. If _node_ is an `html` element, run these substeps: - * > 1. If the head element pointer is null, switch the insertion mode to - * > "before head" and return. (fragment case) - * > 2. Otherwise, the head element pointer is not null, switch the insertion mode to "after head" and return. - */ - case 'HTML': - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at HTML node.' ); - } - } - - /* - * > 16. If _last_ is true, then switch the insertion mode to "in body" - * > and return. (fragment case) - * - * `$last` will always be true here, we've reached the end of the stack. - */ - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; - } - /** * Runs the adoption agency algorithm. * From 17935b904a1951a1e76cd98cc76fcac4f390e2cf Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 2 Jul 2024 18:41:31 +0200 Subject: [PATCH 05/13] Add unsupported tests --- .../tests/html-api/wpHtmlProcessorState.php | 45 ++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorState.php b/tests/phpunit/tests/html-api/wpHtmlProcessorState.php index 56d85a5520cf1..f751d7f433831 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorState.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorState.php @@ -18,14 +18,12 @@ class Tests_HtmlApi_WpHtmlProcessorState extends WP_UnitTestCase { * @ticket TBD * * @param array $stack_of_open_elements Stack of open elements. - * @param string $insertion_mode Initial insertion mode. * @param string $expected_insertion_mode Expected insertion mode after running the algorithm. */ public function test_reset_insertion_mode( array $stack_of_open_elements, string $expected_insertion_mode - ) { - + ): void { $state = new WP_HTML_Processor_State(); $state->context_node = array( 'BODY', array() ); @@ -43,9 +41,9 @@ public function test_reset_insertion_mode( /** * Data provider. * - * @return array{ 0: array, 1: string, 2: string } + * @return array[] */ - public static function data_insertion_mode_cases() { + public static function data_insertion_mode_cases(): array { return array( 'SELECT last element' => array( array( 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), 'SELECT' => array( array( 'HTML', 'BODY', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), @@ -67,4 +65,41 @@ public static function data_insertion_mode_cases() { 'Last element (DIV)' => array( array( 'DIV' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), ); } + + /** + * @dataProvider data_insertion_mode_unsupported + * + * @ticket TBD + * + * @param array $stack_of_open_elements Stack of open elements. + */ + public function test_reset_insertion_mode_unsupported( array $stack_of_open_elements ): void { + $this->expectException( WP_HTML_Unsupported_Exception::class ); + + $state = new WP_HTML_Processor_State(); + $state->context_node = array( 'BODY', array() ); + + foreach ( $stack_of_open_elements as $i => $tag_name ) { + if ( ! ctype_upper( $tag_name ) ) { + throw new Error( 'Expected upper case tag names.' ); + } + $state->stack_of_open_elements->push( new WP_HTML_Token( $i, $tag_name, false ) ); + } + $state->reset_insertion_mode(); + } + + /** + * Data provider. + * + * These tests should be migrated to the supported tests as support for more elements is added. + * + * @return array[] + */ + public static function data_insertion_mode_unsupported(): array { + return array( + 'TEMPLATE requires template insertion mode stack' => array( array( 'HTML', 'BODY', 'TEMPLATE' ) ), + 'HEAD requires more insertion modes' => array( array( 'HTML', 'HEAD' ) ), + 'HTML requires head pointer and insertion modes' => array( array( 'HTML' ) ), + ); + } } From af2d9ba264c3c188da3f4aec9641ab5834226f77 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 2 Jul 2024 19:10:31 +0200 Subject: [PATCH 06/13] Update ticket annotations --- tests/phpunit/tests/html-api/wpHtmlProcessorState.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorState.php b/tests/phpunit/tests/html-api/wpHtmlProcessorState.php index f751d7f433831..5a3b3e534f998 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorState.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorState.php @@ -15,7 +15,7 @@ class Tests_HtmlApi_WpHtmlProcessorState extends WP_UnitTestCase { /** * @dataProvider data_insertion_mode_cases * - * @ticket TBD + * @ticket 61549 * * @param array $stack_of_open_elements Stack of open elements. * @param string $expected_insertion_mode Expected insertion mode after running the algorithm. @@ -69,7 +69,7 @@ public static function data_insertion_mode_cases(): array { /** * @dataProvider data_insertion_mode_unsupported * - * @ticket TBD + * @ticket 61549 * * @param array $stack_of_open_elements Stack of open elements. */ From 599e39addd2d68254c8e1093ac9db5376228e579 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 2 Jul 2024 19:16:33 +0200 Subject: [PATCH 07/13] Add tests and fix TD/TH last element --- src/wp-includes/html-api/class-wp-html-processor-state.php | 1 + tests/phpunit/tests/html-api/wpHtmlProcessorState.php | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php index 609b390ba4612..025e2b34da3e3 100644 --- a/src/wp-includes/html-api/class-wp-html-processor-state.php +++ b/src/wp-includes/html-api/class-wp-html-processor-state.php @@ -320,6 +320,7 @@ public function reset_insertion_mode(): void { $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; return; } + break; /* * > 6. If _node_ is a `tr` element, then switch the insertion mode to "in row" diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorState.php b/tests/phpunit/tests/html-api/wpHtmlProcessorState.php index 5a3b3e534f998..01e9047e7aa3f 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorState.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorState.php @@ -52,7 +52,9 @@ public static function data_insertion_mode_cases(): array { 'SELECT > OPTION' => array( array( 'HTML', 'BODY', 'SELECT', 'OPTION' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), 'SELECT > OPTGROUP > OPTION' => array( array( 'HTML', 'BODY', 'SELECT', 'OPTGROUP', 'OPTION' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), 'TD' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_CELL ), + 'TD (last element)' => array( array( 'TD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), 'TH' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TH' ), WP_HTML_Processor_State::INSERTION_MODE_IN_CELL ), + 'TH (last element)' => array( array( 'TH' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), 'TR' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR' ), WP_HTML_Processor_State::INSERTION_MODE_IN_ROW ), 'TBODY' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY ), 'THEAD' => array( array( 'HTML', 'BODY', 'TABLE', 'THEAD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY ), From 8d34a3a3d135232c49f99de5ad64a348f71b6ac9 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 2 Jul 2024 19:47:09 +0200 Subject: [PATCH 08/13] Add more modes and tests --- .../class-wp-html-processor-state.php | 109 +++++++++++++++--- .../tests/html-api/wpHtmlProcessorState.php | 60 +++++++--- 2 files changed, 136 insertions(+), 33 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php index 025e2b34da3e3..c933a6bf85f9a 100644 --- a/src/wp-includes/html-api/class-wp-html-processor-state.php +++ b/src/wp-includes/html-api/class-wp-html-processor-state.php @@ -62,7 +62,7 @@ class WP_HTML_Processor_State { /** * In select insertion mode for full HTML parser. * - * @since 6.5.0 + * @since 6.7.0 * * @see https://html.spec.whatwg.org/#parsing-main-inselect * @see WP_HTML_Processor_State::$insertion_mode @@ -74,7 +74,7 @@ class WP_HTML_Processor_State { /** * In select in table insertion mode for full HTML parser. * - * @since 6.5.0 + * @since 6.7.0 * * @see https://html.spec.whatwg.org/#parsing-main-inselectintable * @see WP_HTML_Processor_State::$insertion_mode @@ -86,7 +86,7 @@ class WP_HTML_Processor_State { /** * In table insertion mode for full HTML parser. * - * @since 6.5.0 + * @since 6.7.0 * * @see https://html.spec.whatwg.org/#parsing-main-intable * @see WP_HTML_Processor_State::$insertion_mode @@ -98,7 +98,7 @@ class WP_HTML_Processor_State { /** * In caption insertion mode for full HTML parser. * - * @since 6.5.0 + * @since 6.7.0 * * @see https://html.spec.whatwg.org/#parsing-main-incaption * @see WP_HTML_Processor_State::$insertion_mode @@ -110,7 +110,7 @@ class WP_HTML_Processor_State { /** * In table body insertion mode for full HTML parser. * - * @since 6.5.0 + * @since 6.7.0 * * @see https://html.spec.whatwg.org/#parsing-main-intablebody * @see WP_HTML_Processor_State::$insertion_mode @@ -122,7 +122,7 @@ class WP_HTML_Processor_State { /** * In row insertion mode for full HTML parser. * - * @since 6.5.0 + * @since 6.7.0 * * @see https://html.spec.whatwg.org/#parsing-main-inrow * @see WP_HTML_Processor_State::$insertion_mode @@ -134,7 +134,7 @@ class WP_HTML_Processor_State { /** * In cell insertion mode for full HTML parser. * - * @since 6.5.0 + * @since 6.7.0 * * @see https://html.spec.whatwg.org/#parsing-main-incell * @see WP_HTML_Processor_State::$insertion_mode @@ -146,7 +146,7 @@ class WP_HTML_Processor_State { /** * In column group insertion mode for full HTML parser. * - * @since 6.5.0 + * @since 6.7.0 * * @see https://html.spec.whatwg.org/#parsing-main-incolumngroup * @see WP_HTML_Processor_State::$insertion_mode @@ -158,7 +158,7 @@ class WP_HTML_Processor_State { /** * In frameset insertion mode for full HTML parser. * - * @since 6.5.0 + * @since 6.7.0 * * @see https://html.spec.whatwg.org/#parsing-main-inframeset * @see WP_HTML_Processor_State::$insertion_mode @@ -167,6 +167,65 @@ class WP_HTML_Processor_State { */ const INSERTION_MODE_IN_FRAMESET = 'insertion-mode-in-frameset'; + /** + * In head insertion mode for full HTML parser. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-inhead + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_IN_HEAD = 'insertion-mode-in-head'; + + /** + * Before head insertion mode for full HTML parser. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-beforehead + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_BEFORE_HEAD = 'insertion-mode-before-head'; + + /** + * After head insertion mode for full HTML parser. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-afterhead + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_AFTER_HEAD = 'insertion-mode-after-head'; + + /** + * In template insertion mode for full HTML parser. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/#parsing-main-intemplate + * @see WP_HTML_Processor_State::$insertion_mode + * + * @var string + */ + const INSERTION_MODE_IN_TEMPLATE = 'insertion-mode-in-template'; + + /** + * The stack of template insertion modes. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/#the-insertion-mode:stack-of-template-insertion-modes + * + * @var array + */ + public $stack_of_template_insertion_modes = array(); + /** * Tracks open elements while scanning HTML. * @@ -224,6 +283,17 @@ class WP_HTML_Processor_State { */ public $context_node = null; + /** + * HEAD element pointer. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#head-element-pointer + * + * @var WP_HTML_Token|null + */ + public $head_element = null; + /** * The frameset-ok flag indicates if a `FRAMESET` element is allowed in the current state. * @@ -254,8 +324,6 @@ public function __construct() { * * @since 6.7.0 * - * @throws WP_HTML_Unsupported_Exception When encoutering unsupported nodes. - * * @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately */ public function reset_insertion_mode(): void { @@ -369,14 +437,19 @@ public function reset_insertion_mode(): void { * > current template insertion mode and return. */ case 'TEMPLATE': - throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at TEMPLATE node.' ); + $this->insertion_mode = end( $this->stack_of_template_insertion_modes ); + return; /* * > 12. If _node_ is a `head` element and _last_ is false, then switch the * > insertion mode to "in head" and return. */ case 'HEAD': - throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at HEAD node.' ); + if ( ! $last ) { + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD; + return; + } + break; /* * > 13. If _node_ is a `body` element, then switch the insertion mode to "in body" @@ -398,10 +471,16 @@ public function reset_insertion_mode(): void { * > 15. If _node_ is an `html` element, run these substeps: * > 1. If the head element pointer is null, switch the insertion mode to * > "before head" and return. (fragment case) - * > 2. Otherwise, the head element pointer is not null, switch the insertion mode to "after head" and return. + * > 2. Otherwise, the head element pointer is not null, switch the insertion + * > mode to "after head" and return. */ case 'HTML': - throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at HTML node.' ); + if ( null === $this->head_element ) { + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD; + } else { + $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD; + } + return; } } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorState.php b/tests/phpunit/tests/html-api/wpHtmlProcessorState.php index 01e9047e7aa3f..81a6bae9fd87d 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorState.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorState.php @@ -62,6 +62,8 @@ public static function data_insertion_mode_cases(): array { 'CAPTION' => array( array( 'HTML', 'BODY', 'TABLE', 'CAPTION' ), WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION ), 'COLGROUP' => array( array( 'HTML', 'BODY', 'TABLE', 'COLGROUP' ), WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP ), 'TABLE' => array( array( 'HTML', 'BODY', 'TABLE' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE ), + 'HEAD' => array( array( 'HTML', 'HEAD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD ), + 'HEAD (last element)' => array( array( 'HEAD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), 'BODY' => array( array( 'HTML', 'BODY' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), 'FRAMESET' => array( array( 'HTML', 'BODY', 'FRAMESET' ), WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET ), 'Last element (DIV)' => array( array( 'DIV' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), @@ -69,39 +71,61 @@ public static function data_insertion_mode_cases(): array { } /** - * @dataProvider data_insertion_mode_unsupported - * * @ticket 61549 - * - * @param array $stack_of_open_elements Stack of open elements. */ - public function test_reset_insertion_mode_unsupported( array $stack_of_open_elements ): void { - $this->expectException( WP_HTML_Unsupported_Exception::class ); - + public function test_template_insertion_mode_reset(): void { $state = new WP_HTML_Processor_State(); $state->context_node = array( 'BODY', array() ); + array_push( + $state->stack_of_template_insertion_modes, + WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE, + WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY, + WP_HTML_Processor_State::INSERTION_MODE_IN_ROW, + WP_HTML_Processor_State::INSERTION_MODE_IN_CELL, + WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE + ); - foreach ( $stack_of_open_elements as $i => $tag_name ) { + foreach ( array( 'TABLE', 'TBODY', 'TR', 'TD', 'SELECT', 'TEMPLATE' ) as $i => $tag_name ) { if ( ! ctype_upper( $tag_name ) ) { throw new Error( 'Expected upper case tag names.' ); } $state->stack_of_open_elements->push( new WP_HTML_Token( $i, $tag_name, false ) ); } $state->reset_insertion_mode(); + $this->assertSame( + WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE, + $state->insertion_mode + ); } /** - * Data provider. - * - * These tests should be migrated to the supported tests as support for more elements is added. - * - * @return array[] + * @ticket 61549 */ - public static function data_insertion_mode_unsupported(): array { - return array( - 'TEMPLATE requires template insertion mode stack' => array( array( 'HTML', 'BODY', 'TEMPLATE' ) ), - 'HEAD requires more insertion modes' => array( array( 'HTML', 'HEAD' ) ), - 'HTML requires head pointer and insertion modes' => array( array( 'HTML' ) ), + public function test_html_reset_insertion_mode_before_head(): void { + $state = new WP_HTML_Processor_State(); + $state->context_node = array( 'BODY', array() ); + + $state->stack_of_open_elements->push( new WP_HTML_Token( 0, 'HTML', false ) ); + $state->reset_insertion_mode(); + $this->assertSame( + WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD, + $state->insertion_mode + ); + } + + /** + * @ticket 61549 + */ + public function test_html_reset_insertion_mode_after_head(): void { + $state = new WP_HTML_Processor_State(); + $state->context_node = array( 'BODY', array() ); + $state->head_element = new WP_HTML_Token( 'head', 'HEAD', false ); + + $state->stack_of_open_elements->push( new WP_HTML_Token( 0, 'HTML', false ) ); + $state->reset_insertion_mode(); + $this->assertSame( + WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD, + $state->insertion_mode ); } } From 479d6f4549a10273b75b0b6deedb2b08139f3048 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 2 Jul 2024 13:45:16 -0700 Subject: [PATCH 09/13] Add test docs, rearrange setup code. --- ...e.php => wpHtmlProcessorInsertionMode.php} | 111 ++++++++++++++---- 1 file changed, 85 insertions(+), 26 deletions(-) rename tests/phpunit/tests/html-api/{wpHtmlProcessorState.php => wpHtmlProcessorInsertionMode.php} (56%) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorState.php b/tests/phpunit/tests/html-api/wpHtmlProcessorInsertionMode.php similarity index 56% rename from tests/phpunit/tests/html-api/wpHtmlProcessorState.php rename to tests/phpunit/tests/html-api/wpHtmlProcessorInsertionMode.php index 81a6bae9fd87d..8815b9ef049bb 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorState.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorInsertionMode.php @@ -1,6 +1,12 @@ context_node = array( 'BODY', array() ); - foreach ( $stack_of_open_elements as $i => $tag_name ) { - if ( ! ctype_upper( $tag_name ) ) { - throw new Error( 'Expected upper case tag names.' ); - } - $state->stack_of_open_elements->push( new WP_HTML_Token( $i, $tag_name, false ) ); + // Set up the stack of open elements in a specific configuration. Bypass internal rules. + foreach ( $stack_of_open_elements as $bookmark_name => $tag_name ) { + $this->assertTrue( + self::is_tag_name( $tag_name ), + "Expected a tag name in test setup, but given '{$tag_name}' instead: check test data provider." + ); + $state->stack_of_open_elements->stack[] = new WP_HTML_Token( $bookmark_name, $tag_name, false ); } + $state->reset_insertion_mode(); - $this->assertSame( $expected_insertion_mode, $state->insertion_mode ); + $this->assertSame( + $expected_insertion_mode, + $state->insertion_mode, + 'Failed to reset the insertion mode into the expected mode.' + ); } /** @@ -45,7 +60,7 @@ public function test_reset_insertion_mode( */ public static function data_insertion_mode_cases(): array { return array( - 'SELECT last element' => array( array( 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), + 'SELECT last element' => array( array( 'HTML', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), 'SELECT' => array( array( 'HTML', 'BODY', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), 'SELECT in table' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TD', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE ), 'SELECT in template in table' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TD', 'TEMPLATE', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), @@ -71,49 +86,70 @@ public static function data_insertion_mode_cases(): array { } /** + * Ensures that the "reset the insertion mode appropriately" algorithm properly + * takes into account any open stack of template insertion modes. + * * @ticket 61549 */ public function test_template_insertion_mode_reset(): void { $state = new WP_HTML_Processor_State(); $state->context_node = array( 'BODY', array() ); + + // Set up the stack of template insertion modes in a specific configuration. Bypass internal rules. + $newest_template_insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE; + array_push( $state->stack_of_template_insertion_modes, WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE, WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY, WP_HTML_Processor_State::INSERTION_MODE_IN_ROW, WP_HTML_Processor_State::INSERTION_MODE_IN_CELL, - WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE + $newest_template_insertion_mode ); - foreach ( array( 'TABLE', 'TBODY', 'TR', 'TD', 'SELECT', 'TEMPLATE' ) as $i => $tag_name ) { - if ( ! ctype_upper( $tag_name ) ) { - throw new Error( 'Expected upper case tag names.' ); - } - $state->stack_of_open_elements->push( new WP_HTML_Token( $i, $tag_name, false ) ); + foreach ( array( 'TABLE', 'TBODY', 'TR', 'TD', 'SELECT', 'TEMPLATE' ) as $bookmark_name => $tag_name ) { + $this->assertTrue( + self::is_tag_name( $tag_name ), + "Expected a tag name in test setup, but given '{$tag_name}' instead: check test data provider." + ); + $state->stack_of_open_elements->stack[] = new WP_HTML_Token( $bookmark_name, $tag_name, false ); } + $state->reset_insertion_mode(); + $this->assertSame( - WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE, - $state->insertion_mode + $newest_template_insertion_mode, + $state->insertion_mode, + 'Failed to reset insertion mode to the newest item in the template insertion mode stack.' ); } /** + * Ensures that the "reset the insertion mode appropriately" algorithm properly + * resets for the fresh fragment parser state with BODY as the context node. + * * @ticket 61549 */ public function test_html_reset_insertion_mode_before_head(): void { $state = new WP_HTML_Processor_State(); $state->context_node = array( 'BODY', array() ); - $state->stack_of_open_elements->push( new WP_HTML_Token( 0, 'HTML', false ) ); + // Set up the stack of open elements in a specific configuration. Bypass internal rules. + $state->stack_of_open_elements->stack[] = new WP_HTML_Token( 0, 'HTML', false ); + $state->reset_insertion_mode(); + $this->assertSame( WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD, - $state->insertion_mode + $state->insertion_mode, + 'Failed to properly reset insertion mode.' ); } /** + * Ensures that the "reset the insertion mode appropriately" algorithm properly + * resets to AFTER HEAD when a head element is set. + * * @ticket 61549 */ public function test_html_reset_insertion_mode_after_head(): void { @@ -121,11 +157,34 @@ public function test_html_reset_insertion_mode_after_head(): void { $state->context_node = array( 'BODY', array() ); $state->head_element = new WP_HTML_Token( 'head', 'HEAD', false ); - $state->stack_of_open_elements->push( new WP_HTML_Token( 0, 'HTML', false ) ); + // Set up the stack of open elements in a specific configuration. Bypass internal rules. + $state->stack_of_open_elements->stack[] = new WP_HTML_Token( 0, 'HTML', false ); + $state->reset_insertion_mode(); + $this->assertSame( WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD, - $state->insertion_mode + $state->insertion_mode, + 'Failed to properly reset insertion mode.' ); } + + // Test helper methods. + + /** + * Indicates if a given node name represents a tag name, vs. a comment, + * HTML doctype declaration, text node, etc… + * + * Example: + * + * false === is_tag_name( '#text' ); + * false === is_tag_name( 'html' ); // This is a DOCTYPE declaration. + * true === is_tag_name( 'HTML' ); + * + * @param string $node_name Node name as returned from the stack of open elements. + * @return bool + */ + private function is_tag_name( string $node_name ) { + return ctype_upper( $node_name ); + } } From 5bcff17dc81978a080eb989b89b684b8b169f1c8 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 2 Jul 2024 13:45:35 -0700 Subject: [PATCH 10/13] Adjust for context node in reset insertion mode algorithm. --- .../class-wp-html-processor-state.php | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php index c933a6bf85f9a..55b92f10f1d65 100644 --- a/src/wp-includes/html-api/class-wp-html-processor-state.php +++ b/src/wp-includes/html-api/class-wp-html-processor-state.php @@ -327,21 +327,32 @@ public function __construct() { * @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately */ public function reset_insertion_mode(): void { + // Set the first node. + $first_node = null; + foreach ( $this->stack_of_open_elements->walk_down() as $first_node ) { + break; + } + /* * > 1. Let _last_ be false. - * > 2. Let _node_ be the last node in the stack of open elements. - * > 3. _Loop_: If _node_ is the first node in the stack of open elements, then set _last_ - * > to true, and, if the parser was created as part of the HTML fragment parsing - * > algorithm (fragment case), set node to the context element passed to - * > that algorithm. - * > … */ - $last = false; - $last_index = $this->stack_of_open_elements->count() - 1; - foreach ( $this->stack_of_open_elements->walk_up() as $i => $node ) { - if ( $i === $last_index ) { + $last = false; + foreach ( $this->stack_of_open_elements->walk_up() as $node ) { + /* + * > 2. Let _node_ be the last node in the stack of open elements. + * > 3. _Loop_: If _node_ is the first node in the stack of open elements, then set _last_ + * > to true, and, if the parser was created as part of the HTML fragment parsing + * > algorithm (fragment case), set node to the context element passed to + * > that algorithm. + * > … + */ + if ( $node === $first_node ) { $last = true; + if ( isset( $this->context_node ) ) { + $node = new WP_HTML_Token( 'context-node', $this->context_node[0], false ); + } } + switch ( $node->node_name ) { /* * > 4. If node is a `select` element, run these substeps: From 631cfb1d046c6e971099017639e18e8121c498fc Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 3 Jul 2024 08:43:18 -0700 Subject: [PATCH 11/13] Move reset algorithm into HTML Processor --- .../class-wp-html-processor-state.php | 185 ------------------ .../html-api/class-wp-html-processor.php | 183 +++++++++++++++++ 2 files changed, 183 insertions(+), 185 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php index 55b92f10f1d65..f6e3721665402 100644 --- a/src/wp-includes/html-api/class-wp-html-processor-state.php +++ b/src/wp-includes/html-api/class-wp-html-processor-state.php @@ -318,189 +318,4 @@ public function __construct() { $this->stack_of_open_elements = new WP_HTML_Open_Elements(); $this->active_formatting_elements = new WP_HTML_Active_Formatting_Elements(); } - - /** - * Runs the reset the insertion mode appropriately algorithm. - * - * @since 6.7.0 - * - * @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately - */ - public function reset_insertion_mode(): void { - // Set the first node. - $first_node = null; - foreach ( $this->stack_of_open_elements->walk_down() as $first_node ) { - break; - } - - /* - * > 1. Let _last_ be false. - */ - $last = false; - foreach ( $this->stack_of_open_elements->walk_up() as $node ) { - /* - * > 2. Let _node_ be the last node in the stack of open elements. - * > 3. _Loop_: If _node_ is the first node in the stack of open elements, then set _last_ - * > to true, and, if the parser was created as part of the HTML fragment parsing - * > algorithm (fragment case), set node to the context element passed to - * > that algorithm. - * > … - */ - if ( $node === $first_node ) { - $last = true; - if ( isset( $this->context_node ) ) { - $node = new WP_HTML_Token( 'context-node', $this->context_node[0], false ); - } - } - - switch ( $node->node_name ) { - /* - * > 4. If node is a `select` element, run these substeps: - * > 1. If _last_ is true, jump to the step below labeled done. - * > 2. Let _ancestor_ be _node_. - * > 3. _Loop_: If _ancestor_ is the first node in the stack of open elements, - * > jump to the step below labeled done. - * > 4. Let ancestor be the node before ancestor in the stack of open elements. - * > … - * > 7. Jump back to the step labeled _loop_. - * > 8. _Done_: Switch the insertion mode to "in select" and return. - */ - case 'SELECT': - if ( ! $last ) { - foreach ( $this->stack_of_open_elements->walk_up( $node ) as $ancestor ) { - switch ( $ancestor->node_name ) { - /* - * > 5. If _ancestor_ is a `template` node, jump to the step below - * > labeled _done_. - */ - case 'TEMPLATE': - break 2; - - /* - * > 6. If _ancestor_ is a `table` node, switch the insertion mode to - * > "in select in table" and return. - */ - case 'TABLE': - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE; - return; - } - } - } - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT; - return; - - /* - * > 5. If _node_ is a `td` or `th` element and _last_ is false, then switch the - * > insertion mode to "in cell" and return. - */ - case 'TD': - case 'TH': - if ( ! $last ) { - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; - return; - } - break; - - /* - * > 6. If _node_ is a `tr` element, then switch the insertion mode to "in row" - * > and return. - */ - case 'TR': - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; - return; - - /* - * > 7. If _node_ is a `tbody`, `thead`, or `tfoot` element, then switch the - * > insertion mode to "in table body" and return. - */ - case 'TBODY': - case 'THEAD': - case 'TFOOT': - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; - return; - - /* - * > 8. If _node_ is a `caption` element, then switch the insertion mode to - * > "in caption" and return. - */ - case 'CAPTION': - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; - return; - - /* - * > 9. If _node_ is a `colgroup` element, then switch the insertion mode to - * > "in column group" and return. - */ - case 'COLGROUP': - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; - return; - - /* - * > 10. If _node_ is a `table` element, then switch the insertion mode to - * > "in table" and return. - */ - case 'TABLE': - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; - return; - - /* - * > 11. If _node_ is a `template` element, then switch the insertion mode to the - * > current template insertion mode and return. - */ - case 'TEMPLATE': - $this->insertion_mode = end( $this->stack_of_template_insertion_modes ); - return; - - /* - * > 12. If _node_ is a `head` element and _last_ is false, then switch the - * > insertion mode to "in head" and return. - */ - case 'HEAD': - if ( ! $last ) { - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD; - return; - } - break; - - /* - * > 13. If _node_ is a `body` element, then switch the insertion mode to "in body" - * > and return. - */ - case 'BODY': - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; - return; - - /* - * > 14. If _node_ is a `frameset` element, then switch the insertion mode to - * > "in frameset" and return. (fragment case) - */ - case 'FRAMESET': - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET; - return; - - /* - * > 15. If _node_ is an `html` element, run these substeps: - * > 1. If the head element pointer is null, switch the insertion mode to - * > "before head" and return. (fragment case) - * > 2. Otherwise, the head element pointer is not null, switch the insertion - * > mode to "after head" and return. - */ - case 'HTML': - if ( null === $this->head_element ) { - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD; - } else { - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD; - } - return; - } - } - - /* - * > 16. If _last_ is true, then switch the insertion mode to "in body" - * > and return. (fragment case) - * - * `$last` will always be true here, we've reached the end of the stack. - */ - $this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; - } } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 29f1c7ac6d4cc..86f210c0fa834 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2129,6 +2129,189 @@ private function reconstruct_active_formatting_elements() { throw new WP_HTML_Unsupported_Exception( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' ); } + /** + * Runs the reset the insertion mode appropriately algorithm. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately + */ + public function reset_insertion_mode(): void { + // Set the first node. + $first_node = null; + foreach ( $this->state->stack_of_open_elements->walk_down() as $first_node ) { + break; + } + + /* + * > 1. Let _last_ be false. + */ + $last = false; + foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { + /* + * > 2. Let _node_ be the last node in the stack of open elements. + * > 3. _Loop_: If _node_ is the first node in the stack of open elements, then set _last_ + * > to true, and, if the parser was created as part of the HTML fragment parsing + * > algorithm (fragment case), set node to the context element passed to + * > that algorithm. + * > … + */ + if ( $node === $first_node ) { + $last = true; + if ( isset( $this->context_node ) ) { + $node = new WP_HTML_Token( 'context-node', $this->context_node[0], false ); + } + } + + switch ( $node->node_name ) { + /* + * > 4. If node is a `select` element, run these substeps: + * > 1. If _last_ is true, jump to the step below labeled done. + * > 2. Let _ancestor_ be _node_. + * > 3. _Loop_: If _ancestor_ is the first node in the stack of open elements, + * > jump to the step below labeled done. + * > 4. Let ancestor be the node before ancestor in the stack of open elements. + * > … + * > 7. Jump back to the step labeled _loop_. + * > 8. _Done_: Switch the insertion mode to "in select" and return. + */ + case 'SELECT': + if ( ! $last ) { + foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $ancestor ) { + switch ( $ancestor->node_name ) { + /* + * > 5. If _ancestor_ is a `template` node, jump to the step below + * > labeled _done_. + */ + case 'TEMPLATE': + break 2; + + /* + * > 6. If _ancestor_ is a `table` node, switch the insertion mode to + * > "in select in table" and return. + */ + case 'TABLE': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE; + return; + } + } + } + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT; + return; + + /* + * > 5. If _node_ is a `td` or `th` element and _last_ is false, then switch the + * > insertion mode to "in cell" and return. + */ + case 'TD': + case 'TH': + if ( ! $last ) { + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; + return; + } + break; + + /* + * > 6. If _node_ is a `tr` element, then switch the insertion mode to "in row" + * > and return. + */ + case 'TR': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; + return; + + /* + * > 7. If _node_ is a `tbody`, `thead`, or `tfoot` element, then switch the + * > insertion mode to "in table body" and return. + */ + case 'TBODY': + case 'THEAD': + case 'TFOOT': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; + return; + + /* + * > 8. If _node_ is a `caption` element, then switch the insertion mode to + * > "in caption" and return. + */ + case 'CAPTION': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; + return; + + /* + * > 9. If _node_ is a `colgroup` element, then switch the insertion mode to + * > "in column group" and return. + */ + case 'COLGROUP': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; + return; + + /* + * > 10. If _node_ is a `table` element, then switch the insertion mode to + * > "in table" and return. + */ + case 'TABLE': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; + return; + + /* + * > 11. If _node_ is a `template` element, then switch the insertion mode to the + * > current template insertion mode and return. + */ + case 'TEMPLATE': + $this->state->insertion_mode = end( $this->state->stack_of_template_insertion_modes ); + return; + + /* + * > 12. If _node_ is a `head` element and _last_ is false, then switch the + * > insertion mode to "in head" and return. + */ + case 'HEAD': + if ( ! $last ) { + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD; + return; + } + break; + + /* + * > 13. If _node_ is a `body` element, then switch the insertion mode to "in body" + * > and return. + */ + case 'BODY': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + return; + + /* + * > 14. If _node_ is a `frameset` element, then switch the insertion mode to + * > "in frameset" and return. (fragment case) + */ + case 'FRAMESET': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET; + return; + + /* + * > 15. If _node_ is an `html` element, run these substeps: + * > 1. If the head element pointer is null, switch the insertion mode to + * > "before head" and return. (fragment case) + * > 2. Otherwise, the head element pointer is not null, switch the insertion + * > mode to "after head" and return. + */ + case 'HTML': + $this->state->insertion_mode = isset( $this->state->head_element ) + ? WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD + : WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD; + return; + } + } + + /* + * > 16. If _last_ is true, then switch the insertion mode to "in body" + * > and return. (fragment case) + * + * `$last` will always be true here, we've reached the end of the stack. + */ + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + } + /** * Runs the adoption agency algorithm. * From b2c1ef496e27c83ee14ce7dc3f4cd3697fc0f1a9 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 3 Jul 2024 09:24:28 -0700 Subject: [PATCH 12/13] Remove tests and directly reference context node. --- .../html-api/class-wp-html-processor.php | 2 +- .../html-api/wpHtmlProcessorInsertionMode.php | 190 ------------------ 2 files changed, 1 insertion(+), 191 deletions(-) delete mode 100644 tests/phpunit/tests/html-api/wpHtmlProcessorInsertionMode.php diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 86f210c0fa834..9b800f6baf627 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2159,7 +2159,7 @@ public function reset_insertion_mode(): void { if ( $node === $first_node ) { $last = true; if ( isset( $this->context_node ) ) { - $node = new WP_HTML_Token( 'context-node', $this->context_node[0], false ); + $node = $this->context_node; } } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorInsertionMode.php b/tests/phpunit/tests/html-api/wpHtmlProcessorInsertionMode.php deleted file mode 100644 index 8815b9ef049bb..0000000000000 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorInsertionMode.php +++ /dev/null @@ -1,190 +0,0 @@ -context_node = array( 'BODY', array() ); - - // Set up the stack of open elements in a specific configuration. Bypass internal rules. - foreach ( $stack_of_open_elements as $bookmark_name => $tag_name ) { - $this->assertTrue( - self::is_tag_name( $tag_name ), - "Expected a tag name in test setup, but given '{$tag_name}' instead: check test data provider." - ); - $state->stack_of_open_elements->stack[] = new WP_HTML_Token( $bookmark_name, $tag_name, false ); - } - - $state->reset_insertion_mode(); - - $this->assertSame( - $expected_insertion_mode, - $state->insertion_mode, - 'Failed to reset the insertion mode into the expected mode.' - ); - } - - /** - * Data provider. - * - * @return array[] - */ - public static function data_insertion_mode_cases(): array { - return array( - 'SELECT last element' => array( array( 'HTML', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), - 'SELECT' => array( array( 'HTML', 'BODY', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), - 'SELECT in table' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TD', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE ), - 'SELECT in template in table' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TD', 'TEMPLATE', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), - 'SELECT > OPTION' => array( array( 'HTML', 'BODY', 'SELECT', 'OPTION' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), - 'SELECT > OPTGROUP > OPTION' => array( array( 'HTML', 'BODY', 'SELECT', 'OPTGROUP', 'OPTION' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ), - 'TD' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_CELL ), - 'TD (last element)' => array( array( 'TD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), - 'TH' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TH' ), WP_HTML_Processor_State::INSERTION_MODE_IN_CELL ), - 'TH (last element)' => array( array( 'TH' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), - 'TR' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR' ), WP_HTML_Processor_State::INSERTION_MODE_IN_ROW ), - 'TBODY' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY ), - 'THEAD' => array( array( 'HTML', 'BODY', 'TABLE', 'THEAD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY ), - 'TFOOT' => array( array( 'HTML', 'BODY', 'TABLE', 'TFOOT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY ), - 'CAPTION' => array( array( 'HTML', 'BODY', 'TABLE', 'CAPTION' ), WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION ), - 'COLGROUP' => array( array( 'HTML', 'BODY', 'TABLE', 'COLGROUP' ), WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP ), - 'TABLE' => array( array( 'HTML', 'BODY', 'TABLE' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE ), - 'HEAD' => array( array( 'HTML', 'HEAD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD ), - 'HEAD (last element)' => array( array( 'HEAD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), - 'BODY' => array( array( 'HTML', 'BODY' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), - 'FRAMESET' => array( array( 'HTML', 'BODY', 'FRAMESET' ), WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET ), - 'Last element (DIV)' => array( array( 'DIV' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ), - ); - } - - /** - * Ensures that the "reset the insertion mode appropriately" algorithm properly - * takes into account any open stack of template insertion modes. - * - * @ticket 61549 - */ - public function test_template_insertion_mode_reset(): void { - $state = new WP_HTML_Processor_State(); - $state->context_node = array( 'BODY', array() ); - - // Set up the stack of template insertion modes in a specific configuration. Bypass internal rules. - $newest_template_insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE; - - array_push( - $state->stack_of_template_insertion_modes, - WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE, - WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY, - WP_HTML_Processor_State::INSERTION_MODE_IN_ROW, - WP_HTML_Processor_State::INSERTION_MODE_IN_CELL, - $newest_template_insertion_mode - ); - - foreach ( array( 'TABLE', 'TBODY', 'TR', 'TD', 'SELECT', 'TEMPLATE' ) as $bookmark_name => $tag_name ) { - $this->assertTrue( - self::is_tag_name( $tag_name ), - "Expected a tag name in test setup, but given '{$tag_name}' instead: check test data provider." - ); - $state->stack_of_open_elements->stack[] = new WP_HTML_Token( $bookmark_name, $tag_name, false ); - } - - $state->reset_insertion_mode(); - - $this->assertSame( - $newest_template_insertion_mode, - $state->insertion_mode, - 'Failed to reset insertion mode to the newest item in the template insertion mode stack.' - ); - } - - /** - * Ensures that the "reset the insertion mode appropriately" algorithm properly - * resets for the fresh fragment parser state with BODY as the context node. - * - * @ticket 61549 - */ - public function test_html_reset_insertion_mode_before_head(): void { - $state = new WP_HTML_Processor_State(); - $state->context_node = array( 'BODY', array() ); - - // Set up the stack of open elements in a specific configuration. Bypass internal rules. - $state->stack_of_open_elements->stack[] = new WP_HTML_Token( 0, 'HTML', false ); - - $state->reset_insertion_mode(); - - $this->assertSame( - WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD, - $state->insertion_mode, - 'Failed to properly reset insertion mode.' - ); - } - - /** - * Ensures that the "reset the insertion mode appropriately" algorithm properly - * resets to AFTER HEAD when a head element is set. - * - * @ticket 61549 - */ - public function test_html_reset_insertion_mode_after_head(): void { - $state = new WP_HTML_Processor_State(); - $state->context_node = array( 'BODY', array() ); - $state->head_element = new WP_HTML_Token( 'head', 'HEAD', false ); - - // Set up the stack of open elements in a specific configuration. Bypass internal rules. - $state->stack_of_open_elements->stack[] = new WP_HTML_Token( 0, 'HTML', false ); - - $state->reset_insertion_mode(); - - $this->assertSame( - WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD, - $state->insertion_mode, - 'Failed to properly reset insertion mode.' - ); - } - - // Test helper methods. - - /** - * Indicates if a given node name represents a tag name, vs. a comment, - * HTML doctype declaration, text node, etc… - * - * Example: - * - * false === is_tag_name( '#text' ); - * false === is_tag_name( 'html' ); // This is a DOCTYPE declaration. - * true === is_tag_name( 'HTML' ); - * - * @param string $node_name Node name as returned from the stack of open elements. - * @return bool - */ - private function is_tag_name( string $node_name ) { - return ctype_upper( $node_name ); - } -} From 8b125b4f116259c287a14b90491a80f1536dc791 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 3 Jul 2024 09:45:25 -0700 Subject: [PATCH 13/13] Remove personality --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 9b800f6baf627..32800218f6404 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2307,7 +2307,7 @@ public function reset_insertion_mode(): void { * > 16. If _last_ is true, then switch the insertion mode to "in body" * > and return. (fragment case) * - * `$last` will always be true here, we've reached the end of the stack. + * This is only reachable if `$last` is true, as per the fragment parsing case. */ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; }