diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 6e87aa0d66041..03728fd37870f 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1553,12 +1553,7 @@ private function run_adoption_agency_algorithm() { return; } - $outer_loop_counter = 0; - while ( $budget-- > 0 ) { - if ( $outer_loop_counter++ >= 8 ) { - return; - } - + for ( $outer_loop_counter = 0; $outer_loop_counter < 8; $outer_loop_counter++ ) { /* * > Let formatting element be the last element in the list of active formatting elements that: * > - is between the end of the list and the last marker in the list, @@ -1579,8 +1574,35 @@ private function run_adoption_agency_algorithm() { // > If there is no such element, then return and instead act as described in the "any other end tag" entry above. if ( null === $formatting_element ) { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when "any other end tag" is required.' ); + /* + * > Any other end tag + */ + + /* + * Find the corresponding tag opener in the stack of open elements, if + * it exists before reaching a special element, which provides a kind + * of boundary in the stack. For example, a `` should not + * close anything beyond its containing `P` or `DIV` element. + */ + foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { + if ( $subject === $node->node_name ) { + break; + } + + if ( self::is_special( $node->node_name ) ) { + // This is a parse error, ignore the token. + return; + } + } + + $this->generate_implied_end_tags( $subject ); + + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { + $this->state->stack_of_open_elements->pop(); + if ( $node === $item ) { + return; + } + } } // > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return. @@ -1594,6 +1616,10 @@ private function run_adoption_agency_algorithm() { return; } + /* + * > If formatting element is not the current node, this is a parse error. (But do not return.) + */ + /* * > Let furthest block be the topmost node in the stack of open elements that is lower in the stack * > than formatting element, and is an element in the special category. There might not be one. @@ -1601,7 +1627,7 @@ private function run_adoption_agency_algorithm() { $is_above_formatting_element = true; $furthest_block = null; foreach ( $this->state->stack_of_open_elements->walk_down() as $item ) { - if ( $is_above_formatting_element && $formatting_element->bookmark_name !== $item->bookmark_name ) { + if ( $is_above_formatting_element && $formatting_element !== $item ) { continue; } @@ -1625,19 +1651,87 @@ private function run_adoption_agency_algorithm() { foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { $this->state->stack_of_open_elements->pop(); - if ( $formatting_element->bookmark_name === $item->bookmark_name ) { + if ( $formatting_element === $item ) { $this->state->active_formatting_elements->remove_node( $formatting_element ); return; } } } + /* + * > Let common ancestor be the element immediately above formatting element in the stack of open elements. + */ + $common_ancestor = null; + foreach ( $this->state->stack_of_open_elements->walk_up( $formatting_element ) as $item ) { + $common_ancestor = $item; + break; + } + + /* + * Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list. + */ + $formatting_element_index = 0; + foreach ( $this->state->active_formatting_elements->walk_down() as $item ) { + if ( $formatting_element === $item ) { + break; + } + + ++$formatting_element_index; + } + + /* + * > Let node and last node be furthest block. + */ + $node = $furthest_block; + $last_node = $furthest_block; + + $inner_loop_counter = 0; + while ( $budget-- > 0 ) { + ++$inner_loop_counter; + + if ( $this->state->stack_of_open_elements->contains_node( $node ) ) { + foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) { + $node = $item; + break; + } + } else { + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( 'Cannot adjust node pointer above removed node.' ); + } + + if ( $formatting_element === $node ) { + break; + } + + if ( $inner_loop_counter > 3 && $this->state->active_formatting_elements->contains_node( $node ) ) { + $this->state->active_formatting_elements->remove_node( $node ); + } + + if ( ! $this->state->active_formatting_elements->contains_node( $node ) ) { + $this->state->stack_of_open_elements->remove_node( $node ); + continue; + } + + /* + * > Create an element for the token for which the element node was created, + * in the HTML namespace, with common ancestor as the intended parent; + * replace the entry for node in the list of active formatting elements + * with an entry for the new element, replace the entry for node in the + * stack of open elements with an entry for the new element, and let node + * be the new element. + */ + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( 'Cannot create and reference new element for which no token exists.' ); + } + + /* + * > Insert whatever last node ended up being in the previous step at the appropriate + * > palce for inserting a node, but using common ancestor as the override target. + */ + $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( 'Cannot extract common ancestor in adoption agency algorithm.' ); + throw new WP_HTML_Unsupported_Exception( 'Cannot create and reference new element for which no token exists.' ); } - - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when looping required.' ); } /** diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 411b3a1ad7d27..4f02ff6a2599c 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -125,11 +125,16 @@ public function test_clear_to_navigate_after_seeking() { * * @covers WP_HTML_Processor::reconstruct_active_formatting_elements */ - public function test_fails_to_reconstruct_formatting_elements() { + public function test_reconstructs_formatting_elements() { $p = WP_HTML_Processor::create_fragment( '
One Two Three Four' );
$this->assertTrue( $p->next_tag( 'EM' ), 'Could not find first EM.' );
- $this->assertFalse( $p->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' );
+ $this->assertTrue( $p->next_tag( 'EM' ), 'Should have found second EM.' );
+ $this->assertSame(
+ array( 'HTML', 'BODY', 'P', 'EM', 'EM' ),
+ $p->get_breadcrumbs(),
+ 'Should have reconstructed the previous EM before finding the second.'
+ );
}
/**
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
index cbb45977f2256..b3ce6490ba3e0 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
@@ -251,11 +251,6 @@ public function test_fails_when_encountering_unsupported_markup( $html, $descrip
*/
public function data_unsupported_markup() {
return array(
- 'A with formatting following unclosed A' => array(
- 'Click Here',
- 'Unclosed formatting requires complicated reconstruction.',
- ),
-
'A after unclosed A inside DIV' => array(
'',
'A is a formatting element, which requires more complicated reconstruction.',
@@ -340,6 +335,13 @@ public function data_html_target_with_breadcrumbs() {
'P after closed P' => array( ' something This one ', array( 'HTML', 'BODY', 'P', 'A' ), 2 ),
+
+ 'A with formatting following unclosed A' => array(
+ 'Click Here',
+ array( 'HTML', 'BODY', 'STRONG', 'A', 'BIG' ),
+ 1
+ ),
+
// This one adds a test at a deep stack depth to ensure things work for situations beyond short test docs.
'Large HTML document with deep P' => array(
'