Skip to content

Commit

Permalink
Add some support for adoption agency algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
dmsnell committed Jan 15, 2024
1 parent ac69f5d commit 5b9b256
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 21 deletions.
122 changes: 108 additions & 14 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1553,12 +1553,7 @@ private function run_adoption_agency_algorithm() {
return;
}

$outer_loop_counter = 0;
while ( $budget-- > 0 ) {
if ( $outer_loop_counter++ >= 8 ) {
return;
}

for ( $outer_loop_counter = 0; $outer_loop_counter < 8; $outer_loop_counter++ ) {
/*
* > Let formatting element be the last element in the list of active formatting elements that:
* > - is between the end of the list and the last marker in the list,
Expand All @@ -1579,8 +1574,35 @@ private function run_adoption_agency_algorithm() {

// > If there is no such element, then return and instead act as described in the "any other end tag" entry above.
if ( null === $formatting_element ) {
$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when "any other end tag" is required.' );
/*
* > Any other end tag
*/

/*
* Find the corresponding tag opener in the stack of open elements, if
* it exists before reaching a special element, which provides a kind
* of boundary in the stack. For example, a `</custom-tag>` should not
* close anything beyond its containing `P` or `DIV` element.
*/
foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
if ( $subject === $node->node_name ) {
break;
}

if ( self::is_special( $node->node_name ) ) {
// This is a parse error, ignore the token.
return;
}
}

$this->generate_implied_end_tags( $subject );

foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
$this->state->stack_of_open_elements->pop();
if ( $node === $item ) {
return;
}
}
}

// > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return.
Expand All @@ -1594,14 +1616,18 @@ private function run_adoption_agency_algorithm() {
return;
}

/*
* > If formatting element is not the current node, this is a parse error. (But do not return.)
*/

/*
* > Let furthest block be the topmost node in the stack of open elements that is lower in the stack
* > than formatting element, and is an element in the special category. There might not be one.
*/
$is_above_formatting_element = true;
$furthest_block = null;
foreach ( $this->state->stack_of_open_elements->walk_down() as $item ) {
if ( $is_above_formatting_element && $formatting_element->bookmark_name !== $item->bookmark_name ) {
if ( $is_above_formatting_element && $formatting_element !== $item ) {
continue;
}

Expand All @@ -1625,19 +1651,87 @@ private function run_adoption_agency_algorithm() {
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
$this->state->stack_of_open_elements->pop();

if ( $formatting_element->bookmark_name === $item->bookmark_name ) {
if ( $formatting_element === $item ) {
$this->state->active_formatting_elements->remove_node( $formatting_element );
return;
}
}
}

/*
* > Let common ancestor be the element immediately above formatting element in the stack of open elements.
*/
$common_ancestor = null;
foreach ( $this->state->stack_of_open_elements->walk_up( $formatting_element ) as $item ) {
$common_ancestor = $item;
break;
}

/*
* Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
*/
$formatting_element_index = 0;
foreach ( $this->state->active_formatting_elements->walk_down() as $item ) {
if ( $formatting_element === $item ) {
break;
}

++$formatting_element_index;
}

/*
* > Let node and last node be furthest block.
*/
$node = $furthest_block;
$last_node = $furthest_block;

$inner_loop_counter = 0;
while ( $budget-- > 0 ) {
++$inner_loop_counter;

if ( $this->state->stack_of_open_elements->contains_node( $node ) ) {
foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) {
$node = $item;
break;
}
} else {
$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( 'Cannot adjust node pointer above removed node.' );
}

if ( $formatting_element === $node ) {
break;
}

if ( $inner_loop_counter > 3 && $this->state->active_formatting_elements->contains_node( $node ) ) {
$this->state->active_formatting_elements->remove_node( $node );
}

if ( ! $this->state->active_formatting_elements->contains_node( $node ) ) {
$this->state->stack_of_open_elements->remove_node( $node );
continue;
}

/*
* > Create an element for the token for which the element node was created,
* in the HTML namespace, with common ancestor as the intended parent;
* replace the entry for node in the list of active formatting elements
* with an entry for the new element, replace the entry for node in the
* stack of open elements with an entry for the new element, and let node
* be the new element.
*/
$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( 'Cannot create and reference new element for which no token exists.' );
}

/*
* > Insert whatever last node ended up being in the previous step at the appropriate
* > palce for inserting a node, but using common ancestor as the override target.
*/

$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( 'Cannot extract common ancestor in adoption agency algorithm.' );
throw new WP_HTML_Unsupported_Exception( 'Cannot create and reference new element for which no token exists.' );
}

$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when looping required.' );
}

/**
Expand Down
9 changes: 7 additions & 2 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,16 @@ public function test_clear_to_navigate_after_seeking() {
*
* @covers WP_HTML_Processor::reconstruct_active_formatting_elements
*/
public function test_fails_to_reconstruct_formatting_elements() {
public function test_reconstructs_formatting_elements() {
$p = WP_HTML_Processor::create_fragment( '<p><em>One<p><em>Two<p><em>Three<p><em>Four' );

$this->assertTrue( $p->next_tag( 'EM' ), 'Could not find first EM.' );
$this->assertFalse( $p->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' );
$this->assertTrue( $p->next_tag( 'EM' ), 'Should have found second EM.' );
$this->assertSame(
array( 'HTML', 'BODY', 'P', 'EM', 'EM' ),
$p->get_breadcrumbs(),
'Should have reconstructed the previous EM before finding the second.'
);
}

/**
Expand Down
12 changes: 7 additions & 5 deletions tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,6 @@ public function test_fails_when_encountering_unsupported_markup( $html, $descrip
*/
public function data_unsupported_markup() {
return array(
'A with formatting following unclosed A' => array(
'<a><strong>Click <a supported><big unsupported>Here</big></a></strong></a>',
'Unclosed formatting requires complicated reconstruction.',
),

'A after unclosed A inside DIV' => array(
'<a><div supported><a unsupported></div></a>',
'A is a formatting element, which requires more complicated reconstruction.',
Expand Down Expand Up @@ -340,6 +335,13 @@ public function data_html_target_with_breadcrumbs() {
'P after closed P' => array( '<p><i>something</i></p><p target>This one</p>', array( 'HTML', 'BODY', 'P' ), 2 ),
'A after unclosed A' => array( '<a><a target>', array( 'HTML', 'BODY', 'A' ), 2 ),
'A after unclosed A, after a P' => array( '<p><a><a target>', array( 'HTML', 'BODY', 'P', 'A' ), 2 ),

'A with formatting following unclosed A' => array(
'<a><strong>Click <a><big target>Here</big></a></strong></a>',
array( 'HTML', 'BODY', 'STRONG', 'A', 'BIG' ),
1
),

// This one adds a test at a deep stack depth to ensure things work for situations beyond short test docs.
'Large HTML document with deep P' => array(
'<div><div><div><div><div><div><div><div><p></p><p></p><p><div><strong><em><code></code></em></strong></div></p></div></div></div></div></div></div></div></div><div><div><div><div><div><div><div><div><p></p><p></p><p><div><strong><em><code target></code></em></strong></div></p></div></div></div></div></div></div></div></div>',
Expand Down

0 comments on commit 5b9b256

Please sign in to comment.