Skip to content

Commit

Permalink
Remove support for Processing Instructions
Browse files Browse the repository at this point in the history
Attempting to parse processing instructions conflicts with parsing bogus
comments when a document may be incomplete, which might create a
divergence in the HTML API from browser behavior.
  • Loading branch information
dmsnell committed Jan 11, 2024
1 parent d58fd67 commit 2b6352a
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 81 deletions.
34 changes: 1 addition & 33 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,6 @@
*
* And there are non-elements which are atomic in nature but have no modifiable text.
* - `DOCTYPE` nodes like `<DOCTYPE html>` which have no closing tag.
* - XML Processing instruction nodes like `<?xml charset="utf8"?>`.
* - The empty end tag `</>` which is ignored in the browser and DOM but exposed
* to the HTML API.
*
Expand Down Expand Up @@ -483,7 +482,6 @@ class WP_HTML_Tag_Processor {
* | *Matched tag* | Found an HTML tag; it's possible to modify its attributes. |
* | *Text node* | Found a #text node; this is plaintext and modifiable. |
* | *CDATA node* | Found a CDATA section; this is modifiable. |
* | *PI node* | Found a Processing Instruction; this is modifiable. |
* | *Comment* | Found a comment or bogus comment; this is modifiable. |
* | *Presumptuous* | Found an empty tag closer: `</>`. |
* | *Funky comment* | Found a tag closer with an invalid tag name; this is modifiable. |
Expand All @@ -496,7 +494,6 @@ class WP_HTML_Tag_Processor {
* @see WP_HTML_Tag_Processor::STATE_MATCHED_TAG
* @see WP_HTML_Tag_Processor::STATE_TEXT_NODE
* @see WP_HTML_Tag_Processor::STATE_CDATA_NODE
* @see WP_HTML_Tag_Processor::STATE_PI_NODE
* @see WP_HTML_Tag_Processor::STATE_COMMENT
* @see WP_HTML_Tag_Processor::STATE_DOCTYPE
* @see WP_HTML_Tag_Processor::STATE_PRESUMPTUOUS_TAG
Expand Down Expand Up @@ -806,7 +803,6 @@ public function next_tag( $query = null ) {
* - a text node - the plaintext inside tags.
* - an HTML comment.
* - a DOCTYPE declaration.
* - a processing instruction, e.g. `<?xml version="1.0" ?>`.
*
* The Tag Processor currently only supports the tag token.
*
Expand Down Expand Up @@ -1723,9 +1719,6 @@ private function parse_next_tag() {
/*
* <? transitions to a bogus comment state – skip to the nearest >
* See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*
* Although this becomes a DOM comment, the Tag Processor is going to treat
* it as a processing instruction to be able to treat it as the raw syntax.
*/
if ( '?' === $html[ $at + 1 ] ) {
$closer_at = strpos( $html, '>', $at + 2 );
Expand All @@ -1735,7 +1728,7 @@ private function parse_next_tag() {
return false;
}

$this->parser_state = self::STATE_PI_NODE;
$this->parser_state = self::STATE_COMMENT;
$this->token_length = $closer_at + 1 - $this->token_starts_at;
$this->text_starts_at = $this->token_starts_at + 2;
$this->text_length = $closer_at - $this->text_starts_at;
Expand Down Expand Up @@ -2532,7 +2525,6 @@ public function is_tag_closer() {
* - `#tag` when matched on a tag.
* - `#text` when matched on a text node.
* - `#cdata-section` when matched on a CDATA node.
* - `#processing-instruction` when matched on a processing instruction.
* - `#comment` when matched on a comment.
* - `#presumptuous-tag` when matched on an empty tag closer.
* - `#funky-comment` when matched on a funky comment.
Expand All @@ -2549,9 +2541,6 @@ public function get_token_type() {
case self::STATE_DOCTYPE:
return '#doctype';

case self::STATE_PI_NODE:
return '#processing-instruction';

default:
return $this->get_token_name();
}
Expand All @@ -2566,7 +2555,6 @@ public function get_token_type() {
*
* Dynamic names:
* - Uppercase tag name for tag matches.
* - Tag name for processing instructions.
* - `html` for DOCTYPE declarations.
*
* Note that if the Tag Processor is not matched on a token
Expand All @@ -2589,10 +2577,6 @@ public function get_token_name() {
case self::STATE_CDATA_NODE:
return '#cdata-section';

case self::STATE_PI_NODE:
// @todo add the PI tag.
return '?';

case self::STATE_COMMENT:
return '#comment';

Expand Down Expand Up @@ -3197,22 +3181,6 @@ private function matches() {
*/
const STATE_CDATA_NODE = 'STATE_CDATA_NODE';

/**
* Parser Processing Instruction State.
*
* Indicates that the parser has found a Processing Instruction and
* it's possible to read and modify its modifiable text. Note that in
* HTML there are no Processing Instruction nodes and they are treated
* as HTML comments. Nonetheless, the Tag Processor still recognizes
* them as they appear in the HTML stream and exposes them for
* inspection and modification.
*
* @since 6.5.0
*
* @access private
*/
const STATE_PI_NODE = 'STATE_PI_NODE';

/**
* Indicates that the parser has found an HTML comment and it's
* possible to read and modify its modifiable text.
Expand Down
54 changes: 6 additions & 48 deletions tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public function test_basic_assertion_element() {
);

$attributes = $processor->get_attribute_names_with_prefix( '' );
$attribute_list = array_map( array( 'self', 'quoted' ), $attributes );
$attribute_list = array_map( 'Tests_HtmlApi_WpHtmlProcessor_Token_Scanning::quoted', $attributes );
$this->assertSame(
array( 'id', 'inert' ),
$attributes,
Expand Down Expand Up @@ -127,7 +127,7 @@ public function test_basic_assertion_script_element() {
);

$attributes = $processor->get_attribute_names_with_prefix( '' );
$attribute_list = array_map( array( 'self', 'quoted' ), $attributes );
$attribute_list = array_map( 'Tests_HtmlApi_WpHtmlProcessor_Token_Scanning::quoted', $attributes );
$this->assertSame(
array( 'type' ),
$attributes,
Expand Down Expand Up @@ -178,7 +178,7 @@ public function test_basic_assertion_textarea_element() {
);

$attributes = $processor->get_attribute_names_with_prefix( '' );
$attribute_list = array_map( array( 'self', 'quoted' ), $attributes );
$attribute_list = array_map( 'Tests_HtmlApi_WpHtmlProcessor_Token_Scanning::quoted', $attributes );
$this->assertSame(
array( 'rows', 'cols' ),
$attributes,
Expand Down Expand Up @@ -224,7 +224,7 @@ public function test_basic_assertion_title_element() {
);

$attributes = $processor->get_attribute_names_with_prefix( '' );
$attribute_list = array_map( array( 'self', 'quoted' ), $attributes );
$attribute_list = array_map( 'Tests_HtmlApi_WpHtmlProcessor_Token_Scanning::quoted', $attributes );
$this->assertSame(
array( 'class' ),
$attributes,
Expand Down Expand Up @@ -273,7 +273,7 @@ public function test_basic_assertion_rawtext_elements( $tag_name ) {
);

$attributes = $processor->get_attribute_names_with_prefix( '' );
$attribute_list = array_map( array( 'self', 'quoted' ), $attributes );
$attribute_list = array_map( 'Tests_HtmlApi_WpHtmlProcessor_Token_Scanning::quoted', $attributes );
$this->assertSame(
array( 'class' ),
$attributes,
Expand Down Expand Up @@ -338,48 +338,6 @@ public function test_basic_assertion_cdata_section() {
);
}

/**
* Ensures that normative Processing Instruction nodes are properly parsed.
*
* @ticket 60170
*
* @since 6.5.0
*
* @covers WP_HTML_Tag_Processor::next_token
*/
public function test_basic_assertion_processing_instruction() {
$processor = WP_HTML_Processor::create_fragment( '<?xml charset="utf-8">' );
$processor->next_token();

$this->assertSame(
'#processing-instruction',
$processor->get_token_type(),
"Should have found PI node but found {$processor->get_token_type()} instead."
);

$this->assertSame(
'xml',
$processor->get_token_name(),
"Should have found PI tag as name but found {$processor->get_token_name()} instead."
);

$this->assertNull(
$processor->get_tag(),
'Should not have been able to query tag name on non-element token.'
);

$this->assertNull(
$processor->get_attribute( 'type' ),
'Should not have been able to query attributes on non-element token.'
);

$this->assertSame(
' charset="utf-8"',
$processor->get_modifiable_text(),
'Found incorrect modifiable text.'
);
}

/**
* Ensures that common comments are properly parsed.
*
Expand Down Expand Up @@ -436,7 +394,7 @@ public function data_common_comments() {
return array(
'Shortest comment' => array( '<!-->', '' ),
'Short comment' => array( '<!--->', '' ),
'Invalid PI node' => array( '<? missing>', ' missing' ),
'Invalid PI node' => array( '<?/missing/>', '/missing/' ),
'Invalid ! directive' => array( '<!something else>', 'something else' ),
);
}
Expand Down

0 comments on commit 2b6352a

Please sign in to comment.