Skip to content

Commit

Permalink
PR Feedback
Browse files Browse the repository at this point in the history
Co-authored-by: Jon Surrell <[email protected]>
  • Loading branch information
dmsnell and sirreal committed Jan 12, 2024
1 parent 4639ff8 commit 9f29920
Showing 1 changed file with 43 additions and 17 deletions.
60 changes: 43 additions & 17 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1174,7 +1174,10 @@ public function has_class( $wanted_class ) {
*/
public function set_bookmark( $name ) {
// It only makes sense to set a bookmark if the parser has paused on a concrete token.
if ( self::STATE_INCOMPLETE === $this->parser_state ) {
if (
self::STATE_COMPLETE === $this->parser_state ||
self::STATE_INCOMPLETE === $this->parser_state
) {
return false;
}

Expand Down Expand Up @@ -1555,12 +1558,12 @@ private function parse_next_tag() {
}

/*
* <! transitions to markup declaration open state
* `<!` transitions to markup declaration open state
* https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
*/
if ( '!' === $html[ $at + 1 ] ) {
/*
* <!-- transitions to a bogus comment state – skip to the nearest -->
* `<!--` transitions to a comment state – apply further comment rules.
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
if (
Expand All @@ -1579,7 +1582,14 @@ private function parse_next_tag() {
// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
$span_of_dashes = strspn( $html, '-', $closer_at );
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
// @todo This could go wrong if the closer is shorter than `<!---->` because there's no inside content.
/*
* @todo When implementing `set_modifiable_text()` ensure that updates to this token
* don't break the syntax for short comments, e.g. `<!--->`. Unlike other comment
* and bogus comment syntax, these leave no clear insertion point for text and
* they need to be modified specially in order to contain text. E.g. to store
* `?` as the modifiable text, the `<!--->` needs to become `<!--?-->`, which
* involves inserting an additional `-` into the token after the modifiable text.
*/
$this->parser_state = self::STATE_COMMENT;
$this->token_length = $closer_at + $span_of_dashes + 1 - $this->token_starts_at;
$this->text_starts_at = $this->token_starts_at + 4;
Expand Down Expand Up @@ -1628,7 +1638,7 @@ private function parse_next_tag() {
}

/*
* <!DOCTYPE transitions to DOCTYPE state – skip to the nearest >
* `<!DOCTYPE` transitions to DOCTYPE state – skip to the nearest >
* These are ASCII-case-insensitive.
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
Expand Down Expand Up @@ -1726,7 +1736,7 @@ private function parse_next_tag() {
}

/*
* <? transitions to a bogus comment state – skip to the nearest >
* `<?` transitions to a bogus comment state – skip to the nearest >
* See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
if ( '?' === $html[ $at + 1 ] ) {
Expand Down Expand Up @@ -1789,6 +1799,9 @@ private function parse_next_tag() {
* If a non-alpha starts the tag name in a tag closer it's a comment.
* Find the first `>`, which closes the comment.
*
* This parser classifies these particular comments as special "funky comments"
* which are made available for further processing.
*
* See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
*/
if ( $this->is_closing_tag ) {
Expand Down Expand Up @@ -2576,6 +2589,7 @@ public function is_tag_closer() {
* - `#cdata-section` when matched on a CDATA node.
* - `#processing-instruction` when matched on a processing instruction.
* - `#comment` when matched on a comment.
* - `#doctype` when matched on a DOCTYPE declaration.
* - `#presumptuous-tag` when matched on an empty tag closer.
* - `#funky-comment` when matched on a funky comment.
*
Expand Down Expand Up @@ -2667,20 +2681,25 @@ public function get_token_name() {
* @return string
*/
public function get_modifiable_text() {
$at = $this->text_starts_at;
$length = $this->text_length;
$text = substr( $this->html, $at, $length );
if ( null === $this->text_starts_at ) {
return '';
}

$text = substr( $this->html, $this->text_starts_at, $this->text_length );

if (
self::STATE_CDATA_NODE === $this->parser_state ||
self::STATE_PI_NODE === $this->parser_state
self::STATE_COMMENT === $this->parser_state ||
self::STATE_DOCTYPE === $this->parser_state ||
self::STATE_PI_NODE === $this->parser_state ||
self::STATE_FUNKY_COMMENT === $this->parser_state
) {
return $text;
}

$text = html_entity_decode( $text, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE );
$decoded = html_entity_decode( $text, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE );

if ( empty( $text ) ) {
if ( empty( $decoded ) ) {
return '';
}

Expand All @@ -2694,14 +2713,14 @@ public function get_modifiable_text() {
switch ( $this->get_tag() ) {
case 'PRE':
case 'TEXTAREA':
if ( "\n" === $text[0] ) {
return substr( $text, 1 );
if ( "\n" === $decoded[0] ) {
return substr( $decoded, 1 );
}
break;
}
}

return $text;
return $decoded;
}

/**
Expand Down Expand Up @@ -3286,7 +3305,8 @@ private function matches() {
const STATE_DOCTYPE = 'STATE_DOCTYPE';

/**
* Indicates that the parser has found an empty tag closer.
* Indicates that the parser has found an empty tag closer `</>`.
*
* Note that in HTML there are no empty tag closers, and they
* are ignored. Nonetheless, the Tag Processor still
* recognizes them as they appear in the HTML stream.
Expand All @@ -3305,8 +3325,14 @@ private function matches() {
* Indicates that the parser has found a "funky comment"
* and it's possible to read and modify its modifiable text.
*
* Example:
*
* </%url>
* </{"wp-bit":"query/post-author"}>
* </2>
*
* Funky comments are tag closers with invalid tag names. Note
* that in HTML these are treated as HTML comments. Nonetheless,
* that in HTML these are turn into bogus comments. Nonetheless,
* the Tag Processor recognizes them in a stream of HTML and
* exposes them for inspection and modification.
*
Expand Down

0 comments on commit 9f29920

Please sign in to comment.