Skip to content

Commit

Permalink
Fix handling of ID attributes during sanitizer conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
westonruter committed Nov 23, 2021
1 parent eaefc1e commit 263501f
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 20 deletions.
4 changes: 4 additions & 0 deletions includes/sanitizers/class-amp-audio-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*/

use AmpProject\AmpWP\ValidationExemption;
use AmpProject\Attribute;
use AmpProject\DevMode;

/**
Expand Down Expand Up @@ -90,6 +91,9 @@ public function sanitize() {
$sources[] = $new_attributes['src'];
}

// Remove the ID from the original node so that PHP DOM doesn't fail to set it on the replacement element.
$node->removeAttribute( Attribute::ID );

/**
* Original node.
*
Expand Down
9 changes: 7 additions & 2 deletions includes/sanitizers/class-amp-bento-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,14 @@ public function sanitize() {
}

$amp_element = $this->dom->createElement( $amp_name );
foreach ( $bento_element->attributes as $attribute ) {
while ( $bento_element->attributes->length ) {
/** @var DOMAttr $attribute */
$amp_element->setAttribute( $attribute->nodeName, $attribute->nodeValue );
$attribute = $bento_element->attributes->item( 0 );

// Essential for unique attributes like ID, or else PHP DOM will keep it referencing the old element.
$bento_element->removeAttributeNode( $attribute );

$amp_element->setAttributeNode( $attribute );
}

while ( $bento_element->firstChild instanceof DOMNode ) {
Expand Down
3 changes: 3 additions & 0 deletions includes/sanitizers/class-amp-iframe-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ public function sanitize() {
$this->add_or_append_attribute( $normalized_attributes, 'class', 'amp-wp-enforced-sizes' );
}

// Remove the ID from the original node so that PHP DOM doesn't fail to set it on the replacement element.
$node->removeAttribute( Attribute::ID );

$new_node = AMP_DOM_Utils::create_node( $this->dom, 'amp-iframe', $normalized_attributes );

// Find existing placeholder/overflow.
Expand Down
5 changes: 5 additions & 0 deletions includes/sanitizers/class-amp-img-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,11 @@ private function adjust_and_replace_node( DOMElement $node ) {
$new_tag = 'amp-img';
}

// Remove ID since it would be a duplicate and because if it is not removed before replacing the element with
// another element that has the same ID, the removed element would still get returned by getElementById even
// when it is no longer in the Document.
$node->removeAttribute( Attribute::ID );

$img_node = AMP_DOM_Utils::create_node( $this->dom, $new_tag, $new_attributes );
$node->parentNode->replaceChild( $img_node, $node );

Expand Down
11 changes: 9 additions & 2 deletions includes/sanitizers/class-amp-o2-player-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* @package AMP
*/

use AmpProject\Attribute;
use AmpProject\Dom\Document;

/**
Expand Down Expand Up @@ -104,10 +105,16 @@ private function create_amp_o2_player( Document $dom, DOMElement $node ) {
]
);

$amp_o2_player = AMP_DOM_Utils::create_node( $dom, self::$amp_tag, $component_attributes );

$parent_node = $node->parentNode;

// Remove the ID from the original node so that PHP DOM doesn't fail to set it on the replacement element.
if ( $parent_node->hasAttribute( Attribute::ID ) ) {
$component_attributes['id'] = $parent_node->getAttribute( Attribute::ID );
$parent_node->removeAttribute( Attribute::ID );
}

$amp_o2_player = AMP_DOM_Utils::create_node( $dom, self::$amp_tag, $component_attributes );

// replaces the wrapper that contains the script with amp-o2-player element.
$parent_node->parentNode->replaceChild( $amp_o2_player, $parent_node );

Expand Down
8 changes: 6 additions & 2 deletions includes/sanitizers/class-amp-object-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,12 @@ public function sanitize_pdf( Element $element ) {

$attributes_to_copy = [ Attribute::ID, Attribute::CLASS_ ];
foreach ( $attributes_to_copy as $attribute_name ) {
if ( $element->hasAttribute( $attribute_name ) ) {
$attributes[ $attribute_name ] = $element->getAttribute( $attribute_name );
$attribute = $element->getAttributeNode( $attribute_name );
if ( $attribute instanceof DOMAttr ) {
// Remove the attribute from the original node so that PHP DOM doesn't fail to set it on the replacement element (as happens with ID).
$element->removeAttributeNode( $attribute );

$attributes[ $attribute_name ] = $attribute->value;
}
}

Expand Down
8 changes: 8 additions & 0 deletions includes/sanitizers/class-amp-playbuzz-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
* @package AMP
*/

use AmpProject\Attribute;
use AmpProject\Dom\Element;

/**
* Class AMP_Playbuzz_Sanitizer
*
Expand Down Expand Up @@ -67,6 +70,7 @@ public function sanitize() {
}

for ( $i = $num_nodes - 1; $i >= 0; $i-- ) {
/** @var Element $node */
$node = $nodes->item( $i );

if ( self::$pb_class !== $node->getAttribute( 'class' ) ) {
Expand All @@ -81,6 +85,9 @@ public function sanitize() {
continue;
}

// Remove the ID from the original node so that PHP DOM doesn't fail to set it on the replacement element.
$node->removeAttribute( Attribute::ID );

$new_node = AMP_DOM_Utils::create_node( $this->dom, 'amp-playbuzz', $new_attributes );

$node->parentNode->replaceChild( $new_node, $node );
Expand Down Expand Up @@ -132,6 +139,7 @@ private function filter_attributes( $attributes ) {
case 'data-game-info':
case 'data-comments':
case 'class':
case 'id':
$out[ $name ] = $value;
break;

Expand Down
3 changes: 3 additions & 0 deletions includes/sanitizers/class-amp-style-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,9 @@ public function sanitize() {
}

// Prevent selectors like `amp-img img` getting deleted since `img` does not occur in the DOM.
// @todo This should be strictly limited to sanitizers that convert elements and have a light shadow DOM.
// It should apply to amp-img but not to the Bento sanitizer, as right doing `bento-accordion foo`
// will result in the selector never being tree-shaken even though the `foo` tag is not in the DOM.
$this->args['dynamic_element_selectors'] = array_merge(
$this->args['dynamic_element_selectors'],
$this->selector_mappings[ $html_selectors ]
Expand Down
3 changes: 3 additions & 0 deletions includes/sanitizers/class-amp-video-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ public function sanitize() {
}
$new_attributes = $this->set_layout( $new_attributes );

// Remove the ID from the original node so that PHP DOM doesn't fail to set it on the replacement element.
$node->removeAttribute( Attribute::ID );

/**
* Original node.
*
Expand Down
77 changes: 63 additions & 14 deletions tests/php/test-amp-style-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -1583,13 +1583,56 @@ public function get_attribute_selector_data() {
'.stateful' => false,
],
],
'bento_accordion_with_id' => [
'
<bento-accordion id="my-accordion">
<section>
<h2>Section 1</h2>
<div>Content in section 1.</div>
</section>
</bento-accordion>
',
[
'#my-accordion' => true,
'amp-accordion#my-accordion' => true,
],
],
'amp_replacement_elements_with_id' => [
'
<img id="my-img" src="https://example.com/foo.png" width="100" height="100">
<audio id="my-audio" src="https://example.com/foo.mp3" width="100" height="100"></audio>
<video id="my-video" src="https://example.com/foo.mp4" width="100" height="100"></video>
<iframe id="my-iframe" src="https://example.com/foo.mp4" width="100" height="100"></iframe>
<object id="my-object" data="https://example.com/foo.pd4" type="application/pdf"></object>
<div id="my-playbuzz" class="pb_feed" data-item="226dd4c0-ef13-4fee-850b-7be32bf6d121"></div>
<div id="my-o2" class="vdb_player"><script type="text/javascript" src="//delivery.vidible.tv/jsonp/pid=59521379f3bdc970c5c9d75e/vid=5b11a50d0239e257abfdf16a/59521191e9399f3a7d7de88f.js?m.embeded=cms_video_plugin_chromeExtension"></script></div>
',
[
'#my-img' => true,
'amp-img#my-img' => true,
'#my-audio' => true,
'amp-audio#my-audio' => true,
'#my-video' => true,
'amp-video#my-video' => true,
'#my-iframe' => true,
'amp-iframe#my-iframe' => true,
'#my-object' => true,
'amp-google-document-embed#my-object' => true,
'#my-playbuzz' => true,
'amp-playbuzz#my-playbuzz' => true,
'#my-o2' => true,
'amp-o2-player#my-o2' => true,
// @todo Embeds?
],
],
];
}

/**
* Test attribute selector tree shaking.
*
* @dataProvider get_attribute_selector_data
* @covers AMP_Base_Sanitizer::get_selector_conversion_mapping
*
* @param string $markup Source HTML markup.
* @param array $selectors Mapping of selectors to whether they are expected.
Expand All @@ -1605,24 +1648,30 @@ static function ( $selector ) {
)
);

$html = "<html amp><head><meta charset=utf-8><style amp-custom>$style</style></head><body>$markup</body></html>";
$dom = Document::fromHtml( $html, Options::DEFAULTS );
// The toggling of the 'add_noscript_fallback' arg is to catch a bizzare PHP DOM issue whereby if you replace
// an element in a Document, and that replaced element had an ID, the element will still be returned by
// getElementById even though it is no longer inside of the document. When add_noscript_fallback is false,
// then the original img (for example) will not be inside of the document (?).
foreach ( [ true, false ] as $add_noscript_fallback ) {
$html = "<html amp><head><meta charset=utf-8><style amp-custom>$style</style></head><body>$markup</body></html>";
$dom = Document::fromHtml( $html, Options::DEFAULTS );

$sanitizer_classes = amp_get_content_sanitizers();
$sanitized = AMP_Content_Sanitizer::sanitize_document(
$dom,
amp_get_content_sanitizers(),
array_merge(
compact( 'add_noscript_fallback' ),
[ 'use_document_element' => true ]
)
);

$sanitized = AMP_Content_Sanitizer::sanitize_document(
$dom,
$sanitizer_classes,
[
'use_document_element' => true,
]
);
$stylesheets = array_values( $sanitized['stylesheets'] );

$stylesheets = array_values( $sanitized['stylesheets'] );
$actual_selectors = array_values( array_filter( preg_split( '/{.+?}/s', $stylesheets[0] ) ) );
$expected_selectors = array_keys( array_filter( $selectors ) );

$actual_selectors = array_values( array_filter( preg_split( '/{.+?}/s', $stylesheets[0] ) ) );
$expected_selectors = array_keys( array_filter( $selectors ) );
$this->assertEqualSets( $expected_selectors, $actual_selectors );
$this->assertEqualSets( $expected_selectors, $actual_selectors, sprintf( 'add_noscript_fallback is %s', wp_json_encode( $add_noscript_fallback ) ) );
}
}

/**
Expand Down

0 comments on commit 263501f

Please sign in to comment.