Skip to content

Commit

Permalink
Sanitize Tumblr embeds
Browse files Browse the repository at this point in the history
  • Loading branch information
pierlon committed May 21, 2020
1 parent c955f45 commit 21ae657
Show file tree
Hide file tree
Showing 2 changed files with 177 additions and 30 deletions.
94 changes: 64 additions & 30 deletions includes/embeds/class-amp-tumblr-embed-handler.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,54 +6,88 @@
* @since 0.7
*/

use AmpProject\Dom\Document;

/**
* Class AMP_Tumblr_Embed_Handler
*/
class AMP_Tumblr_Embed_Handler extends AMP_Base_Embed_Handler {

/**
* Register embed.
* Default width.
*
* Tumblr embeds for web have a fixed width of 540px.
* See <https://tumblr.zendesk.com/hc/en-us/articles/226261028-Embed-pro-tips>.
*
* @var int
*/
public function register_embed() {
add_filter( 'embed_oembed_html', [ $this, 'filter_embed_oembed_html' ], 10, 2 );
}
protected $DEFAULT_WIDTH = 540;

/**
* Base URL used for identifying embeds.
*
* @var string
*/
protected $base_embed_url = 'https://embed.tumblr.com/embed/post/';

/**
* Unregister embed.
* Get all raw embeds from the DOM.
*
* @param Document $dom Document.
* @return DOMNodeList A list of DOMElement nodes.
*/
public function unregister_embed() {
remove_filter( 'embed_oembed_html', [ $this, 'filter_embed_oembed_html' ], 10 );
protected function get_raw_embed_nodes( Document $dom ) {
return $dom->xpath->query( sprintf( '//div[ @class = "tumblr-post" and starts-with( @data-href, "%s" ) ]', $this->base_embed_url ) );
}

/**
* Filter oEmbed HTML for Tumblr to prepare it for AMP.
* Make embed AMP compatible.
*
* @param string $cache Cache for oEmbed.
* @param string $url Embed URL.
* @return string Embed.
* @param DOMElement $node DOM element.
*/
public function filter_embed_oembed_html( $cache, $url ) {
$parsed_url = wp_parse_url( $url );
if ( false === strpos( $parsed_url['host'], 'tumblr.com' ) ) {
return $cache;
}
protected function sanitize_raw_embed( DOMElement $node ) {
$dom = Document::fromNode( $node );
$iframe_src = $node->getAttribute( 'data-href' );

$attributes = [
'src' => $iframe_src,
'layout' => 'responsive',
'width' => $this->args['width'],
'height' => $this->args['height'],
'resizable' => '',
'sandbox' => 'allow-scripts allow-popups allow-same-origin'
];

// @todo The iframe will not get sized properly.
if ( preg_match( '#data-href="(?P<href>https://embed.tumblr.com/embed/post/\w+/\w+)"#', $cache, $matches ) ) {
$cache = AMP_HTML_Utils::build_tag(
'amp-iframe',
[
'width' => $this->args['width'],
'height' => $this->args['height'],
'layout' => 'responsive',
'sandbox' => 'allow-scripts allow-popups', // The allow-scripts is needed to allow the iframe to render; allow-popups needed to allow clicking.
'src' => $matches['href'],
],
sprintf( '<a placeholder href="%s">Tumblr</a>', $url )
);
$amp_node = AMP_DOM_Utils::create_node(
$dom,
$this->amp_tag,
$attributes
);

// Add an overflow node to allow the amp-iframe to resize.
$overflow_node = AMP_DOM_Utils::create_node(
$dom,
'div',
[
'overflow' => '',
'tabindex' => 0,
'role' => 'button',
'aria-label' => esc_attr__( 'See more', 'amp' ),
]
);
$overflow_node->textContent = esc_html__( 'See more', 'amp' );
$amp_node->appendChild( $overflow_node );

// Append the original link as a placeholder node.
if ( $node->firstChild instanceof DOMElement && 'a' === $node->firstChild->nodeName ) {
$placeholder_node = $node->firstChild;
$placeholder_node->setAttribute( 'placeholder', '' );
$amp_node->appendChild( $placeholder_node );
}

return $cache;
$this->maybe_remove_script_sibling( $node, 'assets.tumblr.com/post.js' );

$node->parentNode->replaceChild( $amp_node, $node );
}
}

113 changes: 113 additions & 0 deletions tests/php/test-amp-tumblr-embed-handler.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
<?php

class AMP_Tumblr_Embed_Handler_Test extends WP_UnitTestCase {

/**
* Set up.
*/
public function setUp() {
parent::setUp();

// Mock the HTTP request.
add_filter( 'pre_http_request', [ $this, 'mock_http_request' ], 10, 3 );
}

/**
* Tear down.
*/
public function tearDown() {
remove_filter( 'pre_http_request', [ $this, 'mock_http_request' ] );
parent::tearDown();
}

/**
* Mock HTTP request.
*
* @param mixed $pre Whether to preempt an HTTP request's return value. Default false.
* @param mixed $r HTTP request arguments.
* @param string $url The request URL.
* @return array Response data.
*/
public function mock_http_request( $pre, $r, $url ) {
if ( in_array( 'external-http', $_SERVER['argv'], true ) ) {
return $pre;
}

if ( false === strpos( $url, 'tumblr.com' ) ) {
return $pre;
}

$body = '{"cache_age":3600,"url":"https:\/\/ifpaintingscouldtext.tumblr.com\/post\/92003045635\/grant-wood-american-gothic-1930","provider_url":"https:\/\/www.tumblr.com","provider_name":"Tumblr","author_name":"If Paintings Could Text","version":"1.0","author_url":"https:\/\/ifpaintingscouldtext.tumblr.com\/","type":"rich","html":"\u003Cdiv class=\u0022tumblr-post\u0022 data-href=\u0022https:\/\/embed.tumblr.com\/embed\/post\/2JT2XTaiTxO08wh21dqQrw\/92003045635\u0022 data-did=\u00227ce4825965cbd8bfd208f6aae43de7a528859aee\u0022 \u003E\u003Ca href=\u0022https:\/\/ifpaintingscouldtext.tumblr.com\/post\/92003045635\/grant-wood-american-gothic-1930\u0022\u003Ehttps:\/\/ifpaintingscouldtext.tumblr.com\/post\/92003045635\/grant-wood-american-gothic-1930\u003C\/a\u003E\u003C\/div\u003E\u003Cscript async src=\u0022https:\/\/assets.tumblr.com\/post.js\u0022\u003E\u003C\/script\u003E","height":null,"width":540}';

return [
'body' => $body,
'response' => [
'code' => 200,
'message' => 'OK',
],
];
}

public function get_conversion_data() {
return [
'no_embed' => [
'<p>Hello world.</p>',
'<p>Hello world.</p>' . PHP_EOL,
],

'url_simple' => [
'https://ifpaintingscouldtext.tumblr.com/post/92003045635/grant-wood-american-gothic-1930' . PHP_EOL,
'<amp-iframe src="https://embed.tumblr.com/embed/post/2JT2XTaiTxO08wh21dqQrw/92003045635" layout="responsive" width="540" height="480" resizable="" sandbox="allow-scripts allow-popups allow-same-origin"><div overflow="" tabindex="0" role="button" aria-label="See more">See more</div><a href="https://ifpaintingscouldtext.tumblr.com/post/92003045635/grant-wood-american-gothic-1930" placeholder="">https://ifpaintingscouldtext.tumblr.com/post/92003045635/grant-wood-american-gothic-1930</a></amp-iframe>' . PHP_EOL . PHP_EOL,
],
];
}

/**
* @dataProvider get_conversion_data
*/
public function test__conversion( $source, $expected ) {
$embed = new AMP_Tumblr_Embed_Handler();

$filtered_content = apply_filters( 'the_content', $source );
$dom = AMP_DOM_Utils::get_dom_from_content( $filtered_content );
$embed->sanitize_raw_embeds( $dom );

$content = AMP_DOM_Utils::get_content_from_dom( $dom );

$this->assertEquals( $expected, $content );
}

public function get_scripts_data() {
return [
'not_converted' => [
'<p>Hello World.</p>',
[],
],
'converted' => [
'https://ifpaintingscouldtext.tumblr.com/post/92003045635/grant-wood-american-gothic-1930' . PHP_EOL,
[ 'amp-iframe' => true ],
],
];
}

/**
* @dataProvider get_scripts_data
*/
public function test__get_scripts( $source, $expected ) {
$embed = new AMP_Tumblr_Embed_Handler();

$filtered_content = apply_filters( 'the_content', $source );
$dom = AMP_DOM_Utils::get_dom_from_content( $filtered_content );
$embed->sanitize_raw_embeds( $dom );

$whitelist_sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom );
$whitelist_sanitizer->sanitize();

$scripts = array_merge(
$embed->get_scripts(),
$whitelist_sanitizer->get_scripts()
);

$this->assertEquals( $expected, $scripts );
}
}

0 comments on commit 21ae657

Please sign in to comment.