From 82c274b0a84228205757e4e78b041f9fc07b776c Mon Sep 17 00:00:00 2001 From: ThierryA Date: Fri, 19 Jan 2018 00:33:34 +0100 Subject: [PATCH 1/7] Bumped version to 0.6-rc2 --- amp.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/amp.php b/amp.php index 68cbdb4b8db..580c8cec7b7 100644 --- a/amp.php +++ b/amp.php @@ -5,7 +5,7 @@ * Plugin URI: https://github.com/automattic/amp-wp * Author: Automattic * Author URI: https://automattic.com - * Version: 0.6-rc1 + * Version: 0.6-rc2 * Text Domain: amp * Domain Path: /languages/ * License: GPLv2 or later @@ -15,7 +15,7 @@ define( 'AMP__FILE__', __FILE__ ); define( 'AMP__DIR__', dirname( __FILE__ ) ); -define( 'AMP__VERSION', '0.6-rc1' ); +define( 'AMP__VERSION', '0.6-rc2' ); require_once AMP__DIR__ . '/includes/class-amp-autoloader.php'; AMP_Autoloader::register(); From 91ef3867dc1f523dad09a735119fb7140b8f439d Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Fri, 19 Jan 2018 10:59:27 -0800 Subject: [PATCH 2/7] Remove erroneous additional allowance of script[type=text/javascript] Issue introduced in #828 via 5c1fa2086 --- includes/sanitizers/class-amp-rule-spec.php | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/includes/sanitizers/class-amp-rule-spec.php b/includes/sanitizers/class-amp-rule-spec.php index 694b7dcf7df..6bec8aa167a 100644 --- a/includes/sanitizers/class-amp-rule-spec.php +++ b/includes/sanitizers/class-amp-rule-spec.php @@ -87,22 +87,18 @@ abstract class AMP_Rule_Spec { */ public static $additional_allowed_tags = array( - /** - * An experimental tag with no protoascii - */ + // An experimental tag with no protoascii. 'amp-share-tracking' => array( 'attr_spec_list' => array(), 'tag_spec' => array(), ), - /** - * Needed for some tags such as analytics - */ + // Needed for some tags such as analytics. 'script' => array( 'attr_spec_list' => array( 'type' => array( 'mandatory' => true, - 'value_casei' => 'text/javascript', + 'value_casei' => 'application/json', ), ), 'tag_spec' => array(), From a6662896c66699ea2c726ecb25e9369918fd497d Mon Sep 17 00:00:00 2001 From: David Cramer Date: Wed, 17 Jan 2018 09:31:12 +0200 Subject: [PATCH 3/7] Make add `get_dom` method to AMP_DOM_Utils --- includes/utils/class-amp-dom-utils.php | 47 +++++++++++++++++++------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/includes/utils/class-amp-dom-utils.php b/includes/utils/class-amp-dom-utils.php index d5f54bcbbf4..87122347667 100644 --- a/includes/utils/class-amp-dom-utils.php +++ b/includes/utils/class-amp-dom-utils.php @@ -13,17 +13,15 @@ class AMP_DOM_Utils { /** - * Return a valid DOMDocument representing arbitrary HTML content passed as a parameter. - * - * @see Reciprocal function get_content_from_dom() + * Return a valid DOMDocument representing HTML document passed as a parameter. * - * @since 0.2 + * @since 0.7 * - * @param string $content Valid HTML content to be represented by a DOMDocument. + * @param string $document Valid HTML document to be represented by a DOMDocument. * * @return DOMDocument|false Returns DOMDocument, or false if conversion failed. */ - public static function get_dom_from_content( $content ) { + public static function get_dom( $document ) { $libxml_previous_state = libxml_use_internal_errors( true ); $dom = new DOMDocument(); @@ -34,13 +32,7 @@ public static function get_dom_from_content( $content ) { * We can later use this to extract our nodes. * Add charset so loadHTML does not have problems parsing it. */ - $result = $dom->loadHTML( - sprintf( - '%s', - get_bloginfo( 'charset' ), - $content - ) - ); + $result = $dom->loadHTML( $document ); libxml_clear_errors(); libxml_use_internal_errors( $libxml_previous_state ); @@ -52,6 +44,35 @@ public static function get_dom_from_content( $content ) { return $dom; } + /** + * Return a valid DOMDocument representing arbitrary HTML content passed as a parameter. + * + * @see Reciprocal function get_content_from_dom() + * + * @since 0.2 + * + * @param string $content Valid HTML content to be represented by a DOMDocument. + * + * @return DOMDocument|false Returns DOMDocument, or false if conversion failed. + */ + public static function get_dom_from_content( $content ) { + /* + * Wrap in dummy tags, since XML needs one parent node. + * It also makes it easier to loop through nodes. + * We can later use this to extract our nodes. + * Add utf-8 charset so loadHTML does not have problems parsing it. + * See: http://php.net/manual/en/domdocument.loadhtml.php#78243 + */ + $document = sprintf( + '%s', + get_bloginfo( 'charset' ), + $content + ); + + return self::get_dom( $document ); + + } + /** * Return valid HTML content extracted from the DOMDocument passed as a parameter. * From 70a444e10f23aa8bf6b1d500edda9648548150c9 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Mon, 22 Jan 2018 19:22:09 -0800 Subject: [PATCH 4/7] Sanitize the output-buffered HTML document response --- includes/class-amp-theme-support.php | 47 +++++++++--------- .../templates/class-amp-content-sanitizer.php | 17 +++++-- includes/utils/class-amp-dom-utils.php | 2 + tests/test-class-amp-theme-support.php | 49 +++++++++++++++++++ 4 files changed, 87 insertions(+), 28 deletions(-) diff --git a/includes/class-amp-theme-support.php b/includes/class-amp-theme-support.php index c5100e90128..0dc51a7a7d1 100644 --- a/includes/class-amp-theme-support.php +++ b/includes/class-amp-theme-support.php @@ -191,8 +191,6 @@ public static function register_hooks() { */ add_action( 'template_redirect', array( __CLASS__, 'start_output_buffering' ), 0 ); - add_filter( 'the_content', array( __CLASS__, 'filter_the_content' ), PHP_INT_MAX ); - // @todo Add character conversion. } @@ -422,32 +420,12 @@ public static function get_amp_custom_styles() { return $css; } - /** - * Filter the content to be valid AMP. - * - * @param string $content Content. - * @return string Amplified content. - */ - public static function filter_the_content( $content ) { - $args = array( - 'content_max_width' => ! empty( $content_width ) ? $content_width : AMP_Post_Template::CONTENT_MAX_WIDTH, // Back-compat. - ); - - list( $sanitized_content, $scripts, $styles ) = AMP_Content_Sanitizer::sanitize( $content, self::$sanitizer_classes, $args ); - - self::$amp_scripts = array_merge( self::$amp_scripts, $scripts ); - self::$amp_styles = array_merge( self::$amp_styles, $styles ); - - return $sanitized_content; - } - /** * Determine required AMP scripts. * - * @param string $html Output HTML. * @return string Scripts to inject into the HEAD. */ - public static function get_amp_component_scripts( $html ) { + public static function get_amp_component_scripts() { $amp_scripts = self::$amp_scripts; foreach ( self::$embed_handlers as $embed_handler ) { @@ -492,10 +470,32 @@ public static function start_output_buffering() { * Finish output buffering. * * @todo Do this in shutdown instead of output buffering callback? + * @global int $content_width * @param string $output Buffered output. * @return string Finalized output. */ public static function finish_output_buffering( $output ) { + global $content_width; + + $dom = AMP_DOM_Utils::get_dom( $output ); + $args = array( + 'content_max_width' => ! empty( $content_width ) ? $content_width : AMP_Post_Template::CONTENT_MAX_WIDTH, // Back-compat. + ); + + /* + * @todo The sanitize method needs to be updated to sanitize the entire HTML element and not just the BODY. + * This will require updating mandatory_parent_blacklist in amphtml-update.py to include elements that appear in the HEAD. + * This will ensure that the scripts and styles that plugins output via wp_head() will be sanitized as well. However, + * since the the old paired mode is sending content from the *body* we'll need to be able to filter out the elements + * from outside the body from being part of the whitelist sanitizer when it runs when theme support is not present, + * as otherwise elements from the HEAD could get added to the BODY. + */ + list( $sanitized_inner_body, $scripts, $styles ) = AMP_Content_Sanitizer::sanitize( $dom, self::$sanitizer_classes, $args ); + + self::$amp_scripts = array_merge( self::$amp_scripts, $scripts ); + self::$amp_styles = array_merge( self::$amp_styles, $styles ); + + $output = preg_replace( '#()(.+)()#si', '$1' . $sanitized_inner_body . '$3', $output ); // Inject required scripts. $output = preg_replace( @@ -513,7 +513,6 @@ public static function finish_output_buffering( $output ) { 1 ); - // @todo Add more validation checking and potentially the whitelist sanitizer. return $output; } } diff --git a/includes/templates/class-amp-content-sanitizer.php b/includes/templates/class-amp-content-sanitizer.php index dcbd62097ef..eabfefe74bf 100644 --- a/includes/templates/class-amp-content-sanitizer.php +++ b/includes/templates/class-amp-content-sanitizer.php @@ -13,16 +13,20 @@ class AMP_Content_Sanitizer { /** * Sanitize. * - * @param string $content Content. - * @param string[] $sanitizer_classes Sanitizer classes. - * @param array $global_args Global args. + * @param string|DOMDocument $content HTML content string or DOM document. + * @param string[] $sanitizer_classes Sanitizer classes. + * @param array $global_args Global args. * * @return array */ public static function sanitize( $content, array $sanitizer_classes, $global_args = array() ) { $scripts = array(); $styles = array(); - $dom = AMP_DOM_Utils::get_dom_from_content( $content ); + if ( $content instanceof DOMDocument ) { + $dom = $content; + } else { + $dom = AMP_DOM_Utils::get_dom_from_content( $content ); + } foreach ( $sanitizer_classes as $sanitizer_class => $args ) { if ( ! class_exists( $sanitizer_class ) ) { @@ -31,6 +35,11 @@ public static function sanitize( $content, array $sanitizer_classes, $global_arg continue; } + /** + * Sanitizer. + * + * @type AMP_Base_Sanitizer $sanitizer + */ $sanitizer = new $sanitizer_class( $dom, array_merge( $global_args, $args ) ); if ( ! is_subclass_of( $sanitizer, 'AMP_Base_Sanitizer' ) ) { diff --git a/includes/utils/class-amp-dom-utils.php b/includes/utils/class-amp-dom-utils.php index 87122347667..e6a8b96918f 100644 --- a/includes/utils/class-amp-dom-utils.php +++ b/includes/utils/class-amp-dom-utils.php @@ -88,6 +88,8 @@ public static function get_content_from_dom( $dom ) { /** * We only want children of the body tag, since we have a subset of HTML. + * + * @todo We will want to get the full HTML eventually. */ $body = $dom->getElementsByTagName( 'body' )->item( 0 ); diff --git a/tests/test-class-amp-theme-support.php b/tests/test-class-amp-theme-support.php index 5a10663bfb7..0b15e7f1165 100644 --- a/tests/test-class-amp-theme-support.php +++ b/tests/test-class-amp-theme-support.php @@ -67,4 +67,53 @@ public function test_is_paired_available() { $this->assertTrue( is_search() ); $this->assertFalse( AMP_Theme_Support::is_paired_available() ); } + + /** + * Test finish_output_buffering. + * + * @covers AMP_Theme_Support::finish_output_buffering() + */ + public function test_finish_output_buffering() { + add_theme_support( 'amp' ); + AMP_Theme_Support::init(); + ob_start(); + ?> + + > + + + + + + + + + + + + assertContains( '', $sanitized_html ); + $this->assertContains( '', $sanitized_html ); + $this->assertContains( '