From 82c274b0a84228205757e4e78b041f9fc07b776c Mon Sep 17 00:00:00 2001
From: ThierryA
Date: Fri, 19 Jan 2018 00:33:34 +0100
Subject: [PATCH 1/7] Bumped version to 0.6-rc2
---
amp.php | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/amp.php b/amp.php
index 68cbdb4b8db..580c8cec7b7 100644
--- a/amp.php
+++ b/amp.php
@@ -5,7 +5,7 @@
* Plugin URI: https://github.com/automattic/amp-wp
* Author: Automattic
* Author URI: https://automattic.com
- * Version: 0.6-rc1
+ * Version: 0.6-rc2
* Text Domain: amp
* Domain Path: /languages/
* License: GPLv2 or later
@@ -15,7 +15,7 @@
define( 'AMP__FILE__', __FILE__ );
define( 'AMP__DIR__', dirname( __FILE__ ) );
-define( 'AMP__VERSION', '0.6-rc1' );
+define( 'AMP__VERSION', '0.6-rc2' );
require_once AMP__DIR__ . '/includes/class-amp-autoloader.php';
AMP_Autoloader::register();
From 91ef3867dc1f523dad09a735119fb7140b8f439d Mon Sep 17 00:00:00 2001
From: Weston Ruter
Date: Fri, 19 Jan 2018 10:59:27 -0800
Subject: [PATCH 2/7] Remove erroneous additional allowance of
script[type=text/javascript]
Issue introduced in #828 via 5c1fa2086
---
includes/sanitizers/class-amp-rule-spec.php | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/includes/sanitizers/class-amp-rule-spec.php b/includes/sanitizers/class-amp-rule-spec.php
index 694b7dcf7df..6bec8aa167a 100644
--- a/includes/sanitizers/class-amp-rule-spec.php
+++ b/includes/sanitizers/class-amp-rule-spec.php
@@ -87,22 +87,18 @@ abstract class AMP_Rule_Spec {
*/
public static $additional_allowed_tags = array(
- /**
- * An experimental tag with no protoascii
- */
+ // An experimental tag with no protoascii.
'amp-share-tracking' => array(
'attr_spec_list' => array(),
'tag_spec' => array(),
),
- /**
- * Needed for some tags such as analytics
- */
+ // Needed for some tags such as analytics.
'script' => array(
'attr_spec_list' => array(
'type' => array(
'mandatory' => true,
- 'value_casei' => 'text/javascript',
+ 'value_casei' => 'application/json',
),
),
'tag_spec' => array(),
From a6662896c66699ea2c726ecb25e9369918fd497d Mon Sep 17 00:00:00 2001
From: David Cramer
Date: Wed, 17 Jan 2018 09:31:12 +0200
Subject: [PATCH 3/7] Make add `get_dom` method to AMP_DOM_Utils
---
includes/utils/class-amp-dom-utils.php | 47 +++++++++++++++++++-------
1 file changed, 34 insertions(+), 13 deletions(-)
diff --git a/includes/utils/class-amp-dom-utils.php b/includes/utils/class-amp-dom-utils.php
index d5f54bcbbf4..87122347667 100644
--- a/includes/utils/class-amp-dom-utils.php
+++ b/includes/utils/class-amp-dom-utils.php
@@ -13,17 +13,15 @@
class AMP_DOM_Utils {
/**
- * Return a valid DOMDocument representing arbitrary HTML content passed as a parameter.
- *
- * @see Reciprocal function get_content_from_dom()
+ * Return a valid DOMDocument representing HTML document passed as a parameter.
*
- * @since 0.2
+ * @since 0.7
*
- * @param string $content Valid HTML content to be represented by a DOMDocument.
+ * @param string $document Valid HTML document to be represented by a DOMDocument.
*
* @return DOMDocument|false Returns DOMDocument, or false if conversion failed.
*/
- public static function get_dom_from_content( $content ) {
+ public static function get_dom( $document ) {
$libxml_previous_state = libxml_use_internal_errors( true );
$dom = new DOMDocument();
@@ -34,13 +32,7 @@ public static function get_dom_from_content( $content ) {
* We can later use this to extract our nodes.
* Add charset so loadHTML does not have problems parsing it.
*/
- $result = $dom->loadHTML(
- sprintf(
- '%s',
- get_bloginfo( 'charset' ),
- $content
- )
- );
+ $result = $dom->loadHTML( $document );
libxml_clear_errors();
libxml_use_internal_errors( $libxml_previous_state );
@@ -52,6 +44,35 @@ public static function get_dom_from_content( $content ) {
return $dom;
}
+ /**
+ * Return a valid DOMDocument representing arbitrary HTML content passed as a parameter.
+ *
+ * @see Reciprocal function get_content_from_dom()
+ *
+ * @since 0.2
+ *
+ * @param string $content Valid HTML content to be represented by a DOMDocument.
+ *
+ * @return DOMDocument|false Returns DOMDocument, or false if conversion failed.
+ */
+ public static function get_dom_from_content( $content ) {
+ /*
+ * Wrap in dummy tags, since XML needs one parent node.
+ * It also makes it easier to loop through nodes.
+ * We can later use this to extract our nodes.
+ * Add utf-8 charset so loadHTML does not have problems parsing it.
+ * See: http://php.net/manual/en/domdocument.loadhtml.php#78243
+ */
+ $document = sprintf(
+ '%s',
+ get_bloginfo( 'charset' ),
+ $content
+ );
+
+ return self::get_dom( $document );
+
+ }
+
/**
* Return valid HTML content extracted from the DOMDocument passed as a parameter.
*
From 70a444e10f23aa8bf6b1d500edda9648548150c9 Mon Sep 17 00:00:00 2001
From: Weston Ruter
Date: Mon, 22 Jan 2018 19:22:09 -0800
Subject: [PATCH 4/7] Sanitize the output-buffered HTML document response
---
includes/class-amp-theme-support.php | 47 +++++++++---------
.../templates/class-amp-content-sanitizer.php | 17 +++++--
includes/utils/class-amp-dom-utils.php | 2 +
tests/test-class-amp-theme-support.php | 49 +++++++++++++++++++
4 files changed, 87 insertions(+), 28 deletions(-)
diff --git a/includes/class-amp-theme-support.php b/includes/class-amp-theme-support.php
index c5100e90128..0dc51a7a7d1 100644
--- a/includes/class-amp-theme-support.php
+++ b/includes/class-amp-theme-support.php
@@ -191,8 +191,6 @@ public static function register_hooks() {
*/
add_action( 'template_redirect', array( __CLASS__, 'start_output_buffering' ), 0 );
- add_filter( 'the_content', array( __CLASS__, 'filter_the_content' ), PHP_INT_MAX );
-
// @todo Add character conversion.
}
@@ -422,32 +420,12 @@ public static function get_amp_custom_styles() {
return $css;
}
- /**
- * Filter the content to be valid AMP.
- *
- * @param string $content Content.
- * @return string Amplified content.
- */
- public static function filter_the_content( $content ) {
- $args = array(
- 'content_max_width' => ! empty( $content_width ) ? $content_width : AMP_Post_Template::CONTENT_MAX_WIDTH, // Back-compat.
- );
-
- list( $sanitized_content, $scripts, $styles ) = AMP_Content_Sanitizer::sanitize( $content, self::$sanitizer_classes, $args );
-
- self::$amp_scripts = array_merge( self::$amp_scripts, $scripts );
- self::$amp_styles = array_merge( self::$amp_styles, $styles );
-
- return $sanitized_content;
- }
-
/**
* Determine required AMP scripts.
*
- * @param string $html Output HTML.
* @return string Scripts to inject into the HEAD.
*/
- public static function get_amp_component_scripts( $html ) {
+ public static function get_amp_component_scripts() {
$amp_scripts = self::$amp_scripts;
foreach ( self::$embed_handlers as $embed_handler ) {
@@ -492,10 +470,32 @@ public static function start_output_buffering() {
* Finish output buffering.
*
* @todo Do this in shutdown instead of output buffering callback?
+ * @global int $content_width
* @param string $output Buffered output.
* @return string Finalized output.
*/
public static function finish_output_buffering( $output ) {
+ global $content_width;
+
+ $dom = AMP_DOM_Utils::get_dom( $output );
+ $args = array(
+ 'content_max_width' => ! empty( $content_width ) ? $content_width : AMP_Post_Template::CONTENT_MAX_WIDTH, // Back-compat.
+ );
+
+ /*
+ * @todo The sanitize method needs to be updated to sanitize the entire HTML element and not just the BODY.
+ * This will require updating mandatory_parent_blacklist in amphtml-update.py to include elements that appear in the HEAD.
+ * This will ensure that the scripts and styles that plugins output via wp_head() will be sanitized as well. However,
+ * since the the old paired mode is sending content from the *body* we'll need to be able to filter out the elements
+ * from outside the body from being part of the whitelist sanitizer when it runs when theme support is not present,
+ * as otherwise elements from the HEAD could get added to the BODY.
+ */
+ list( $sanitized_inner_body, $scripts, $styles ) = AMP_Content_Sanitizer::sanitize( $dom, self::$sanitizer_classes, $args );
+
+ self::$amp_scripts = array_merge( self::$amp_scripts, $scripts );
+ self::$amp_styles = array_merge( self::$amp_styles, $styles );
+
+ $output = preg_replace( '#()(.+)(
+
+
+
+
+ )#si', '$1' . $sanitized_inner_body . '$3', $output );
// Inject required scripts.
$output = preg_replace(
@@ -513,7 +513,6 @@ public static function finish_output_buffering( $output ) {
1
);
- // @todo Add more validation checking and potentially the whitelist sanitizer.
return $output;
}
}
diff --git a/includes/templates/class-amp-content-sanitizer.php b/includes/templates/class-amp-content-sanitizer.php
index dcbd62097ef..eabfefe74bf 100644
--- a/includes/templates/class-amp-content-sanitizer.php
+++ b/includes/templates/class-amp-content-sanitizer.php
@@ -13,16 +13,20 @@ class AMP_Content_Sanitizer {
/**
* Sanitize.
*
- * @param string $content Content.
- * @param string[] $sanitizer_classes Sanitizer classes.
- * @param array $global_args Global args.
+ * @param string|DOMDocument $content HTML content string or DOM document.
+ * @param string[] $sanitizer_classes Sanitizer classes.
+ * @param array $global_args Global args.
*
* @return array
*/
public static function sanitize( $content, array $sanitizer_classes, $global_args = array() ) {
$scripts = array();
$styles = array();
- $dom = AMP_DOM_Utils::get_dom_from_content( $content );
+ if ( $content instanceof DOMDocument ) {
+ $dom = $content;
+ } else {
+ $dom = AMP_DOM_Utils::get_dom_from_content( $content );
+ }
foreach ( $sanitizer_classes as $sanitizer_class => $args ) {
if ( ! class_exists( $sanitizer_class ) ) {
@@ -31,6 +35,11 @@ public static function sanitize( $content, array $sanitizer_classes, $global_arg
continue;
}
+ /**
+ * Sanitizer.
+ *
+ * @type AMP_Base_Sanitizer $sanitizer
+ */
$sanitizer = new $sanitizer_class( $dom, array_merge( $global_args, $args ) );
if ( ! is_subclass_of( $sanitizer, 'AMP_Base_Sanitizer' ) ) {
diff --git a/includes/utils/class-amp-dom-utils.php b/includes/utils/class-amp-dom-utils.php
index 87122347667..e6a8b96918f 100644
--- a/includes/utils/class-amp-dom-utils.php
+++ b/includes/utils/class-amp-dom-utils.php
@@ -88,6 +88,8 @@ public static function get_content_from_dom( $dom ) {
/**
* We only want children of the body tag, since we have a subset of HTML.
+ *
+ * @todo We will want to get the full HTML eventually.
*/
$body = $dom->getElementsByTagName( 'body' )->item( 0 );
diff --git a/tests/test-class-amp-theme-support.php b/tests/test-class-amp-theme-support.php
index 5a10663bfb7..0b15e7f1165 100644
--- a/tests/test-class-amp-theme-support.php
+++ b/tests/test-class-amp-theme-support.php
@@ -67,4 +67,53 @@ public function test_is_paired_available() {
$this->assertTrue( is_search() );
$this->assertFalse( AMP_Theme_Support::is_paired_available() );
}
+
+ /**
+ * Test finish_output_buffering.
+ *
+ * @covers AMP_Theme_Support::finish_output_buffering()
+ */
+ public function test_finish_output_buffering() {
+ add_theme_support( 'amp' );
+ AMP_Theme_Support::init();
+ ob_start();
+ ?>
+
+ >
+