diff --git a/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php b/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php index b635d83e4aa..6786a213df1 100644 --- a/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php +++ b/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php @@ -1626,7 +1626,7 @@ private function check_attr_spec_rule_value_regex_casei( DOMElement $node, $attr private function check_attr_spec_rule_valid_url( DOMElement $node, $attr_name, $attr_spec_rule ) { if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ] ) && $node->hasAttribute( $attr_name ) ) { foreach ( $this->extract_attribute_urls( $node->getAttributeNode( $attr_name ) ) as $url ) { - $url = urldecode( $url ); + $url = $this->normalize_url_from_attribute_value( $url ); // Check whether the URL is parsable. $parts = wp_parse_url( $url ); @@ -1644,7 +1644,7 @@ private function check_attr_spec_rule_valid_url( DOMElement $node, $attr_name, $ } // Check if the host contains invalid chars (hostCharIsValid: https://github.com/ampproject/amphtml/blob/af1e3a550feeafd732226202b8d1f26dcefefa18/validator/engine/parse-url.js#L62-L103). - $host = wp_parse_url( $url, PHP_URL_HOST ); + $host = wp_parse_url( urldecode( $url ), PHP_URL_HOST ); if ( $host && preg_match( '/[!"#$%&\'()*+,\/:;<=>?@[\]^`{|}~\s]/', $host ) ) { return AMP_Rule_Spec::FAIL; } @@ -1672,6 +1672,16 @@ private function parse_protocol( $url ) { return null; } + /** + * Normalize a URL that appeared as a tag attribute. + * + * @param string $url The URL to normalize. + * @return string The normalized URL. + */ + private function normalize_url_from_attribute_value( $url ) { + return preg_replace( '/[\t\r\n]/', '', trim( $url ) ); + } + /** * Check if attribute has a protocol value rule determine if it matches. * @@ -1689,7 +1699,7 @@ private function check_attr_spec_rule_allowed_protocol( DOMElement $node, $attr_ if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOWED_PROTOCOL ] ) ) { if ( $node->hasAttribute( $attr_name ) ) { foreach ( $this->extract_attribute_urls( $node->getAttributeNode( $attr_name ) ) as $url ) { - $url_scheme = $this->parse_protocol( $url ); + $url_scheme = $this->parse_protocol( $this->normalize_url_from_attribute_value( $url ) ); if ( isset( $url_scheme ) && ! in_array( strtolower( $url_scheme ), $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOWED_PROTOCOL ], true ) ) { return AMP_Rule_Spec::FAIL; } @@ -1701,7 +1711,7 @@ private function check_attr_spec_rule_allowed_protocol( DOMElement $node, $attr_ foreach ( $attr_spec_rule[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] as $alternative_name ) { if ( $node->hasAttribute( $alternative_name ) ) { foreach ( $this->extract_attribute_urls( $node->getAttributeNode( $alternative_name ), $attr_name ) as $url ) { - $url_scheme = $this->parse_protocol( $url ); + $url_scheme = $this->parse_protocol( $this->normalize_url_from_attribute_value( $url ) ); if ( isset( $url_scheme ) && ! in_array( strtolower( $url_scheme ), $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOWED_PROTOCOL ], true ) ) { return AMP_Rule_Spec::FAIL; } diff --git a/tests/php/test-amp-tag-and-attribute-sanitizer-private-methods.php b/tests/php/test-amp-tag-and-attribute-sanitizer-private-methods.php index 79e90d603d9..6f825713a40 100644 --- a/tests/php/test-amp-tag-and-attribute-sanitizer-private-methods.php +++ b/tests/php/test-amp-tag-and-attribute-sanitizer-private-methods.php @@ -1413,6 +1413,66 @@ public function get_check_attr_spec_rule_blacklisted_value_regex() { ]; } + /** + * Gets the test data for test_check_attr_spec_rule_valid_url(). + * + * @return array The test data. + */ + public function get_check_attr_spec_rule_valid_url() { + return [ + 'no_attribute' => [ + '', + AMP_Rule_Spec::NOT_APPLICABLE, + ], + 'correct_url' => [ + '', + AMP_Rule_Spec::PASS, + ], + 'correct_url_leading_space' => [ + '', + AMP_Rule_Spec::PASS, + ], + 'non_parseable_url' => [ + '', + AMP_Rule_Spec::FAIL, + ], + 'wrong_protocol' => [ + '', + AMP_Rule_Spec::FAIL, + ], + 'wrong_host' => [ + '', + AMP_Rule_Spec::FAIL, + ], + ]; + } + + /** + * Tests check_attr_spec_rule_valid_url. + * + * @dataProvider get_check_attr_spec_rule_valid_url + * @group allowed-tags-private-methods + * @covers AMP_Tag_And_Attribute_Sanitizer::check_attr_spec_rule_valid_url() + * + * @param array $source The HTML source to test. + * @param string $expected The expected return value. + * @throws ReflectionException If it's not possible to create a reflection to call the private method. + */ + public function test_check_attr_spec_rule_valid_url( $source, $expected ) { + $node_tag_name = 'a'; + $dom = AMP_DOM_Utils::get_dom_from_content( $source ); + $sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom ); + $node = $dom->getElementsByTagName( $node_tag_name )->item( 0 ); + $attr_name = 'baz'; + $attr_spec_rule = [ 'value_url' => [] ]; + + $this->assertEquals( + $expected, + $this->call_private_method( $sanitizer, 'check_attr_spec_rule_valid_url', [ $node, $attr_name, $attr_spec_rule ] ), + sprintf( 'using source: %s', $source ) + ); + } + /** * @dataProvider get_check_attr_spec_rule_blacklisted_value_regex * @group allowed-tags-private-methods @@ -1429,7 +1489,7 @@ public function test_check_attr_spec_rule_blacklisted_value_regex( $data, $expec public function get_check_attr_spec_rule_allowed_protocol() { return [ - 'no_attributes' => [ + 'no_attributes' => [ [ 'source' => '
', 'node_tag_name' => 'div', @@ -1438,7 +1498,7 @@ public function get_check_attr_spec_rule_allowed_protocol() { ], AMP_Rule_Spec::NOT_APPLICABLE, ], - 'protocol_pass' => [ + 'protocol_pass' => [ [ 'source' => '
', 'node_tag_name' => 'div', @@ -1454,7 +1514,23 @@ public function get_check_attr_spec_rule_allowed_protocol() { ], AMP_Rule_Spec::PASS, ], - 'protocol_multiple_pass' => [ + 'protocol_pass_leading_space' => [ + [ + 'source' => '
', + 'node_tag_name' => 'div', + 'attr_name' => 'attribute1', + 'attr_spec_rule' => [ + 'value_url' => [ + 'protocol' => [ + 'http', + 'https', + ], + ], + ], + ], + AMP_Rule_Spec::PASS, + ], + 'protocol_multiple_pass' => [ [ 'source' => '
', 'node_tag_name' => 'div', @@ -1470,7 +1546,7 @@ public function get_check_attr_spec_rule_allowed_protocol() { ], AMP_Rule_Spec::PASS, ], - 'protocol_fail' => [ + 'protocol_fail' => [ [ 'source' => '
', 'node_tag_name' => 'div', @@ -1486,7 +1562,7 @@ public function get_check_attr_spec_rule_allowed_protocol() { ], AMP_Rule_Spec::FAIL, ], - 'protocol_multiple_fail' => [ + 'protocol_multiple_fail' => [ [ 'source' => '', 'node_tag_name' => 'img', @@ -1502,7 +1578,7 @@ public function get_check_attr_spec_rule_allowed_protocol() { ], AMP_Rule_Spec::FAIL, ], - 'protocol_alternative_pass' => [ + 'protocol_alternative_pass' => [ [ 'source' => '
', 'node_tag_name' => 'div', @@ -1521,7 +1597,7 @@ public function get_check_attr_spec_rule_allowed_protocol() { ], AMP_Rule_Spec::PASS, ], - 'protocol_alternative_fail' => [ + 'protocol_alternative_fail' => [ [ 'source' => '
', 'node_tag_name' => 'div', @@ -1543,6 +1619,142 @@ public function get_check_attr_spec_rule_allowed_protocol() { ]; } + /** + * Gets the test data for test_parse_protocol(). + * + * @return array The test data. + */ + public function get_parse_protocol_data() { + return [ + 'empty_string' => [ + '', + false, + ], + 'only_space' => [ + ' ', + false, + ], + 'traditional_https' => [ + 'https://example.com', + 'https', + ], + 'trailing_space' => [ + 'https://foo.com ', + 'https', + ], + 'no_colon' => [ + '//image.png ', + false, + ], + 'two_colons' => [ + 'foo:baz://image.png ', + 'foo:baz', + ], + ]; + } + + /** + * Tests parse_protocol. + * + * @dataProvider get_parse_protocol_data + * @group allowed-tags-private-methods + * @covers AMP_Tag_And_Attribute_Sanitizer::parse_protocol() + * + * @param array $url The URL to parse. + * @param string $expected The expected return value. + * @throws ReflectionException If it's not possible to create a reflection to call the private method. + */ + public function test_parse_protocol( $url, $expected ) { + $dom = AMP_DOM_Utils::get_dom_from_content( '' ); + $sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom ); + + $this->assertEquals( + $expected, + $this->call_private_method( $sanitizer, 'parse_protocol', [ $url ] ) + ); + } + + /** + * Gets the test data for test_normalize_url_from_attribute_value(). + * + * @return array The test data. + */ + public function get_normalize_url_data() { + $normalized_url = 'https://example.com'; + + return [ + 'nothing_to_remove' => [ + 'https://example.com', + ], + 'empty_string' => [ + '', + ], + 'only_space' => [ + ' ', + '', + ], + 'leading_space' => [ + ' https://example.com', + $normalized_url, + ], + 'leading_tab' => [ + "\thttps://example.com", + $normalized_url, + ], + 'trailing_linefeed' => [ + "https://example.com \n", + $normalized_url, + ], + 'trailing_space' => [ + 'https://example.com ', + $normalized_url, + ], + 'enclosed_in_spaces' => [ + ' https://example.com ', + $normalized_url, + ], + 'space_inside' => [ + ' https: //exam ple.com ', + 'https: //exam ple.com', + ], + 'tabs_inside' => [ + "https:\t//exam\tple.com ", + $normalized_url, + ], + 'leading_slashes' => [ + '//example.com', + ], + 'url_encoded_space_not_removed' => [ + 'https://example.com?foo=++baz', + ], + ]; + } + + /** + * Tests normalize_url_from_attribute_value. + * + * @dataProvider get_normalize_url_data + * @group allowed-tags-private-methods + * @covers AMP_Tag_And_Attribute_Sanitizer::normalize_url_from_attribute_value() + * + * @param array $url The URL to normalize. + * @param string|null $expected The expected return value. + * @throws ReflectionException If it's not possible to create a reflection to call the private method. + */ + public function test_normalize_url_from_attribute_value( $url, $expected = null ) { + if ( null === $expected ) { + $expected = $url; + } + + $dom = AMP_DOM_Utils::get_dom_from_content( '' ); + $sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom ); + + $this->assertEquals( + $expected, + $this->call_private_method( $sanitizer, 'normalize_url_from_attribute_value', [ $url ] ) + ); + } + /** * @dataProvider get_check_attr_spec_rule_allowed_protocol * @group allowed-tags-private-methods