Skip to content

Commit

Permalink
Merge pull request #4166 from ampproject/fix/href-validation
Browse files Browse the repository at this point in the history
Prevent validation errors for URLs with leading or trailing spaces
  • Loading branch information
westonruter committed Jan 24, 2020
2 parents a2763d8 + 92bb4e6 commit 277ba23
Show file tree
Hide file tree
Showing 2 changed files with 233 additions and 11 deletions.
18 changes: 14 additions & 4 deletions includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -1626,7 +1626,7 @@ private function check_attr_spec_rule_value_regex_casei( DOMElement $node, $attr
private function check_attr_spec_rule_valid_url( DOMElement $node, $attr_name, $attr_spec_rule ) {
if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ] ) && $node->hasAttribute( $attr_name ) ) {
foreach ( $this->extract_attribute_urls( $node->getAttributeNode( $attr_name ) ) as $url ) {
$url = urldecode( $url );
$url = $this->normalize_url_from_attribute_value( $url );

// Check whether the URL is parsable.
$parts = wp_parse_url( $url );
Expand All @@ -1644,7 +1644,7 @@ private function check_attr_spec_rule_valid_url( DOMElement $node, $attr_name, $
}

// Check if the host contains invalid chars (hostCharIsValid: https://github.com/ampproject/amphtml/blob/af1e3a550feeafd732226202b8d1f26dcefefa18/validator/engine/parse-url.js#L62-L103).
$host = wp_parse_url( $url, PHP_URL_HOST );
$host = wp_parse_url( urldecode( $url ), PHP_URL_HOST );
if ( $host && preg_match( '/[!"#$%&\'()*+,\/:;<=>?@[\]^`{|}~\s]/', $host ) ) {
return AMP_Rule_Spec::FAIL;
}
Expand Down Expand Up @@ -1672,6 +1672,16 @@ private function parse_protocol( $url ) {
return null;
}

/**
* Normalize a URL that appeared as a tag attribute.
*
* @param string $url The URL to normalize.
* @return string The normalized URL.
*/
private function normalize_url_from_attribute_value( $url ) {
return preg_replace( '/[\t\r\n]/', '', trim( $url ) );
}

/**
* Check if attribute has a protocol value rule determine if it matches.
*
Expand All @@ -1689,7 +1699,7 @@ private function check_attr_spec_rule_allowed_protocol( DOMElement $node, $attr_
if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOWED_PROTOCOL ] ) ) {
if ( $node->hasAttribute( $attr_name ) ) {
foreach ( $this->extract_attribute_urls( $node->getAttributeNode( $attr_name ) ) as $url ) {
$url_scheme = $this->parse_protocol( $url );
$url_scheme = $this->parse_protocol( $this->normalize_url_from_attribute_value( $url ) );
if ( isset( $url_scheme ) && ! in_array( strtolower( $url_scheme ), $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOWED_PROTOCOL ], true ) ) {
return AMP_Rule_Spec::FAIL;
}
Expand All @@ -1701,7 +1711,7 @@ private function check_attr_spec_rule_allowed_protocol( DOMElement $node, $attr_
foreach ( $attr_spec_rule[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] as $alternative_name ) {
if ( $node->hasAttribute( $alternative_name ) ) {
foreach ( $this->extract_attribute_urls( $node->getAttributeNode( $alternative_name ), $attr_name ) as $url ) {
$url_scheme = $this->parse_protocol( $url );
$url_scheme = $this->parse_protocol( $this->normalize_url_from_attribute_value( $url ) );
if ( isset( $url_scheme ) && ! in_array( strtolower( $url_scheme ), $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOWED_PROTOCOL ], true ) ) {
return AMP_Rule_Spec::FAIL;
}
Expand Down
226 changes: 219 additions & 7 deletions tests/php/test-amp-tag-and-attribute-sanitizer-private-methods.php
Original file line number Diff line number Diff line change
Expand Up @@ -1413,6 +1413,66 @@ public function get_check_attr_spec_rule_blacklisted_value_regex() {
];
}

/**
* Gets the test data for test_check_attr_spec_rule_valid_url().
*
* @return array The test data.
*/
public function get_check_attr_spec_rule_valid_url() {
return [
'no_attribute' => [
'<a></a>',
AMP_Rule_Spec::NOT_APPLICABLE,
],
'correct_url' => [
'<a baz="https://wp.org"></a>',
AMP_Rule_Spec::PASS,
],
'correct_url_leading_space' => [
'<a baz=" https://wp.org"></a>',
AMP_Rule_Spec::PASS,
],
'non_parseable_url' => [
'<a baz="//"></a>',
AMP_Rule_Spec::FAIL,
],
'wrong_protocol' => [
'<a baz="@:wp.org"></a>',
AMP_Rule_Spec::FAIL,
],
'wrong_host' => [
'<a baz="https://wp$camp.org"></a>',
AMP_Rule_Spec::FAIL,
],
];
}

/**
* Tests check_attr_spec_rule_valid_url.
*
* @dataProvider get_check_attr_spec_rule_valid_url
* @group allowed-tags-private-methods
* @covers AMP_Tag_And_Attribute_Sanitizer::check_attr_spec_rule_valid_url()
*
* @param array $source The HTML source to test.
* @param string $expected The expected return value.
* @throws ReflectionException If it's not possible to create a reflection to call the private method.
*/
public function test_check_attr_spec_rule_valid_url( $source, $expected ) {
$node_tag_name = 'a';
$dom = AMP_DOM_Utils::get_dom_from_content( $source );
$sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom );
$node = $dom->getElementsByTagName( $node_tag_name )->item( 0 );
$attr_name = 'baz';
$attr_spec_rule = [ 'value_url' => [] ];

$this->assertEquals(
$expected,
$this->call_private_method( $sanitizer, 'check_attr_spec_rule_valid_url', [ $node, $attr_name, $attr_spec_rule ] ),
sprintf( 'using source: %s', $source )
);
}

/**
* @dataProvider get_check_attr_spec_rule_blacklisted_value_regex
* @group allowed-tags-private-methods
Expand All @@ -1429,7 +1489,7 @@ public function test_check_attr_spec_rule_blacklisted_value_regex( $data, $expec

public function get_check_attr_spec_rule_allowed_protocol() {
return [
'no_attributes' => [
'no_attributes' => [
[
'source' => '<div></div>',
'node_tag_name' => 'div',
Expand All @@ -1438,7 +1498,7 @@ public function get_check_attr_spec_rule_allowed_protocol() {
],
AMP_Rule_Spec::NOT_APPLICABLE,
],
'protocol_pass' => [
'protocol_pass' => [
[
'source' => '<div attribute1="http://example.com"></div>',
'node_tag_name' => 'div',
Expand All @@ -1454,7 +1514,23 @@ public function get_check_attr_spec_rule_allowed_protocol() {
],
AMP_Rule_Spec::PASS,
],
'protocol_multiple_pass' => [
'protocol_pass_leading_space' => [
[
'source' => '<div attribute1=" http://example.com"></div>',
'node_tag_name' => 'div',
'attr_name' => 'attribute1',
'attr_spec_rule' => [
'value_url' => [
'protocol' => [
'http',
'https',
],
],
],
],
AMP_Rule_Spec::PASS,
],
'protocol_multiple_pass' => [
[
'source' => '<div attribute1="http://example.com, https://domain.com"></div>',
'node_tag_name' => 'div',
Expand All @@ -1470,7 +1546,7 @@ public function get_check_attr_spec_rule_allowed_protocol() {
],
AMP_Rule_Spec::PASS,
],
'protocol_fail' => [
'protocol_fail' => [
[
'source' => '<div attribute1="data://example.com"></div>',
'node_tag_name' => 'div',
Expand All @@ -1486,7 +1562,7 @@ public function get_check_attr_spec_rule_allowed_protocol() {
],
AMP_Rule_Spec::FAIL,
],
'protocol_multiple_fail' => [
'protocol_multiple_fail' => [
[
'source' => '<img srcset="http://example.com, data://domain.com">',
'node_tag_name' => 'img',
Expand All @@ -1502,7 +1578,7 @@ public function get_check_attr_spec_rule_allowed_protocol() {
],
AMP_Rule_Spec::FAIL,
],
'protocol_alternative_pass' => [
'protocol_alternative_pass' => [
[
'source' => '<div attribute1_alternative1="http://example.com"></div>',
'node_tag_name' => 'div',
Expand All @@ -1521,7 +1597,7 @@ public function get_check_attr_spec_rule_allowed_protocol() {
],
AMP_Rule_Spec::PASS,
],
'protocol_alternative_fail' => [
'protocol_alternative_fail' => [
[
'source' => '<div attribute1_alternative1="data://example.com"></div>',
'node_tag_name' => 'div',
Expand All @@ -1543,6 +1619,142 @@ public function get_check_attr_spec_rule_allowed_protocol() {
];
}

/**
* Gets the test data for test_parse_protocol().
*
* @return array The test data.
*/
public function get_parse_protocol_data() {
return [
'empty_string' => [
'',
false,
],
'only_space' => [
' ',
false,
],
'traditional_https' => [
'https://example.com',
'https',
],
'trailing_space' => [
'https://foo.com ',
'https',
],
'no_colon' => [
'//image.png ',
false,
],
'two_colons' => [
'foo:baz://image.png ',
'foo:baz',
],
];
}

/**
* Tests parse_protocol.
*
* @dataProvider get_parse_protocol_data
* @group allowed-tags-private-methods
* @covers AMP_Tag_And_Attribute_Sanitizer::parse_protocol()
*
* @param array $url The URL to parse.
* @param string $expected The expected return value.
* @throws ReflectionException If it's not possible to create a reflection to call the private method.
*/
public function test_parse_protocol( $url, $expected ) {
$dom = AMP_DOM_Utils::get_dom_from_content( '' );
$sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom );

$this->assertEquals(
$expected,
$this->call_private_method( $sanitizer, 'parse_protocol', [ $url ] )
);
}

/**
* Gets the test data for test_normalize_url_from_attribute_value().
*
* @return array The test data.
*/
public function get_normalize_url_data() {
$normalized_url = 'https://example.com';

return [
'nothing_to_remove' => [
'https://example.com',
],
'empty_string' => [
'',
],
'only_space' => [
' ',
'',
],
'leading_space' => [
' https://example.com',
$normalized_url,
],
'leading_tab' => [
"\thttps://example.com",
$normalized_url,
],
'trailing_linefeed' => [
"https://example.com \n",
$normalized_url,
],
'trailing_space' => [
'https://example.com ',
$normalized_url,
],
'enclosed_in_spaces' => [
' https://example.com ',
$normalized_url,
],
'space_inside' => [
' https: //exam ple.com ',
'https: //exam ple.com',
],
'tabs_inside' => [
"https:\t//exam\tple.com ",
$normalized_url,
],
'leading_slashes' => [
'//example.com',
],
'url_encoded_space_not_removed' => [
'https://example.com?foo=++baz',
],
];
}

/**
* Tests normalize_url_from_attribute_value.
*
* @dataProvider get_normalize_url_data
* @group allowed-tags-private-methods
* @covers AMP_Tag_And_Attribute_Sanitizer::normalize_url_from_attribute_value()
*
* @param array $url The URL to normalize.
* @param string|null $expected The expected return value.
* @throws ReflectionException If it's not possible to create a reflection to call the private method.
*/
public function test_normalize_url_from_attribute_value( $url, $expected = null ) {
if ( null === $expected ) {
$expected = $url;
}

$dom = AMP_DOM_Utils::get_dom_from_content( '' );
$sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom );

$this->assertEquals(
$expected,
$this->call_private_method( $sanitizer, 'normalize_url_from_attribute_value', [ $url ] )
);
}

/**
* @dataProvider get_check_attr_spec_rule_allowed_protocol
* @group allowed-tags-private-methods
Expand Down

0 comments on commit 277ba23

Please sign in to comment.