Skip to content

Commit

Permalink
fix: improve lazyload and replacement on json strings
Browse files Browse the repository at this point in the history
  • Loading branch information
selul committed Dec 14, 2019
1 parent 02df077 commit b7f67fd
Show file tree
Hide file tree
Showing 7 changed files with 171 additions and 214 deletions.
8 changes: 4 additions & 4 deletions inc/lazyload_replacer.php
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ public function lazyload_tag_replace( $new_tag, $original_url, $new_url, $optml_
if ( ! self::$is_lazyload_placeholder && ! $should_ignore_rescale ) {
$optml_args['quality'] = 'eco';
$optml_args['resize'] = [];
$low_url = apply_filters( 'optml_content_url', $is_slashed ? stripslashes( $original_url ) : $original_url, $optml_args );
$low_url = apply_filters( 'optml_content_url', $original_url, $optml_args );
$low_url = $is_slashed ? addcslashes( $low_url, '/' ) : $low_url;
} else {
$low_url = $this->get_svg_for(
Expand Down Expand Up @@ -221,12 +221,12 @@ public function lazyload_tag_replace( $new_tag, $original_url, $new_url, $optml_
);
$new_tag = preg_replace(
[
'/( src(?>=|"|\'|\s|\\\\)*)' . preg_quote( $original_url, '/' ) . '/m',
'/ src=/m',
'/((?:\s|\'|"){1,}src(?>=|"|\'|\s|\\\\)*)' . preg_quote( $original_url, '/' ) . '/m',
'/<img/im',
],
[
"$1$low_url",
$opt_src . ' src=',
'<img' . $opt_src,
],
$new_tag,
1
Expand Down
171 changes: 75 additions & 96 deletions inc/manager.php
Original file line number Diff line number Diff line change
Expand Up @@ -260,107 +260,13 @@ public function replace_meta( $metadata, $object_id, $meta_key, $single ) {
return $metadata;
}

return $this->process_urls_from_json( $current_meta );
return $this->replace_content( $current_meta );
}

// Return original if the check does not pass
return $metadata;
}

/**
* Process json string.
*
* @param string $json Json string.
*
* @return string Processed string.
*/
public function process_urls_from_json( $json ) {

$extracted_urls = $this->extract_urls_from_json( $json );

return $this->do_url_replacement( $json, $extracted_urls );
}

/**
* Extract urls used as values in json string, i.e not prefixed by =("|') char.
*
* @param string $content Raw json string.
*
* @return array array of urls.
*/
public function extract_urls_from_json( $content ) {
$regex = '/(?<!(=|\\\\)(?:"|\'|"))(?:http(?:s?):)(?:[\/\\\\|.|\w|\s|@|%|-])*\.(?:' . implode( '|', array_keys( Optml_Config::$extensions ) ) . ')(?:\??[\w|=|&|\-|\.|:]*)/';
preg_match_all(
$regex,
$content,
$urls
);

return $this->normalize_urls( $urls[0] );
}

/**
* Normalize extracted urls.
*
* @param array $urls Raw urls extracted.
*
* @return array Normalized array.
*/
private function normalize_urls( $urls ) {

$urls = array_map(
function ( $value ) {
$value = str_replace( '&quot;', '', $value );

return rtrim( $value, '\\";\'' );
},
$urls
);
$urls = array_unique( $urls );

return array_values( $urls );
}

/**
* Process string content and replace possible urls.
*
* @param string $html String content.
* @param array $extracted_urls Urls to check.
*
* @return string Processed html.
*/
private function do_url_replacement( $html, $extracted_urls ) {
$extracted_urls = apply_filters( 'optml_extracted_urls', $extracted_urls );

if ( empty( $extracted_urls ) ) {
return $html;
}

$upload_resource = $this->tag_replacer->get_upload_resource();
$urls = array_combine( $extracted_urls, $extracted_urls );
$urls = array_map(
function ( $url ) use ( $upload_resource ) {
$is_relative = strpos( $url, $upload_resource['content_path'] ) === 0;
if ( $is_relative ) {
$url = $upload_resource['content_host'] . $url;
}

$is_slashed = strpos( $url, '\/' ) !== false;
$url = html_entity_decode( $url );
$new_url = apply_filters( 'optml_content_url', $url );

return $is_slashed ? addcslashes( $new_url, '/' ) : $new_url;
},
$urls
);

foreach ( $urls as $origin => $replace ) {
$html = preg_replace( '/(?<![\/|:|\\w])' . preg_quote( $origin, '/' ) . '/m', $replace, $html );
}

return $html;
}

/**
* Filter raw HTML content for urls.
*
Expand Down Expand Up @@ -444,7 +350,7 @@ public static function parse_images_from_html( $content ) {
$header_end = $header_start + strlen( $matches[0][0] );
}

if ( preg_match_all( '/(?:<a[^>]+?href=["|\'](?P<link_url>[^\s]+?)["|\'][^>]*?>\s*)?(?P<img_tag>(?:<noscript\s*>\s*)?<img[^>]*?\s+?(?:' . implode( '|', array_merge( [ 'src' ], Optml_Tag_Replacer::possible_src_attributes() ) ) . ')=\\\\?["|\'](?P<img_url>[^\s]+?)["|\'].*?>(?:\s*<\/noscript\s*>)?){1}(?:\s*<\/a>)?/ism', $content, $images, PREG_OFFSET_CAPTURE ) ) {
if ( preg_match_all( '/(?:<a[^>]+?href=["|\'](?P<link_url>[^\s]+?)["|\'][^>]*?>\s*)?(?P<img_tag>(?:<noscript\s*>\s*)?<img[^>]*?\s?(?:' . implode( '|', array_merge( [ 'src' ], Optml_Tag_Replacer::possible_src_attributes() ) ) . ')=\\\\?["|\'](?P<img_url>[^\s]+?)["|\'].*?>(?:\s*<\/noscript\s*>)?){1}(?:\s*<\/a>)?/ism', $content, $images, PREG_OFFSET_CAPTURE ) ) {

foreach ( $images as $key => $unused ) {
// Simplify the output as much as possible, mostly for confirming test results.
Expand Down Expand Up @@ -514,6 +420,79 @@ public function extract_image_urls_from_content( $content ) {
return $this->normalize_urls( $urls[1] );
}

/**
* Normalize extracted urls.
*
* @param array $urls Raw urls extracted.
*
* @return array Normalized array.
*/
private function normalize_urls( $urls ) {

$urls = array_map(
function ( $value ) {
$value = str_replace( '&quot;', '', $value );

return rtrim( $value, '\\";\'' );
},
$urls
);
$urls = array_unique( $urls );

return array_values( $urls );
}

/**
* Process string content and replace possible urls.
*
* @param string $html String content.
* @param array $extracted_urls Urls to check.
*
* @return string Processed html.
*/
private function do_url_replacement( $html, $extracted_urls ) {
$extracted_urls = apply_filters( 'optml_extracted_urls', $extracted_urls );

if ( empty( $extracted_urls ) ) {
return $html;
}
$slashed_config = addcslashes( Optml_Config::$service_url, '/' );

$extracted_urls = array_filter(
$extracted_urls,
function ( $value ) use ( $slashed_config ) {
return strpos( $value, Optml_Config::$service_url ) === false && strpos( $value, $slashed_config ) === false;
}
);
$upload_resource = $this->tag_replacer->get_upload_resource();
$urls = array_combine( $extracted_urls, $extracted_urls );

$urls = array_map(
function ( $url ) use ( $upload_resource ) {
$is_slashed = strpos( $url, '\/' ) !== false;
$is_relative = strpos(
$url,
$is_slashed ?
addcslashes( $upload_resource['content_path'], '/' ) :
$upload_resource['content_path']
) === 0;
if ( $is_relative ) {
$url = $upload_resource['content_host'] . $url;
}
$new_url = apply_filters( 'optml_content_url', $url );

return $new_url;
},
$urls
);

foreach ( $urls as $origin => $replace ) {
$html = preg_replace( '/(?<![\/|:|\\w])' . preg_quote( $origin, '/' ) . '/m', $replace, $html );
}

return $html;
}

/**
* Init html replacer handler.
*/
Expand Down
2 changes: 1 addition & 1 deletion inc/tag_replacer.php
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public function process_image_tags( $content, $images = array() ) {

$is_slashed = strpos( $images['img_url'][ $index ], '\/' ) !== false;

$src = $tmp = $is_slashed ? stripslashes( $images['img_url'][ $index ] ) : $images['img_url'][ $index ];
$src = $tmp = $is_slashed ? $this->strip_slashes( $images['img_url'][ $index ] ) : $images['img_url'][ $index ];

if ( strpos( $src, $this->upload_resource['content_path'] ) === 0 ) {
$src = $tmp = untrailingslashit( $this->upload_resource['content_host'] ) . $src;
Expand Down
10 changes: 10 additions & 0 deletions inc/traits/normalizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ public function to_boolean( $value ) {
return boolval( $value );
}

/**
* Strip slashes on unicode encoded strings.
*
* @param string $string Input string.
*
* @return string Decoded string.
*/
public function strip_slashes( $string ) {
return html_entity_decode( stripslashes( preg_replace( '/\\\u([\da-fA-F]{4})/', '&#x\1;', $string ) ) );
}
/**
* Normalize value to an integer within bounds.
*
Expand Down
3 changes: 1 addition & 2 deletions inc/url_replacer.php
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,7 @@ public function build_image_url(

// We do a little hack here, for json unicode chars we first replace them with html special chars,
// we then strip slashes to normalize the URL and last we convert html special chars back to get a clean URL
$url = $is_slashed ? html_entity_decode( stripslashes( preg_replace( '/\\\u([\da-fA-F]{4})/', '&#x\1;', $url ) ) ) : $url;

$url = $is_slashed ? html_entity_decode( stripslashes( preg_replace( '/\\\u([\da-fA-F]{4})/', '&#x\1;', $url ) ) ) : ( $url );
if ( strpos( $url, Optml_Config::$service_url ) !== false ) {
return $original_url;
}
Expand Down
22 changes: 22 additions & 0 deletions tests/test-lazyload.php
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,28 @@ public function test_lazyload_json_data_disabled() {
$this->assertEquals( 2, substr_count( $replaced_content2, '/http:\/\/example.org' ) );
}

public function test_json_lazyload_replacement() {
$html = Test_Replacer::get_html_array();

$replaced_content = Optml_Manager::instance()->replace_content( json_encode( $html ) );
$this->assertContains( 'i.optimole.com', $replaced_content );
$this->assertEquals( ( 6 + ( 3 * 48 ) ), substr_count( $replaced_content, 'i.optimole.com' ) );

$this->assertTrue( is_array( json_decode( $replaced_content, true ) ) );
$this->assertNotContains( "\"https:\/\/www.example.org\/wp-content", $replaced_content );
$this->assertNotContains( "\"\/\/www.example.org\/wp-content", $replaced_content );
$this->assertNotContains( "\"\/wp-content", $replaced_content );
$count_unicode = 0;
$replaced_html = json_decode( $replaced_content, true );

foreach ( $replaced_html as $value ) {
$count_unicode += substr_count( $value, Test_Replacer::DECODED_UNICODE );

}

$this->assertEquals( $count_unicode, ( ( 24 * 3 ) + 3 ) );
}

public function test_should_replace_query_string_url() {
$content = '<img src="https://example.org/photos/814499/pexels-photo-814499.jpeg?auto=compress&cs=tinysrgb&dpr=1&w=500" alt="">';
$replaced_content = Optml_Manager::instance()->replace_content( $content );
Expand Down
Loading

0 comments on commit b7f67fd

Please sign in to comment.