Avoid lossy htmlentities encode by setting charset

Sets the charset of the html passed into DomDocument to utf-8. Replaces the mb_convert_encoding call replacing utf-8 with html entities before handing off to DomDocument. This avoids the need to later convert back to utf-8 from html entities afterward. This secondary mb_convert_encoding call was converting not only the utf-8 we converted earlier but also entity encoding html stored inside data-* or other attributes of html elements. Fixes Automattic/wp-calypso#44897 Maintains the fix for WordPress#24445 (WordPress#24447)
lsl · Aug 18, 2020 · e623758 · simison · Aug 18, 2020 · andrewserong
1 parent c348eb6
commit e623758
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/lib/block-supports/index.php b/lib/block-supports/index.php
@@ -56,13 +56,13 @@ function gutenberg_apply_block_supports( $block_content, $block ) {
 
 	// Suppress warnings from this method from polluting the front-end.
 	// @codingStandardsIgnoreStart
-	if ( ! @$dom->loadHTML( mb_convert_encoding( $block_content, 'HTML-ENTITIES', 'UTF-8' ), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_COMPACT ) ) {
+	if ( ! @$dom->loadHTML( '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head><body>' . $block_content . '</body></html>', LIBXML_HTML_NODEFDTD | LIBXML_COMPACT ) ) {
 	// @codingStandardsIgnoreEnd
 		return $block_content;
 	}
 
 	$xpath      = new DOMXPath( $dom );
-	$block_root = $xpath->query( '/*' )[0];
+	$block_root = $xpath->query( '/html/body/*' )[0];
 
 	if ( empty( $block_root ) ) {
 		return $block_content;
@@ -86,7 +86,7 @@ function gutenberg_apply_block_supports( $block_content, $block ) {
 		$block_root->setAttribute( 'style', esc_attr( implode( '; ', $new_styles ) . ';' ) );
 	}
 
-	return mb_convert_encoding( $dom->saveHtml(), 'UTF-8', 'HTML-ENTITIES' );
+	return preg_replace( array( '/^<html><head><meta http-equiv="Content-Type" content="text\/html; charset=utf-8"><\/head><body>/', '/<\/body><\/html>$/' ), '', $dom->saveHtml() );
 }
 add_filter( 'render_block', 'gutenberg_apply_block_supports', 10, 2 );