-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
Sets the charset of the html passed into DomDocument to utf-8. Replaces the mb_convert_encoding call replacing utf-8 with html entities before handing off to DomDocument. This avoids the need to later convert back to utf-8 from html entities afterward. This secondary mb_convert_encoding call was converting not only the utf-8 we converted earlier but also entity encoding html stored inside data-* or other attributes of html elements. Fixes Automattic/wp-calypso#44897 Maintains the fix for WordPress#24445 (WordPress#24447)
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,13 +56,13 @@ function gutenberg_apply_block_supports( $block_content, $block ) { | |
|
||
// Suppress warnings from this method from polluting the front-end. | ||
// @codingStandardsIgnoreStart | ||
if ( ! @$dom->loadHTML( mb_convert_encoding( $block_content, 'HTML-ENTITIES', 'UTF-8' ), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_COMPACT ) ) { | ||
if ( ! @$dom->loadHTML( '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head><body>' . $block_content . '</body></html>', LIBXML_HTML_NODEFDTD | LIBXML_COMPACT ) ) { | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
andrewserong
|
||
// @codingStandardsIgnoreEnd | ||
return $block_content; | ||
} | ||
|
||
$xpath = new DOMXPath( $dom ); | ||
$block_root = $xpath->query( '/*' )[0]; | ||
$block_root = $xpath->query( '/html/body/*' )[0]; | ||
|
||
if ( empty( $block_root ) ) { | ||
return $block_content; | ||
|
@@ -86,7 +86,7 @@ function gutenberg_apply_block_supports( $block_content, $block ) { | |
$block_root->setAttribute( 'style', esc_attr( implode( '; ', $new_styles ) . ';' ) ); | ||
} | ||
|
||
return mb_convert_encoding( $dom->saveHtml(), 'UTF-8', 'HTML-ENTITIES' ); | ||
return preg_replace( array( '/^<html><head><meta http-equiv="Content-Type" content="text\/html; charset=utf-8"><\/head><body>/', '/<\/body><\/html>$/' ), '', $dom->saveHtml() ); | ||
This comment has been minimized.
Sorry, something went wrong.
ockham
|
||
} | ||
add_filter( 'render_block', 'gutenberg_apply_block_supports', 10, 2 ); | ||
|
||
|
3 comments
on commit e623758
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@lsl This is nice, thanks! Definitely good material for a PR 😄
When we were originally fixing this, we briefly had a fix that preprended <?xml version=“1.0” encoding=“utf-8”?>
to the block HTML (but ended up using the mb_convert_encoding
based approach since the <?xml version=“1.0” encoding=“utf-8”?>
was also present in the output).
If you want, you could give that approach another spin (basically since it just needs pre-pending rather than wrapping, and only one str_replace()
instead of two (or a preg_replace
).
Could you also add a unit test that captures the issue we were seeing in Jetpack with mb_convert_encoding
(but is fixed with this approach)? See my WordPress#24447 for where to put it 🙂
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since you were asking about hints for unit test coverage -- it might be enough to change this string to include some nested HTML/Gutenberg blocks. We basically need to mimick whatever Jetpack is injecting (Automattic/wp-calypso#44897).
This means however that we'll also need to change some of the logic in that file's get_attribute_from_block
and get_content_from_block
functions to locate HTML attributes and content wrapped in HTML elements.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the advice on test changes needed! 🙇
Re different approaches + cc @simison these are all the approaches I've been able to test so far: https://3v4l.org/EaZv9
Note the different handling / changes to utf output and altered whitespace in each case.
The last approach (this commit) is the only one that manages to preserve utf-8, whitespace, and html entities previously in the html.
I also dislike this wrapping/unwrapping so I'm more than happy to take a different approach if you can figure out something that avoids it.
would
<meta charset="utf-8">
instead of<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
?