diff --git a/CHANGELOG.md b/CHANGELOG.md index 06d5b1f..0980576 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p ## Unreleased ### Fixed - Fixed an issue where the `retconAutoAlt` filter didn't work as intended [#71](https://github.com/mmikkel/Retcon-Craft/issues/71) +- Fixed an issue where Retcon would convert UTF-8 non-breaking spaces to HTML entities, preventing filters like `retconRemoveEmpty` from working correctly in all cases [#70](https://github.com/mmikkel/Retcon-Craft/issues/70) ## 3.2.0 - 2024-06-28 ### Improved diff --git a/src/library/RetconDom.php b/src/library/RetconDom.php index 5031641..d051525 100644 --- a/src/library/RetconDom.php +++ b/src/library/RetconDom.php @@ -43,16 +43,16 @@ class RetconDom */ public function __construct($html) { - $html = (string)$html; - $libxmlUseInternalErrors = \libxml_use_internal_errors(true); - $content = \mb_convert_encoding($html, 'HTML-ENTITIES', Craft::$app->getView()->getTwig()->getCharset()); + $html = str_replace("\xc2\xa0", ' ', (string)$html); // Make sure UTF-8 non-breaking spaces are replaced with regular spaces + $libxmlUseInternalErrors = libxml_use_internal_errors(true); + $content = mb_convert_encoding($html, 'HTML-ENTITIES', Craft::$app->getView()->getTwig()->getCharset()); $this->doc = new \DOMDocument(); $this->doc->loadHTML("$content", LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); $this->crawler = new Crawler($this->doc); $this->html5 = new HTML5([ 'encode_entities' => false, ]); - \libxml_use_internal_errors($libxmlUseInternalErrors); + libxml_use_internal_errors($libxmlUseInternalErrors); } /**