diff --git a/CHANGELOG.md b/CHANGELOG.md index e974e17e4..917b9e2b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -98,6 +98,8 @@ [#437](https://github.com/nextcloud/cookbook/pull/437) @christianlupus - Corrected bugs in CI system [#447](https://github.com/nextcloud/cookbook/pull/447) @christianlupus +- Correct encoding error when importing UTF8 Encoded recipe + [#284](https://github.com/nextcloud/cookbook/issues/284) @cholletk ### Removed - Travis build system diff --git a/lib/Service/RecipeService.php b/lib/Service/RecipeService.php index 6e724f3a3..350481825 100755 --- a/lib/Service/RecipeService.php +++ b/lib/Service/RecipeService.php @@ -53,7 +53,7 @@ public function getRecipeById(int $id) { return $this->parseRecipeFile($file); } - + /** * Get a recipe's modification time by its folder id. * @@ -114,7 +114,7 @@ public function checkRecipe(array $json): array { if (!$json) { throw new Exception('Recipe array was null'); } - + if (empty($json['name'])) { throw new Exception('Field "name" is required'); } @@ -146,7 +146,7 @@ public function checkRecipe(array $json): array { if (empty($img)) { continue; } - + $image_matches = []; preg_match_all('!\d+!', $img, $image_matches); @@ -195,7 +195,7 @@ public function checkRecipe(array $json): array { $json['image'] .= '?' . $image_url['query']; } } - + // Make sure that "recipeCategory" is a string if (isset($json['recipeCategory'])) { @@ -210,7 +210,7 @@ public function checkRecipe(array $json): array { $json['recipeCategory'] = $this->cleanUpString($json['recipeCategory'], false, true); - + // Make sure that "recipeYield" is an integer which is at least 1 if (isset($json['recipeYield']) && $json['recipeYield']) { $regex_matches = []; @@ -388,7 +388,7 @@ public function checkRecipe(array $json): array { if (isset($duration_matches[1][0]) && !empty($duration_matches[1][0])) { $duration_hours = intval($duration_matches[1][0]); } - + if (isset($duration_matches[2][0]) && !empty($duration_matches[2][0])) { $duration_minutes = intval($duration_matches[2][0]); } @@ -409,7 +409,7 @@ public function checkRecipe(array $json): array { } else { $json['nutrition'] = []; } - + return $json; } @@ -426,6 +426,9 @@ private function parseRecipeHtml($url, $html) { // Make sure we don't have any encoded entities in the HTML string $html = html_entity_decode($html); + // Convert utf8 entity + $html = utf8_decode($html); + // Start document parser $document = new \DOMDocument(); @@ -441,7 +444,7 @@ private function parseRecipeHtml($url, $html) { } finally { libxml_use_internal_errors($libxml_previous_state); } - + $xpath = new \DOMXPath($document); $json_ld_elements = $xpath->query("//*[@type='application/ld+json']"); @@ -489,7 +492,7 @@ private function parseRecipeHtml($url, $html) { // Parse HTML if JSON couldn't be found $json = []; - + $recipes = $xpath->query("//*[@itemtype='http://schema.org/Recipe']"); if (!isset($recipes[0])) { @@ -514,7 +517,7 @@ private function parseRecipeHtml($url, $html) { case 'images': case 'thumbnail': $prop = 'image'; - + if (!isset($json[$prop]) || !is_array($json[$prop])) { $json[$prop] = []; } @@ -533,7 +536,7 @@ private function parseRecipeHtml($url, $html) { case 'recipeIngredient': case 'ingredients': $prop = 'recipeIngredient'; - + if (!isset($json[$prop]) || !is_array($json[$prop])) { $json[$prop] = []; } @@ -546,7 +549,7 @@ private function parseRecipeHtml($url, $html) { } else { array_push($json[$prop], $prop_element->nodeValue); } - + break; case 'recipeInstructions': @@ -554,7 +557,7 @@ private function parseRecipeHtml($url, $html) { case 'steps': case 'guide': $prop = 'recipeInstructions'; - + if (!isset($json[$prop]) || !is_array($json[$prop])) { $json[$prop] = []; } @@ -590,7 +593,7 @@ private function parseRecipeHtml($url, $html) { // Make one final desparate attempt at getting the instructions if (!isset($json['recipeInstructions']) || !$json['recipeInstructions'] || sizeof($json['recipeInstructions']) < 1) { $json['recipeInstructions'] = []; - + $step_elements = $recipes[0]->getElementsByTagName('p'); foreach ($step_elements as $step_element) { @@ -601,23 +604,23 @@ private function parseRecipeHtml($url, $html) { array_push($json['recipeInstructions'], $step_element->nodeValue); } } - + return $this->checkRecipe($json); } private function display_libxml_errors($url, $errors) { $error_counter = []; $by_error_code = []; - + foreach ($errors as $error) { $count = array_key_exists($error->code, $error_counter) ? $error_counter[$error->code] : 0; $error_counter[$error->code] = $count + 1; $by_error_code[$error->code] = $error; } - + foreach ($error_counter as $code => $count) { $error = $by_error_code[$code]; - + switch ($error->level) { case LIBXML_ERR_WARNING: $error_message = "libxml: Warning $error->code "; @@ -634,7 +637,7 @@ private function display_libxml_errors($url, $errors) { $error_message .= "occurred " . $count . " times while parsing " . $url . ". Last time in line $error->line" . " and column $error->column: " . $error->message; - + $this->logger->warning($error_message); } } @@ -688,7 +691,7 @@ public function addRecipe($json) { if ($user_folder->nodeExists($json['name'])) { throw new Exception('Another recipe with that name already exists'); } - + $recipe_folder->move($new_path); } @@ -864,29 +867,29 @@ public function getRecipeFiles() { public function updateSearchIndex() { $this->migrateFolderStructure(); } - + private function migrateFolderStructure() { // Remove old cache folder if needed $legacy_cache_path = '/cookbook/cache'; - + if ($this->root->nodeExists($legacy_cache_path)) { $this->root->get($legacy_cache_path)->delete(); } - + // Restructure files if needed $user_folder = $this->getFolderForUser(); - + foreach ($user_folder->getDirectoryListing() as $node) { // Move JSON files from the user directory into its own folder if ($this->isRecipeFile($node)) { $recipe_name = str_replace('.json', '', $node->getName()); - + $node->move($node->getPath() . '_tmp'); - + $recipe_folder = $user_folder->newFolder($recipe_name); - + $node->move($recipe_folder->getPath() . '/recipe.json'); - + // Rename folders with .json extensions (this was likely caused by a migration bug) } elseif ($node instanceof Folder && strpos($node->getName(), '.json')) { $node->move(str_replace('.json', '', $node->getPath())); @@ -902,7 +905,7 @@ private function migrateFolderStructure() { public function getAllKeywordsInSearchIndex() { return $this->db->findAllKeywords($this->user_id); } - + /** * Gets all categories from the index * @@ -1198,7 +1201,7 @@ private function cleanUpString($str, $preserve_newlines = false, $remove_slashes } else { $str = str_replace(["\t", "\\"], '', $str); } - + $str = html_entity_decode($str); return $str;