diff --git a/CHANGELOG.md b/CHANGELOG.md index 13a77c254..e15bb1e12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,8 @@ [#1050](https://github.com/nextcloud/cookbook/pull/1050) @christianlupus - Migrated node-sass to dart sass [#1051](https://github.com/nextcloud/cookbook/pull/1051) @christianlupus +- Add the url as a parameter to allow for specialized parsers per website in the backend + [#1060](https://github.com/nextcloud/cookbook/pull/1060) @christianlupus ### Codebase maintenance - Removed codecov.io upload of intermediate merge commits during pull requests diff --git a/lib/Helper/HTMLParser/AbstractHtmlParser.php b/lib/Helper/HTMLParser/AbstractHtmlParser.php index 28aed502d..cc984a29e 100644 --- a/lib/Helper/HTMLParser/AbstractHtmlParser.php +++ b/lib/Helper/HTMLParser/AbstractHtmlParser.php @@ -19,8 +19,9 @@ public function __construct(IL10N $l10n) { * Extract the recipe from the given document. * * @param \DOMDocument $document The document to parse + * @param ?string $url The URL of the recipe to import * @return array The JSON content in the document as a PHP array * @throws HtmlParsingException If the parsing was not successful */ - abstract public function parse(\DOMDocument $document): array; + abstract public function parse(\DOMDocument $document, ?string $url): array; } diff --git a/lib/Helper/HTMLParser/HttpJsonLdParser.php b/lib/Helper/HTMLParser/HttpJsonLdParser.php index 7163265bd..0b9272f8c 100644 --- a/lib/Helper/HTMLParser/HttpJsonLdParser.php +++ b/lib/Helper/HTMLParser/HttpJsonLdParser.php @@ -22,7 +22,7 @@ public function __construct(IL10N $l10n, JsonService $jsonService) { $this->jsonService = $jsonService; } - public function parse(\DOMDocument $document): array { + public function parse(\DOMDocument $document, ?string $url): array { $xpath = new \DOMXPath($document); $json_ld_elements = $xpath->query("//*[@type='application/ld+json']"); diff --git a/lib/Helper/HTMLParser/HttpMicrodataParser.php b/lib/Helper/HTMLParser/HttpMicrodataParser.php index abeb374c9..212df4764 100644 --- a/lib/Helper/HTMLParser/HttpMicrodataParser.php +++ b/lib/Helper/HTMLParser/HttpMicrodataParser.php @@ -31,7 +31,7 @@ public function __construct(IL10N $l10n) { parent::__construct($l10n); } - public function parse(DOMDocument $document): array { + public function parse(DOMDocument $document, ?string $url): array { $this->xpath = new DOMXPath($document); $selectorHttp = "//*[@itemtype='http://schema.org/Recipe']"; diff --git a/lib/Service/RecipeExtractionService.php b/lib/Service/RecipeExtractionService.php index c99e053ae..086b517fb 100644 --- a/lib/Service/RecipeExtractionService.php +++ b/lib/Service/RecipeExtractionService.php @@ -29,14 +29,15 @@ public function __construct(HttpJsonLdParser $jsonParser, HttpMicrodataParser $m * Parse a DOM document using all registered parsers * * @param \DOMDocument $document The document to parse + * @param ?string $url The URL of the recipe to be parsed * @throws HtmlParsingException If no parser was able to successfully parse the document * @return array The data as returned from the parser */ - public function parse(\DOMDocument $document): array { + public function parse(\DOMDocument $document, ?string $url): array { /** @var $parser AbstractHtmlParser */ foreach ($this->parsers as $parser) { try { - return $parser->parse($document); + return $parser->parse($document, $url); } catch (HtmlParsingException $ex) { // Silently ignore failure as there might be other parsers better suited } diff --git a/lib/Service/RecipeService.php b/lib/Service/RecipeService.php index 769659476..f90def5f3 100755 --- a/lib/Service/RecipeService.php +++ b/lib/Service/RecipeService.php @@ -834,7 +834,7 @@ public function downloadRecipe(string $url): File { $this->htmlDownloadService->downloadRecipe($url); try { - $json = $this->recipeExtractionService->parse($this->htmlDownloadService->getDom()); + $json = $this->recipeExtractionService->parse($this->htmlDownloadService->getDom(), $url); } catch (HtmlParsingException $ex) { throw new ImportException($ex->getMessage(), null, $ex); } diff --git a/tests/Unit/Helper/HTMLParser/HttpJsonLdParserTest.php b/tests/Unit/Helper/HTMLParser/HttpJsonLdParserTest.php index bd0fe3d47..695a87cfe 100644 --- a/tests/Unit/Helper/HTMLParser/HttpJsonLdParserTest.php +++ b/tests/Unit/Helper/HTMLParser/HttpJsonLdParserTest.php @@ -74,7 +74,7 @@ public function testHTMLFile($file, $valid, $jsonFile): void { $document->loadHTML($content); try { - $res = $parser->parse($document); + $res = $parser->parse($document, 'http://example.com'); $jsonDest = file_get_contents(__DIR__ . "/res_JsonLd/$jsonFile"); $expected = json_decode($jsonDest, true); diff --git a/tests/Unit/Helper/HTMLParser/HttpMicrodataParserTest.php b/tests/Unit/Helper/HTMLParser/HttpMicrodataParserTest.php index b7e1353f9..cfab9d273 100644 --- a/tests/Unit/Helper/HTMLParser/HttpMicrodataParserTest.php +++ b/tests/Unit/Helper/HTMLParser/HttpMicrodataParserTest.php @@ -59,7 +59,7 @@ public function testHTMLFile($filename, $valid, $jsonFile): void { $document->loadHTML($content); try { - $res = $parser->parse($document); + $res = $parser->parse($document, 'http://example.com'); $jsonDest = file_get_contents(__DIR__ . "/res_Microdata/$jsonFile"); $expected = json_decode($jsonDest, true); @@ -145,7 +145,7 @@ private function finishTest($parser, $content, $jsonFile): void { $document->loadHTML($content); try { - $res = $parser->parse($document); + $res = $parser->parse($document, 'http://exmapl.com'); $jsonDest = file_get_contents(__DIR__ . "/res_Microdata/$jsonFile"); $expected = json_decode($jsonDest, true); diff --git a/tests/Unit/Service/RecipeExtractionServiceTest.php b/tests/Unit/Service/RecipeExtractionServiceTest.php index 193c473ce..a29017f52 100644 --- a/tests/Unit/Service/RecipeExtractionServiceTest.php +++ b/tests/Unit/Service/RecipeExtractionServiceTest.php @@ -8,6 +8,7 @@ use OCA\Cookbook\Exception\HtmlParsingException; use OCP\IL10N; use OCA\Cookbook\Service\RecipeExtractionService; +use PHPUnit\Framework\MockObject\MockObject; class RecipeExtractionServiceTest extends TestCase { /** @@ -26,34 +27,37 @@ protected function setUp(): void { * @param bool $exceptionExpected */ public function testParsingDelegation($jsonSuccess, $microdataSuccess, $exceptionExpected): void { + /** @var HttpJsonLdParser|MockObject $jsonParser */ $jsonParser = $this->createMock(HttpJsonLdParser::class); + /** @var HttpMicrodataParser|MockObject $microdataParser */ $microdataParser = $this->createMock(HttpMicrodataParser::class); $document = $this->createStub(\DOMDocument::class); + $url = 'http://example.com'; $expectedObject = [new \stdClass()]; if ($jsonSuccess) { $jsonParser->expects($this->once()) ->method('parse') - ->with($document) + ->with($document, $url) ->willReturn($expectedObject); $microdataParser->expects($this->never())->method('parse'); } else { $jsonParser->expects($this->once()) ->method('parse') - ->with($document) + ->with($document, $url) ->willThrowException(new HtmlParsingException()); if ($microdataSuccess) { $microdataParser->expects($this->once()) ->method('parse') - ->with($document) + ->with($document, $url) ->willReturn($expectedObject); } else { $microdataParser->expects($this->once()) ->method('parse') - ->with($document) + ->with($document, $url) ->willThrowException(new HtmlParsingException()); } } @@ -61,7 +65,7 @@ public function testParsingDelegation($jsonSuccess, $microdataSuccess, $exceptio $sut = new RecipeExtractionService($jsonParser, $microdataParser, $this->l); try { - $ret = $sut->parse($document); + $ret = $sut->parse($document, $url); $this->assertEquals($expectedObject, $ret); } catch (HtmlParsingException $ex) {