diff --git a/packages/metascraper-lang/__snapshots__/index.js.snap-shot b/packages/metascraper-lang/__snapshots__/index.js.snap-shot index 8344fb705..05e5c77e9 100644 --- a/packages/metascraper-lang/__snapshots__/index.js.snap-shot +++ b/packages/metascraper-lang/__snapshots__/index.js.snap-shot @@ -2,7 +2,7 @@ exports['html lang property 1'] = { "lang": "pl", "author": "Jakub Majmurek", "title": "Churchill, bohater naszych czasów / Film / dwutygodnik.com", - "publisher": "Film | Dwutygodnik | Dwutygodnik", + "publisher": "Dwutygodnik", "image": "http://www.dwutygodnik.com/public/media/article/image_full/7615.png", "date": "2018-01-01T12:00:00.000Z", "description": "Wysyp filmów o Churchillu w pobrexitowej Brytanii, wydaje się czymś zrozumiałym. Uosabia on ostatni moment prawdziwej wielkości Zjednoczonego Królestwa – wspomnienie tej historycznej chwili pozwala oswoić traumy i lęki", diff --git a/packages/metascraper-publisher/index.js b/packages/metascraper-publisher/index.js index 5f032955f..c1989fcb1 100644 --- a/packages/metascraper-publisher/index.js +++ b/packages/metascraper-publisher/index.js @@ -22,7 +22,10 @@ const wrap = rule => ({ htmlDom }) => { const getFromTitle = (text, regex) => { const matches = regex.exec(text) - return matches ? matches[1] : false + if (!matches) return false + let result = matches[1] + while (regex.test(result)) result = regex.exec(result)[1] + return result } /** diff --git a/packages/metascraper-publisher/test/index.js b/packages/metascraper-publisher/test/index.js index 49316c449..40d127ba0 100644 --- a/packages/metascraper-publisher/test/index.js +++ b/packages/metascraper-publisher/test/index.js @@ -1,7 +1,6 @@ 'use strict' const should = require('should') - const metascraper = require('metascraper')([require('..')()]) const getHtml = title => ` @@ -20,13 +19,18 @@ const getHtml = title => ` describe('metascraper-publisher', () => { describe('from title', async () => { - ;['Murcia | Wikipedia', 'Murcia - Wikipedia', '| Wikipedia'].forEach( - title => - it(`${title} → Wikipedia`, async () => { - const url = 'https://en.wikipedia.org/wiki/Murcia' - const { publisher } = await metascraper({ html: getHtml(title), url }) - should(publisher).be.equal('Wikipedia') - }) + ;[ + 'Murcia | Wikipedia', + 'Murcia - Wikipedia', + '| Wikipedia', + 'San Antonio Spurs guard Manu Ginobili... - San Antonio Spurs | Wikipedia', + 'San Antonio Spurs guard Manu Ginobili... | San Antonio Spurs - Wikipedia' + ].forEach(title => + it(`${title} → Wikipedia`, async () => { + const url = 'https://en.wikipedia.org/wiki/Murcia' + const { publisher } = await metascraper({ html: getHtml(title), url }) + should(publisher).be.equal('Wikipedia') + }) ) }) })