From 07327ed43528bb334d65859416bc3b6e7e0ccabd Mon Sep 17 00:00:00 2001 From: Gordon Pedersen Date: Thu, 14 May 2020 20:32:14 +1000 Subject: [PATCH] Add a base element to the DOM with current URL This makes the DOM parse with a correct base URI instead of the extension's base URI, which fixes https://github.com/deathau/markdown-clipper/issues/1 --- background/background.js | 17 +++++++++++------ contentScript/pageScraper.js | 16 +++++++++++++--- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/background/background.js b/background/background.js index 3106ab5..7e41920 100644 --- a/background/background.js +++ b/background/background.js @@ -3,13 +3,13 @@ browser.runtime.onMessage.addListener(notify); // creates the readable article object from Readability function createReadableVersion(dom) { - var reader = new Readability(dom); + var reader = new Readability(dom, { debug: true }); var article = reader.parse(); return article; } // convert the article content to markdown using Turndown -function convertArticleToMarkdown(article, url) { +function convertArticleToMarkdown(article) { var turndownService = new TurndownService() var markdown = turndownService.turndown(article.content); @@ -21,9 +21,6 @@ function convertArticleToMarkdown(article, url) { markdown = "> " + article.excerpt + "\n\n" + markdown; } - //add url - markdown = url + "\n\n" + markdown; - return markdown; } @@ -63,14 +60,22 @@ function downloadMarkdown(markdown, title) { function notify(message) { // message for initial clipping of the dom if (message.type == "clip") { + + // parse the dom var parser = new DOMParser(); var dom = parser.parseFromString(message.dom, "text/html"); if (dom.documentElement.nodeName == "parsererror") { console.error("error while parsing"); } + // make markdown document from the dom var article = createReadableVersion(dom); - var markdown = convertArticleToMarkdown(article, message.url); + var markdown = convertArticleToMarkdown(article); + + // add url to the top of the markdown + markdown = dom.baseURI + "\n\n" + markdown; + + // send a message to display the markdown browser.runtime.sendMessage({ type: "display.md", markdown: markdown, article: article }); } // message for triggering download diff --git a/contentScript/pageScraper.js b/contentScript/pageScraper.js index d818d68..13d0473 100644 --- a/contentScript/pageScraper.js +++ b/contentScript/pageScraper.js @@ -1,7 +1,17 @@ function notifyExtension() { - //var serializer = new XMLSerializer(); - //var content = serializer.serializeToString(document); + // if the document doesn't have a "base" element make one + // this allows the DOM parser in future steps to fix relative uris + if (document.head.getElementsByTagName('base').length == 0) { + let baseEl = document.createElement('base'); + // use the current uri + baseEl.setAttribute('href', window.location.href); + document.head.append(baseEl); + } + + // get the content of the page as a string var content = document.documentElement.outerHTML; - browser.runtime.sendMessage({ type: "clip", dom: content, url:window.location.href}); + + // send a message that the content should be clipped + browser.runtime.sendMessage({ type: "clip", dom: content}); } notifyExtension(); \ No newline at end of file