-
Notifications
You must be signed in to change notification settings - Fork 0
/
automationWorkflow.js
57 lines (51 loc) · 1.66 KB
/
automationWorkflow.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
const { Readability } = require('@mozilla/readability');
const { JSDOM } = require('jsdom');
const TurndownService = require('turndown');
const fs = require('fs').promises;
async function fetchURLContent(url) {
try {
const response = await fetch(url);
const html = await response.text();
return html;
} catch (error) {
console.error(`Error fetching URL content: ${error}`);
}
}
function extractReadableContent(html, url) {
try {
const dom = new JSDOM(html, { url: url });
const article = new Readability(dom.window.document).parse();
return article;
} catch (error) {
console.error(`Error extracting readable content: ${error}`);
}
}
function convertToMarkdown(html) {
try {
const turndownService = new TurndownService();
const markdown = turndownService.turndown(html);
return markdown;
} catch (error) {
console.error(`Error converting HTML to Markdown: ${error}`);
}
}
async function saveAsMarkdownFile(filename, content) {
await fs.writeFile(filename, content, 'utf-8');
}
async function runWorkflow(url) {
const html = await fetchURLContent(url);
const article = extractReadableContent(html, url);
if (article) {
const markdown = convertToMarkdown(
article.content.replaceAll('<br></em>', '</em><br>')
);
const content = `# ${article.title}\n\n${markdown}\n\n[Source](${url})`;
const filename = `${article.title}.md`;
await saveAsMarkdownFile(filename, content);
console.log(article.siteName);
} else {
console.error('Unable to extract readable content from the URL');
}
}
const url = 'https://www.poetryfoundation.org/poetrymagazine/poems/155480/one-of-us';
runWorkflow(url);