Skip to content

Commit

Permalink
marked puppeteer as old method
Browse files Browse the repository at this point in the history
  • Loading branch information
lostmypillow committed Aug 19, 2024
1 parent 46ea7bb commit c211932
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 30 deletions.
2 changes: 1 addition & 1 deletion app.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const cors = require("cors");
const scrapeContent = require("./lib/scrapeContent");
const app = express();
const port = 3001;
const getHTMLFetch = require("./lib/getHTMLFetch")
const getHTMLFetch = require("./lib/getHTML")
app.use((req, res, next) => {
res.setHeader(
"Access-Control-Allow-Origin",
Expand Down
17 changes: 2 additions & 15 deletions lib/getHTML.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,6 @@
const puppeteer = require("puppeteer");
const fetch = require('node-fetch-commonjs');
async function getHTML(link) {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
});
const page = await browser.newPage();
await page.goto(link, {
waitUntil: "domcontentloaded",
});

const html = await page.content();
await browser.close();
return html;
return await (await fetch(link)).text()
}



module.exports = getHTML
10 changes: 0 additions & 10 deletions lib/getHTMLFetch.js

This file was deleted.

19 changes: 19 additions & 0 deletions lib/getHTMLOld.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
const puppeteer = require("puppeteer");
async function getHTMLOld(link) {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
});
const page = await browser.newPage();
await page.goto(link, {
waitUntil: "domcontentloaded",
});

const html = await page.content();
await browser.close();
return html;
}



module.exports = getHTMLOld
9 changes: 5 additions & 4 deletions lib/scrapeContent.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
const getHTML = require("./getHTML");
const getHTMLFetch = require("./getHTMLFetch")
const getHTMLOld = require("./getHTMLOld");
const getHTML = require("./getHTML")
const cheerio = require("cheerio");

async function scrapeContent(link) {
var title = "";
var date_source_author = "";
var content;
let dateParts;
const $ = cheerio.load(await getHTMLFetch(link));
const $ = cheerio.load(await getHTML(link));

switch (true) {
///////ctee 、綜合外電
Expand All @@ -26,7 +26,7 @@ async function scrapeContent(link) {
});
break;
//////

////// ePrice bug related links
case link.includes("eprice"):
title = $("h1.title").text().trim();
Expand Down Expand Up @@ -405,6 +405,7 @@ content = []
.filter((item) => !item.startsWith("("));

break;
//////technbang bug!
case link.includes("techbang"):
title = $("h1.post-title").text();

Expand Down

0 comments on commit c211932

Please sign in to comment.