From 9d8776d0bbf97ddbfa2a614f94b5b50fde42e677 Mon Sep 17 00:00:00 2001 From: Dan Phiffer Date: Fri, 18 Aug 2023 15:42:06 -0400 Subject: [PATCH] try/catch loading additional links --- src/collector.ts | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/collector.ts b/src/collector.ts index 0f86a9c..ccc9d66 100644 --- a/src/collector.ts +++ b/src/collector.ts @@ -268,33 +268,36 @@ export const collect = async (inUrl: string, args: CollectorOptions) => { const browse_links = sampleSize(subDomainLinks, args.numPages); output.browsing_history = [output.uri_dest].concat(browse_links.map(l => l.href)); console.log('About to browse more links'); + page_request.abort(); pageIndex++; - // try { - for (let link of output.browsing_history.slice(1)) { - // link = 'https://www.npr.org/sections/food/'; - await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for 1 second - logger.log('info', `browsing now to ${link}`, { type: 'Browser' }); - if (didBrowserDisconnect) { - return { - status: 'failed', - page_response: 'Chrome crashed' - }; - } + try { + for (let link of output.browsing_history.slice(1)) { + await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for 1 second + logger.log('info', `browsing now to ${link}`, { type: 'Browser' }); + if (didBrowserDisconnect) { + return { + status: 'failed', + page_response: 'Chrome crashed' + }; + } - await navigateWithTimeout(page, link); - await savePageContent(pageIndex, args.outDir, page, args.saveScreenshots); + await navigateWithTimeout(page, link); + await savePageContent(pageIndex, args.outDir, page, args.saveScreenshots); - console.log(`Interacting with page ${pageIndex}`); - await Promise.all([ - autoScroll(page), - fillForms(page) - ]); - console.log(`Done interacting with page ${pageIndex}`); + console.log(`Interacting with page ${pageIndex}`); + await Promise.all([ + autoScroll(page), + fillForms(page) + ]); + console.log(`Done interacting with page ${pageIndex}`); - pageIndex++; - duplicatedLinks = duplicatedLinks.concat(await getLinks(page)); + pageIndex++; + duplicatedLinks = duplicatedLinks.concat(await getLinks(page)); + } + } catch(error) { + console.log(`Error loading additional pages: ${error.message}`); } console.log('Saving cookies');