Skip to content

Commit 620d324

Browse files
committed
Revert "use promise.all on page interactions, isInteracting -> isDone"
This reverts commit adb6e39.
1 parent 09fdff4 commit 620d324

File tree

3 files changed

+94
-94
lines changed

3 files changed

+94
-94
lines changed

src/collector.ts

+26-30
Original file line numberDiff line numberDiff line change
@@ -178,31 +178,34 @@ export const collect = async (inUrl: string, args: CollectorOptions) => {
178178
// Function to navigate to a page with a timeout guard
179179
const navigateWithTimeout = async (page: Page, url: string, timeout: number, waitUntil: PuppeteerLifeCycleEvent) => {
180180
try {
181-
console.log(`Going to ${url}`);
182181
page_response = await Promise.race([
183182
page.goto(url, {
184183
timeout: timeout,
185184
waitUntil: waitUntil
186185
}),
187186
new Promise((_, reject) =>
188187
setTimeout(() => {
189-
console.log(`Failed loading with ${waitUntil}`);
190-
reject(new Error(`Failed loading with ${waitUntil}`));
188+
console.log('First navigation attempt timeout');
189+
reject(new Error('First navigation attempt timeout'));
191190
}, 10000)
192191
)
193192
]);
194193
} catch (error) {
195-
console.log('Trying with domcontentloaded');
194+
console.log('First attempt failed, trying with domcontentloaded');
196195
page_response = await page.goto(url, {
197196
timeout: timeout,
198-
waitUntil: 'domcontentloaded'
197+
waitUntil: 'domcontentloaded' as PuppeteerLifeCycleEvent
199198
});
200199
}
200+
await savePageContent(pageIndex, args.outDir, page, args.saveScreenshots);
201201
};
202202

203203
// Go to the first url
204+
console.log('Going to the first url');
204205
await navigateWithTimeout(page, inUrl, args.defaultTimeout, args.defaultWaitUntil as PuppeteerLifeCycleEvent);
205-
await savePageContent(pageIndex, args.outDir, page, args.saveScreenshots);
206+
207+
pageIndex++;
208+
console.log('Saving first page response');
206209

207210
let duplicatedLinks = [];
208211
const outputLinks = {
@@ -244,13 +247,10 @@ export const collect = async (inUrl: string, args: CollectorOptions) => {
244247
}
245248
}
246249
}
247-
248-
console.log(`Interacting with page ${pageIndex}`);
249-
await Promise.all([
250-
autoScroll(page),
251-
fillForms(page)
252-
]);
253-
console.log(`Done interacting with page ${pageIndex}`);
250+
await fillForms(page);
251+
// console.log('... done with fillForms');
252+
await autoScroll(page);
253+
// console.log('... done with autoScroll');
254254

255255
let subDomainLinks = [];
256256
if (getSubdomain(output.uri_dest) !== 'www') {
@@ -264,12 +264,8 @@ export const collect = async (inUrl: string, args: CollectorOptions) => {
264264
output.browsing_history = [output.uri_dest].concat(browse_links.map(l => l.href));
265265
console.log('About to browse more links');
266266

267-
pageIndex++;
268-
269267
// try {
270-
for (let link of output.browsing_history.slice(1)) {
271-
// link = 'https://www.npr.org/sections/food/';
272-
await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for 1 second
268+
for (const link of output.browsing_history.slice(1)) {
273269
logger.log('info', `browsing now to ${link}`, { type: 'Browser' });
274270
if (didBrowserDisconnect) {
275271
return {
@@ -278,30 +274,30 @@ export const collect = async (inUrl: string, args: CollectorOptions) => {
278274
};
279275
}
280276

277+
console.log(`Browsing now to ${link}`);
281278
await navigateWithTimeout(page, link, args.defaultTimeout, args.defaultWaitUntil as PuppeteerLifeCycleEvent);
282-
await savePageContent(pageIndex, args.outDir, page, args.saveScreenshots);
283279

284-
console.log(`Interacting with page ${pageIndex}`);
285-
await Promise.all([
286-
autoScroll(page),
287-
fillForms(page)
288-
]);
289-
console.log(`Done interacting with page ${pageIndex}`);
280+
await fillForms(page);
281+
// console.log('... done with fillForms (2)');
290282

283+
await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for 1 second
291284
pageIndex++;
285+
292286
duplicatedLinks = duplicatedLinks.concat(await getLinks(page));
287+
await autoScroll(page);
288+
// console.log('... done with autoScroll (2)');
293289
}
294290

295-
console.log('Saving cookies');
291+
// console.log('saving cookies');
296292
await captureBrowserCookies(page, args.outDir);
297293
// console.log('... done saving cookies');
298294
if (args.captureHar) {
299-
console.log('Saving har');
295+
// console.log('saving har');
300296
await har.stop();
301297
// console.log('... done saving har');
302298
}
303299

304-
console.log('Closing browser');
300+
// console.log('closing browser');
305301
await browser.close();
306302
// console.log('... done closing browser');
307303
if (typeof userDataDir !== 'undefined') {
@@ -324,7 +320,7 @@ export const collect = async (inUrl: string, args: CollectorOptions) => {
324320
}
325321
}
326322
// generate report
327-
console.log('Generating report');
323+
// console.log('generating report');
328324
const fpRequests = Array.from(hosts.requests.first_party);
329325
const tpRequests = Array.from(hosts.requests.third_party);
330326
const incorrectTpAssignment = tpRequests.filter((f: string) => getDomain(f) === REDIRECTED_FIRST_PARTY.domain);
@@ -383,7 +379,7 @@ export const collect = async (inUrl: string, args: CollectorOptions) => {
383379
return acc;
384380
}, {});
385381

386-
console.log('Writing inspection.json');
382+
// console.log('writing inspection.json');
387383
const json_dump = JSON.stringify({ ...output, reports }, null, 2);
388384
writeFileSync(join(args.outDir, 'inspection.json'), json_dump);
389385
if (args.outDir.includes('bl-tmp')) {

src/pptr-utils/default.ts

-2
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,10 @@ const writeFile = promisify(fs.writeFile);
66

77
export const savePageContent = async (index, outDir, page: Page, screenshot = true) => {
88
try {
9-
console.log(`Saving ${index}.html`);
109
const html = await page.content();
1110
const outPath = path.join(outDir, `${index}.html`);
1211
await writeFile(outPath, html);
1312
if (screenshot) {
14-
console.log(`Saving ${index}.jpeg`);
1513
const outPathImg = path.join(outDir, `${index}.jpeg`);
1614
await page.screenshot({ path: outPathImg, type: 'jpeg', quality: 50 });
1715
}

src/pptr-utils/interaction-utils.ts

+68-62
Original file line numberDiff line numberDiff line change
@@ -33,84 +33,93 @@ export const DEFAULT_INPUT_VALUES = {
3333
// ... [rest of the default input values]
3434
};
3535

36-
export const fillForms = async (page: Page, timeout = 30000) => {
37-
console.log('Filling out forms');
38-
let isDone = false;
36+
export const fillForms = async (page: Page, timeout = 6000) => {
37+
let isInteracting = false;
3938

4039
const timeoutPromise = new Promise(resolve => {
4140
setTimeout(() => {
42-
isDone = true;
43-
console.log('Timeout reached. Exiting fillForms().');
41+
if (isInteracting) {
42+
// console.log('Interaction ongoing. Waiting for safe exit.');
43+
return;
44+
}
45+
// console.log('Timeout reached. Exiting fillForms().');
4446
resolve('Timeout');
4547
}, timeout);
4648
});
4749

4850
const fillPromise = async () => {
51+
console.log('Entering fillPromise.');
4952
try {
50-
console.log('Checking for inputs on the page');
51-
if (isDone) {
52-
return;
53-
}
54-
const elements = await page.$$('input');
55-
console.log(`Found ${elements.length} input elements`);
56-
let count = 0;
57-
for (const el of elements) {
58-
if (isDone) {
59-
return;
60-
}
61-
if (count > 100) {
62-
break;
63-
}
64-
count += 1;
65-
const pHandle = await el.getProperty('type');
66-
const pValue = await pHandle.jsonValue();
67-
// console.log(`Input is type ${pValue}`);
53+
if (!page.isClosed()) {
54+
console.log('Checking for inputs on the page');
55+
const elements = await page.$$('input');
56+
console.log(`Found ${elements.length} input elements`);
57+
let count = 0;
58+
for (const el of elements) {
59+
if (!page.isClosed()) {
60+
isInteracting = true;
6861

69-
const autoCompleteHandle = await el.getProperty('autocomplete');
70-
const autoCompleteValue = (await autoCompleteHandle.jsonValue()) as string;
71-
// console.log(`Autocomplete attribute is: ${autoCompleteValue}`);
72-
let autoCompleteKeys = [];
62+
// console.log(`Inspecting element ${count}`);
63+
if (count > 100) {
64+
break;
65+
}
66+
count += 1;
7367

74-
// console.log('Checking autocomplete value');
75-
if (autoCompleteValue) {
76-
if (autoCompleteValue.includes('cc-name')) {
77-
// console.log('Autocomplete includes cc-name.');
78-
autoCompleteKeys = ['cc-name'];
68+
const pHandle = await el.getProperty('type');
69+
const pValue = await pHandle.jsonValue();
70+
// console.log(`Input is type ${pValue}`);
71+
72+
const autoCompleteHandle = await el.getProperty('autocomplete');
73+
const autoCompleteValue = (await autoCompleteHandle.jsonValue()) as string;
74+
// console.log(`Autocomplete attribute is: ${autoCompleteValue}`);
75+
let autoCompleteKeys = [];
76+
77+
// console.log('Checking autocomplete value');
78+
if (autoCompleteValue) {
79+
if (autoCompleteValue.includes('cc-name')) {
80+
// console.log('Autocomplete includes cc-name.');
81+
autoCompleteKeys = ['cc-name'];
82+
} else {
83+
// console.log('Autocomplete does not include cc-name.');
84+
autoCompleteKeys = Object.keys(DEFAULT_INPUT_VALUES).filter(k => (autoCompleteValue as string).includes(k));
85+
}
86+
}
87+
88+
if (pValue === 'submit' || pValue === 'hidden') {
89+
// console.log('Type is either submit or hidden.');
90+
continue;
91+
} else if (autoCompleteKeys.length > 0) {
92+
// console.log('Autocomplete keys > 0');
93+
await el.focus();
94+
await page.keyboard.press('Tab', {
95+
delay: 100
96+
});
97+
await el.press('Backspace');
98+
await page.keyboard.type(DEFAULT_INPUT_VALUES[autoCompleteKeys[0] as string]);
99+
} else if (Object.keys(DEFAULT_INPUT_VALUES).includes(pValue as string)) {
100+
// console.log('Default input values includes pValue');
101+
await el.focus();
102+
await page.keyboard.press('Tab', {
103+
delay: 100
104+
});
105+
await el.press('Backspace');
106+
await page.keyboard.type(DEFAULT_INPUT_VALUES[pValue as string]);
107+
// console.log(' ... done with test');
108+
}
109+
isInteracting = false;
79110
} else {
80-
// console.log('Autocomplete does not include cc-name.');
81-
autoCompleteKeys = Object.keys(DEFAULT_INPUT_VALUES).filter(k => (autoCompleteValue as string).includes(k));
111+
console.log('Page is closed. Exiting loop.');
112+
break;
82113
}
83114
}
84-
85-
if (isDone) {
86-
return;
87-
} else if (pValue === 'submit' || pValue === 'hidden') {
88-
// console.log('Type is either submit or hidden.');
89-
continue;
90-
} else if (autoCompleteKeys.length > 0) {
91-
// console.log('Autocomplete keys > 0');
92-
await el.focus();
93-
await page.keyboard.press('Tab', {
94-
delay: 100
95-
});
96-
await el.press('Backspace');
97-
await page.keyboard.type(DEFAULT_INPUT_VALUES[autoCompleteKeys[0] as string]);
98-
} else if (Object.keys(DEFAULT_INPUT_VALUES).includes(pValue as string)) {
99-
// console.log('Default input values includes pValue');
100-
await el.focus();
101-
await page.keyboard.press('Tab', {
102-
delay: 100
103-
});
104-
await el.press('Backspace');
105-
await page.keyboard.type(DEFAULT_INPUT_VALUES[pValue as string]);
106-
// console.log(' ... done with test');
107-
}
115+
} else {
116+
console.log('Page is closed. Exiting fillForms.');
108117
}
109118
} catch (error) {
110119
if (error.message.includes('Execution context was destroyed')) {
111120
console.log('Page navigated away while interacting. Continuing...');
112121
} else {
113-
console.log(`Error in fillForms: ${error.message}`);
122+
console.error(`Error in fillForms: ${error.message}`);
114123
}
115124
} finally {
116125
console.log('Done with fillForms');
@@ -121,7 +130,6 @@ export const fillForms = async (page: Page, timeout = 30000) => {
121130
};
122131

123132
export const autoScroll = async page => {
124-
console.log('Scrolling the page');
125133
await page.evaluate(async () => {
126134
return new Promise((resolve, reject) => {
127135
try {
@@ -136,12 +144,10 @@ export const autoScroll = async page => {
136144
count += 1;
137145
if (totalHeight >= scrollHeight || count > COUNT_MAX) {
138146
clearInterval(timer);
139-
console.log('Done scrolling the page');
140147
resolve(undefined);
141148
}
142149
}, 100);
143150
} catch (error) {
144-
console.log(`Error scrolling: ${error.message}`);
145151
reject(error);
146152
}
147153
});

0 commit comments

Comments
 (0)