Skip to content

Commit 1bb1cc8

Browse files
authored
Merge pull request #890 from RohitR311/persist-fix
fix(maxun-core): pagination data persistence for multiple actions
2 parents f175b9f + 8171e51 commit 1bb1cc8

File tree

2 files changed

+97
-57
lines changed

2 files changed

+97
-57
lines changed

maxun-core/src/interpret.ts

Lines changed: 67 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ export default class Interpreter extends EventEmitter {
8282
scrapeSchema: {}
8383
};
8484

85+
private scrapeListCounter: number = 0;
86+
8587
constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>) {
8688
super();
8789
this.workflow = workflow.workflow;
@@ -484,7 +486,7 @@ export default class Interpreter extends EventEmitter {
484486
await this.options.serializableCallback(scrapeResults);
485487
},
486488

487-
scrapeSchema: async (schema: Record<string, { selector: string; tag: string, attribute: string; shadow: string}>) => {
489+
scrapeSchema: async (schema: Record<string, { selector: string; tag: string, attribute: string; shadow: string}>, actionName: string = "") => {
488490
if (this.isAborted) {
489491
this.log('Workflow aborted, stopping scrapeSchema', Level.WARN);
490492
return;
@@ -540,25 +542,25 @@ export default class Interpreter extends EventEmitter {
540542
}
541543

542544
const actionType = "scrapeSchema";
543-
const actionName = (schema as any).__name || "Texts";
545+
const name = actionName || "Texts";
544546

545547
if (!this.namedResults[actionType]) this.namedResults[actionType] = {};
546-
this.namedResults[actionType][actionName] = this.cumulativeResults;
548+
this.namedResults[actionType][name] = this.cumulativeResults;
547549

548550
if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {};
549-
if (!this.serializableDataByType[actionType][actionName]) {
550-
this.serializableDataByType[actionType][actionName] = [];
551+
if (!this.serializableDataByType[actionType][name]) {
552+
this.serializableDataByType[actionType][name] = [];
551553
}
552554

553-
this.serializableDataByType[actionType][actionName] = [...this.cumulativeResults];
555+
this.serializableDataByType[actionType][name] = [...this.cumulativeResults];
554556

555557
await this.options.serializableCallback({
556558
scrapeList: this.serializableDataByType.scrapeList,
557559
scrapeSchema: this.serializableDataByType.scrapeSchema
558560
});
559561
},
560562

561-
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {
563+
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }, actionName: string = "") => {
562564
if (this.isAborted) {
563565
this.log('Workflow aborted, stopping scrapeList', Level.WARN);
564566
return;
@@ -575,12 +577,13 @@ export default class Interpreter extends EventEmitter {
575577

576578
try {
577579
await this.ensureScriptsLoaded(page);
578-
580+
579581
if (this.options.debugChannel?.incrementScrapeListIndex) {
580582
this.options.debugChannel.incrementScrapeListIndex();
581583
}
582584

583585
let scrapeResults = [];
586+
let paginationUsed = false;
584587

585588
if (!config.pagination) {
586589
scrapeResults = await page.evaluate((cfg) => {
@@ -592,38 +595,53 @@ export default class Interpreter extends EventEmitter {
592595
}
593596
}, config);
594597
} else {
595-
scrapeResults = await this.handlePagination(page, config);
598+
paginationUsed = true;
599+
scrapeResults = await this.handlePagination(page, config, actionName);
596600
}
597601

598602
if (!Array.isArray(scrapeResults)) {
599603
scrapeResults = [];
600604
}
601605

602-
const actionType = "scrapeList";
603-
const actionName = (config as any).__name || "List";
606+
console.log(`ScrapeList completed with ${scrapeResults.length} results`);
604607

605-
if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {};
606-
if (!this.serializableDataByType[actionType][actionName]) {
607-
this.serializableDataByType[actionType][actionName] = [];
608-
}
608+
if (!paginationUsed) {
609+
const actionType = "scrapeList";
610+
let name = actionName || "";
609611

610-
this.serializableDataByType[actionType][actionName].push(...scrapeResults);
612+
if (!name || name.trim() === "") {
613+
this.scrapeListCounter++;
614+
name = `List ${this.scrapeListCounter}`;
615+
}
611616

612-
await this.options.serializableCallback({
613-
scrapeList: this.serializableDataByType.scrapeList,
614-
scrapeSchema: this.serializableDataByType.scrapeSchema
615-
});
617+
if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {};
618+
if (!this.serializableDataByType[actionType][name]) {
619+
this.serializableDataByType[actionType][name] = [];
620+
}
621+
622+
this.serializableDataByType[actionType][name].push(...scrapeResults);
623+
624+
await this.options.serializableCallback({
625+
scrapeList: this.serializableDataByType.scrapeList,
626+
scrapeSchema: this.serializableDataByType.scrapeSchema
627+
});
628+
}
616629
} catch (error) {
617630
console.error('ScrapeList action failed completely:', error.message);
618-
631+
619632
const actionType = "scrapeList";
620-
const actionName = (config as any).__name || "List";
633+
let name = actionName || "";
634+
635+
if (!name || name.trim() === "") {
636+
this.scrapeListCounter++;
637+
name = `List ${this.scrapeListCounter}`;
638+
}
621639

622640
if (!this.namedResults[actionType]) this.namedResults[actionType] = {};
623-
this.namedResults[actionType][actionName] = [];
641+
this.namedResults[actionType][name] = [];
624642

625643
if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {};
626-
this.serializableDataByType[actionType][actionName] = [];
644+
this.serializableDataByType[actionType][name] = [];
627645

628646
await this.options.serializableCallback({
629647
scrapeList: this.serializableDataByType.scrapeList,
@@ -718,26 +736,7 @@ export default class Interpreter extends EventEmitter {
718736
debug.setActionType(String(step.action));
719737
}
720738

721-
if ((step as any)?.name) {
722-
stepName = (step as any).name;
723-
} else if (
724-
Array.isArray((step as any)?.args) &&
725-
(step as any).args.length > 0 &&
726-
typeof (step as any).args[0] === "object" &&
727-
"__name" in (step as any).args[0]
728-
) {
729-
stepName = (step as any).args[0].__name;
730-
} else if (
731-
typeof (step as any)?.args === "object" &&
732-
step?.args !== null &&
733-
"__name" in (step as any).args
734-
) {
735-
stepName = (step as any).args.__name;
736-
}
737-
738-
if (!stepName) {
739-
stepName = String(step.action);
740-
}
739+
stepName = (step as any)?.name || String(step.action);
741740

742741
if (debug && typeof (debug as any).setActionName === "function") {
743742
(debug as any).setActionName(stepName);
@@ -751,6 +750,9 @@ export default class Interpreter extends EventEmitter {
751750
const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
752751
if (step.action === 'screenshot') {
753752
await (wawActions.screenshot as any)(...(params ?? []), stepName ?? undefined);
753+
} else if (step.action === 'scrapeList' || step.action === 'scrapeSchema') {
754+
const actionName = (step as any).name || "";
755+
await wawActions[step.action as CustomFunctions](...(params ?? []), actionName);
754756
} else {
755757
await wawActions[step.action as CustomFunctions](...(params ?? []));
756758
}
@@ -812,26 +814,39 @@ export default class Interpreter extends EventEmitter {
812814
fields: any,
813815
limit?: number,
814816
pagination: any
815-
}) {
817+
}, providedActionName: string = "") {
816818
if (this.isAborted) {
817819
this.log('Workflow aborted, stopping pagination', Level.WARN);
818820
return [];
819821
}
820822

823+
const actionType = "scrapeList";
824+
let actionName = providedActionName || "";
825+
if (!actionName || actionName.trim() === "") {
826+
this.scrapeListCounter++;
827+
actionName = `List ${this.scrapeListCounter}`;
828+
}
829+
830+
if (!this.serializableDataByType[actionType]) {
831+
this.serializableDataByType[actionType] = {};
832+
}
833+
if (!this.serializableDataByType[actionType][actionName]) {
834+
this.serializableDataByType[actionType][actionName] = [];
835+
}
836+
821837
let allResults: Record<string, any>[] = [];
822838
let previousHeight = 0;
823839
let scrapedItems: Set<string> = new Set<string>();
824840
let visitedUrls: Set<string> = new Set<string>();
825841
const MAX_RETRIES = 3;
826-
const RETRY_DELAY = 1000; // 1 second delay between retries
842+
const RETRY_DELAY = 1000;
827843
const MAX_UNCHANGED_RESULTS = 5;
828844

829845
const debugLog = (message: string, ...args: any[]) => {
830846
console.log(`[Page ${visitedUrls.size}] [URL: ${page.url()}] ${message}`, ...args);
831847
};
832848

833849
const scrapeCurrentPage = async () => {
834-
// Check abort flag before scraping current page
835850
if (this.isAborted) {
836851
debugLog("Workflow aborted, stopping scrapeCurrentPage");
837852
return;
@@ -849,7 +864,6 @@ export default class Interpreter extends EventEmitter {
849864
debugLog(`Page evaluation failed: ${error.message}`);
850865
return;
851866
}
852-
853867
const newResults = results.filter(item => {
854868
const uniqueKey = JSON.stringify(item);
855869
if (scrapedItems.has(uniqueKey)) return false;
@@ -859,7 +873,11 @@ export default class Interpreter extends EventEmitter {
859873
allResults = allResults.concat(newResults);
860874
debugLog("Results collected:", allResults.length);
861875

862-
await this.options.serializableCallback(allResults);
876+
this.serializableDataByType[actionType][actionName] = [...allResults];
877+
await this.options.serializableCallback({
878+
scrapeList: this.serializableDataByType.scrapeList,
879+
scrapeSchema: this.serializableDataByType.scrapeSchema
880+
});
863881
};
864882

865883
const checkLimit = () => {

server/src/workflow-management/classes/Interpreter.ts

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -562,14 +562,39 @@ export class WorkflowInterpreter {
562562
? data?.scrapeSchema
563563
: null;
564564

565-
if (!subtree) return;
565+
if (typeKey === "scrapeList" && data.scrapeList) {
566+
data = data.scrapeList;
567+
} else if (typeKey === "scrapeSchema" && data.scrapeSchema) {
568+
data = data.scrapeSchema;
569+
}
570+
571+
let actionName = "";
572+
if (typeKey === "scrapeList" && data && typeof data === "object" && !Array.isArray(data)) {
573+
const keys = Object.keys(data);
574+
if (keys.length === 1) {
575+
actionName = keys[0];
576+
data = data[actionName];
577+
} else if (keys.length > 1) {
578+
actionName = keys[keys.length - 1];
579+
data = data[actionName];
580+
}
581+
}
566582

567-
if (typeKey === "scrapeList") {
568-
actionName = this.getUniqueActionName(typeKey, actionName);
583+
if (!actionName) {
584+
actionName = this.currentActionName || "";
585+
if (typeKey === "scrapeList" && !actionName) {
586+
actionName = this.getUniqueActionName(typeKey, "");
587+
}
569588
}
570589

571-
const values = Object.values(subtree);
572-
const flattened = values.flat();
590+
const flattened = Array.isArray(data)
591+
? data
592+
: (
593+
data?.List ??
594+
(data && typeof data === "object"
595+
? Object.values(data).flat?.() ?? data
596+
: [])
597+
);
573598

574599
if (!this.serializableDataByType[typeKey]) {
575600
this.serializableDataByType[typeKey] = {};
@@ -586,9 +611,6 @@ export class WorkflowInterpreter {
586611
name: actionName,
587612
data: flattened,
588613
});
589-
590-
this.currentActionType = null;
591-
this.currentActionName = null;
592614
} catch (err: any) {
593615
logger.log("error", `serializableCallback failed: ${err.message}`);
594616
}

0 commit comments

Comments
 (0)