Skip to content

Commit 60176fe

Browse files
authored
Merge pull request #562 from getmaxun/all-record
feat: allow training multiple capture actions in one recording session
2 parents 2f4db4e + e09e794 commit 60176fe

File tree

18 files changed

+1789
-586
lines changed

18 files changed

+1789
-586
lines changed

maxun-core/src/interpret.ts

Lines changed: 61 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ interface InterpreterOptions {
4343
binaryCallback: (output: any, mimeType: string) => (void | Promise<void>);
4444
debug: boolean;
4545
debugChannel: Partial<{
46-
activeId: Function,
47-
debugMessage: Function,
46+
activeId: (id: number) => void,
47+
debugMessage: (msg: string) => void,
48+
setActionType: (type: string) => void,
4849
}>
4950
}
5051

@@ -377,12 +378,20 @@ export default class Interpreter extends EventEmitter {
377378
*/
378379
const wawActions: Record<CustomFunctions, (...args: any[]) => void> = {
379380
screenshot: async (params: PageScreenshotOptions) => {
381+
if (this.options.debugChannel?.setActionType) {
382+
this.options.debugChannel.setActionType('screenshot');
383+
}
384+
380385
const screenshotBuffer = await page.screenshot({
381386
...params, path: undefined,
382387
});
383388
await this.options.binaryCallback(screenshotBuffer, 'image/png');
384389
},
385390
enqueueLinks: async (selector: string) => {
391+
if (this.options.debugChannel?.setActionType) {
392+
this.options.debugChannel.setActionType('enqueueLinks');
393+
}
394+
386395
const links: string[] = await page.locator(selector)
387396
.evaluateAll(
388397
// @ts-ignore
@@ -409,55 +418,51 @@ export default class Interpreter extends EventEmitter {
409418
await page.close();
410419
},
411420
scrape: async (selector?: string) => {
421+
if (this.options.debugChannel?.setActionType) {
422+
this.options.debugChannel.setActionType('scrape');
423+
}
424+
412425
await this.ensureScriptsLoaded(page);
413426

414427
const scrapeResults: Record<string, string>[] = await page.evaluate((s) => window.scrape(s ?? null), selector);
415428
await this.options.serializableCallback(scrapeResults);
416429
},
417430

418431
scrapeSchema: async (schema: Record<string, { selector: string; tag: string, attribute: string; shadow: string}>) => {
432+
if (this.options.debugChannel?.setActionType) {
433+
this.options.debugChannel.setActionType('scrapeSchema');
434+
}
435+
419436
await this.ensureScriptsLoaded(page);
420437

421438
const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
422439

423-
const newResults = Array.isArray(scrapeResult) ? scrapeResult : [scrapeResult];
424-
newResults.forEach((result) => {
425-
Object.entries(result).forEach(([key, value]) => {
426-
const keyExists = this.cumulativeResults.some(
427-
(item) => key in item && item[key] !== undefined
428-
);
429-
430-
if (!keyExists) {
431-
this.cumulativeResults.push({ [key]: value });
432-
}
433-
});
440+
if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) {
441+
this.cumulativeResults = [];
442+
}
443+
444+
if (this.cumulativeResults.length === 0) {
445+
this.cumulativeResults.push({});
446+
}
447+
448+
const mergedResult = this.cumulativeResults[0];
449+
const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
450+
451+
Object.entries(resultToProcess).forEach(([key, value]) => {
452+
if (value !== undefined) {
453+
mergedResult[key] = value;
454+
}
434455
});
435-
436-
const mergedResult: Record<string, string>[] = [
437-
Object.fromEntries(
438-
Object.entries(
439-
this.cumulativeResults.reduce((acc, curr) => {
440-
Object.entries(curr).forEach(([key, value]) => {
441-
// If the key doesn't exist or the current value is not undefined, add/update it
442-
if (value !== undefined) {
443-
acc[key] = value;
444-
}
445-
});
446-
return acc;
447-
}, {})
448-
)
449-
)
450-
];
451-
452-
// Log cumulative results after each action
453-
console.log("CUMULATIVE results:", this.cumulativeResults);
454-
console.log("MERGED results:", mergedResult);
455-
456-
await this.options.serializableCallback(mergedResult);
457-
// await this.options.serializableCallback(scrapeResult);
456+
457+
console.log("Updated merged result:", mergedResult);
458+
await this.options.serializableCallback([mergedResult]);
458459
},
459460

460461
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {
462+
if (this.options.debugChannel?.setActionType) {
463+
this.options.debugChannel.setActionType('scrapeList');
464+
}
465+
461466
await this.ensureScriptsLoaded(page);
462467
if (!config.pagination) {
463468
const scrapeResults: Record<string, any>[] = await page.evaluate((cfg) => window.scrapeList(cfg), config);
@@ -469,6 +474,10 @@ export default class Interpreter extends EventEmitter {
469474
},
470475

471476
scrapeListAuto: async (config: { listSelector: string }) => {
477+
if (this.options.debugChannel?.setActionType) {
478+
this.options.debugChannel.setActionType('scrapeListAuto');
479+
}
480+
472481
await this.ensureScriptsLoaded(page);
473482

474483
const scrapeResults: { selector: string, innerText: string }[] = await page.evaluate((listSelector) => {
@@ -479,6 +488,10 @@ export default class Interpreter extends EventEmitter {
479488
},
480489

481490
scroll: async (pages?: number) => {
491+
if (this.options.debugChannel?.setActionType) {
492+
this.options.debugChannel.setActionType('scroll');
493+
}
494+
482495
await page.evaluate(async (pagesInternal) => {
483496
for (let i = 1; i <= (pagesInternal ?? 1); i += 1) {
484497
// @ts-ignore
@@ -488,6 +501,10 @@ export default class Interpreter extends EventEmitter {
488501
},
489502

490503
script: async (code: string) => {
504+
if (this.options.debugChannel?.setActionType) {
505+
this.options.debugChannel.setActionType('script');
506+
}
507+
491508
const AsyncFunction: FunctionConstructor = Object.getPrototypeOf(
492509
async () => { },
493510
).constructor;
@@ -496,6 +513,10 @@ export default class Interpreter extends EventEmitter {
496513
},
497514

498515
flag: async () => new Promise((res) => {
516+
if (this.options.debugChannel?.setActionType) {
517+
this.options.debugChannel.setActionType('flag');
518+
}
519+
499520
this.emit('flag', page, res);
500521
}),
501522
};
@@ -526,6 +547,10 @@ export default class Interpreter extends EventEmitter {
526547
const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
527548
await wawActions[step.action as CustomFunctions](...(params ?? []));
528549
} else {
550+
if (this.options.debugChannel?.setActionType) {
551+
this.options.debugChannel.setActionType(String(step.action));
552+
}
553+
529554
// Implements the dot notation for the "method name" in the workflow
530555
const levels = String(step.action).split('.');
531556
const methodName = levels[levels.length - 1];

package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
"lodash": "^4.17.21",
5151
"loglevel": "^1.8.0",
5252
"loglevel-plugin-remote": "^0.6.8",
53-
"maxun-core": "^0.0.15",
5453
"minio": "^8.0.1",
5554
"moment-timezone": "^0.5.45",
5655
"node-cron": "^3.0.3",

public/locales/de.json

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -535,20 +535,23 @@
535535
"output_data": "Ausgabedaten",
536536
"log": "Protokoll"
537537
},
538-
"empty_output": "Die Ausgabe ist leer.",
539-
"loading": "Ausführung läuft. Extrahierte Daten werden nach Abschluss des Durchlaufs hier angezeigt.",
538+
"buttons": {
539+
"stop": "Stoppen"
540+
},
541+
"loading": "Daten werden geladen...",
542+
"empty_output": "Keine Ausgabedaten verfügbar",
540543
"captured_data": {
541544
"title": "Erfasste Daten",
542-
"download_json": "Als JSON herunterladen",
543-
"download_csv": "Als CSV herunterladen"
545+
"download_csv": "CSV herunterladen",
546+
"view_full": "Vollständige Daten anzeigen",
547+
"items": "Elemente",
548+
"schema_title": "Erfasste Texte",
549+
"list_title": "Erfasste Listen"
544550
},
545551
"captured_screenshot": {
546-
"title": "Erfasster Screenshot",
547-
"download": "Screenshot herunterladen",
548-
"render_failed": "Das Bild konnte nicht gerendert werden"
549-
},
550-
"buttons": {
551-
"stop": "Stoppen"
552+
"title": "Erfasste Screenshots",
553+
"download": "Herunterladen",
554+
"render_failed": "Fehler beim Rendern des Screenshots"
552555
}
553556
},
554557
"navbar": {

public/locales/en.json

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,11 @@
177177
"pagination": "Select how the robot can capture the rest of the list",
178178
"limit": "Choose the number of items to extract",
179179
"complete": "Capture is complete"
180+
},
181+
"actions": {
182+
"text": "Capture Text",
183+
"list": "Capture List",
184+
"screenshot": "Capture Screenshot"
180185
}
181186
},
182187
"right_panel": {
@@ -543,20 +548,23 @@
543548
"output_data": "Output Data",
544549
"log": "Log"
545550
},
546-
"empty_output": "The output is empty.",
547-
"loading": "Run in progress. Extracted data will appear here once run completes.",
551+
"buttons": {
552+
"stop": "Stop"
553+
},
554+
"loading": "Loading data...",
555+
"empty_output": "No output data available",
548556
"captured_data": {
549557
"title": "Captured Data",
550-
"download_json": "Download as JSON",
551-
"download_csv": "Download as CSV"
558+
"download_csv": "Download CSV",
559+
"view_full": "View Full Data",
560+
"items": "items",
561+
"schema_title": "Captured Texts",
562+
"list_title": "Captured Lists"
552563
},
553564
"captured_screenshot": {
554-
"title": "Captured Screenshot",
555-
"download": "Download Screenshot",
556-
"render_failed": "The image failed to render"
557-
},
558-
"buttons": {
559-
"stop": "Stop"
565+
"title": "Captured Screenshots",
566+
"download": "Download",
567+
"render_failed": "Failed to render screenshot"
560568
}
561569
},
562570
"navbar": {

public/locales/es.json

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -536,20 +536,23 @@
536536
"output_data": "Datos de Salida",
537537
"log": "Registro"
538538
},
539-
"empty_output": "La salida está vacía.",
540-
"loading": "Ejecución en curso. Los datos extraídos aparecerán aquí una vez que se complete la ejecución.",
539+
"buttons": {
540+
"stop": "Detener"
541+
},
542+
"loading": "Cargando datos...",
543+
"empty_output": "No hay datos de salida disponibles",
541544
"captured_data": {
542-
"title": "Datos Capturados",
543-
"download_json": "Descargar como JSON",
544-
"download_csv": "Descargar como CSV"
545+
"title": "Datos capturados",
546+
"download_csv": "Descargar CSV",
547+
"view_full": "Ver datos completos",
548+
"items": "elementos",
549+
"schema_title": "Textos capturados",
550+
"list_title": "Listas capturadas"
545551
},
546552
"captured_screenshot": {
547-
"title": "Captura de Pantalla",
548-
"download": "Descargar Captura",
549-
"render_failed": "No se pudo renderizar la imagen"
550-
},
551-
"buttons": {
552-
"stop": "Detener"
553+
"title": "Capturas de pantalla",
554+
"download": "Descargar",
555+
"render_failed": "Error al renderizar la captura de pantalla"
553556
}
554557
},
555558
"navbar": {

public/locales/ja.json

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -536,20 +536,23 @@
536536
"output_data": "出力データ",
537537
"log": "ログ"
538538
},
539-
"empty_output": "出力は空です。",
540-
"loading": "実行中です。実行が完了すると、抽出されたデータがここに表示されます。",
539+
"buttons": {
540+
"stop": "停止"
541+
},
542+
"loading": "データを読み込み中...",
543+
"empty_output": "出力データがありません",
541544
"captured_data": {
542-
"title": "キャプチャされたデータ",
543-
"download_json": "JSONとしてダウンロード",
544-
"download_csv": "CSVとしてダウンロード"
545+
"title": "キャプチャしたデータ",
546+
"download_csv": "CSVをダウンロード",
547+
"view_full": "完全なデータを表示",
548+
"items": "アイテム",
549+
"schema_title": "キャプチャしたテキスト",
550+
"list_title": "キャプチャしたリスト"
545551
},
546552
"captured_screenshot": {
547-
"title": "キャプチャされたスクリーンショット",
548-
"download": "スクリーンショットをダウンロード",
549-
"render_failed": "画像のレンダリングに失敗しました"
550-
},
551-
"buttons": {
552-
"stop": "停止"
553+
"title": "キャプチャしたスクリーンショット",
554+
"download": "ダウンロード",
555+
"render_failed": "スクリーンショットのレンダリングに失敗しました"
553556
}
554557
},
555558
"navbar": {

public/locales/zh.json

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -536,20 +536,23 @@
536536
"output_data": "输出数据",
537537
"log": "日志"
538538
},
539-
"empty_output": "输出为空。",
540-
"loading": "运行中。运行完成后,提取的数据将显示在此处。",
539+
"buttons": {
540+
"stop": "停止"
541+
},
542+
"loading": "加载数据中...",
543+
"empty_output": "没有可用的输出数据",
541544
"captured_data": {
542-
"title": "捕获的数据",
543-
"download_json": "下载为JSON",
544-
"download_csv": "下载为CSV"
545+
"title": "已捕获的数据",
546+
"download_csv": "下载CSV",
547+
"view_full": "查看完整数据",
548+
"items": "项目",
549+
"schema_title": "已捕获的文本",
550+
"list_title": "已捕获的列表"
545551
},
546552
"captured_screenshot": {
547-
"title": "捕获的截图",
548-
"download": "下载截图",
549-
"render_failed": "图像渲染失败"
550-
},
551-
"buttons": {
552-
"stop": "停止"
553+
"title": "已捕获的截图",
554+
"download": "下载",
555+
"render_failed": "渲染截图失败"
553556
}
554557
},
555558
"navbar": {

server/src/api/record.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,11 @@ async function executeRun(id: string, userId: string) {
586586
const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
587587
const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput);
588588

589+
const categorizedOutput = {
590+
scrapeSchema: interpretationInfo.scrapeSchemaOutput || {},
591+
scrapeList: interpretationInfo.scrapeListOutput || {},
592+
};
593+
589594
await destroyRemoteBrowser(plainRun.browserId, userId);
590595

591596
const updatedRun = await run.update({
@@ -594,7 +599,10 @@ async function executeRun(id: string, userId: string) {
594599
finishedAt: new Date().toLocaleString(),
595600
browserId: plainRun.browserId,
596601
log: interpretationInfo.log.join('\n'),
597-
serializableOutput: interpretationInfo.serializableOutput,
602+
serializableOutput: {
603+
scrapeSchema: Object.values(categorizedOutput.scrapeSchema),
604+
scrapeList: Object.values(categorizedOutput.scrapeList),
605+
},
598606
binaryOutput: uploadedBinaryOutput,
599607
});
600608

0 commit comments

Comments
 (0)