Skip to content

Commit 7d652f4

Browse files
Merge branch 'develop' into discriptionbox
2 parents fdf1ec5 + d4cb9f2 commit 7d652f4

File tree

37 files changed

+2869
-806
lines changed

37 files changed

+2869
-806
lines changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
name: Bug Report
2+
description: Report a bug to help us improve
3+
title: "[Bug]: "
4+
labels: [bug]
5+
assignees: []
6+
7+
body:
8+
- type: dropdown
9+
id: environment
10+
attributes:
11+
label: Where are you using the app?
12+
options:
13+
- Cloud (Hosted by Us)
14+
- Self-Hosted (OSS) with Docker
15+
- Self-Hosted (OSS) without Docker
16+
validations:
17+
required: true
18+
19+
- type: input
20+
id: app_version
21+
attributes:
22+
label: App Version
23+
description: Enter the version number you are using (if known).
24+
placeholder: "e.g., v1.2.3"
25+
validations:
26+
required: false
27+
28+
- type: input
29+
id: browser
30+
attributes:
31+
label: Browser
32+
description: Which browser are you using?
33+
placeholder: "e.g., Chrome 124, Firefox 115, Safari 17"
34+
validations:
35+
required: true
36+
37+
- type: input
38+
id: operating_system
39+
attributes:
40+
label: Operating System
41+
description: Your operating system and version.
42+
placeholder: "e.g., Windows 11, macOS Sonoma, Ubuntu 22.04"
43+
validations:
44+
required: true
45+
46+
- type: textarea
47+
id: steps_to_reproduce
48+
attributes:
49+
label: Steps to Reproduce
50+
description: How can we reproduce the problem?
51+
placeholder: |
52+
1. Go to '...'
53+
2. Click on '...'
54+
3. Scroll down to '...'
55+
4. See error
56+
validations:
57+
required: true
58+
59+
- type: textarea
60+
id: expected_behavior
61+
attributes:
62+
label: Expected Behavior
63+
description: What did you expect to happen instead?
64+
validations:
65+
required: true
66+
67+
- type: textarea
68+
id: actual_behavior
69+
attributes:
70+
label: Actual Behavior
71+
description: What actually happened?
72+
validations:
73+
required: true
74+
75+
- type: textarea
76+
id: logs
77+
attributes:
78+
label: Relevant Logs or Screenshots
79+
description: Please paste any logs, screenshots, or console errors if available.
80+
placeholder: "Paste logs or upload screenshots."
81+
validations:
82+
required: false
83+
84+
- type: textarea
85+
id: additional_context
86+
attributes:
87+
label: Additional Context
88+
description: Anything else we should know?
89+
validations:
90+
required: false

ENVEXAMPLE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ DB_PASSWORD=postgres # PostgreSQL password
77
DB_HOST=postgres # Host for PostgreSQL in Docker
88
DB_PORT=5432 # Port for PostgreSQL (default: 5432)
99
ENCRYPTION_KEY=f4d5e6a7b8c9d0e1f23456789abcdef01234567890abcdef123456789abcdef0 # Key for encrypting sensitive data (passwords and proxies)
10+
SESSION_SECRET=maxun_session # A strong, random string used to sign session cookies. Recommended to define your own session secret to avoid session hijacking.
11+
1012
MINIO_ENDPOINT=minio # MinIO endpoint in Docker
1113
MINIO_PORT=9000 # Port for MinIO (default: 9000)
1214
MINIO_CONSOLE_PORT=9001 # Web UI Port for MinIO (default: 9001)

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ You can access the frontend at http://localhost:5173/ and backend at http://loca
9898
| `DB_HOST` | Yes | Host address where the Postgres database server is running. | Database connection will fail. |
9999
| `DB_PORT` | Yes | Port number used to connect to the Postgres database server. | Database connection will fail. |
100100
| `ENCRYPTION_KEY` | Yes | Key used for encrypting sensitive data (proxies, passwords). | Encryption functionality will not work. |
101+
| `SESSION_SECRET` | No | A strong, random string used to sign session cookies | Uses default secret. Recommended to define your own session secret to avoid session hijacking. |
101102
| `MINIO_ENDPOINT` | Yes | Endpoint URL for MinIO, to store Robot Run Screenshots. | Connection to MinIO storage will fail. |
102103
| `MINIO_PORT` | Yes | Port number for MinIO service. | Connection to MinIO storage will fail. |
103104
| `MINIO_CONSOLE_PORT` | No | Port number for MinIO WebUI service. Needed for Docker setup. | Cannot access MinIO Web UI. |
@@ -107,7 +108,6 @@ You can access the frontend at http://localhost:5173/ and backend at http://loca
107108
| `GOOGLE_REDIRECT_URI` | No | Redirect URI for handling Google OAuth responses. | Google login will not work. |
108109
| `AIRTABLE_CLIENT_ID` | No | Client ID for Airtable, used for Airtable integration authentication. | Airtable login will not work. |
109110
| `AIRTABLE_REDIRECT_URI` | No | Redirect URI for handling Airtable OAuth responses. | Airtable login will not work. |
110-
111111
| `MAXUN_TELEMETRY` | No | Disables telemetry to stop sending anonymous usage data. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes. Please keep it enabled. | Telemetry data will not be collected. |
112112

113113

docker-compose.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
version: '3.8'
2-
31
services:
42
postgres:
53
image: postgres:13

index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<meta name="theme-color" content="#000000" />
77
<meta
88
name="description"
9-
content="Web site created using Vite"
9+
content="Maxun is an open-source no-code web data extraction platform. Train a robot in 2 minutes to extract data on auto-pilot!"
1010
/>
1111
<link rel="icon" type="image/png" href="src/assets/maxunlogo.png">
1212
<title>Maxun | Open Source No Code Web Data Extraction Platform</title>

maxun-core/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "maxun-core",
3-
"version": "0.0.15",
3+
"version": "0.0.16",
44
"description": "Core package for Maxun, responsible for data extraction",
55
"main": "build/index.js",
66
"typings": "build/index.d.ts",

maxun-core/src/interpret.ts

Lines changed: 72 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,16 @@ declare global {
3737
* Defines optional intepreter options (passed in constructor)
3838
*/
3939
interface InterpreterOptions {
40+
mode?: string;
4041
maxRepeats: number;
4142
maxConcurrency: number;
4243
serializableCallback: (output: any) => (void | Promise<void>);
4344
binaryCallback: (output: any, mimeType: string) => (void | Promise<void>);
4445
debug: boolean;
4546
debugChannel: Partial<{
46-
activeId: Function,
47-
debugMessage: Function,
47+
activeId: (id: number) => void,
48+
debugMessage: (msg: string) => void,
49+
setActionType: (type: string) => void,
4850
}>
4951
}
5052

@@ -377,12 +379,20 @@ export default class Interpreter extends EventEmitter {
377379
*/
378380
const wawActions: Record<CustomFunctions, (...args: any[]) => void> = {
379381
screenshot: async (params: PageScreenshotOptions) => {
382+
if (this.options.debugChannel?.setActionType) {
383+
this.options.debugChannel.setActionType('screenshot');
384+
}
385+
380386
const screenshotBuffer = await page.screenshot({
381387
...params, path: undefined,
382388
});
383389
await this.options.binaryCallback(screenshotBuffer, 'image/png');
384390
},
385391
enqueueLinks: async (selector: string) => {
392+
if (this.options.debugChannel?.setActionType) {
393+
this.options.debugChannel.setActionType('enqueueLinks');
394+
}
395+
386396
const links: string[] = await page.locator(selector)
387397
.evaluateAll(
388398
// @ts-ignore
@@ -409,55 +419,61 @@ export default class Interpreter extends EventEmitter {
409419
await page.close();
410420
},
411421
scrape: async (selector?: string) => {
422+
if (this.options.debugChannel?.setActionType) {
423+
this.options.debugChannel.setActionType('scrape');
424+
}
425+
412426
await this.ensureScriptsLoaded(page);
413427

414428
const scrapeResults: Record<string, string>[] = await page.evaluate((s) => window.scrape(s ?? null), selector);
415429
await this.options.serializableCallback(scrapeResults);
416430
},
417431

418432
scrapeSchema: async (schema: Record<string, { selector: string; tag: string, attribute: string; shadow: string}>) => {
433+
if (this.options.debugChannel?.setActionType) {
434+
this.options.debugChannel.setActionType('scrapeSchema');
435+
}
436+
437+
if (this.options.mode && this.options.mode === 'editor') {
438+
await this.options.serializableCallback({});
439+
return;
440+
}
441+
419442
await this.ensureScriptsLoaded(page);
420443

421444
const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
422445

423-
const newResults = Array.isArray(scrapeResult) ? scrapeResult : [scrapeResult];
424-
newResults.forEach((result) => {
425-
Object.entries(result).forEach(([key, value]) => {
426-
const keyExists = this.cumulativeResults.some(
427-
(item) => key in item && item[key] !== undefined
428-
);
429-
430-
if (!keyExists) {
431-
this.cumulativeResults.push({ [key]: value });
432-
}
433-
});
446+
if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) {
447+
this.cumulativeResults = [];
448+
}
449+
450+
if (this.cumulativeResults.length === 0) {
451+
this.cumulativeResults.push({});
452+
}
453+
454+
const mergedResult = this.cumulativeResults[0];
455+
const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
456+
457+
Object.entries(resultToProcess).forEach(([key, value]) => {
458+
if (value !== undefined) {
459+
mergedResult[key] = value;
460+
}
434461
});
435-
436-
const mergedResult: Record<string, string>[] = [
437-
Object.fromEntries(
438-
Object.entries(
439-
this.cumulativeResults.reduce((acc, curr) => {
440-
Object.entries(curr).forEach(([key, value]) => {
441-
// If the key doesn't exist or the current value is not undefined, add/update it
442-
if (value !== undefined) {
443-
acc[key] = value;
444-
}
445-
});
446-
return acc;
447-
}, {})
448-
)
449-
)
450-
];
451-
452-
// Log cumulative results after each action
453-
console.log("CUMULATIVE results:", this.cumulativeResults);
454-
console.log("MERGED results:", mergedResult);
455-
456-
await this.options.serializableCallback(mergedResult);
457-
// await this.options.serializableCallback(scrapeResult);
462+
463+
console.log("Updated merged result:", mergedResult);
464+
await this.options.serializableCallback([mergedResult]);
458465
},
459466

460467
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {
468+
if (this.options.debugChannel?.setActionType) {
469+
this.options.debugChannel.setActionType('scrapeList');
470+
}
471+
472+
if (this.options.mode && this.options.mode === 'editor') {
473+
await this.options.serializableCallback({});
474+
return;
475+
}
476+
461477
await this.ensureScriptsLoaded(page);
462478
if (!config.pagination) {
463479
const scrapeResults: Record<string, any>[] = await page.evaluate((cfg) => window.scrapeList(cfg), config);
@@ -469,6 +485,10 @@ export default class Interpreter extends EventEmitter {
469485
},
470486

471487
scrapeListAuto: async (config: { listSelector: string }) => {
488+
if (this.options.debugChannel?.setActionType) {
489+
this.options.debugChannel.setActionType('scrapeListAuto');
490+
}
491+
472492
await this.ensureScriptsLoaded(page);
473493

474494
const scrapeResults: { selector: string, innerText: string }[] = await page.evaluate((listSelector) => {
@@ -479,6 +499,10 @@ export default class Interpreter extends EventEmitter {
479499
},
480500

481501
scroll: async (pages?: number) => {
502+
if (this.options.debugChannel?.setActionType) {
503+
this.options.debugChannel.setActionType('scroll');
504+
}
505+
482506
await page.evaluate(async (pagesInternal) => {
483507
for (let i = 1; i <= (pagesInternal ?? 1); i += 1) {
484508
// @ts-ignore
@@ -488,6 +512,10 @@ export default class Interpreter extends EventEmitter {
488512
},
489513

490514
script: async (code: string) => {
515+
if (this.options.debugChannel?.setActionType) {
516+
this.options.debugChannel.setActionType('script');
517+
}
518+
491519
const AsyncFunction: FunctionConstructor = Object.getPrototypeOf(
492520
async () => { },
493521
).constructor;
@@ -496,6 +524,10 @@ export default class Interpreter extends EventEmitter {
496524
},
497525

498526
flag: async () => new Promise((res) => {
527+
if (this.options.debugChannel?.setActionType) {
528+
this.options.debugChannel.setActionType('flag');
529+
}
530+
499531
this.emit('flag', page, res);
500532
}),
501533
};
@@ -526,6 +558,10 @@ export default class Interpreter extends EventEmitter {
526558
const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
527559
await wawActions[step.action as CustomFunctions](...(params ?? []));
528560
} else {
561+
if (this.options.debugChannel?.setActionType) {
562+
this.options.debugChannel.setActionType(String(step.action));
563+
}
564+
529565
// Implements the dot notation for the "method name" in the workflow
530566
const levels = String(step.action).split('.');
531567
const methodName = levels[levels.length - 1];

nginx.conf

Lines changed: 13 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,24 @@
11
server {
22
listen 80;
3+
server_name _;
34

5+
root /var/www/maxun;
6+
index index.html;
7+
8+
# Serve the frontend
49
location / {
5-
root /usr/share/nginx/html;
610
try_files $uri $uri/ /index.html;
711
}
8-
9-
location /api {
10-
proxy_pass http://localhost:8080;
11-
proxy_http_version 1.1;
12-
proxy_set_header Upgrade $http_upgrade;
13-
proxy_set_header Connection 'upgrade';
14-
proxy_set_header Host $host;
15-
proxy_cache_bypass $http_upgrade;
16-
17-
# Add timeout configurations
18-
proxy_connect_timeout 60s;
19-
proxy_send_timeout 60s;
20-
proxy_read_timeout 60s;
21-
22-
# Add error handling
23-
proxy_intercept_errors on;
24-
error_page 502 503 504 /50x.html;
25-
}
26-
27-
location ~ ^/(record|workflow|storage|auth|integration|proxy|api-docs) {
28-
proxy_pass http://localhost:8080;
12+
13+
# Proxy for backend
14+
location ^/(auth|storage|record|workflow|robot|proxy|api-docs|api)(/|$) {
15+
proxy_pass http://localhost:8080; # change as per your setup
2916
proxy_http_version 1.1;
3017
proxy_set_header Upgrade $http_upgrade;
31-
proxy_set_header Connection 'keep-alive'; # Ensure connections remain open
3218
proxy_set_header Connection 'upgrade';
3319
proxy_set_header Host $host;
34-
proxy_cache_bypass $http_upgrade;
35-
36-
# Timeout configurations
37-
proxy_connect_timeout 60s;
38-
proxy_send_timeout 60s;
39-
proxy_read_timeout 60s;
40-
41-
# Error handling for these routes
42-
proxy_intercept_errors on;
43-
error_page 502 503 504 /50x.html;
20+
proxy_set_header X-Real-IP $remote_addr;
21+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
22+
proxy_set_header X-Forwarded-Proto $scheme;
4423
}
45-
}
24+
}

0 commit comments

Comments
 (0)