From 0d763f78213671d5809102b9b501920921418ffd Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 20 Dec 2024 18:32:08 +0530 Subject: [PATCH 001/156] feat: iframe support for get element info --- server/src/workflow-management/selector.ts | 188 +++++++++++++++++---- 1 file changed, 155 insertions(+), 33 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 240f8921b..169794870 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -23,10 +23,8 @@ export const getElementInformation = async ( if (!getList || listSelector !== '') { const elementInfo = await page.evaluate( async ({ x, y }) => { - const el = document.elementFromPoint(x, y) as HTMLElement; - if (el) { - const { parentElement } = el; - const element = parentElement?.tagName === 'A' ? parentElement : el; + // Helper function to get element info + const getElementInfo = (element: HTMLElement) => { let info: { tagName: string; hasOnlyText?: boolean; @@ -36,9 +34,12 @@ export const getElementInformation = async ( attributes?: Record; innerHTML?: string; outerHTML?: string; + fromIframe?: boolean; + iframePath?: string[]; } = { tagName: element?.tagName ?? '', }; + if (element) { info.attributes = Array.from(element.attributes).reduce( (acc, attr) => { @@ -48,7 +49,7 @@ export const getElementInformation = async ( {} as Record ); } - // Gather specific information based on the tag + if (element?.tagName === 'A') { info.url = (element as HTMLAnchorElement).href; info.innerText = element.innerText ?? ''; @@ -61,50 +62,80 @@ export const getElementInformation = async ( ...info.attributes, selectedValue: selectElement.value, }; - } else if (element?.tagName === 'INPUT' && (element as HTMLInputElement).type === 'time' || (element as HTMLInputElement).type === 'date') { + } else if (element?.tagName === 'INPUT' && + ((element as HTMLInputElement).type === 'time' || + (element as HTMLInputElement).type === 'date')) { info.innerText = (element as HTMLInputElement).value; } else { info.hasOnlyText = element?.children?.length === 0 && element?.innerText?.length > 0; info.innerText = element?.innerText ?? ''; } + info.innerHTML = element.innerHTML; info.outerHTML = element.outerHTML; return info; + }; + + // Helper function to search in iframe + const searchInIframe = ( + iframe: HTMLIFrameElement, + relativeX: number, + relativeY: number, + iframePath: string[] + ) => { + try { + if (!iframe.contentDocument) return null; + + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; + if (!el) return null; + + const { parentElement } = el; + const element = parentElement?.tagName === 'A' ? parentElement : el; + + const info = getElementInfo(element); + info.fromIframe = true; + info.iframePath = iframePath; + + return info; + } catch (e) { + console.warn('Cannot access iframe content:', e); + return null; + } + }; + + const el = document.elementFromPoint(x, y) as HTMLElement; + if (el) { + // Check if the element is an iframe + if (el.tagName === 'IFRAME') { + const iframe = el as HTMLIFrameElement; + const rect = iframe.getBoundingClientRect(); + const relativeX = x - rect.left; + const relativeY = y - rect.top; + + const iframeResult = searchInIframe( + iframe, + relativeX, + relativeY, + [iframe.id || 'unnamed-iframe'] + ); + if (iframeResult) return iframeResult; + } + + const { parentElement } = el; + const element = parentElement?.tagName === 'A' ? parentElement : el; + return getElementInfo(element); } return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return elementInfo; } else { const elementInfo = await page.evaluate( async ({ x, y }) => { - const originalEl = document.elementFromPoint(x, y) as HTMLElement; - if (originalEl) { - let element = originalEl; - - while (element.parentElement) { - const parentRect = element.parentElement.getBoundingClientRect(); - const childRect = element.getBoundingClientRect(); - - const fullyContained = - parentRect.left <= childRect.left && - parentRect.right >= childRect.right && - parentRect.top <= childRect.top && - parentRect.bottom >= childRect.bottom; - - const significantOverlap = - (childRect.width * childRect.height) / - (parentRect.width * parentRect.height) > 0.5; - - if (fullyContained && significantOverlap) { - element = element.parentElement; - } else { - break; - } - } - + // Helper function to get element info (same as above) + const getElementInfo = (element: HTMLElement) => { let info: { tagName: string; hasOnlyText?: boolean; @@ -114,6 +145,8 @@ export const getElementInformation = async ( attributes?: Record; innerHTML?: string; outerHTML?: string; + fromIframe?: boolean; + iframePath?: string[]; } = { tagName: element?.tagName ?? '', }; @@ -142,10 +175,99 @@ export const getElementInformation = async ( info.innerHTML = element.innerHTML; info.outerHTML = element.outerHTML; return info; + }; + + // Helper function to search in iframe (same as above) + const searchInIframe = ( + iframe: HTMLIFrameElement, + relativeX: number, + relativeY: number, + iframePath: string[] + ) => { + try { + if (!iframe.contentDocument) return null; + + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; + if (!el) return null; + + let element = el; + while (element.parentElement) { + const parentRect = element.parentElement.getBoundingClientRect(); + const childRect = element.getBoundingClientRect(); + + const fullyContained = + parentRect.left <= childRect.left && + parentRect.right >= childRect.right && + parentRect.top <= childRect.top && + parentRect.bottom >= childRect.bottom; + + const significantOverlap = + (childRect.width * childRect.height) / + (parentRect.width * parentRect.height) > 0.5; + + if (fullyContained && significantOverlap) { + element = element.parentElement; + } else { + break; + } + } + + const info = getElementInfo(element); + info.fromIframe = true; + info.iframePath = iframePath; + + return info; + } catch (e) { + console.warn('Cannot access iframe content:', e); + return null; + } + }; + + const originalEl = document.elementFromPoint(x, y) as HTMLElement; + if (originalEl) { + // Check if the element is an iframe + if (originalEl.tagName === 'IFRAME') { + const iframe = originalEl as HTMLIFrameElement; + const rect = iframe.getBoundingClientRect(); + const relativeX = x - rect.left; + const relativeY = y - rect.top; + + const iframeResult = searchInIframe( + iframe, + relativeX, + relativeY, + [iframe.id || 'unnamed-iframe'] + ); + if (iframeResult) return iframeResult; + } + + let element = originalEl; + while (element.parentElement) { + const parentRect = element.parentElement.getBoundingClientRect(); + const childRect = element.getBoundingClientRect(); + + const fullyContained = + parentRect.left <= childRect.left && + parentRect.right >= childRect.right && + parentRect.top <= childRect.top && + parentRect.bottom >= childRect.bottom; + + const significantOverlap = + (childRect.width * childRect.height) / + (parentRect.width * parentRect.height) > 0.5; + + if (fullyContained && significantOverlap) { + element = element.parentElement; + } else { + break; + } + } + + return getElementInfo(element); } return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return elementInfo; } From 6904933036bc48bc09fc331479efbfe174181c78 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 20 Dec 2024 20:28:11 +0530 Subject: [PATCH 002/156] feat: iframe support for getRect --- server/src/workflow-management/selector.ts | 189 ++++++++++++++++++--- 1 file changed, 166 insertions(+), 23 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 169794870..83491042a 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -292,25 +292,90 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector if (!getList || listSelector !== '') { const rect = await page.evaluate( async ({ x, y }) => { + // Helper function to convert rectangle to plain object + const getRectangleInfo = (rectangle: DOMRect) => { + const info = { + x: rectangle.x, + y: rectangle.y, + width: rectangle.width, + height: rectangle.height, + top: rectangle.top, + right: rectangle.right, + bottom: rectangle.bottom, + left: rectangle.left, + fromIframe: false, + iframePath: [] as string[] + }; + return info; + }; + + // Helper function to search in iframe + const searchInIframe = ( + iframe: HTMLIFrameElement, + relativeX: number, + relativeY: number, + iframePath: string[] + ) => { + try { + if (!iframe.contentDocument) return null; + + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; + if (!el) return null; + + const { parentElement } = el; + const element = parentElement?.tagName === 'A' ? parentElement : el; + const rectangle = element?.getBoundingClientRect(); + + if (rectangle) { + const iframeRect = iframe.getBoundingClientRect(); + const rectInfo = getRectangleInfo(rectangle); + + // Adjust coordinates relative to the main document + rectInfo.x += iframeRect.x; + rectInfo.y += iframeRect.y; + rectInfo.top += iframeRect.top; + rectInfo.right += iframeRect.left; + rectInfo.bottom += iframeRect.top; + rectInfo.left += iframeRect.left; + rectInfo.fromIframe = true; + rectInfo.iframePath = iframePath; + + return rectInfo; + } + return null; + } catch (e) { + console.warn('Cannot access iframe content:', e); + return null; + } + }; + const el = document.elementFromPoint(x, y) as HTMLElement; if (el) { + // Check if the element is an iframe + if (el.tagName === 'IFRAME') { + const iframe = el as HTMLIFrameElement; + const rect = iframe.getBoundingClientRect(); + const relativeX = x - rect.left; + const relativeY = y - rect.top; + + const iframeResult = searchInIframe( + iframe, + relativeX, + relativeY, + [iframe.id || 'unnamed-iframe'] + ); + if (iframeResult) return iframeResult; + } + const { parentElement } = el; - // Match the logic in recorder.ts for link clicks const element = parentElement?.tagName === 'A' ? parentElement : el; const rectangle = element?.getBoundingClientRect(); + if (rectangle) { - return { - x: rectangle.x, - y: rectangle.y, - width: rectangle.width, - height: rectangle.height, - top: rectangle.top, - right: rectangle.right, - bottom: rectangle.bottom, - left: rectangle.left, - }; + return getRectangleInfo(rectangle); } } + return null; }, { x: coordinates.x, y: coordinates.y }, ); @@ -318,10 +383,98 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } else { const rect = await page.evaluate( async ({ x, y }) => { + // Helper function to convert rectangle to plain object (same as above) + const getRectangleInfo = (rectangle: DOMRect) => ({ + x: rectangle.x, + y: rectangle.y, + width: rectangle.width, + height: rectangle.height, + top: rectangle.top, + right: rectangle.right, + bottom: rectangle.bottom, + left: rectangle.left, + fromIframe: false, + iframePath: [] as string[] + }); + + // Helper function to search in iframe (same as above) + const searchInIframe = ( + iframe: HTMLIFrameElement, + relativeX: number, + relativeY: number, + iframePath: string[] + ) => { + try { + if (!iframe.contentDocument) return null; + + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; + if (!el) return null; + + let element = el; + while (element.parentElement) { + const parentRect = element.parentElement.getBoundingClientRect(); + const childRect = element.getBoundingClientRect(); + + const fullyContained = + parentRect.left <= childRect.left && + parentRect.right >= childRect.right && + parentRect.top <= childRect.top && + parentRect.bottom >= childRect.bottom; + + const significantOverlap = + (childRect.width * childRect.height) / + (parentRect.width * parentRect.height) > 0.5; + + if (fullyContained && significantOverlap) { + element = element.parentElement; + } else { + break; + } + } + + const rectangle = element?.getBoundingClientRect(); + if (rectangle) { + const iframeRect = iframe.getBoundingClientRect(); + const rectInfo = getRectangleInfo(rectangle); + + // Adjust coordinates relative to the main document + rectInfo.x += iframeRect.x; + rectInfo.y += iframeRect.y; + rectInfo.top += iframeRect.top; + rectInfo.right += iframeRect.left; + rectInfo.bottom += iframeRect.top; + rectInfo.left += iframeRect.left; + rectInfo.fromIframe = true; + rectInfo.iframePath = iframePath; + + return rectInfo; + } + return null; + } catch (e) { + console.warn('Cannot access iframe content:', e); + return null; + } + }; + const originalEl = document.elementFromPoint(x, y) as HTMLElement; if (originalEl) { - let element = originalEl; + // Check if the element is an iframe + if (originalEl.tagName === 'IFRAME') { + const iframe = originalEl as HTMLIFrameElement; + const rect = iframe.getBoundingClientRect(); + const relativeX = x - rect.left; + const relativeY = y - rect.top; + + const iframeResult = searchInIframe( + iframe, + relativeX, + relativeY, + [iframe.id || 'unnamed-iframe'] + ); + if (iframeResult) return iframeResult; + } + let element = originalEl; while (element.parentElement) { const parentRect = element.parentElement.getBoundingClientRect(); const childRect = element.getBoundingClientRect(); @@ -344,18 +497,8 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } const rectangle = element?.getBoundingClientRect(); - if (rectangle) { - return { - x: rectangle.x, - y: rectangle.y, - width: rectangle.width, - height: rectangle.height, - top: rectangle.top, - right: rectangle.right, - bottom: rectangle.bottom, - left: rectangle.left, - }; + return getRectangleInfo(rectangle); } } return null; From 8ba928dae6cac4d7e5924bcc799e792068e6734d Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 20 Dec 2024 20:28:24 +0530 Subject: [PATCH 003/156] chore: fix format --- server/src/workflow-management/selector.ts | 76 +++++++++++----------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 83491042a..dd869f3d0 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -62,9 +62,9 @@ export const getElementInformation = async ( ...info.attributes, selectedValue: selectElement.value, }; - } else if (element?.tagName === 'INPUT' && - ((element as HTMLInputElement).type === 'time' || - (element as HTMLInputElement).type === 'date')) { + } else if (element?.tagName === 'INPUT' && + ((element as HTMLInputElement).type === 'time' || + (element as HTMLInputElement).type === 'date')) { info.innerText = (element as HTMLInputElement).value; } else { info.hasOnlyText = element?.children?.length === 0 && @@ -79,20 +79,20 @@ export const getElementInformation = async ( // Helper function to search in iframe const searchInIframe = ( - iframe: HTMLIFrameElement, - relativeX: number, + iframe: HTMLIFrameElement, + relativeX: number, relativeY: number, iframePath: string[] ) => { try { if (!iframe.contentDocument) return null; - + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; if (!el) return null; const { parentElement } = el; const element = parentElement?.tagName === 'A' ? parentElement : el; - + const info = getElementInfo(element); info.fromIframe = true; info.iframePath = iframePath; @@ -112,11 +112,11 @@ export const getElementInformation = async ( const rect = iframe.getBoundingClientRect(); const relativeX = x - rect.left; const relativeY = y - rect.top; - + const iframeResult = searchInIframe( - iframe, - relativeX, - relativeY, + iframe, + relativeX, + relativeY, [iframe.id || 'unnamed-iframe'] ); if (iframeResult) return iframeResult; @@ -179,14 +179,14 @@ export const getElementInformation = async ( // Helper function to search in iframe (same as above) const searchInIframe = ( - iframe: HTMLIFrameElement, - relativeX: number, + iframe: HTMLIFrameElement, + relativeX: number, relativeY: number, iframePath: string[] ) => { try { if (!iframe.contentDocument) return null; - + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; if (!el) return null; @@ -231,11 +231,11 @@ export const getElementInformation = async ( const rect = iframe.getBoundingClientRect(); const relativeX = x - rect.left; const relativeY = y - rect.top; - + const iframeResult = searchInIframe( - iframe, - relativeX, - relativeY, + iframe, + relativeX, + relativeY, [iframe.id || 'unnamed-iframe'] ); if (iframeResult) return iframeResult; @@ -311,25 +311,25 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector // Helper function to search in iframe const searchInIframe = ( - iframe: HTMLIFrameElement, - relativeX: number, + iframe: HTMLIFrameElement, + relativeX: number, relativeY: number, iframePath: string[] ) => { try { if (!iframe.contentDocument) return null; - + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; if (!el) return null; const { parentElement } = el; const element = parentElement?.tagName === 'A' ? parentElement : el; const rectangle = element?.getBoundingClientRect(); - + if (rectangle) { const iframeRect = iframe.getBoundingClientRect(); const rectInfo = getRectangleInfo(rectangle); - + // Adjust coordinates relative to the main document rectInfo.x += iframeRect.x; rectInfo.y += iframeRect.y; @@ -339,7 +339,7 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector rectInfo.left += iframeRect.left; rectInfo.fromIframe = true; rectInfo.iframePath = iframePath; - + return rectInfo; } return null; @@ -357,11 +357,11 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector const rect = iframe.getBoundingClientRect(); const relativeX = x - rect.left; const relativeY = y - rect.top; - + const iframeResult = searchInIframe( - iframe, - relativeX, - relativeY, + iframe, + relativeX, + relativeY, [iframe.id || 'unnamed-iframe'] ); if (iframeResult) return iframeResult; @@ -370,7 +370,7 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector const { parentElement } = el; const element = parentElement?.tagName === 'A' ? parentElement : el; const rectangle = element?.getBoundingClientRect(); - + if (rectangle) { return getRectangleInfo(rectangle); } @@ -399,14 +399,14 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector // Helper function to search in iframe (same as above) const searchInIframe = ( - iframe: HTMLIFrameElement, - relativeX: number, + iframe: HTMLIFrameElement, + relativeX: number, relativeY: number, iframePath: string[] ) => { try { if (!iframe.contentDocument) return null; - + const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; if (!el) return null; @@ -436,7 +436,7 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector if (rectangle) { const iframeRect = iframe.getBoundingClientRect(); const rectInfo = getRectangleInfo(rectangle); - + // Adjust coordinates relative to the main document rectInfo.x += iframeRect.x; rectInfo.y += iframeRect.y; @@ -446,7 +446,7 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector rectInfo.left += iframeRect.left; rectInfo.fromIframe = true; rectInfo.iframePath = iframePath; - + return rectInfo; } return null; @@ -464,11 +464,11 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector const rect = iframe.getBoundingClientRect(); const relativeX = x - rect.left; const relativeY = y - rect.top; - + const iframeResult = searchInIframe( - iframe, - relativeX, - relativeY, + iframe, + relativeX, + relativeY, [iframe.id || 'unnamed-iframe'] ); if (iframeResult) return iframeResult; From 422b774ba2e93089c94dc9a884836039abc2c9f5 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 23 Dec 2024 13:34:24 +0530 Subject: [PATCH 004/156] fix: browser window english language --- public/locales/en.json | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/public/locales/en.json b/public/locales/en.json index 7752ad78e..66212a877 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -400,24 +400,24 @@ }, "browser_window": { "attribute_modal": { - "title": "属性を選択", - "notifications": { - "list_select_success": "リストが正常に選択されました。抽出するテキストデータを選択してください。", - "pagination_select_success": "ページネーション要素が正常に選択されました。" - } + "title": "Select Attribute", + "notifications": { + "list_select_success": "List has been successfully selected. Please select the text data to extract.", + "pagination_select_success": "Pagination element has been successfully selected." + } }, "attribute_options": { - "anchor": { - "text": "テキスト: {{text}}", - "url": "URL: {{url}}" - }, - "image": { - "alt_text": "代替テキスト: {{altText}}", - "image_url": "画像URL: {{imageUrl}}" - }, - "default": { - "text": "テキスト: {{text}}" - } + "anchor": { + "text": "Text: {{text}}", + "url": "URL: {{url}}" + }, + "image": { + "alt_text": "Alt Text: {{altText}}", + "image_url": "Image URL: {{imageUrl}}" + }, + "default": { + "text": "Text: {{text}}" + } } }, "runs_table": { From e017148487c41f9c3af15d3ea7b025c3615dea33 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 23 Dec 2024 14:46:41 +0530 Subject: [PATCH 005/156] fix: preserve previous labels steps before adding list step --- src/context/browserSteps.tsx | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index 7630f559a..dd2111990 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -62,26 +62,35 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ const addListStep = (listSelector: string, newFields: { [key: string]: TextStep }, listId: number, pagination?: { type: string; selector: string }, limit?: number) => { setBrowserSteps(prevSteps => { const existingListStepIndex = prevSteps.findIndex(step => step.type === 'list' && step.id === listId); + if (existingListStepIndex !== -1) { const updatedSteps = [...prevSteps]; const existingListStep = updatedSteps[existingListStepIndex] as ListStep; - - const filteredNewFields = Object.entries(newFields).reduce((acc, [key, value]) => { + + // Preserve existing labels for fields + const mergedFields = Object.entries(newFields).reduce((acc, [key, field]) => { if (!discardedFields.has(`${listId}-${key}`)) { - acc[key] = value; + // If field exists, preserve its label + if (existingListStep.fields[key]) { + acc[key] = { + ...field, + label: existingListStep.fields[key].label + }; + } else { + acc[key] = field; + } } return acc; }, {} as { [key: string]: TextStep }); - + updatedSteps[existingListStepIndex] = { ...existingListStep, - fields: { ...existingListStep.fields, ...filteredNewFields }, - pagination: pagination, - limit: limit, + fields: mergedFields, + pagination: pagination || existingListStep.pagination, + limit: limit }; return updatedSteps; } else { - // Create a new ListStep return [ ...prevSteps, { id: listId, type: 'list', listSelector, fields: newFields, pagination, limit } From e2bc45b16328e56de2ddf426427b95342e311c7d Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 23 Dec 2024 23:14:36 +0530 Subject: [PATCH 006/156] chore: -rm unique mode 1 log --- server/src/workflow-management/selector.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 240f8921b..7c3ead34e 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -864,7 +864,6 @@ interface SelectorResult { export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates, listSelector: string): Promise => { try { if (!listSelector) { - console.log(`NON UNIQUE: MODE 1`) const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); From bc88aa82802ec4741a0672401f361807dbe373bb Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 23 Dec 2024 23:15:05 +0530 Subject: [PATCH 007/156] chore: -rm non unique mode 2 log --- server/src/workflow-management/selector.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 7c3ead34e..c0fa21f15 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -931,7 +931,6 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates }, coordinates); return selectors || { generalSelector: '' }; } else { - console.log(`NON UNIQUE: MODE 2`) const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); From 489b8783a47f8a08723866f78f22d4b849411bd3 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 23 Dec 2024 23:15:53 +0530 Subject: [PATCH 008/156] chore: -rm user.emal --- server/src/routes/auth.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/routes/auth.ts b/server/src/routes/auth.ts index cc3d879bd..084edb4b0 100644 --- a/server/src/routes/auth.ts +++ b/server/src/routes/auth.ts @@ -52,7 +52,7 @@ router.post("/register", async (req, res) => { userId: user.id, registeredAt: new Date().toISOString(), }); - console.log(`User registered - ${user.email}`); + console.log(`User registered`); res.json(user); } catch (error: any) { console.log(`Could not register user - ${error}`); From 2c6f5b7156cf545ffdde70eb6ea1485fbe0204bd Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 23 Dec 2024 23:16:54 +0530 Subject: [PATCH 009/156] feat: better error message --- server/src/workflow-management/scheduler/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/workflow-management/scheduler/index.ts b/server/src/workflow-management/scheduler/index.ts index 169b0061c..ade7d9699 100644 --- a/server/src/workflow-management/scheduler/index.ts +++ b/server/src/workflow-management/scheduler/index.ts @@ -73,7 +73,7 @@ async function createWorkflowAndStoreMetadata(id: string, userId: string) { } catch (e) { const { message } = e as Error; logger.log('info', `Error while scheduling a run with id: ${id}`); - console.log(message); + console.log(`Error while scheduling a run with id: ${id}:`, message); return { success: false, error: message, From 3776726a0b0e412f0451c8e195b321d637c07810 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 23 Dec 2024 23:17:33 +0530 Subject: [PATCH 010/156] chore: -rm save log --- src/components/molecules/RobotDuplicate.tsx | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/components/molecules/RobotDuplicate.tsx b/src/components/molecules/RobotDuplicate.tsx index 530aedc6f..b832ff0b9 100644 --- a/src/components/molecules/RobotDuplicate.tsx +++ b/src/components/molecules/RobotDuplicate.tsx @@ -93,8 +93,6 @@ export const RobotDuplicationModal = ({ isOpen, handleStart, handleClose, initia return; } - console.log("handle save"); - try { const success = await duplicateRecording(robot.recording_meta.id, targetUrl); From 09aab83851d33ff07bb7fe47c592c8f7ac841435 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 23 Dec 2024 23:17:54 +0530 Subject: [PATCH 011/156] chore: -rm settings log --- src/components/organisms/RightSidePanel.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index ba178b7bd..12f75028e 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -220,7 +220,6 @@ export const RightSidePanel: React.FC = ({ onFinishCapture } stopGetText(); const settings = getTextSettingsObject(); - console.log("SETTINGS", settings); const hasTextSteps = browserSteps.some(step => step.type === 'text'); if (hasTextSteps) { socket?.emit('action', { action: 'scrapeSchema', settings }); From 970a84692083fbd36e3c63c53519e2a0ed7ef752 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 23 Dec 2024 23:18:20 +0530 Subject: [PATCH 012/156] chore: -rm i18n logs --- src/pages/Login.tsx | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pages/Login.tsx b/src/pages/Login.tsx index 3c8e08c46..86dfa1594 100644 --- a/src/pages/Login.tsx +++ b/src/pages/Login.tsx @@ -10,8 +10,6 @@ import i18n from '../i18n'; const Login = () => { const { t } = useTranslation(); - console.log(i18n) - console.log(t) const [form, setForm] = useState({ email: "", password: "", From ecfd1e1e4223ba5958f3e4ff81804fff52d79a79 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 23 Dec 2024 23:25:37 +0530 Subject: [PATCH 013/156] fix: bring back i18n logs --- src/pages/Login.tsx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pages/Login.tsx b/src/pages/Login.tsx index 86dfa1594..3c8e08c46 100644 --- a/src/pages/Login.tsx +++ b/src/pages/Login.tsx @@ -10,6 +10,8 @@ import i18n from '../i18n'; const Login = () => { const { t } = useTranslation(); + console.log(i18n) + console.log(t) const [form, setForm] = useState({ email: "", password: "", From ad8a48c0c6d9c3eb1c334851e8eb25f1489a0d5a Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 23 Dec 2024 23:26:23 +0530 Subject: [PATCH 014/156] chore: add log warning --- src/pages/Login.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pages/Login.tsx b/src/pages/Login.tsx index 3c8e08c46..b82287992 100644 --- a/src/pages/Login.tsx +++ b/src/pages/Login.tsx @@ -10,6 +10,7 @@ import i18n from '../i18n'; const Login = () => { const { t } = useTranslation(); + // just don't remove these logs - god knows why it's not working without them console.log(i18n) console.log(t) const [form, setForm] = useState({ From feb30b9f9e1ffd047e3054f7c10d90e07ec134d5 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Tue, 24 Dec 2024 02:30:36 +0530 Subject: [PATCH 015/156] feat: add nth-child selectors for td tag --- server/src/workflow-management/selector.ts | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 240f8921b..af9de4af8 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -869,6 +869,13 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); + if (selector === 'td' && element.parentElement) { + // Find position among td siblings + const siblings = Array.from(element.parentElement.children); + const position = siblings.indexOf(element) + 1; + return `${selector}:nth-child(${position})`; + } + if (element.className) { const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); if (classes.length > 0) { @@ -937,6 +944,12 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); + if (selector === 'td' && element.parentElement) { + const siblings = Array.from(element.parentElement.children); + const position = siblings.indexOf(element) + 1; + return `${selector}:nth-child(${position})`; + } + if (element.className) { const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); if (classes.length > 0) { @@ -991,6 +1004,12 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); + if (selector === 'td' && element.parentElement) { + const siblings = Array.from(element.parentElement.children); + const position = siblings.indexOf(element) + 1; + return `${selector}:nth-child(${position})`; + } + const className = typeof element.className === 'string' ? element.className : ''; if (className) { const classes = className.split(/\s+/).filter((cls: string) => Boolean(cls)); From 5ac88c6eda67f67229ca71e511cb7de25bfefd06 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Tue, 24 Dec 2024 02:31:32 +0530 Subject: [PATCH 016/156] feat: add scraping logic for tabular data in scrapeList --- maxun-core/src/browserSide/scraper.js | 112 ++++++++++++++------------ 1 file changed, 61 insertions(+), 51 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index a2009d789..82341fbd5 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -262,73 +262,83 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ window.scrapeList = async function ({ listSelector, fields, limit = 10 }) { + // Separate fields into table and non-table categories + const tableFields = {}; + const nonTableFields = {}; + + for (const [label, field] of Object.entries(fields)) { + if (['TD', 'TH', 'TR'].includes(field.tag)) { + tableFields[label] = field; + } else { + nonTableFields[label] = field; + } + } + + const parentElements = Array.from(document.querySelectorAll(listSelector)); const scrapedData = []; - while (scrapedData.length < limit) { - let parentElements = Array.from(document.querySelectorAll(listSelector)); - - // If we only got one element or none, try a more generic approach - if (limit > 1 && parentElements.length <= 1) { - const [containerSelector, _] = listSelector.split('>').map(s => s.trim()); - const container = document.querySelector(containerSelector); + for (const parent of parentElements) { + // First, get the number of rows we'll need by checking the first table field + const firstTableField = Object.values(tableFields)[0]; + const tableRows = firstTableField + ? Array.from(parent.querySelectorAll(firstTableField.selector)).slice(0, limit) + : [null]; + + tableRows.forEach((_, rowIndex) => { + const record = {}; - if (container) { - const allChildren = Array.from(container.children); + // Table fields + for (const [label, { selector, attribute }] of Object.entries(tableFields)) { + const elements = Array.from(parent.querySelectorAll(selector)); + const element = elements[rowIndex]; - const firstMatch = document.querySelector(listSelector); - if (firstMatch) { - // Get classes from the first matching element - const firstMatchClasses = Array.from(firstMatch.classList); - - // Find similar elements by matching most of their classes - parentElements = allChildren.filter(element => { - const elementClasses = Array.from(element.classList); - - // Element should share at least 70% of classes with the first match - const commonClasses = firstMatchClasses.filter(cls => - elementClasses.includes(cls)); - return commonClasses.length >= Math.floor(firstMatchClasses.length * 0.7); - }); + if (element) { + let value; + if (attribute === 'innerText') { + value = element.innerText.trim(); + } else if (attribute === 'innerHTML') { + value = element.innerHTML.trim(); + } else if (attribute === 'src' || attribute === 'href') { + const attrValue = element.getAttribute(attribute); + value = attrValue ? new URL(attrValue, window.location.origin).href : null; + } else { + value = element.getAttribute(attribute); + } + record[label] = value; } } - } - - // Iterate through each parent element - for (const parent of parentElements) { - if (scrapedData.length >= limit) break; - const record = {}; - - // For each field, select the corresponding element within the parent - for (const [label, { selector, attribute }] of Object.entries(fields)) { - const fieldElement = parent.querySelector(selector); - - if (fieldElement) { + + // Non table fields + for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) { + const element = parent.querySelector(selector); + + if (element) { + let value; if (attribute === 'innerText') { - record[label] = fieldElement.innerText.trim(); + value = element.innerText.trim(); } else if (attribute === 'innerHTML') { - record[label] = fieldElement.innerHTML.trim(); - } else if (attribute === 'src') { - // Handle relative 'src' URLs - const src = fieldElement.getAttribute('src'); - record[label] = src ? new URL(src, window.location.origin).href : null; - } else if (attribute === 'href') { - // Handle relative 'href' URLs - const href = fieldElement.getAttribute('href'); - record[label] = href ? new URL(href, window.location.origin).href : null; + value = element.innerHTML.trim(); + } else if (attribute === 'src' || attribute === 'href') { + const attrValue = element.getAttribute(attribute); + value = attrValue ? new URL(attrValue, window.location.origin).href : null; } else { - record[label] = fieldElement.getAttribute(attribute); + value = element.getAttribute(attribute); } + record[label] = value; } } - scrapedData.push(record); - } + + if (Object.keys(record).length > 0) { + scrapedData.push(record); + } + }); - // If we've processed all available elements and still haven't reached the limit, - // break to avoid infinite loop - if (parentElements.length === 0 || scrapedData.length >= parentElements.length) { + if (scrapedData.length >= limit) { + scrapedData.length = limit; break; } } + return scrapedData; }; From 99ce03f2f1e4caf333df6812fa812d2f37d8df36 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 24 Dec 2024 02:49:17 +0530 Subject: [PATCH 017/156] feat: robot name --- src/components/molecules/RobotEdit.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/molecules/RobotEdit.tsx b/src/components/molecules/RobotEdit.tsx index 73397da41..37e4de652 100644 --- a/src/components/molecules/RobotEdit.tsx +++ b/src/components/molecules/RobotEdit.tsx @@ -148,7 +148,7 @@ export const RobotEditModal = ({ isOpen, handleStart, handleClose, initialSettin <> handleRobotNameChange(e.target.value)} From 155a4c711b9602f7e829e4f214c2ac83c53b1dbb Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 24 Dec 2024 02:50:02 +0530 Subject: [PATCH 018/156] feat: robot name --- public/locales/en.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/en.json b/public/locales/en.json index 66212a877..9dcad5142 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -345,7 +345,7 @@ }, "robot_edit": { "title": "Edit Robot", - "change_name": "Change Robot Name", + "change_name": "Robot Name", "robot_limit": "Robot Limit", "save": "Save Changes", "cancel": "Cancel", From fed53fd0c609b9460fb5eee8f101b98928f1f9b6 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 24 Dec 2024 03:06:50 +0530 Subject: [PATCH 019/156] chore: -rm translated notification --- src/pages/Login.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pages/Login.tsx b/src/pages/Login.tsx index b82287992..161cfacd2 100644 --- a/src/pages/Login.tsx +++ b/src/pages/Login.tsx @@ -46,7 +46,7 @@ const Login = () => { password, }); dispatch({ type: "LOGIN", payload: data }); - notify("success", t('login.welcome_notification')); // Translated notification + notify("success", t('login.welcome_notification')); window.localStorage.setItem("user", JSON.stringify(data)); navigate("/"); } catch (err) { From 85381a4b609b175bda07f09cc2097cfe3b34d688 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 24 Dec 2024 03:07:01 +0530 Subject: [PATCH 020/156] chore: -rm translated error --- src/pages/Login.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pages/Login.tsx b/src/pages/Login.tsx index 161cfacd2..85fbb20b4 100644 --- a/src/pages/Login.tsx +++ b/src/pages/Login.tsx @@ -50,7 +50,7 @@ const Login = () => { window.localStorage.setItem("user", JSON.stringify(data)); navigate("/"); } catch (err) { - notify("error", t('login.error_notification')); // Translated error + notify("error", t('login.error_notification')); setLoading(false); } }; From 7650a21c03bb72a994648dc08d1fe22df9da08ca Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 24 Dec 2024 03:07:17 +0530 Subject: [PATCH 021/156] chore: -rm lang switcher --- src/pages/Login.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pages/Login.tsx b/src/pages/Login.tsx index 85fbb20b4..04fead670 100644 --- a/src/pages/Login.tsx +++ b/src/pages/Login.tsx @@ -55,7 +55,6 @@ const Login = () => { } }; - // Language switcher function return ( From 06d6891f54cd56edebeb34b5bcbe829c28153836 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 24 Dec 2024 03:07:28 +0530 Subject: [PATCH 022/156] chore: -rm lang switcher buttons --- src/pages/Login.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pages/Login.tsx b/src/pages/Login.tsx index 04fead670..8443a2be7 100644 --- a/src/pages/Login.tsx +++ b/src/pages/Login.tsx @@ -68,7 +68,7 @@ const Login = () => { padding: 4, }} > - {/* Language Switcher Buttons */} + Date: Tue, 24 Dec 2024 03:07:42 +0530 Subject: [PATCH 023/156] chore: whitespace cleanup --- src/pages/Login.tsx | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pages/Login.tsx b/src/pages/Login.tsx index 8443a2be7..c62eb12d8 100644 --- a/src/pages/Login.tsx +++ b/src/pages/Login.tsx @@ -68,8 +68,6 @@ const Login = () => { padding: 4, }} > - - Date: Tue, 24 Dec 2024 03:07:51 +0530 Subject: [PATCH 024/156] chore: whitespace cleanup --- src/pages/Login.tsx | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pages/Login.tsx b/src/pages/Login.tsx index c62eb12d8..d88e1a49d 100644 --- a/src/pages/Login.tsx +++ b/src/pages/Login.tsx @@ -54,9 +54,7 @@ const Login = () => { setLoading(false); } }; - - return ( Date: Tue, 24 Dec 2024 03:08:08 +0530 Subject: [PATCH 025/156] fix: format --- src/pages/Login.tsx | 252 ++++++++++++++++++++++---------------------- 1 file changed, 126 insertions(+), 126 deletions(-) diff --git a/src/pages/Login.tsx b/src/pages/Login.tsx index d88e1a49d..ba0f377e1 100644 --- a/src/pages/Login.tsx +++ b/src/pages/Login.tsx @@ -1,138 +1,138 @@ -import axios from "axios"; -import { useState, useContext, useEffect, FormEvent } from "react"; -import { useNavigate, Link } from "react-router-dom"; -import { AuthContext } from "../context/auth"; -import { Box, Typography, TextField, Button, CircularProgress, Grid } from "@mui/material"; -import { useGlobalInfoStore } from "../context/globalInfo"; +import axios from "axios"; +import { useState, useContext, useEffect, FormEvent } from "react"; +import { useNavigate, Link } from "react-router-dom"; +import { AuthContext } from "../context/auth"; +import { Box, Typography, TextField, Button, CircularProgress, Grid } from "@mui/material"; +import { useGlobalInfoStore } from "../context/globalInfo"; import { apiUrl } from "../apiConfig"; import { useTranslation } from 'react-i18next'; -import i18n from '../i18n'; +import i18n from '../i18n'; const Login = () => { - const { t } = useTranslation(); - // just don't remove these logs - god knows why it's not working without them - console.log(i18n) - console.log(t) - const [form, setForm] = useState({ - email: "", - password: "", - }); - const [loading, setLoading] = useState(false); - const { notify } = useGlobalInfoStore(); - const { email, password } = form; + const { t } = useTranslation(); + // just don't remove these logs - god knows why it's not working without them + console.log(i18n) + console.log(t) + const [form, setForm] = useState({ + email: "", + password: "", + }); + const [loading, setLoading] = useState(false); + const { notify } = useGlobalInfoStore(); + const { email, password } = form; - const { state, dispatch } = useContext(AuthContext); - const { user } = state; + const { state, dispatch } = useContext(AuthContext); + const { user } = state; - const navigate = useNavigate(); + const navigate = useNavigate(); - useEffect(() => { - if (user) { - navigate("/"); - } - }, [user, navigate]); + useEffect(() => { + if (user) { + navigate("/"); + } + }, [user, navigate]); - const handleChange = (e: any) => { - const { name, value } = e.target; - setForm({ ...form, [name]: value }); - }; + const handleChange = (e: any) => { + const { name, value } = e.target; + setForm({ ...form, [name]: value }); + }; - const submitForm = async (e: any) => { - e.preventDefault(); - setLoading(true); - try { - const { data } = await axios.post(`${apiUrl}/auth/login`, { - email, - password, - }); - dispatch({ type: "LOGIN", payload: data }); - notify("success", t('login.welcome_notification')); - window.localStorage.setItem("user", JSON.stringify(data)); - navigate("/"); - } catch (err) { - notify("error", t('login.error_notification')); - setLoading(false); - } - }; - - return ( - - - logo - - {t('login.title')} - - - - - - {t('login.register_prompt')}{" "} - - {t('login.register_link')} - - - - - ); + const submitForm = async (e: any) => { + e.preventDefault(); + setLoading(true); + try { + const { data } = await axios.post(`${apiUrl}/auth/login`, { + email, + password, + }); + dispatch({ type: "LOGIN", payload: data }); + notify("success", t('login.welcome_notification')); + window.localStorage.setItem("user", JSON.stringify(data)); + navigate("/"); + } catch (err) { + notify("error", t('login.error_notification')); + setLoading(false); + } + }; + + return ( + + + logo + + {t('login.title')} + + + + + + {t('login.register_prompt')}{" "} + + {t('login.register_link')} + + + + + ); }; export default Login; \ No newline at end of file From 5939471762da3dfd90dea7186d53fbab563edad0 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 24 Dec 2024 03:18:14 +0530 Subject: [PATCH 026/156] fix: move handleSave onClick to save button --- src/components/molecules/RobotEdit.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/molecules/RobotEdit.tsx b/src/components/molecules/RobotEdit.tsx index 37e4de652..6547d93b4 100644 --- a/src/components/molecules/RobotEdit.tsx +++ b/src/components/molecules/RobotEdit.tsx @@ -170,8 +170,8 @@ export const RobotEditModal = ({ isOpen, handleStart, handleClose, initialSettin /> )} - - + ) : ( + + + )} )} @@ -578,7 +620,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture ) }} /> - {!confirmedListTextFields[step.id]?.[key] && ( + {!confirmedListTextFields[step.id]?.[key] ? ( + ) : ( + + + )} ))} From 3cf0b858933f0a9aa0541a7b714677600861939f Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 28 Dec 2024 17:39:04 +0530 Subject: [PATCH 051/156] feat: add lang translation for delete button --- public/locales/de.json | 3 ++- public/locales/en.json | 3 ++- public/locales/es.json | 3 ++- public/locales/ja.json | 3 ++- public/locales/zh.json | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/public/locales/de.json b/public/locales/de.json index 411d8f22f..db0ce562f 100644 --- a/public/locales/de.json +++ b/public/locales/de.json @@ -162,7 +162,8 @@ "confirm_limit": "Limit bestätigen", "finish_capture": "Erfassung abschließen", "finish": "Fertig", - "cancel": "Abbrechen" + "cancel": "Abbrechen", + "delete": "Löschen" }, "screenshot": { "capture_fullpage": "Vollständige Seite erfassen", diff --git a/public/locales/en.json b/public/locales/en.json index c5a2ff4c7..9b4defbce 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -163,7 +163,8 @@ "confirm_limit": "Confirm Limit", "finish_capture": "Finish Capture", "finish": "Finish", - "cancel": "Cancel" + "cancel": "Cancel", + "delete": "Delete" }, "screenshot": { "capture_fullpage": "Capture Fullpage", diff --git a/public/locales/es.json b/public/locales/es.json index 6e52cc6fc..e897914ed 100644 --- a/public/locales/es.json +++ b/public/locales/es.json @@ -163,7 +163,8 @@ "confirm_limit": "Confirmar Límite", "finish_capture": "Finalizar Captura", "finish": "Finalizar", - "cancel": "Cancelar" + "cancel": "Cancelar", + "delete": "Eliminar" }, "screenshot": { "capture_fullpage": "Capturar Página Completa", diff --git a/public/locales/ja.json b/public/locales/ja.json index 9d2d9a898..9ae226dc4 100644 --- a/public/locales/ja.json +++ b/public/locales/ja.json @@ -163,7 +163,8 @@ "confirm_limit": "制限を確認", "finish_capture": "取得を完了", "finish": "完了", - "cancel": "キャンセル" + "cancel": "キャンセル", + "delete": "削除" }, "screenshot": { "capture_fullpage": "フルページを取得", diff --git a/public/locales/zh.json b/public/locales/zh.json index 69561d5ca..344a58a7b 100644 --- a/public/locales/zh.json +++ b/public/locales/zh.json @@ -163,7 +163,8 @@ "confirm_limit": "确认限制", "finish_capture": "完成捕获", "finish": "完成", - "cancel": "取消" + "cancel": "取消", + "delete": "删除" }, "screenshot": { "capture_fullpage": "捕获整页", From fd7e4ab626fe2b862de7fea14819be18c40012d8 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 28 Dec 2024 18:11:24 +0530 Subject: [PATCH 052/156] feat: check confirm capture and render delete button --- src/components/organisms/RightSidePanel.tsx | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 403f78f6c..c6b3479f1 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -56,6 +56,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture const [showCaptureText, setShowCaptureText] = useState(true); const [hoverStates, setHoverStates] = useState<{ [id: string]: boolean }>({}); const [browserStepIdList, setBrowserStepIdList] = useState([]); + const [isCaptureTextConfirmed, setIsCaptureTextConfirmed] = useState(false); const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog } = useGlobalInfoStore(); const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot, getList, startGetList, stopGetList, startPaginationMode, stopPaginationMode, paginationType, updatePaginationType, limitType, customLimit, updateLimitType, updateCustomLimit, stopLimitMode, startLimitMode, captureStage, setCaptureStage } = useActionContext(); @@ -130,6 +131,11 @@ export const RightSidePanel: React.FC = ({ onFinishCapture const handlePairDelete = () => { } + const handleStartGetText = () => { + setIsCaptureTextConfirmed(false); + startGetText(); + } + const handleTextLabelChange = (id: number, label: string, listId?: number, fieldKey?: string) => { if (listId !== undefined && fieldKey !== undefined) { // Prevent editing if the field is confirmed @@ -256,6 +262,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture if (hasTextSteps) { socket?.emit('action', { action: 'scrapeSchema', settings }); } + setIsCaptureTextConfirmed(true); resetInterpretationLog(); onFinishCapture(); }, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps, resetInterpretationLog]); @@ -502,7 +509,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture )} - {!getText && !getScreenshot && !getList && showCaptureText && } + {!getText && !getScreenshot && !getList && showCaptureText && } {getText && <> @@ -563,7 +570,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture - ) : ( + ) : !isCaptureTextConfirmed && ( - ) : ( + ) : !isCaptureListConfirmed && ( + )} - + )} From dead389e480cae29978242047a554e3a3f780f30 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 29 Dec 2024 17:55:01 +0530 Subject: [PATCH 056/156] feat: add translation for back button of capture list action --- public/locales/de.json | 1 + public/locales/en.json | 1 + public/locales/es.json | 1 + public/locales/ja.json | 1 + public/locales/zh.json | 1 + 5 files changed, 5 insertions(+) diff --git a/public/locales/de.json b/public/locales/de.json index 411d8f22f..c43b46a28 100644 --- a/public/locales/de.json +++ b/public/locales/de.json @@ -161,6 +161,7 @@ "confirm_pagination": "Paginierung bestätigen", "confirm_limit": "Limit bestätigen", "finish_capture": "Erfassung abschließen", + "back": "Zurück", "finish": "Fertig", "cancel": "Abbrechen" }, diff --git a/public/locales/en.json b/public/locales/en.json index c5a2ff4c7..1a68faedf 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -162,6 +162,7 @@ "confirm_pagination": "Confirm Pagination", "confirm_limit": "Confirm Limit", "finish_capture": "Finish Capture", + "back": "Back", "finish": "Finish", "cancel": "Cancel" }, diff --git a/public/locales/es.json b/public/locales/es.json index 6e52cc6fc..b2fb55a4c 100644 --- a/public/locales/es.json +++ b/public/locales/es.json @@ -162,6 +162,7 @@ "confirm_pagination": "Confirmar Paginación", "confirm_limit": "Confirmar Límite", "finish_capture": "Finalizar Captura", + "back": "Atrás", "finish": "Finalizar", "cancel": "Cancelar" }, diff --git a/public/locales/ja.json b/public/locales/ja.json index 9d2d9a898..e4073814d 100644 --- a/public/locales/ja.json +++ b/public/locales/ja.json @@ -162,6 +162,7 @@ "confirm_pagination": "ページネーションを確認", "confirm_limit": "制限を確認", "finish_capture": "取得を完了", + "back": "戻る", "finish": "完了", "cancel": "キャンセル" }, diff --git a/public/locales/zh.json b/public/locales/zh.json index 69561d5ca..d171c2c98 100644 --- a/public/locales/zh.json +++ b/public/locales/zh.json @@ -162,6 +162,7 @@ "confirm_pagination": "确认分页", "confirm_limit": "确认限制", "finish_capture": "完成捕获", + "back": "返回", "finish": "完成", "cancel": "取消" }, From a09b03e4a75627d2adc8189f8fdd361b36b8a82b Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 29 Dec 2024 23:36:06 +0530 Subject: [PATCH 057/156] feat: get deepest shadowDOM element selector --- server/src/workflow-management/selector.ts | 243 +++++++++++++++------ 1 file changed, 179 insertions(+), 64 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 5a7273df4..9b3af66e8 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -23,31 +23,41 @@ export const getElementInformation = async ( if (!getList || listSelector !== '') { const elementInfo = await page.evaluate( async ({ x, y }) => { - // Helper function to get element from point including shadow DOM + // Enhanced helper function to get element from point including shadow DOM const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; // Traverse through shadow roots let current = element; - while (current) { - // Check if element has shadow root - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; - - // Try to find deeper element in shadow DOM + let shadowRoot = current.shadowRoot; + + // Keep track of the deepest shadow DOM element found + let deepestElement = current; + + while (shadowRoot) { + // Try to find element at same point in shadow DOM const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; - + + // Update our tracking of the deepest element + deepestElement = shadowElement; current = shadowElement; + shadowRoot = current.shadowRoot; } - return current; + + return deepestElement; }; const el = getDeepestElementFromPoint(x, y); if (el) { const { parentElement } = el; const element = parentElement?.tagName === 'A' ? parentElement : el; + + // Get the containing shadow root if any + const containingShadowRoot = element.getRootNode() as ShadowRoot; + const isShadowRoot = containingShadowRoot instanceof ShadowRoot; + let info: { tagName: string; hasOnlyText?: boolean; @@ -58,11 +68,20 @@ export const getElementInformation = async ( innerHTML?: string; outerHTML?: string; isShadowRoot?: boolean; + shadowRootMode?: string; + shadowRootContent?: string; } = { tagName: element?.tagName ?? '', - isShadowRoot: !!element?.shadowRoot + isShadowRoot: isShadowRoot }; + + if (isShadowRoot) { + // Include shadow root specific information + info.shadowRootMode = containingShadowRoot.mode; + info.shadowRootContent = containingShadowRoot.innerHTML; + } + // Get attributes including those from shadow DOM context if (element) { info.attributes = Array.from(element.attributes).reduce( (acc, attr) => { @@ -71,84 +90,82 @@ export const getElementInformation = async ( }, {} as Record ); + + // Get text content considering shadow DOM context + info.innerText = element.textContent ?? ''; + info.innerHTML = element.innerHTML; + info.outerHTML = element.outerHTML; + info.hasOnlyText = element.children.length === 0 && + (element.textContent !== null && + element.textContent.trim().length > 0); } - // Gather specific information based on the tag - if (element?.tagName === 'A') { - info.url = (element as HTMLAnchorElement).href; - info.innerText = element.innerText ?? ''; - } else if (element?.tagName === 'IMG') { - info.imageUrl = (element as HTMLImageElement).src; - } else if (element?.tagName === 'SELECT') { - const selectElement = element as HTMLSelectElement; - info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? ''; - info.attributes = { - ...info.attributes, - selectedValue: selectElement.value, - }; - } else if (element?.tagName === 'INPUT' && ((element as HTMLInputElement).type === 'time' || (element as HTMLInputElement).type === 'date')) { - info.innerText = (element as HTMLInputElement).value; - } else { - info.hasOnlyText = element?.children?.length === 0 && - element?.innerText?.length > 0; - info.innerText = element?.innerText ?? ''; - } - info.innerHTML = element.innerHTML; - info.outerHTML = element.outerHTML; return info; } return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return elementInfo; } else { const elementInfo = await page.evaluate( async ({ x, y }) => { - // Helper function to get element from point including shadow DOM + // Enhanced helper function to get element from point including shadow DOM const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - + // Traverse through shadow roots let current = element; - while (current) { - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; - + let shadowRoot = current.shadowRoot; + + // Keep track of the deepest shadow DOM element found + let deepestElement = current; + + while (shadowRoot) { + // Try to find element at same point in shadow DOM const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; - + + // Update our tracking of the deepest element + deepestElement = shadowElement; current = shadowElement; + shadowRoot = current.shadowRoot; } - return current; + + return deepestElement; }; - + const originalEl = getDeepestElementFromPoint(x, y); if (originalEl) { let element = originalEl; - + + // Handle element hierarchy traversal for list items while (element.parentElement) { const parentRect = element.parentElement.getBoundingClientRect(); const childRect = element.getBoundingClientRect(); - + const fullyContained = parentRect.left <= childRect.left && parentRect.right >= childRect.right && parentRect.top <= childRect.top && parentRect.bottom >= childRect.bottom; - + const significantOverlap = (childRect.width * childRect.height) / (parentRect.width * parentRect.height) > 0.5; - + if (fullyContained && significantOverlap) { element = element.parentElement; } else { break; } } - + + // Get the containing shadow root if any + const containingShadowRoot = element.getRootNode() as ShadowRoot; + const isShadowRoot = containingShadowRoot instanceof ShadowRoot; + let info: { tagName: string; hasOnlyText?: boolean; @@ -159,12 +176,21 @@ export const getElementInformation = async ( innerHTML?: string; outerHTML?: string; isShadowRoot?: boolean; + shadowRootMode?: string; + shadowRootContent?: string; } = { tagName: element?.tagName ?? '', - isShadowRoot: !!element?.shadowRoot + isShadowRoot: isShadowRoot }; - + + if (isShadowRoot) { + // Include shadow root specific information + info.shadowRootMode = containingShadowRoot.mode; + info.shadowRootContent = containingShadowRoot.innerHTML; + } + if (element) { + // Get attributes including those from shadow DOM context info.attributes = Array.from(element.attributes).reduce( (acc, attr) => { acc[attr.name] = attr.value; @@ -172,21 +198,25 @@ export const getElementInformation = async ( }, {} as Record ); + + // Handle specific element types + if (element.tagName === 'A') { + info.url = (element as HTMLAnchorElement).href; + info.innerText = element.textContent ?? ''; + } else if (element.tagName === 'IMG') { + info.imageUrl = (element as HTMLImageElement).src; + } else { + // Handle text content with proper null checking + info.hasOnlyText = element.children.length === 0 && + (element.textContent !== null && + element.textContent.trim().length > 0); + info.innerText = element.textContent ?? ''; + } + + info.innerHTML = element.innerHTML; + info.outerHTML = element.outerHTML; } - - if (element?.tagName === 'A') { - info.url = (element as HTMLAnchorElement).href; - info.innerText = element.innerText ?? ''; - } else if (element?.tagName === 'IMG') { - info.imageUrl = (element as HTMLImageElement).src; - } else { - info.hasOnlyText = element?.children?.length === 0 && - element?.innerText?.length > 0; - info.innerText = element?.innerText ?? ''; - } - - info.innerHTML = element.innerHTML; - info.outerHTML = element.outerHTML; + return info; } return null; @@ -793,6 +823,76 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { return output; } + const MAX_DEPTH = 10; + + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + let current = element; + let deepestElement = current; + let depth = 0; + + while (current && depth < MAX_DEPTH) { + const shadowRoot = current.shadowRoot; + if (shadowRoot) { + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement) break; + + deepestElement = shadowElement; + current = shadowElement; + } else { + break; + } + depth++; + } + + return deepestElement; + }; + + const genSelectorForShadowDOM = (element: HTMLElement) => { + const findShadowContext = (element: HTMLElement): { host: HTMLElement, root: ShadowRoot } | null => { + let current: HTMLElement | null = element; + let depth = 0; + + while (current && depth < MAX_DEPTH) { + // Check if element is inside a shadow root + if (current.parentNode instanceof ShadowRoot) { + return { + host: (current.parentNode as ShadowRoot).host as HTMLElement, + root: current.parentNode as ShadowRoot + }; + } + current = current.parentElement; + depth++; + } + return null; + }; + + const shadowContext = findShadowContext(element); + if (!shadowContext) return null; + + try { + // Generate selector for the shadow host + const hostSelector = finder(shadowContext.host); + + // Generate selector for the element within the shadow DOM + const shadowElementSelector = finder(element, { + root: shadowContext.root as unknown as Element + }); + + return { + fullSelector: `${hostSelector} >>> ${shadowElementSelector}`, + hostSelector, + shadowElementSelector, + mode: shadowContext.root.mode + }; + } catch (e) { + console.warn('Error generating shadow DOM selector:', e); + return null; + } + }; + const genSelectors = (element: HTMLElement | null) => { if (element == null) { return null; @@ -812,6 +912,9 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { } catch (e) { } + // Generate shadow DOM specific selector + const shadowSelector = genSelectorForShadowDOM(element); + const hrefSelector = genSelectorForAttributes(element, ['href']); const formSelector = genSelectorForAttributes(element, [ 'name', @@ -858,9 +961,21 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { hrefSelector, accessibilitySelector, formSelector, + // Shadow DOM selector + shadowSelector: shadowSelector ? { + // Full selector that can traverse shadow DOM + full: shadowSelector.fullSelector, + // Individual parts for more flexible usage + host: shadowSelector.hostSelector, + element: shadowSelector.shadowElementSelector, + // Shadow root mode (open/closed) + mode: shadowSelector.mode + } : null }; } + + function genAttributeSet(element: HTMLElement, attributes: string[]) { return new Set( attributes.filter((attr) => { @@ -900,7 +1015,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { return char.length === 1 && char.match(/[0-9]/); } - const hoveredElement = document.elementFromPoint(x, y) as HTMLElement; + const hoveredElement = getDeepestElementFromPoint(x, y); if ( hoveredElement != null && !hoveredElement.closest('#overlay-controls') != null From 542f4d31fa43359928d052b9edb3caa5f446c1c3 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 29 Dec 2024 23:41:19 +0530 Subject: [PATCH 058/156] feat: change shadowDOM full selector path --- server/src/workflow-management/selector.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 9b3af66e8..690fb0b15 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -882,7 +882,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { }); return { - fullSelector: `${hostSelector} >>> ${shadowElementSelector}`, + fullSelector: `${hostSelector} > ${shadowElementSelector}`, hostSelector, shadowElementSelector, mode: shadowContext.root.mode From b60f4b73b8424151f17bfd3389f5698f72c429df Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 01:24:32 +0530 Subject: [PATCH 059/156] feat: add functionality to scrape shadowDOM elements --- maxun-core/src/browserSide/scraper.js | 122 ++++++++++++++++---------- 1 file changed, 78 insertions(+), 44 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index a2009d789..ef979828d 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -189,68 +189,102 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @returns {Array.>} */ window.scrapeSchema = function (lists) { + // These utility functions remain unchanged as they work perfectly function omap(object, f, kf = (x) => x) { - return Object.fromEntries( - Object.entries(object) - .map(([k, v]) => [kf(k), f(v)]), - ); + return Object.fromEntries( + Object.entries(object) + .map(([k, v]) => [kf(k), f(v)]), + ); } function ofilter(object, f) { - return Object.fromEntries( - Object.entries(object) - .filter(([k, v]) => f(k, v)), - ); + return Object.fromEntries( + Object.entries(object) + .filter(([k, v]) => f(k, v)), + ); } + function findElement(config) { + // If this is a shadow DOM query + if (config.shadow && config.selector.includes('>>')) { + const [hostSelector, shadowSelector] = config.selector.split('>>').map(s => s.trim()); + const host = document.querySelector(hostSelector); + return host?.shadowRoot?.querySelector(shadowSelector) || null; + } + // Otherwise, use regular querySelector + return document.querySelector(config.selector); + } + + function findAllElements(config) { + // If this is a shadow DOM query + if (config.shadow && config.selector.includes('>>')) { + const element = findElement(config); + return element ? [element] : []; + } + // Otherwise, use regular querySelectorAll + return Array.from(document.querySelectorAll(config.selector)); + } + + // Modified to use our new element finding functions function getSeedKey(listObj) { - const maxLength = Math.max(...Object.values(omap(listObj, (x) => document.querySelectorAll(x.selector).length))); - return Object.keys(ofilter(listObj, (_, v) => document.querySelectorAll(v.selector).length === maxLength))[0]; + const maxLength = Math.max(...Object.values( + omap(listObj, (x) => findAllElements(x).length) + )); + return Object.keys( + ofilter(listObj, (_, v) => findAllElements(v).length === maxLength) + )[0]; } + // This function remains unchanged as it works with DOM elements + // regardless of how they were found function getMBEs(elements) { - return elements.map((element) => { - let candidate = element; - const isUniqueChild = (e) => elements - .filter((elem) => e.parentNode?.contains(elem)) - .length === 1; - - while (candidate && isUniqueChild(candidate)) { - candidate = candidate.parentNode; - } + return elements.map((element) => { + let candidate = element; + const isUniqueChild = (e) => elements + .filter((elem) => e.parentNode?.contains(elem)) + .length === 1; + + while (candidate && isUniqueChild(candidate)) { + candidate = candidate.parentNode; + } - return candidate; - }); + return candidate; + }); } const seedName = getSeedKey(lists); - const seedElements = Array.from(document.querySelectorAll(lists[seedName].selector)); + const seedElements = findAllElements(lists[seedName]); const MBEs = getMBEs(seedElements); return MBEs.map((mbe) => omap( - lists, - ({ selector, attribute }, key) => { - const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem)); - if (!elem) return undefined; - - switch (attribute) { - case 'href': - const relativeHref = elem.getAttribute('href'); - return relativeHref ? new URL(relativeHref, window.location.origin).href : null; - case 'src': - const relativeSrc = elem.getAttribute('src'); - return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; - case 'innerText': - return elem.innerText; - case 'textContent': - return elem.textContent; - default: - return elem.innerText; - } - }, - (key) => key // Use the original key in the output + lists, + (config, key) => { + // Use our new findAllElements function + const elem = findAllElements(config) + .find((elem) => mbe.contains(elem)); + + if (!elem) return undefined; + + switch (config.attribute) { + case 'href': { + const relativeHref = elem.getAttribute('href'); + return relativeHref ? new URL(relativeHref, window.location.origin).href : null; + } + case 'src': { + const relativeSrc = elem.getAttribute('src'); + return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; + } + case 'innerText': + return elem.innerText; + case 'textContent': + return elem.textContent; + default: + return elem.getAttribute(config.attribute) || elem.innerText; + } + }, + (key) => key )) || []; - } + }; /** * Scrapes multiple lists of similar items based on a template item. From 9f9dc4e1030ca3819355245765ecadcc1e2c8d6f Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 01:25:45 +0530 Subject: [PATCH 060/156] feat: add shadow optional field in SelectorObject --- src/context/browserSteps.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index dd2111990..fd311a350 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -32,6 +32,7 @@ export interface SelectorObject { selector: string; tag?: string; attribute?: string; + shadow?: boolean; [key: string]: any; } From b696fa568d65a0948edd3b999eb31c4ccf39dad5 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 01:28:22 +0530 Subject: [PATCH 061/156] feat: add shadow param for scrapeSchema config --- maxun-core/src/interpret.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index c581954df..495ba2db8 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -403,7 +403,7 @@ export default class Interpreter extends EventEmitter { await this.options.serializableCallback(scrapeResults); }, - scrapeSchema: async (schema: Record) => { + scrapeSchema: async (schema: Record) => { await this.ensureScriptsLoaded(page); const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema); From 415ce02a3d2eb82f7434230239a5c7659b557016 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 02:39:27 +0530 Subject: [PATCH 062/156] feat: add shadow bool field to text step --- src/components/organisms/BrowserWindow.tsx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index c7e9fc0f3..11fe8c558 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -13,6 +13,7 @@ import { useTranslation } from 'react-i18next'; interface ElementInfo { tagName: string; hasOnlyText?: boolean; + isShadowRoot?: boolean; innerText?: string; url?: string; imageUrl?: string; @@ -185,6 +186,7 @@ export const BrowserWindow = () => { addTextStep('', data, { selector: highlighterData.selector, tag: highlighterData.elementInfo?.tagName, + shadow: highlighterData.elementInfo?.isShadowRoot, attribute }); } else { @@ -192,7 +194,7 @@ export const BrowserWindow = () => { setAttributeOptions(options); setSelectedElement({ selector: highlighterData.selector, - info: highlighterData.elementInfo + info: highlighterData.elementInfo, }); setShowAttributeModal(true); } @@ -229,6 +231,7 @@ export const BrowserWindow = () => { selectorObj: { selector: highlighterData.selector, tag: highlighterData.elementInfo?.tagName, + shadow: highlighterData.elementInfo?.isShadowRoot, attribute } }; @@ -276,6 +279,7 @@ export const BrowserWindow = () => { addTextStep('', data, { selector: selectedElement.selector, tag: selectedElement.info?.tagName, + shadow: selectedElement.info?.isShadowRoot, attribute: attribute }); } @@ -288,6 +292,7 @@ export const BrowserWindow = () => { selectorObj: { selector: selectedElement.selector, tag: selectedElement.info?.tagName, + shadow: selectedElement.info?.isShadowRoot, attribute: attribute } }; From 1a6a481b578a7212743ceb199b934585583b5a0e Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 02:46:24 +0530 Subject: [PATCH 063/156] feat: add shadow selectors field type --- server/src/types/index.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/server/src/types/index.ts b/server/src/types/index.ts index f2e327efa..e882f69d2 100644 --- a/server/src/types/index.ts +++ b/server/src/types/index.ts @@ -129,6 +129,13 @@ export interface BaseActionInfo { hasOnlyText: boolean; } +interface ShadowSelector { + full: string; + host: string; + element: string; + mode: string; +} + /** * Holds all the possible css selectors that has been found for an element. * @category Types @@ -143,6 +150,7 @@ export interface Selectors { hrefSelector: string|null; accessibilitySelector: string|null; formSelector: string|null; + shadowSelector: ShadowSelector | null; } /** @@ -156,7 +164,7 @@ export interface BaseAction extends BaseActionInfo{ associatedActions: ActionType[]; inputType: string | undefined; value: string | undefined; - selectors: { [key: string]: string | null }; + selectors: Selectors; timestamp: number; isPassword: boolean; /** From c3031811a63d21139c306781a6e64ee09d81b1de Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 02:52:58 +0530 Subject: [PATCH 064/156] feat: prioritize returning shadow selector --- server/src/workflow-management/utils.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/server/src/workflow-management/utils.ts b/server/src/workflow-management/utils.ts index b3dadd607..4f7471277 100644 --- a/server/src/workflow-management/utils.ts +++ b/server/src/workflow-management/utils.ts @@ -12,6 +12,11 @@ export const getBestSelectorForAction = (action: Action) => { case ActionType.Hover: case ActionType.DragAndDrop: { const selectors = action.selectors; + + if (selectors?.shadowSelector?.full) { + return selectors.shadowSelector.full; + } + // less than 25 characters, and element only has text inside const textSelector = selectors?.text?.length != null && @@ -75,6 +80,11 @@ export const getBestSelectorForAction = (action: Action) => { case ActionType.Input: case ActionType.Keydown: { const selectors = action.selectors; + + if (selectors?.shadowSelector?.full) { + return selectors.shadowSelector.full; + } + return ( selectors.testIdSelector ?? selectors?.id ?? From cec2397a58256736b60467e40f1cc2e255667394 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 02:55:21 +0530 Subject: [PATCH 065/156] feat: change shadowDOM full selector path --- server/src/workflow-management/selector.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 690fb0b15..164f5220a 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -882,7 +882,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { }); return { - fullSelector: `${hostSelector} > ${shadowElementSelector}`, + fullSelector: `${hostSelector} >> ${shadowElementSelector}`, hostSelector, shadowElementSelector, mode: shadowContext.root.mode From 05c7921c9d574d4074b64f56319d6166e84b1dc3 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 03:05:07 +0530 Subject: [PATCH 066/156] feat: add shadowInfo in highlighter data --- .../workflow-management/classes/Generator.ts | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 609541de1..d1bccbe42 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -730,15 +730,26 @@ export class WorkflowGenerator { const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click); const elementInfo = await getElementInformation(page, coordinates, this.listSelector, this.getList); if (rect) { + const highlighterData = { + rect, + selector: displaySelector, + elementInfo, + // Include shadow DOM specific information + shadowInfo: elementInfo?.isShadowRoot ? { + mode: elementInfo.shadowRootMode, + content: elementInfo.shadowRootContent + } : null + }; + if (this.getList === true) { if (this.listSelector !== '') { const childSelectors = await getChildSelectors(page, this.listSelector || ''); - this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo, childSelectors }) + this.socket.emit('highlighter', { ...highlighterData, childSelectors }) } else { - this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo }); + this.socket.emit('highlighter', { ...highlighterData }); } } else { - this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo }); + this.socket.emit('highlighter', { ...highlighterData }); } } } From 4031ded27947f7ac72f40c9203cb1498f0ac4460 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:26:53 +0530 Subject: [PATCH 067/156] feat: confirm instead of confirm pagination --- public/locales/en.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/en.json b/public/locales/en.json index 70ded8615..cb3f2789f 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -159,7 +159,7 @@ "confirm": "Confirm", "discard": "Discard", "confirm_capture": "Confirm Capture", - "confirm_pagination": "Confirm Pagination", + "confirm_pagination": "Confirm", "confirm_limit": "Confirm Limit", "finish_capture": "Finish Capture", "back": "Back", From 8baad8d1f90b9b36594206e91dc37304c68d0a9b Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:27:23 +0530 Subject: [PATCH 068/156] feat: confirm instead of confirm limit --- public/locales/en.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/en.json b/public/locales/en.json index cb3f2789f..bd8acce38 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -160,7 +160,7 @@ "discard": "Discard", "confirm_capture": "Confirm Capture", "confirm_pagination": "Confirm", - "confirm_limit": "Confirm Limit", + "confirm_limit": "Confirm", "finish_capture": "Finish Capture", "back": "Back", "finish": "Finish", From 663a4fd69c0f13998a61f13b1564cb8565bf942b Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:29:24 +0530 Subject: [PATCH 069/156] feat(spanish): confirm instead of confirm pagination --- public/locales/es.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/es.json b/public/locales/es.json index 5cde0c701..089c10cd6 100644 --- a/public/locales/es.json +++ b/public/locales/es.json @@ -159,7 +159,7 @@ "confirm": "Confirmar", "discard": "Descartar", "confirm_capture": "Confirmar Captura", - "confirm_pagination": "Confirmar Paginación", + "confirm_pagination": "Confirmar", "confirm_limit": "Confirmar Límite", "finish_capture": "Finalizar Captura", "back": "Atrás", From aded4dfebb42e765f8b22d1b01111c2f82fc73f3 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:29:41 +0530 Subject: [PATCH 070/156] feat(spanish): confirm instead of confirm limit --- public/locales/es.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/es.json b/public/locales/es.json index 089c10cd6..942108801 100644 --- a/public/locales/es.json +++ b/public/locales/es.json @@ -160,7 +160,7 @@ "discard": "Descartar", "confirm_capture": "Confirmar Captura", "confirm_pagination": "Confirmar", - "confirm_limit": "Confirmar Límite", + "confirm_limit": "Confirmar", "finish_capture": "Finalizar Captura", "back": "Atrás", "finish": "Finalizar", From 09b974ca782e574240beab052c1a1e78e0316eac Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:30:38 +0530 Subject: [PATCH 071/156] feat(japanese): confirm instead of confirm pagination --- public/locales/ja.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/ja.json b/public/locales/ja.json index a0d18c670..e2204e14a 100644 --- a/public/locales/ja.json +++ b/public/locales/ja.json @@ -159,7 +159,7 @@ "confirm": "確認", "discard": "破棄", "confirm_capture": "取得を確認", - "confirm_pagination": "ページネーションを確認", + "confirm_pagination": "確認", "confirm_limit": "制限を確認", "finish_capture": "取得を完了", "back": "戻る", From 20b31f36d99fe327075e9c104637b79b282edf87 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:30:57 +0530 Subject: [PATCH 072/156] feat(japanese): confirm instead of confirm limit --- public/locales/ja.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/ja.json b/public/locales/ja.json index e2204e14a..0bcba967a 100644 --- a/public/locales/ja.json +++ b/public/locales/ja.json @@ -160,7 +160,7 @@ "discard": "破棄", "confirm_capture": "取得を確認", "confirm_pagination": "確認", - "confirm_limit": "制限を確認", + "confirm_limit": "確認", "finish_capture": "取得を完了", "back": "戻る", "finish": "完了", From e78a61139d7fa828cf772ee4bc6c63889f77f3f8 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:31:50 +0530 Subject: [PATCH 073/156] feat(german): confirm instead of confirm pagination --- public/locales/de.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/de.json b/public/locales/de.json index debf80f64..e06b784a5 100644 --- a/public/locales/de.json +++ b/public/locales/de.json @@ -158,7 +158,7 @@ "confirm": "Bestätigen", "discard": "Verwerfen", "confirm_capture": "Erfassung bestätigen", - "confirm_pagination": "Paginierung bestätigen", + "confirm_pagination": "Bestätigen", "confirm_limit": "Limit bestätigen", "finish_capture": "Erfassung abschließen", "back": "Zurück", From c753ce551200fa4b6431ff034694c6dcd343e516 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:32:16 +0530 Subject: [PATCH 074/156] feat(german): confirm instead of confirm limit --- public/locales/de.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/de.json b/public/locales/de.json index e06b784a5..b9b4185b1 100644 --- a/public/locales/de.json +++ b/public/locales/de.json @@ -159,7 +159,7 @@ "discard": "Verwerfen", "confirm_capture": "Erfassung bestätigen", "confirm_pagination": "Bestätigen", - "confirm_limit": "Limit bestätigen", + "confirm_limit": "Bestätigen", "finish_capture": "Erfassung abschließen", "back": "Zurück", "finish": "Fertig", From 634daeecf595cd2418913d9ae21689e55e5c2b39 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:32:56 +0530 Subject: [PATCH 075/156] feat(chinese): confirm instead of confirm pagination --- public/locales/zh.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/zh.json b/public/locales/zh.json index e55565f8b..805396dcb 100644 --- a/public/locales/zh.json +++ b/public/locales/zh.json @@ -159,7 +159,7 @@ "confirm": "确认", "discard": "放弃", "confirm_capture": "确认捕获", - "confirm_pagination": "确认分页", + "confirm_pagination": "确认", "confirm_limit": "确认限制", "finish_capture": "完成捕获", "back": "返回", From cd7f38f561a1ce2e4b596a25adeb48c5b6342f0f Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 30 Dec 2024 19:33:12 +0530 Subject: [PATCH 076/156] feat(chinese): confirm instead of confirm limit --- public/locales/zh.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/locales/zh.json b/public/locales/zh.json index 805396dcb..a19fe4391 100644 --- a/public/locales/zh.json +++ b/public/locales/zh.json @@ -160,7 +160,7 @@ "discard": "放弃", "confirm_capture": "确认捕获", "confirm_pagination": "确认", - "confirm_limit": "确认限制", + "confirm_limit": "确认", "finish_capture": "完成捕获", "back": "返回", "finish": "完成", From d2ab81e22959acc9fccf65f5845d8962170608d7 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 22:59:28 +0530 Subject: [PATCH 077/156] feat: add logic to get deeply nested shadowDOM elements --- server/src/workflow-management/selector.ts | 87 ++++++++++++---------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 164f5220a..d957b879a 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -823,7 +823,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { return output; } - const MAX_DEPTH = 10; + // const MAX_DEPTH = 10; const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let element = document.elementFromPoint(x, y) as HTMLElement; @@ -832,60 +832,76 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { let current = element; let deepestElement = current; let depth = 0; + const MAX_DEPTH = 4; // Limit to 2 levels of shadow DOM while (current && depth < MAX_DEPTH) { const shadowRoot = current.shadowRoot; - if (shadowRoot) { - const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; - if (!shadowElement) break; - - deepestElement = shadowElement; - current = shadowElement; - } else { - break; - } + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepestElement = shadowElement; + current = shadowElement; depth++; } return deepestElement; }; + // Helper function to generate selectors for shadow DOM elements const genSelectorForShadowDOM = (element: HTMLElement) => { - const findShadowContext = (element: HTMLElement): { host: HTMLElement, root: ShadowRoot } | null => { - let current: HTMLElement | null = element; + // Get complete path up to document root + const getShadowPath = (el: HTMLElement) => { + const path = []; + let current = el; let depth = 0; + const MAX_DEPTH = 4; while (current && depth < MAX_DEPTH) { - // Check if element is inside a shadow root - if (current.parentNode instanceof ShadowRoot) { - return { - host: (current.parentNode as ShadowRoot).host as HTMLElement, - root: current.parentNode as ShadowRoot - }; + const rootNode = current.getRootNode(); + if (rootNode instanceof ShadowRoot) { + path.unshift({ + host: rootNode.host as HTMLElement, + root: rootNode, + element: current + }); + current = rootNode.host as HTMLElement; + depth++; + } else { + break; } - current = current.parentElement; - depth++; } - return null; + return path; }; - - const shadowContext = findShadowContext(element); - if (!shadowContext) return null; + + const shadowPath = getShadowPath(element); + if (shadowPath.length === 0) return null; try { - // Generate selector for the shadow host - const hostSelector = finder(shadowContext.host); + const selectorParts: string[] = []; - // Generate selector for the element within the shadow DOM - const shadowElementSelector = finder(element, { - root: shadowContext.root as unknown as Element + // Generate selector for each shadow DOM boundary + shadowPath.forEach((context, index) => { + // Get selector for the host element + const hostSelector = finder(context.host, { + root: index === 0 ? document.body : (shadowPath[index - 1].root as unknown as Element) + }); + + // For the last context, get selector for target element + if (index === shadowPath.length - 1) { + const elementSelector = finder(element, { + root: context.root as unknown as Element + }); + selectorParts.push(`${hostSelector} >> ${elementSelector}`); + } else { + selectorParts.push(hostSelector); + } }); return { - fullSelector: `${hostSelector} >> ${shadowElementSelector}`, - hostSelector, - shadowElementSelector, - mode: shadowContext.root.mode + fullSelector: selectorParts.join(' >> '), + mode: shadowPath[shadowPath.length - 1].root.mode }; } catch (e) { console.warn('Error generating shadow DOM selector:', e); @@ -963,12 +979,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { formSelector, // Shadow DOM selector shadowSelector: shadowSelector ? { - // Full selector that can traverse shadow DOM full: shadowSelector.fullSelector, - // Individual parts for more flexible usage - host: shadowSelector.hostSelector, - element: shadowSelector.shadowElementSelector, - // Shadow root mode (open/closed) mode: shadowSelector.mode } : null }; From 9287c296922478b77391d0c4930f4b478de4614e Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 23:02:21 +0530 Subject: [PATCH 078/156] feat: rm host and element info for shadow selector --- server/src/types/index.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/types/index.ts b/server/src/types/index.ts index e882f69d2..151e3dd41 100644 --- a/server/src/types/index.ts +++ b/server/src/types/index.ts @@ -131,8 +131,6 @@ export interface BaseActionInfo { interface ShadowSelector { full: string; - host: string; - element: string; mode: string; } From e952d8f202278a67e86350e60542576b09260238 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 23:37:16 +0530 Subject: [PATCH 079/156] feat: add nested shadow-root scraping logic for scrapeSchema --- maxun-core/src/browserSide/scraper.js | 115 ++++++++++++++++---------- 1 file changed, 70 insertions(+), 45 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index ef979828d..ad9295b8d 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -188,8 +188,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @param {Object.} lists The named lists of HTML elements. * @returns {Array.>} */ - window.scrapeSchema = function (lists) { - // These utility functions remain unchanged as they work perfectly + window.scrapeSchema = function(lists) { function omap(object, f, kf = (x) => x) { return Object.fromEntries( Object.entries(object) @@ -203,29 +202,73 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, .filter(([k, v]) => f(k, v)), ); } - - function findElement(config) { - // If this is a shadow DOM query - if (config.shadow && config.selector.includes('>>')) { - const [hostSelector, shadowSelector] = config.selector.split('>>').map(s => s.trim()); - const host = document.querySelector(hostSelector); - return host?.shadowRoot?.querySelector(shadowSelector) || null; - } - // Otherwise, use regular querySelector - return document.querySelector(config.selector); - } - + function findAllElements(config) { - // If this is a shadow DOM query - if (config.shadow && config.selector.includes('>>')) { - const element = findElement(config); - return element ? [element] : []; - } - // Otherwise, use regular querySelectorAll - return Array.from(document.querySelectorAll(config.selector)); + if (!config.shadow || !config.selector.includes('>>')) { + return Array.from(document.querySelectorAll(config.selector)); + } + + // For shadow DOM, we'll get all possible combinations + const parts = config.selector.split('>>').map(s => s.trim()); + let currentElements = [document]; + + for (let i = 0; i < parts.length; i++) { + const part = parts[i]; + const nextElements = []; + + for (const element of currentElements) { + let targets; + if (i === 0) { + // First selector is queried from document + targets = Array.from(element.querySelectorAll(part)) + .filter(el => { + // Only include elements that either: + // 1. Have an open shadow root + // 2. Don't need shadow root (last part of selector) + if (i === parts.length - 1) return true; + const shadowRoot = el.shadowRoot; + return shadowRoot && shadowRoot.mode === 'open'; + }); + } else { + // For subsequent selectors, only use elements with open shadow roots + const shadowRoot = element.shadowRoot; + if (!shadowRoot || shadowRoot.mode !== 'open') continue; + + targets = Array.from(shadowRoot.querySelectorAll(part)); + } + nextElements.push(...targets); + } + + if (nextElements.length === 0) return []; + currentElements = nextElements; + } + + return currentElements; } + + // Helper function to extract value from element based on attribute + function getElementValue(element, attribute) { + if (!element) return null; + + switch (attribute) { + case 'href': { + const relativeHref = element.getAttribute('href'); + return relativeHref ? new URL(relativeHref, window.location.origin).href : null; + } + case 'src': { + const relativeSrc = element.getAttribute('src'); + return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; + } + case 'innerText': + return element.innerText?.trim(); + case 'textContent': + return element.textContent?.trim(); + default: + return element.getAttribute(attribute) || element.innerText?.trim(); + } + } - // Modified to use our new element finding functions + // Get the seed key based on the maximum number of elements found function getSeedKey(listObj) { const maxLength = Math.max(...Object.values( omap(listObj, (x) => findAllElements(x).length) @@ -235,8 +278,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, )[0]; } - // This function remains unchanged as it works with DOM elements - // regardless of how they were found + // Find minimal bounding elements function getMBEs(elements) { return elements.map((element) => { let candidate = element; @@ -252,35 +294,18 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, }); } + // Main scraping logic const seedName = getSeedKey(lists); const seedElements = findAllElements(lists[seedName]); const MBEs = getMBEs(seedElements); return MBEs.map((mbe) => omap( lists, - (config, key) => { - // Use our new findAllElements function + (config) => { const elem = findAllElements(config) .find((elem) => mbe.contains(elem)); - - if (!elem) return undefined; - - switch (config.attribute) { - case 'href': { - const relativeHref = elem.getAttribute('href'); - return relativeHref ? new URL(relativeHref, window.location.origin).href : null; - } - case 'src': { - const relativeSrc = elem.getAttribute('src'); - return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; - } - case 'innerText': - return elem.innerText; - case 'textContent': - return elem.textContent; - default: - return elem.getAttribute(config.attribute) || elem.innerText; - } + + return elem ? getElementValue(elem, config.attribute) : undefined; }, (key) => key )) || []; From b757d9c4f8b0ea00d6eb6d6fe6c2e7c37407ed92 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Mon, 30 Dec 2024 23:38:38 +0530 Subject: [PATCH 080/156] feat: add func to rm shadow selectors from workflow --- maxun-core/src/interpret.ts | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 495ba2db8..3cef8c298 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -663,11 +663,28 @@ export default class Interpreter extends EventEmitter { if (isApplicable) { return actionId; } + } } + + private removeShadowSelectors(workflow: Workflow) { + for (let actionId = workflow.length - 1; actionId >= 0; actionId--) { + const step = workflow[actionId]; + + // Check if step has where and selectors + if (step.where && Array.isArray(step.where.selectors)) { + // Filter out selectors that contain ">>" + step.where.selectors = step.where.selectors.filter(selector => !selector.includes('>>')); + } + } + + return workflow; } private async runLoop(p: Page, workflow: Workflow) { - const workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow)); + let workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow)); + + // remove shadow selectors + workflowCopy = this.removeShadowSelectors(workflowCopy); // apply ad-blocker to the current page try { From 4b4074b70d352401120bd1fe0b37fbee7838bac5 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Tue, 31 Dec 2024 01:52:38 +0530 Subject: [PATCH 081/156] feat: add logic to scrape multiple nested shadow dom elements --- maxun-core/src/browserSide/scraper.js | 174 +++++++++++++++----------- 1 file changed, 100 insertions(+), 74 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index ad9295b8d..00f8cef7c 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -204,69 +204,68 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, } function findAllElements(config) { - if (!config.shadow || !config.selector.includes('>>')) { - return Array.from(document.querySelectorAll(config.selector)); - } - - // For shadow DOM, we'll get all possible combinations - const parts = config.selector.split('>>').map(s => s.trim()); - let currentElements = [document]; - - for (let i = 0; i < parts.length; i++) { - const part = parts[i]; - const nextElements = []; - - for (const element of currentElements) { - let targets; - if (i === 0) { - // First selector is queried from document - targets = Array.from(element.querySelectorAll(part)) - .filter(el => { - // Only include elements that either: - // 1. Have an open shadow root - // 2. Don't need shadow root (last part of selector) - if (i === parts.length - 1) return true; - const shadowRoot = el.shadowRoot; - return shadowRoot && shadowRoot.mode === 'open'; - }); - } else { - // For subsequent selectors, only use elements with open shadow roots - const shadowRoot = element.shadowRoot; - if (!shadowRoot || shadowRoot.mode !== 'open') continue; - - targets = Array.from(shadowRoot.querySelectorAll(part)); + if (!config.shadow || !config.selector.includes('>>')) { + return Array.from(document.querySelectorAll(config.selector)); + } + + // For shadow DOM, we'll get all possible combinations + const parts = config.selector.split('>>').map(s => s.trim()); + let currentElements = [document]; + + for (let i = 0; i < parts.length; i++) { + const part = parts[i]; + const nextElements = []; + + for (const element of currentElements) { + let targets; + if (i === 0) { + // First selector is queried from document + targets = Array.from(element.querySelectorAll(part)) + .filter(el => { + // Only include elements that either: + // 1. Have an open shadow root + // 2. Don't need shadow root (last part of selector) + if (i === parts.length - 1) return true; + const shadowRoot = el.shadowRoot; + return shadowRoot && shadowRoot.mode === 'open'; + }); + } else { + // For subsequent selectors, only use elements with open shadow roots + const shadowRoot = element.shadowRoot; + if (!shadowRoot || shadowRoot.mode !== 'open') continue; + + targets = Array.from(shadowRoot.querySelectorAll(part)); + } + nextElements.push(...targets); } - nextElements.push(...targets); - } - - if (nextElements.length === 0) return []; - currentElements = nextElements; - } - - return currentElements; + + if (nextElements.length === 0) return []; + currentElements = nextElements; + } + + return currentElements; } - // Helper function to extract value from element based on attribute - function getElementValue(element, attribute) { - if (!element) return null; - - switch (attribute) { - case 'href': { - const relativeHref = element.getAttribute('href'); - return relativeHref ? new URL(relativeHref, window.location.origin).href : null; - } - case 'src': { - const relativeSrc = element.getAttribute('src'); - return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; - } - case 'innerText': - return element.innerText?.trim(); - case 'textContent': - return element.textContent?.trim(); - default: - return element.getAttribute(attribute) || element.innerText?.trim(); - } - } + function getElementValue(element, attribute) { + if (!element) return null; + + switch (attribute) { + case 'href': { + const relativeHref = element.getAttribute('href'); + return relativeHref ? new URL(relativeHref, window.location.origin).href : null; + } + case 'src': { + const relativeSrc = element.getAttribute('src'); + return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; + } + case 'innerText': + return element.innerText?.trim(); + case 'textContent': + return element.textContent?.trim(); + default: + return element.getAttribute(attribute) || element.innerText?.trim(); + } + } // Get the seed key based on the maximum number of elements found function getSeedKey(listObj) { @@ -280,26 +279,26 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, // Find minimal bounding elements function getMBEs(elements) { - return elements.map((element) => { - let candidate = element; - const isUniqueChild = (e) => elements - .filter((elem) => e.parentNode?.contains(elem)) - .length === 1; - - while (candidate && isUniqueChild(candidate)) { - candidate = candidate.parentNode; - } + return elements.map((element) => { + let candidate = element; + const isUniqueChild = (e) => elements + .filter((elem) => e.parentNode?.contains(elem)) + .length === 1; + + while (candidate && isUniqueChild(candidate)) { + candidate = candidate.parentNode; + } - return candidate; - }); + return candidate; + }); } - // Main scraping logic + // First try the MBE approach const seedName = getSeedKey(lists); const seedElements = findAllElements(lists[seedName]); const MBEs = getMBEs(seedElements); - - return MBEs.map((mbe) => omap( + + const mbeResults = MBEs.map((mbe) => omap( lists, (config) => { const elem = findAllElements(config) @@ -309,6 +308,33 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, }, (key) => key )) || []; + + // If MBE approach didn't find all elements, try independent scraping + if (mbeResults.some(result => Object.values(result).some(v => v === undefined))) { + // Fall back to independent scraping + const results = []; + const foundElements = new Map(); + + // Find all elements for each selector + Object.entries(lists).forEach(([key, config]) => { + const elements = findAllElements(config); + foundElements.set(key, elements); + }); + + // Create result objects for each found element + foundElements.forEach((elements, key) => { + elements.forEach((element, index) => { + if (!results[index]) { + results[index] = {}; + } + results[index][key] = getElementValue(element, lists[key].attribute); + }); + }); + + return results.filter(result => Object.keys(result).length > 0); + } + + return mbeResults; }; /** From 4a09ea66ff6c3c25c02b7997ed97f0ac4d677cd9 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Tue, 31 Dec 2024 12:26:09 +0530 Subject: [PATCH 082/156] feat: get deepest element rect coordinates --- server/src/workflow-management/selector.ts | 53 ++++++++++++++-------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index d957b879a..910b31340 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -237,23 +237,30 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector if (!getList || listSelector !== '') { const rect = await page.evaluate( async ({ x, y }) => { - // Helper function to get element from point including shadow DOM + // Enhanced helper function to get element from point including shadow DOM const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; // Traverse through shadow roots let current = element; - while (current) { - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; - + let shadowRoot = current.shadowRoot; + + // Keep track of the deepest shadow DOM element found + let deepestElement = current; + + while (shadowRoot) { + // Try to find element at same point in shadow DOM const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; - + + // Update our tracking of the deepest element + deepestElement = shadowElement; current = shadowElement; + shadowRoot = current.shadowRoot; } - return current; + + return deepestElement; }; const el = getDeepestElementFromPoint(x, y); @@ -274,36 +281,45 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector }; } } + return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return rect; } else { const rect = await page.evaluate( async ({ x, y }) => { - // Helper function to get element from point including shadow DOM + // Enhanced helper function to get element from point including shadow DOM const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; // Traverse through shadow roots let current = element; - while (current) { - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; - + let shadowRoot = current.shadowRoot; + + // Keep track of the deepest shadow DOM element found + let deepestElement = current; + + while (shadowRoot) { + // Try to find element at same point in shadow DOM const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; - + + // Update our tracking of the deepest element + deepestElement = shadowElement; current = shadowElement; + shadowRoot = current.shadowRoot; } - return current; + + return deepestElement; }; const originalEl = getDeepestElementFromPoint(x, y); if (originalEl) { let element = originalEl; + // Handle element hierarchy traversal for list items while (element.parentElement) { const parentRect = element.parentElement.getBoundingClientRect(); const childRect = element.getBoundingClientRect(); @@ -326,7 +342,6 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } const rectangle = element?.getBoundingClientRect(); - if (rectangle) { return { x: rectangle.x, @@ -342,14 +357,14 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return rect; } } catch (error) { const { message, stack } = error as Error; - logger.log('error', `Error while retrieving selector: ${message}`); - logger.log('error', `Stack: ${stack}`); + console.error('Error while retrieving selector:', message); + console.error('Stack:', stack); } }; From 4c0ad3ceed6a8b192d8e672b36908fd4db3871cd Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 31 Dec 2024 21:26:53 +0530 Subject: [PATCH 083/156] fix: avoid ui shift on api key reveal --- src/components/organisms/ApiKey.tsx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/components/organisms/ApiKey.tsx b/src/components/organisms/ApiKey.tsx index 37a72764b..0af279343 100644 --- a/src/components/organisms/ApiKey.tsx +++ b/src/components/organisms/ApiKey.tsx @@ -124,7 +124,11 @@ const ApiKeyManager = () => { {apiKeyName} - {showKey ? `${apiKey?.substring(0, 10)}...` : '***************'} + + + {showKey ? `${apiKey?.substring(0, 10)}...` : '**********'} + + From a3337d7fcc08a8436edadcb33b0214401a63e28d Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 31 Dec 2024 21:27:12 +0530 Subject: [PATCH 084/156] fix: format --- src/components/organisms/ApiKey.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/ApiKey.tsx b/src/components/organisms/ApiKey.tsx index 0af279343..9d54fe5c5 100644 --- a/src/components/organisms/ApiKey.tsx +++ b/src/components/organisms/ApiKey.tsx @@ -126,7 +126,7 @@ const ApiKeyManager = () => { {apiKeyName} - {showKey ? `${apiKey?.substring(0, 10)}...` : '**********'} + {showKey ? `${apiKey?.substring(0, 10)}...` : '**********'} From 42e13066bd7800043e6952ddaae06d62985c2ee4 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Wed, 1 Jan 2025 16:13:38 +0530 Subject: [PATCH 085/156] feat: add shadowDOM support for capture list selector generation --- server/src/workflow-management/selector.ts | 329 +++++++++++++++++---- 1 file changed, 279 insertions(+), 50 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 910b31340..713c05bcc 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -1076,46 +1076,133 @@ interface SelectorResult { */ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates, listSelector: string): Promise => { + interface ShadowContext { + host: HTMLElement; + root: ShadowRoot; + element: HTMLElement; + } + try { if (!listSelector) { const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { + // Helper function to get deepest element, traversing shadow DOM + function getDeepestElementFromPoint(x: number, y: number): HTMLElement | null { + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + let current = element; + let deepestElement = current; + let depth = 0; + const MAX_DEPTH = 4; // Limit shadow DOM traversal depth + + while (current && depth < MAX_DEPTH) { + const shadowRoot = current.shadowRoot; + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepestElement = shadowElement; + current = shadowElement; + depth++; + } + + return deepestElement; + } + + // Generate basic selector from element's tag and classes function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); - - if (element.className) { - const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); + + const className = typeof element.className === 'string' ? element.className : ''; + if (className) { + const classes = className.split(/\s+/) + .filter(cls => Boolean(cls) && !cls.startsWith('!') && !cls.includes(':')); + if (classes.length > 0) { - const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':')); - if (validClasses.length > 0) { - selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); - } + selector += '.' + classes.map(cls => CSS.escape(cls)).join('.'); } } - + return selector; } + // Get complete shadow DOM path for an element + function getShadowPath(element: HTMLElement): ShadowContext[] { + const path: ShadowContext[] = []; + let current = element; + let depth = 0; + const MAX_DEPTH = 4; + + while (current && depth < MAX_DEPTH) { + const rootNode = current.getRootNode(); + if (rootNode instanceof ShadowRoot) { + path.unshift({ + host: rootNode.host as HTMLElement, + root: rootNode, + element: current + }); + current = rootNode.host as HTMLElement; + depth++; + } else { + break; + } + } + return path; + } + + // Generate complete selector path for any element function getSelectorPath(element: HTMLElement | null): string { + if (!element) return ''; + + // Check for shadow DOM path first + const shadowPath = getShadowPath(element); + if (shadowPath.length > 0) { + const selectorParts: string[] = []; + + // Build complete shadow DOM path + shadowPath.forEach((context, index) => { + const hostSelector = getNonUniqueSelector(context.host); + + if (index === shadowPath.length - 1) { + // For deepest shadow context, include target element + const elementSelector = getNonUniqueSelector(element); + selectorParts.push(`${hostSelector} >> ${elementSelector}`); + } else { + // For intermediate shadow boundaries + selectorParts.push(hostSelector); + } + }); + + return selectorParts.join(' >> '); + } + + // Regular DOM path generation const path: string[] = []; + let currentElement = element; let depth = 0; - const maxDepth = 2; + const MAX_DEPTH = 2; - while (element && element !== document.body && depth < maxDepth) { - const selector = getNonUniqueSelector(element); + while (currentElement && currentElement !== document.body && depth < MAX_DEPTH) { + const selector = getNonUniqueSelector(currentElement); path.unshift(selector); - element = element.parentElement; + + const parentElement = currentElement.parentElement; + if (!parentElement) break; + currentElement = parentElement; depth++; } return path.join(' > '); } - const originalEl = document.elementFromPoint(x, y) as HTMLElement; + // Main logic to get element and generate selector + const originalEl = getDeepestElementFromPoint(x, y); if (!originalEl) return null; let element = originalEl; - // if (listSelector === '') { + // Handle parent traversal for better element targeting while (element.parentElement) { const parentRect = element.parentElement.getBoundingClientRect(); const childRect = element.getBoundingClientRect(); @@ -1136,60 +1223,134 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates break; } } - // } const generalSelector = getSelectorPath(element); - return { - generalSelector, - }; + return { generalSelector }; }, coordinates); + return selectors || { generalSelector: '' }; } else { + // When we have a list selector, we need special handling while maintaining shadow DOM support const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { + // Helper function to get deepest element, traversing shadow DOM + function getDeepestElementFromPoint(x: number, y: number): HTMLElement | null { + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + let current = element; + let deepestElement = current; + let depth = 0; + const MAX_DEPTH = 4; + + while (current && depth < MAX_DEPTH) { + const shadowRoot = current.shadowRoot; + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepestElement = shadowElement; + current = shadowElement; + depth++; + } + + return deepestElement; + } + + // Generate basic selector from element's tag and classes function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); - - if (element.className) { - const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); + + const className = typeof element.className === 'string' ? element.className : ''; + if (className) { + const classes = className.split(/\s+/) + .filter(cls => Boolean(cls) && !cls.startsWith('!') && !cls.includes(':')); + if (classes.length > 0) { - const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':')); - if (validClasses.length > 0) { - selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); - } + selector += '.' + classes.map(cls => CSS.escape(cls)).join('.'); } } - + return selector; } - function getSelectorPath(element: HTMLElement | null): string { + // Get complete shadow DOM path for an element + function getShadowPath(element: HTMLElement): ShadowContext[] { + const path: ShadowContext[] = []; + let current = element; + let depth = 0; + const MAX_DEPTH = 4; + + while (current && depth < MAX_DEPTH) { + const rootNode = current.getRootNode(); + if (rootNode instanceof ShadowRoot) { + path.unshift({ + host: rootNode.host as HTMLElement, + root: rootNode, + element: current + }); + current = rootNode.host as HTMLElement; + depth++; + } else { + break; + } + } + return path; + } + + // Generate selector path specifically for list items + function getListItemSelectorPath(element: HTMLElement | null): string { + if (!element) return ''; + + // Check for shadow DOM path first + const shadowPath = getShadowPath(element); + if (shadowPath.length > 0) { + const selectorParts: string[] = []; + + shadowPath.forEach((context, index) => { + const hostSelector = getNonUniqueSelector(context.host); + + if (index === shadowPath.length - 1) { + const elementSelector = getNonUniqueSelector(element); + selectorParts.push(`${hostSelector} >> ${elementSelector}`); + } else { + selectorParts.push(hostSelector); + } + }); + + return selectorParts.join(' >> '); + } + + // For list items, we want a shallower path to better match list patterns const path: string[] = []; + let currentElement = element; let depth = 0; - const maxDepth = 2; + const MAX_LIST_DEPTH = 2; // Keeping shallow depth for list items - while (element && element !== document.body && depth < maxDepth) { - const selector = getNonUniqueSelector(element); + while (currentElement && currentElement !== document.body && depth < MAX_LIST_DEPTH) { + const selector = getNonUniqueSelector(currentElement); path.unshift(selector); - element = element.parentElement; + + if (!currentElement.parentElement) break; + currentElement = currentElement.parentElement; depth++; } return path.join(' > '); } - const originalEl = document.elementFromPoint(x, y) as HTMLElement; - if (!originalEl) return null; + // Main logic for list item selection + const originalEl = getDeepestElementFromPoint(x, y); + if (!originalEl) return { generalSelector: '' }; let element = originalEl; - const generalSelector = getSelectorPath(element); - return { - generalSelector, - }; - }, coordinates); - return selectors || { generalSelector: '' }; - } + const generalSelector = getListItemSelectorPath(element); + return { generalSelector }; + }, coordinates); + return selectors || { generalSelector: '' }; + } } catch (error) { console.error('Error in getNonUniqueSelectors:', error); return { generalSelector: '' }; @@ -1218,42 +1379,110 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro } // Function to generate selector path from an element to its parent - function getSelectorPath(element: HTMLElement | null): string { + function getSelectorPath(element: HTMLElement): string { if (!element || !element.parentElement) return ''; const parentSelector = getNonUniqueSelector(element.parentElement); const elementSelector = getNonUniqueSelector(element); + // Check if element is in shadow DOM + const rootNode = element.getRootNode(); + if (rootNode instanceof ShadowRoot) { + const hostSelector = getNonUniqueSelector(rootNode.host as HTMLElement); + return `${hostSelector} >> ${elementSelector}`; + } + return `${parentSelector} > ${elementSelector}`; } - // Function to recursively get all descendant selectors + // Function to get all shadow DOM children of an element + function getShadowChildren(element: HTMLElement): HTMLElement[] { + const children: HTMLElement[] = []; + + // Check if element has shadow root + const shadowRoot = element.shadowRoot; + if (shadowRoot) { + // Get all elements in the shadow DOM + const shadowElements = Array.from(shadowRoot.querySelectorAll('*')) as HTMLElement[]; + children.push(...shadowElements); + } + + return children; + } + + // Function to recursively get all descendant selectors including shadow DOM function getAllDescendantSelectors(element: HTMLElement): string[] { let selectors: string[] = []; + + // Handle regular DOM children const children = Array.from(element.children) as HTMLElement[]; - for (const child of children) { const childPath = getSelectorPath(child); if (childPath) { - selectors.push(childPath); // Add direct child path - selectors = selectors.concat(getAllDescendantSelectors(child)); // Recursively process descendants + selectors.push(childPath); + // Recursively process regular DOM descendants + selectors = selectors.concat(getAllDescendantSelectors(child)); + + // Check for shadow DOM in this child + const shadowChildren = getShadowChildren(child); + for (const shadowChild of shadowChildren) { + const shadowPath = getSelectorPath(shadowChild); + if (shadowPath) { + selectors.push(shadowPath); + // Recursively process shadow DOM descendants + selectors = selectors.concat(getAllDescendantSelectors(shadowChild)); + } + } + } + } + + // Handle direct shadow DOM children of the current element + const shadowChildren = getShadowChildren(element); + for (const shadowChild of shadowChildren) { + const shadowPath = getSelectorPath(shadowChild); + if (shadowPath) { + selectors.push(shadowPath); + selectors = selectors.concat(getAllDescendantSelectors(shadowChild)); } } return selectors; } - // Find all occurrences of the parent selector in the DOM - const parentElements = Array.from(document.querySelectorAll(parentSelector)) as HTMLElement[]; - const allChildSelectors = new Set(); // Use a set to ensure uniqueness + // Split the parent selector if it contains shadow DOM parts + const selectorParts = parentSelector.split('>>').map(part => part.trim()); + let parentElements: HTMLElement[] = []; + + // Handle shadow DOM traversal if needed + if (selectorParts.length > 1) { + // Start with the host elements + parentElements = Array.from(document.querySelectorAll(selectorParts[0])) as HTMLElement[]; + + // Traverse through shadow DOM parts + for (let i = 1; i < selectorParts.length; i++) { + const newParentElements: HTMLElement[] = []; + for (const element of parentElements) { + if (element.shadowRoot) { + const shadowChildren = Array.from(element.shadowRoot.querySelectorAll(selectorParts[i])) as HTMLElement[]; + newParentElements.push(...shadowChildren); + } + } + parentElements = newParentElements; + } + } else { + // Regular DOM selector + parentElements = Array.from(document.querySelectorAll(parentSelector)) as HTMLElement[]; + } + + const allChildSelectors = new Set(); // Process each parent element and its descendants parentElements.forEach((parentElement) => { const descendantSelectors = getAllDescendantSelectors(parentElement); - descendantSelectors.forEach((selector) => allChildSelectors.add(selector)); // Add selectors to the set + descendantSelectors.forEach((selector) => allChildSelectors.add(selector)); }); - return Array.from(allChildSelectors); // Convert the set back to an array + return Array.from(allChildSelectors); }, parentSelector); return childSelectors || []; From c6105b4ee226a562f80b7054fffb3acac23e9d23 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Wed, 1 Jan 2025 16:15:13 +0530 Subject: [PATCH 086/156] feat: generate highlighter for shadoDOM and mixedDOM elements --- src/components/organisms/BrowserWindow.tsx | 30 +++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 11fe8c558..442b7e504 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -120,7 +120,13 @@ export const BrowserWindow = () => { const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[] }) => { if (getList === true) { if (listSelector) { + console.log("LIST SELEECTORRRRR: ", listSelector); + console.log("DATA SELEECTORRRRR: ", data.selector); + console.log("CHILDREEENN SELECORRRR: ", data.childSelectors); socket?.emit('listSelector', { selector: listSelector }); + + const hasValidChildSelectors = Array.isArray(data.childSelectors) && data.childSelectors.length > 0; + if (limitMode) { setHighlighterData(null); } else if (paginationMode) { @@ -133,7 +139,29 @@ export const BrowserWindow = () => { } else if (data.childSelectors && data.childSelectors.includes(data.selector)) { // highlight only valid child elements within the listSelector setHighlighterData(data); - } else { + } else if (data.elementInfo?.isShadowRoot && data.childSelectors) { + // New case: Handle pure Shadow DOM elements + // Check if the selector matches any shadow root child selectors + const isShadowChild = data.childSelectors.some(childSelector => + data.selector.includes('>>') && // Shadow DOM uses >> for piercing + childSelector.split('>>').some(part => + data.selector.includes(part.trim()) + ) + ); + setHighlighterData(isShadowChild ? data : null); + } else if (data.selector.includes('>>') && hasValidChildSelectors) { + // New case: Handle mixed DOM cases + // Split the selector into parts and check each against child selectors + const selectorParts = data.selector.split('>>').map(part => part.trim()); + const isValidMixedSelector = selectorParts.some(part => + // Now we know data.childSelectors is defined + data.childSelectors!.some(childSelector => + childSelector.includes(part) + ) + ); + setHighlighterData(isValidMixedSelector ? data : null); + } + else { // if !valid child in normal mode, clear the highlighter setHighlighterData(null); } From 8db6279f05c25e671098f959bfe0b79f5d06cb4f Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Wed, 1 Jan 2025 16:39:36 +0530 Subject: [PATCH 087/156] feat: add shadowDOM support for scraping list --- maxun-core/src/browserSide/scraper.js | 146 ++++++++++++++++++++------ 1 file changed, 113 insertions(+), 33 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 00f8cef7c..caa783c8c 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -349,27 +349,100 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, window.scrapeList = async function ({ listSelector, fields, limit = 10 }) { const scrapedData = []; - while (scrapedData.length < limit) { - let parentElements = Array.from(document.querySelectorAll(listSelector)); + // Helper function to query through Shadow DOM + const queryShadowDOM = (rootElement, selector) => { + // Split the selector by Shadow DOM delimiter + const parts = selector.split('>>').map(part => part.trim()); + let currentElement = rootElement; + + // Traverse through each part of the selector + for (let i = 0; i < parts.length; i++) { + if (!currentElement) return null; + + // If we're at the document level (first part) + if (!currentElement.querySelector && !currentElement.shadowRoot) { + currentElement = document.querySelector(parts[i]); + continue; + } + + // Try to find element in regular DOM first + let nextElement = currentElement.querySelector(parts[i]); + + // If not found, check shadow DOM + if (!nextElement && currentElement.shadowRoot) { + nextElement = currentElement.shadowRoot.querySelector(parts[i]); + } + + // If still not found, try to find in shadow DOM of all child elements + if (!nextElement) { + const allChildren = Array.from(currentElement.children || []); + for (const child of allChildren) { + if (child.shadowRoot) { + nextElement = child.shadowRoot.querySelector(parts[i]); + if (nextElement) break; + } + } + } + + currentElement = nextElement; + } + + return currentElement; + }; + + // Helper function to query all elements through Shadow DOM + const queryShadowDOMAll = (rootElement, selector) => { + const parts = selector.split('>>').map(part => part.trim()); + let currentElements = [rootElement]; + + for (const part of parts) { + const nextElements = []; + + for (const element of currentElements) { + // Check regular DOM + if (element.querySelectorAll) { + nextElements.push(...element.querySelectorAll(part)); + } + + // Check shadow DOM + if (element.shadowRoot) { + nextElements.push(...element.shadowRoot.querySelectorAll(part)); + } + + // Check shadow DOM of children + const children = Array.from(element.children || []); + for (const child of children) { + if (child.shadowRoot) { + nextElements.push(...child.shadowRoot.querySelectorAll(part)); + } + } + } + + currentElements = nextElements; + } - // If we only got one element or none, try a more generic approach + return currentElements; + }; + + while (scrapedData.length < limit) { + // Use our shadow DOM query function to get parent elements + let parentElements = queryShadowDOMAll(document, listSelector); + parentElements = Array.from(parentElements); + + // Handle the case when we don't find enough elements if (limit > 1 && parentElements.length <= 1) { - const [containerSelector, _] = listSelector.split('>').map(s => s.trim()); - const container = document.querySelector(containerSelector); + const [containerSelector, ...rest] = listSelector.split('>>').map(s => s.trim()); + const container = queryShadowDOM(document, containerSelector); if (container) { - const allChildren = Array.from(container.children); + const allChildren = Array.from(container.children || []); + const firstMatch = queryShadowDOM(document, listSelector); - const firstMatch = document.querySelector(listSelector); if (firstMatch) { - // Get classes from the first matching element - const firstMatchClasses = Array.from(firstMatch.classList); + const firstMatchClasses = Array.from(firstMatch.classList || []); - // Find similar elements by matching most of their classes parentElements = allChildren.filter(element => { - const elementClasses = Array.from(element.classList); - - // Element should share at least 70% of classes with the first match + const elementClasses = Array.from(element.classList || []); const commonClasses = firstMatchClasses.filter(cls => elementClasses.includes(cls)); return commonClasses.length >= Math.floor(firstMatchClasses.length * 0.7); @@ -378,42 +451,49 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, } } - // Iterate through each parent element + // Process each parent element for (const parent of parentElements) { if (scrapedData.length >= limit) break; const record = {}; - // For each field, select the corresponding element within the parent + // Process each field using shadow DOM querying for (const [label, { selector, attribute }] of Object.entries(fields)) { - const fieldElement = parent.querySelector(selector); + // Use relative selector from parent + const relativeSelector = selector.split('>>').slice(-1)[0]; + const fieldElement = queryShadowDOM(parent, relativeSelector); if (fieldElement) { - if (attribute === 'innerText') { - record[label] = fieldElement.innerText.trim(); - } else if (attribute === 'innerHTML') { - record[label] = fieldElement.innerHTML.trim(); - } else if (attribute === 'src') { - // Handle relative 'src' URLs - const src = fieldElement.getAttribute('src'); - record[label] = src ? new URL(src, window.location.origin).href : null; - } else if (attribute === 'href') { - // Handle relative 'href' URLs - const href = fieldElement.getAttribute('href'); - record[label] = href ? new URL(href, window.location.origin).href : null; - } else { - record[label] = fieldElement.getAttribute(attribute); + switch (attribute) { + case 'innerText': + record[label] = fieldElement.innerText?.trim() || ''; + break; + case 'innerHTML': + record[label] = fieldElement.innerHTML?.trim() || ''; + break; + case 'src': + const src = fieldElement.getAttribute('src'); + record[label] = src ? new URL(src, window.location.origin).href : null; + break; + case 'href': + const href = fieldElement.getAttribute('href'); + record[label] = href ? new URL(href, window.location.origin).href : null; + break; + default: + record[label] = fieldElement.getAttribute(attribute); } } } - scrapedData.push(record); + + if (Object.keys(record).length > 0) { + scrapedData.push(record); + } } - // If we've processed all available elements and still haven't reached the limit, - // break to avoid infinite loop if (parentElements.length === 0 || scrapedData.length >= parentElements.length) { break; } } + return scrapedData; }; From 24915a93410aa1f309f27a7e18dd1bb0729f1b6f Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 22:55:33 +0530 Subject: [PATCH 088/156] feat: get notify from global info store --- package.json | 1 - src/components/molecules/ScheduleSettings.tsx | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/package.json b/package.json index e89f13de1..fc5e9edb1 100644 --- a/package.json +++ b/package.json @@ -46,7 +46,6 @@ "jwt-decode": "^4.0.0", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", - "maxun-core": "^0.0.7", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", diff --git a/src/components/molecules/ScheduleSettings.tsx b/src/components/molecules/ScheduleSettings.tsx index 3af0072fc..ea78720cf 100644 --- a/src/components/molecules/ScheduleSettings.tsx +++ b/src/components/molecules/ScheduleSettings.tsx @@ -79,7 +79,7 @@ export const ScheduleSettingsModal = ({ isOpen, handleStart, handleClose, initia 'SUNDAY' ]; - const { recordingId } = useGlobalInfoStore(); + const { recordingId, notify } = useGlobalInfoStore(); const deleteRobotSchedule = () => { if (recordingId) { From 8c2b92483b1a86996d970293f2a167ea05157af6 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 22:57:23 +0530 Subject: [PATCH 089/156] feat: notify on schedule delete --- src/components/molecules/ScheduleSettings.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/components/molecules/ScheduleSettings.tsx b/src/components/molecules/ScheduleSettings.tsx index ea78720cf..917696c9d 100644 --- a/src/components/molecules/ScheduleSettings.tsx +++ b/src/components/molecules/ScheduleSettings.tsx @@ -85,6 +85,7 @@ export const ScheduleSettingsModal = ({ isOpen, handleStart, handleClose, initia if (recordingId) { deleteSchedule(recordingId); setSchedule(null); + notify('success', t('Schedule deleted successfully')); } else { console.error('No recording id provided'); } From e61798855564427223f4c0177f45fedcbf854814 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:02:17 +0530 Subject: [PATCH 090/156] fix: revert local maxun-core changes --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index fc5e9edb1..e89f13de1 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "jwt-decode": "^4.0.0", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", + "maxun-core": "^0.0.7", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", From 9b2ea1f5353466f63a92fefc8921bc1271619339 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:29:06 +0530 Subject: [PATCH 091/156] chore: cleanup space --- src/components/molecules/RecordingsTable.tsx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/components/molecules/RecordingsTable.tsx b/src/components/molecules/RecordingsTable.tsx index 01bc524b9..ecda3c8da 100644 --- a/src/components/molecules/RecordingsTable.tsx +++ b/src/components/molecules/RecordingsTable.tsx @@ -33,10 +33,6 @@ interface Column { format?: (value: string) => string; } - - - - interface Data { id: string; name: string; From cc6cc8ff8d03b17e942335bcd3770ad6d12f49aa Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:30:00 +0530 Subject: [PATCH 092/156] fix: format --- src/components/molecules/RecordingsTable.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/molecules/RecordingsTable.tsx b/src/components/molecules/RecordingsTable.tsx index ecda3c8da..f8a0ba37d 100644 --- a/src/components/molecules/RecordingsTable.tsx +++ b/src/components/molecules/RecordingsTable.tsx @@ -437,7 +437,6 @@ const OptionsButton = ({ handleEdit, handleDelete, handleDuplicate }: OptionsBut {t('recordingtable.duplicate')} - ); From 6d2507982077daceebcba4eba02d44c0c28bab58 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:30:46 +0530 Subject: [PATCH 093/156] fix: format --- src/api/storage.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/api/storage.ts b/src/api/storage.ts index 4b2f4e80d..9ae3bc47f 100644 --- a/src/api/storage.ts +++ b/src/api/storage.ts @@ -5,11 +5,6 @@ import { ScheduleSettings } from "../components/molecules/ScheduleSettings"; import { CreateRunResponse, ScheduleRunResponse } from "../pages/MainPage"; import { apiUrl } from "../apiConfig"; - - - - - export const getStoredRecordings = async (): Promise => { try { const response = await axios.get(`${apiUrl}/storage/recordings`); From 3b9e30ddae52efa4e450a948c393132dee883b67 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:31:21 +0530 Subject: [PATCH 094/156] fix: format --- src/api/storage.ts | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/src/api/storage.ts b/src/api/storage.ts index 9ae3bc47f..18c793c03 100644 --- a/src/api/storage.ts +++ b/src/api/storage.ts @@ -77,11 +77,7 @@ export const getStoredRecording = async (id: string) => { } } - - export const checkRunsForRecording = async (id: string): Promise => { - - try { const response = await axios.get(`${apiUrl}/storage/recordings/${id}/runs`); @@ -94,32 +90,26 @@ export const checkRunsForRecording = async (id: string): Promise => { } }; - export const deleteRecordingFromStorage = async (id: string): Promise => { - const hasRuns = await checkRunsForRecording(id); - + if (hasRuns) { - + return false; } try { const response = await axios.delete(`${apiUrl}/storage/recordings/${id}`); if (response.status === 200) { - + return true; } else { throw new Error(`Couldn't delete stored recording ${id}`); } } catch (error: any) { console.log(error); - + return false; } - - - - }; export const deleteRunFromStorage = async (id: string): Promise => { @@ -154,7 +144,7 @@ export const createRunForStoredRecording = async (id: string, settings: RunSetti try { const response = await axios.put( `${apiUrl}/storage/runs/${id}`, - { ...settings }); + { ...settings }); if (response.status === 200) { return response.data; } else { From 52aefd1c0f674cb0cac87e25124dd8c56027712b Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:31:37 +0530 Subject: [PATCH 095/156] fix: format --- src/api/workflow.ts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/api/workflow.ts b/src/api/workflow.ts index 03b677b18..40ac0d992 100644 --- a/src/api/workflow.ts +++ b/src/api/workflow.ts @@ -3,7 +3,7 @@ import { emptyWorkflow } from "../shared/constants"; import { default as axios, AxiosResponse } from "axios"; import { apiUrl } from "../apiConfig"; -export const getActiveWorkflow = async(id: string) : Promise => { +export const getActiveWorkflow = async (id: string): Promise => { try { const response = await axios.get(`${apiUrl}/workflow/${id}`) if (response.status === 200) { @@ -11,13 +11,13 @@ export const getActiveWorkflow = async(id: string) : Promise => { } else { throw new Error('Something went wrong when fetching a recorded workflow'); } - } catch(error: any) { + } catch (error: any) { console.log(error); return emptyWorkflow; } }; -export const getParamsOfActiveWorkflow = async(id: string) : Promise => { +export const getParamsOfActiveWorkflow = async (id: string): Promise => { try { const response = await axios.get(`${apiUrl}/workflow/params/${id}`) if (response.status === 200) { @@ -25,15 +25,15 @@ export const getParamsOfActiveWorkflow = async(id: string) : Promise => { +export const deletePair = async (index: number): Promise => { try { - const response = await axios.delete(`${apiUrl}/workflow/pair/${index}`); + const response = await axios.delete(`${apiUrl}/workflow/pair/${index}`); if (response.status === 200) { return response.data; } else { @@ -45,11 +45,11 @@ export const deletePair = async(index: number): Promise => { } }; -export const AddPair = async(index: number, pair: WhereWhatPair): Promise => { +export const AddPair = async (index: number, pair: WhereWhatPair): Promise => { try { const response = await axios.post(`${apiUrl}/workflow/pair/${index}`, { pair, - }, {headers: {'Content-Type': 'application/json'}}); + }, { headers: { 'Content-Type': 'application/json' } }); if (response.status === 200) { return response.data; } else { @@ -61,11 +61,11 @@ export const AddPair = async(index: number, pair: WhereWhatPair): Promise => { +export const UpdatePair = async (index: number, pair: WhereWhatPair): Promise => { try { const response = await axios.put(`${apiUrl}/workflow/pair/${index}`, { pair, - }, {headers: {'Content-Type': 'application/json'}}); + }, { headers: { 'Content-Type': 'application/json' } }); if (response.status === 200) { return response.data; } else { From 735b33b84727439c4c354021a06016166661902c Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:53:58 +0530 Subject: [PATCH 096/156] fix: typo --- src/components/molecules/NavBar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/molecules/NavBar.tsx b/src/components/molecules/NavBar.tsx index 142d45ab8..8577f30e7 100644 --- a/src/components/molecules/NavBar.tsx +++ b/src/components/molecules/NavBar.tsx @@ -318,7 +318,7 @@ export const NavBar: React.FC = ({ { window.open('https://x.com/maxun_io?ref=app', '_blank'); }}> - Twiiter (X) + Twiter (X) {t('navbar.menu_items.language')} From 22a99ff8b56788635581517bc863a3115b6b79db Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 1 Jan 2025 23:54:28 +0530 Subject: [PATCH 097/156] fix: twitter typo --- src/components/molecules/NavBar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/molecules/NavBar.tsx b/src/components/molecules/NavBar.tsx index 8577f30e7..8aeeb05d5 100644 --- a/src/components/molecules/NavBar.tsx +++ b/src/components/molecules/NavBar.tsx @@ -318,7 +318,7 @@ export const NavBar: React.FC = ({ { window.open('https://x.com/maxun_io?ref=app', '_blank'); }}> - Twiter (X) + Twitter (X) {t('navbar.menu_items.language')} From c287340f845e9429fc5534dc38af6257b4d75826 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Thu, 2 Jan 2025 14:17:19 +0530 Subject: [PATCH 098/156] feat: shadowDOM support for table and non table list scraping --- maxun-core/src/browserSide/scraper.js | 345 +++++++++++++++++++++----- 1 file changed, 280 insertions(+), 65 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index caa783c8c..ff5a19388 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -347,33 +347,29 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ window.scrapeList = async function ({ listSelector, fields, limit = 10 }) { - const scrapedData = []; - - // Helper function to query through Shadow DOM + // Shadow DOM query functions remain unchanged const queryShadowDOM = (rootElement, selector) => { - // Split the selector by Shadow DOM delimiter + if (!selector.includes('>>')) { + return rootElement.querySelector(selector); + } + const parts = selector.split('>>').map(part => part.trim()); let currentElement = rootElement; - // Traverse through each part of the selector for (let i = 0; i < parts.length; i++) { if (!currentElement) return null; - // If we're at the document level (first part) if (!currentElement.querySelector && !currentElement.shadowRoot) { currentElement = document.querySelector(parts[i]); continue; } - // Try to find element in regular DOM first let nextElement = currentElement.querySelector(parts[i]); - // If not found, check shadow DOM if (!nextElement && currentElement.shadowRoot) { nextElement = currentElement.shadowRoot.querySelector(parts[i]); } - // If still not found, try to find in shadow DOM of all child elements if (!nextElement) { const allChildren = Array.from(currentElement.children || []); for (const child of allChildren) { @@ -390,8 +386,11 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return currentElement; }; - // Helper function to query all elements through Shadow DOM const queryShadowDOMAll = (rootElement, selector) => { + if (!selector.includes('>>')) { + return rootElement.querySelectorAll(selector); + } + const parts = selector.split('>>').map(part => part.trim()); let currentElements = [rootElement]; @@ -399,17 +398,14 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, const nextElements = []; for (const element of currentElements) { - // Check regular DOM if (element.querySelectorAll) { nextElements.push(...element.querySelectorAll(part)); } - // Check shadow DOM if (element.shadowRoot) { nextElements.push(...element.shadowRoot.querySelectorAll(part)); } - // Check shadow DOM of children const children = Array.from(element.children || []); for (const child of children) { if (child.shadowRoot) { @@ -424,76 +420,295 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return currentElements; }; - while (scrapedData.length < limit) { - // Use our shadow DOM query function to get parent elements - let parentElements = queryShadowDOMAll(document, listSelector); - parentElements = Array.from(parentElements); + // Enhanced table processing helper functions with shadow DOM support + function extractValue(element, attribute) { + if (!element) return null; + + // Check for shadow root first + if (element.shadowRoot) { + const shadowContent = element.shadowRoot.textContent; + if (shadowContent && shadowContent.trim()) { + return shadowContent.trim(); + } + } + + if (attribute === 'innerText') { + return element.innerText.trim(); + } else if (attribute === 'innerHTML') { + return element.innerHTML.trim(); + } else if (attribute === 'src' || attribute === 'href') { + const attrValue = element.getAttribute(attribute); + return attrValue ? new URL(attrValue, window.location.origin).href : null; + } + return element.getAttribute(attribute); + } - // Handle the case when we don't find enough elements - if (limit > 1 && parentElements.length <= 1) { - const [containerSelector, ...rest] = listSelector.split('>>').map(s => s.trim()); - const container = queryShadowDOM(document, containerSelector); + function findTableAncestor(element) { + let currentElement = element; + const MAX_DEPTH = 5; + let depth = 0; + + while (currentElement && depth < MAX_DEPTH) { + // Check if current element is in shadow DOM + if (currentElement.getRootNode() instanceof ShadowRoot) { + currentElement = currentElement.getRootNode().host; + continue; + } - if (container) { - const allChildren = Array.from(container.children || []); - const firstMatch = queryShadowDOM(document, listSelector); + if (currentElement.tagName === 'TD') { + return { type: 'TD', element: currentElement }; + } else if (currentElement.tagName === 'TR') { + return { type: 'TR', element: currentElement }; + } + currentElement = currentElement.parentElement; + depth++; + } + return null; + } + + function getCellIndex(td) { + let index = 0; + let sibling = td; + + // Handle shadow DOM case + if (td.getRootNode() instanceof ShadowRoot) { + const shadowRoot = td.getRootNode(); + const allCells = Array.from(shadowRoot.querySelectorAll('td')); + return allCells.indexOf(td); + } + + while (sibling = sibling.previousElementSibling) { + index++; + } + return index; + } + + function hasThElement(row, tableFields) { + for (const [label, { selector }] of Object.entries(tableFields)) { + const element = queryShadowDOM(row, selector); + if (element) { + let current = element; + while (current && current !== row) { + // Check if we're in shadow DOM + if (current.getRootNode() instanceof ShadowRoot) { + current = current.getRootNode().host; + continue; + } + + if (current.tagName === 'TH') { + return true; + } + current = current.parentElement; + } + } + } + return false; + } + + function filterRowsBasedOnTag(rows, tableFields) { + for (const row of rows) { + if (hasThElement(row, tableFields)) { + return rows; + } + } + // Include shadow DOM in TH search + return rows.filter(row => { + const directTH = row.getElementsByTagName('TH').length === 0; + const shadowTH = row.shadowRoot ? + row.shadowRoot.querySelector('th') === null : true; + return directTH && shadowTH; + }); + } + + // Class similarity functions remain unchanged + function calculateClassSimilarity(classList1, classList2) { + const set1 = new Set(classList1); + const set2 = new Set(classList2); + const intersection = new Set([...set1].filter(x => set2.has(x))); + const union = new Set([...set1, ...set2]); + return intersection.size / union.size; + } + + function findSimilarElements(baseElement, similarityThreshold = 0.7) { + const baseClasses = Array.from(baseElement.classList); + if (baseClasses.length === 0) return []; + const potentialElements = document.getElementsByTagName(baseElement.tagName); + return Array.from(potentialElements).filter(element => { + if (element === baseElement) return false; + const similarity = calculateClassSimilarity( + baseClasses, + Array.from(element.classList) + ); + return similarity >= similarityThreshold; + }); + } + + // Main scraping logic with shadow DOM support + let containers = queryShadowDOMAll(document, listSelector); + containers = Array.from(containers); + + if (containers.length === 0) return []; + + if (limit > 1 && containers.length === 1) { + const baseContainer = containers[0]; + const similarContainers = findSimilarElements(baseContainer); + + if (similarContainers.length > 0) { + const newContainers = similarContainers.filter(container => + !container.matches(listSelector) + ); + containers = [...containers, ...newContainers]; + } + } + + const containerFields = containers.map(() => ({ + tableFields: {}, + nonTableFields: {} + })); + + // Classify fields + containers.forEach((container, containerIndex) => { + for (const [label, field] of Object.entries(fields)) { + const sampleElement = queryShadowDOM(container, field.selector); + + if (sampleElement) { + const ancestor = findTableAncestor(sampleElement); + if (ancestor) { + containerFields[containerIndex].tableFields[label] = { + ...field, + tableContext: ancestor.type, + cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1 + }; + } else { + containerFields[containerIndex].nonTableFields[label] = field; + } + } else { + containerFields[containerIndex].nonTableFields[label] = field; + } + } + }); + + const tableData = []; + const nonTableData = []; + + // Process table data with shadow DOM support + for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { + const container = containers[containerIndex]; + const { tableFields } = containerFields[containerIndex]; + + if (Object.keys(tableFields).length > 0) { + const firstField = Object.values(tableFields)[0]; + const firstElement = queryShadowDOM(container, firstField.selector); + let tableContext = firstElement; + + // Find table context including shadow DOM + while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) { + if (tableContext.getRootNode() instanceof ShadowRoot) { + tableContext = tableContext.getRootNode().host; + } else { + tableContext = tableContext.parentElement; + } + } + + if (tableContext) { + // Get rows from both regular DOM and shadow DOM + const rows = []; + if (tableContext.shadowRoot) { + rows.push(...tableContext.shadowRoot.getElementsByTagName('TR')); + } + rows.push(...tableContext.getElementsByTagName('TR')); - if (firstMatch) { - const firstMatchClasses = Array.from(firstMatch.classList || []); + const processedRows = filterRowsBasedOnTag(rows, tableFields); + + for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) { + const record = {}; + const currentRow = processedRows[rowIndex]; - parentElements = allChildren.filter(element => { - const elementClasses = Array.from(element.classList || []); - const commonClasses = firstMatchClasses.filter(cls => - elementClasses.includes(cls)); - return commonClasses.length >= Math.floor(firstMatchClasses.length * 0.7); - }); + for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) { + let element = null; + + if (cellIndex >= 0) { + let td = currentRow.children[cellIndex]; + + // Check shadow DOM for td + if (!td && currentRow.shadowRoot) { + const shadowCells = currentRow.shadowRoot.children; + if (shadowCells && shadowCells.length > cellIndex) { + td = shadowCells[cellIndex]; + } + } + + if (td) { + element = queryShadowDOM(td, selector); + + if (!element && selector.split(">").pop().includes('td:nth-child')) { + element = td; + } + + if (!element) { + const tagOnlySelector = selector.split('.')[0]; + element = queryShadowDOM(td, tagOnlySelector); + } + + if (!element) { + let currentElement = td; + while (currentElement && currentElement.children.length > 0) { + let foundContentChild = false; + for (const child of currentElement.children) { + if (extractValue(child, attribute)) { + currentElement = child; + foundContentChild = true; + break; + } + } + if (!foundContentChild) break; + } + element = currentElement; + } + } + } else { + element = queryShadowDOM(currentRow, selector); + } + + if (element) { + record[label] = extractValue(element, attribute); + } + } + + if (Object.keys(record).length > 0) { + tableData.push(record); + } } } } + } - // Process each parent element - for (const parent of parentElements) { - if (scrapedData.length >= limit) break; + // Non-table data scraping remains unchanged + for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { + if (nonTableData.length >= limit) break; + + const container = containers[containerIndex]; + const { nonTableFields } = containerFields[containerIndex]; + + if (Object.keys(nonTableFields).length > 0) { const record = {}; - // Process each field using shadow DOM querying - for (const [label, { selector, attribute }] of Object.entries(fields)) { - // Use relative selector from parent + for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) { const relativeSelector = selector.split('>>').slice(-1)[0]; - const fieldElement = queryShadowDOM(parent, relativeSelector); - - if (fieldElement) { - switch (attribute) { - case 'innerText': - record[label] = fieldElement.innerText?.trim() || ''; - break; - case 'innerHTML': - record[label] = fieldElement.innerHTML?.trim() || ''; - break; - case 'src': - const src = fieldElement.getAttribute('src'); - record[label] = src ? new URL(src, window.location.origin).href : null; - break; - case 'href': - const href = fieldElement.getAttribute('href'); - record[label] = href ? new URL(href, window.location.origin).href : null; - break; - default: - record[label] = fieldElement.getAttribute(attribute); - } + const element = queryShadowDOM(container, relativeSelector); + + if (element) { + record[label] = extractValue(element, attribute); } } - + if (Object.keys(record).length > 0) { - scrapedData.push(record); + nonTableData.push(record); } } - - if (parentElements.length === 0 || scrapedData.length >= parentElements.length) { - break; - } } + const scrapedData = [...tableData, ...nonTableData]; return scrapedData; }; From ec0bc75097c287a9ffce1b0fcc47600a96f781c8 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Thu, 2 Jan 2025 18:12:10 +0530 Subject: [PATCH 099/156] docs: update website to maxun.dev --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cebcedd2f..47e170b5a 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web

- Website | + Website | Discord | Twitter | Join Maxun Cloud | From b6faf5cf17736dcc99ffa0b146031f23ccc55f80 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Thu, 2 Jan 2025 19:35:03 +0530 Subject: [PATCH 100/156] feat: add iframeSelector generation logic for capture text --- server/src/workflow-management/selector.ts | 433 +++++++++++---------- 1 file changed, 223 insertions(+), 210 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index dd869f3d0..6ed6a9970 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -23,119 +23,64 @@ export const getElementInformation = async ( if (!getList || listSelector !== '') { const elementInfo = await page.evaluate( async ({ x, y }) => { - // Helper function to get element info - const getElementInfo = (element: HTMLElement) => { - let info: { - tagName: string; - hasOnlyText?: boolean; - innerText?: string; - url?: string; - imageUrl?: string; - attributes?: Record; - innerHTML?: string; - outerHTML?: string; - fromIframe?: boolean; - iframePath?: string[]; - } = { - tagName: element?.tagName ?? '', - }; - - if (element) { - info.attributes = Array.from(element.attributes).reduce( - (acc, attr) => { - acc[attr.name] = attr.value; - return acc; - }, - {} as Record - ); - } + // Helper function to find elements within iframes, handling nested cases + const getElementFromIframePoint = ( + x: number, + y: number, + context: Document = document, + iframePath: string[] = [] + ): { element: HTMLElement | null; iframePath: string[] } => { + // First try to get element at the given coordinates + let element = context.elementFromPoint(x, y) as HTMLElement; + if (!element) return { element: null, iframePath }; + + // Check if we found an iframe + if (element.tagName === 'IFRAME') { + const iframe = element as HTMLIFrameElement; + try { + // Make sure we can access the iframe's content + if (!iframe.contentDocument) { + return { element, iframePath }; + } - if (element?.tagName === 'A') { - info.url = (element as HTMLAnchorElement).href; - info.innerText = element.innerText ?? ''; - } else if (element?.tagName === 'IMG') { - info.imageUrl = (element as HTMLImageElement).src; - } else if (element?.tagName === 'SELECT') { - const selectElement = element as HTMLSelectElement; - info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? ''; - info.attributes = { - ...info.attributes, - selectedValue: selectElement.value, - }; - } else if (element?.tagName === 'INPUT' && - ((element as HTMLInputElement).type === 'time' || - (element as HTMLInputElement).type === 'date')) { - info.innerText = (element as HTMLInputElement).value; - } else { - info.hasOnlyText = element?.children?.length === 0 && - element?.innerText?.length > 0; - info.innerText = element?.innerText ?? ''; + // Transform coordinates to iframe's space + const rect = iframe.getBoundingClientRect(); + const relativeX = x - rect.left; + const relativeY = y - rect.top; + + // Add this iframe to the path + const updatedPath = [...iframePath, iframe.id || 'unnamed-iframe']; + + // Recursively search within the iframe + const iframeResult = getElementFromIframePoint( + relativeX, + relativeY, + iframe.contentDocument, + updatedPath + ); + + // If we found an element in the iframe, return it + if (iframeResult.element) { + return iframeResult; + } + } catch (e) { + console.warn('Cannot access iframe content:', e); + } } - info.innerHTML = element.innerHTML; - info.outerHTML = element.outerHTML; - return info; + // Return the element we found (either in main document or iframe) + return { element, iframePath }; }; - // Helper function to search in iframe - const searchInIframe = ( - iframe: HTMLIFrameElement, - relativeX: number, - relativeY: number, - iframePath: string[] - ) => { - try { - if (!iframe.contentDocument) return null; - - const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; - if (!el) return null; - - const { parentElement } = el; - const element = parentElement?.tagName === 'A' ? parentElement : el; - - const info = getElementInfo(element); - info.fromIframe = true; - info.iframePath = iframePath; - - return info; - } catch (e) { - console.warn('Cannot access iframe content:', e); - return null; - } - }; - - const el = document.elementFromPoint(x, y) as HTMLElement; + // Get the element and its iframe path + const { element: el, iframePath } = getElementFromIframePoint(x, y); + if (el) { - // Check if the element is an iframe - if (el.tagName === 'IFRAME') { - const iframe = el as HTMLIFrameElement; - const rect = iframe.getBoundingClientRect(); - const relativeX = x - rect.left; - const relativeY = y - rect.top; - - const iframeResult = searchInIframe( - iframe, - relativeX, - relativeY, - [iframe.id || 'unnamed-iframe'] - ); - if (iframeResult) return iframeResult; - } - + // Handle potential anchor parent const { parentElement } = el; - const element = parentElement?.tagName === 'A' ? parentElement : el; - return getElementInfo(element); - } - return null; - }, - { x: coordinates.x, y: coordinates.y } - ); - return elementInfo; - } else { - const elementInfo = await page.evaluate( - async ({ x, y }) => { - // Helper function to get element info (same as above) - const getElementInfo = (element: HTMLElement) => { + const targetElement = parentElement?.tagName === 'A' ? parentElement : el; + + // Build the element information object let info: { tagName: string; hasOnlyText?: boolean; @@ -148,122 +93,40 @@ export const getElementInformation = async ( fromIframe?: boolean; iframePath?: string[]; } = { - tagName: element?.tagName ?? '', + tagName: targetElement?.tagName ?? '', + fromIframe: iframePath.length > 0, + iframePath: iframePath.length > 0 ? iframePath : undefined }; - if (element) { - info.attributes = Array.from(element.attributes).reduce( + // Collect element attributes and properties + if (targetElement) { + // Get all attributes + info.attributes = Array.from(targetElement.attributes).reduce( (acc, attr) => { acc[attr.name] = attr.value; return acc; }, {} as Record ); - } - if (element?.tagName === 'A') { - info.url = (element as HTMLAnchorElement).href; - info.innerText = element.innerText ?? ''; - } else if (element?.tagName === 'IMG') { - info.imageUrl = (element as HTMLImageElement).src; - } else { - info.hasOnlyText = element?.children?.length === 0 && - element?.innerText?.length > 0; - info.innerText = element?.innerText ?? ''; - } - - info.innerHTML = element.innerHTML; - info.outerHTML = element.outerHTML; - return info; - }; - - // Helper function to search in iframe (same as above) - const searchInIframe = ( - iframe: HTMLIFrameElement, - relativeX: number, - relativeY: number, - iframePath: string[] - ) => { - try { - if (!iframe.contentDocument) return null; - - const el = iframe.contentDocument.elementFromPoint(relativeX, relativeY) as HTMLElement; - if (!el) return null; - - let element = el; - while (element.parentElement) { - const parentRect = element.parentElement.getBoundingClientRect(); - const childRect = element.getBoundingClientRect(); - - const fullyContained = - parentRect.left <= childRect.left && - parentRect.right >= childRect.right && - parentRect.top <= childRect.top && - parentRect.bottom >= childRect.bottom; - - const significantOverlap = - (childRect.width * childRect.height) / - (parentRect.width * parentRect.height) > 0.5; - - if (fullyContained && significantOverlap) { - element = element.parentElement; - } else { - break; - } - } - - const info = getElementInfo(element); - info.fromIframe = true; - info.iframePath = iframePath; - - return info; - } catch (e) { - console.warn('Cannot access iframe content:', e); - return null; - } - }; - - const originalEl = document.elementFromPoint(x, y) as HTMLElement; - if (originalEl) { - // Check if the element is an iframe - if (originalEl.tagName === 'IFRAME') { - const iframe = originalEl as HTMLIFrameElement; - const rect = iframe.getBoundingClientRect(); - const relativeX = x - rect.left; - const relativeY = y - rect.top; - - const iframeResult = searchInIframe( - iframe, - relativeX, - relativeY, - [iframe.id || 'unnamed-iframe'] - ); - if (iframeResult) return iframeResult; - } - - let element = originalEl; - while (element.parentElement) { - const parentRect = element.parentElement.getBoundingClientRect(); - const childRect = element.getBoundingClientRect(); - - const fullyContained = - parentRect.left <= childRect.left && - parentRect.right >= childRect.right && - parentRect.top <= childRect.top && - parentRect.bottom >= childRect.bottom; - - const significantOverlap = - (childRect.width * childRect.height) / - (parentRect.width * parentRect.height) > 0.5; - - if (fullyContained && significantOverlap) { - element = element.parentElement; + // Handle specific element types + if (targetElement.tagName === 'A') { + info.url = (targetElement as HTMLAnchorElement).href; + info.innerText = targetElement.textContent ?? ''; + } else if (targetElement.tagName === 'IMG') { + info.imageUrl = (targetElement as HTMLImageElement).src; } else { - break; + info.hasOnlyText = targetElement.children.length === 0 && + (targetElement.textContent !== null && + targetElement.textContent.trim().length > 0); + info.innerText = targetElement.textContent ?? ''; } + + info.innerHTML = targetElement.innerHTML; + info.outerHTML = targetElement.outerHTML; } - return getElementInfo(element); + return info; } return null; }, @@ -271,6 +134,7 @@ export const getElementInformation = async ( ); return elementInfo; } + // ... rest of the code remains same } catch (error) { const { message, stack } = error as Error; console.error('Error while retrieving selector:', message); @@ -984,6 +848,148 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { } return output; } + + const getIframeOffset = (iframe: HTMLIFrameElement): { x: number; y: number } => { + const rect = iframe.getBoundingClientRect(); + return { + x: rect.left, + y: rect.top + }; + }; + + const isAccessibleIframe = (iframe: HTMLIFrameElement): boolean => { + try { + return !!iframe.contentDocument; + } catch (e) { + return false; + } + }; + + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // Get the initial element at the specified coordinates + let currentElement = document.elementFromPoint(x, y) as HTMLElement; + if (!currentElement) return null; + + let deepestElement = currentElement; + let current = currentElement; + let currentX = x; + let currentY = y; + let depth = 0; + const MAX_DEPTH = 20; // Prevent infinite loops with deeply nested iframes + + // Continue traversing while we find nested iframes + while (current && depth < MAX_DEPTH) { + // Check if the current element is an iframe and if we can access it + if (current instanceof HTMLIFrameElement && isAccessibleIframe(current)) { + // Calculate the offset of the iframe + const iframeOffset = getIframeOffset(current); + + // Transform coordinates to be relative to the iframe's content window + const relativeX = currentX - iframeOffset.x; + const relativeY = currentY - iframeOffset.y; + + // Find the element at these coordinates within the iframe + const iframeElement = current.contentDocument?.elementFromPoint(relativeX, relativeY) as HTMLElement; + + // If we don't find an element or we get the same element, stop traversing + if (!iframeElement || iframeElement === current) break; + + // Update our tracking variables + deepestElement = iframeElement; + current = iframeElement; + currentX = relativeX; + currentY = relativeY; + depth++; + } else { + // If the current element is not an iframe, we're done traversing + break; + } + } + + return deepestElement; + }; + + interface IframeContext { + frame: HTMLIFrameElement; + document: Document; + element: HTMLElement; + } + + const genSelectorForIframe = (element: HTMLElement) => { + // Helper function to check if we can access an iframe's content + const isAccessibleIframe = (iframe: HTMLIFrameElement): boolean => { + try { + return !!iframe.contentDocument; + } catch (e) { + return false; + } + }; + + // Get complete path up through nested iframes to document root + const getIframePath = (el: HTMLElement) => { + const path: IframeContext[] = []; + let current = el; + let currentDoc = el.ownerDocument; + let depth = 0; + const MAX_DEPTH = 20; // Limit depth to prevent infinite loops + + while (current && depth < MAX_DEPTH) { + // If we're in an iframe, get its parent document + const frameElement = currentDoc.defaultView?.frameElement as HTMLIFrameElement; + if (frameElement && isAccessibleIframe(frameElement)) { + path.unshift({ + frame: frameElement, + document: currentDoc, + element: current + }); + current = frameElement; + currentDoc = frameElement.ownerDocument; + depth++; + } else { + break; + } + } + return path; + }; + + // Get the iframe path for our target element + const iframePath = getIframePath(element); + if (iframePath.length === 0) return null; + + try { + const selectorParts: string[] = []; + + // Generate selector for each iframe boundary + iframePath.forEach((context, index) => { + // Get selector for the iframe element in its parent document + const frameSelector = finder(context.frame, { + root: index === 0 ? document.body : (iframePath[index - 1].document.body as Element) + }); + + // For the last context, get selector for target element + if (index === iframePath.length - 1) { + const elementSelector = finder(element, { + root: context.document.body as Element + }); + // Use :>> for iframe traversal in the selector + selectorParts.push(`${frameSelector} :>> ${elementSelector}`); + } else { + selectorParts.push(frameSelector); + } + }); + + return { + // Join all parts with :>> to indicate iframe traversal + fullSelector: selectorParts.join(' :>> '), + // Include additional metadata about the frames if needed + frameCount: iframePath.length, + isAccessible: true + }; + } catch (e) { + console.warn('Error generating iframe selector:', e); + return null; + } + }; const genSelectors = (element: HTMLElement | null) => { if (element == null) { @@ -1004,6 +1010,8 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { } catch (e) { } + const iframeSelector = genSelectorForIframe(element); + const hrefSelector = genSelectorForAttributes(element, ['href']); const formSelector = genSelectorForAttributes(element, [ 'name', @@ -1050,6 +1058,11 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { hrefSelector, accessibilitySelector, formSelector, + iframeSelector: iframeSelector ? { + full: iframeSelector.fullSelector, + frame: iframeSelector.frameCount, + accesible: iframeSelector.isAccessible + } : null }; } @@ -1092,7 +1105,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { return char.length === 1 && char.match(/[0-9]/); } - const hoveredElement = document.elementFromPoint(x, y) as HTMLElement; + const hoveredElement = getDeepestElementFromPoint(x, y) as HTMLElement; if ( hoveredElement != null && !hoveredElement.closest('#overlay-controls') != null From 8323593bb09d0b9a869afb825aa58be3944199b9 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Thu, 2 Jan 2025 21:18:49 +0530 Subject: [PATCH 101/156] chore: format --- src/components/organisms/BrowserWindow.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 442b7e504..2a5f77585 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -9,7 +9,6 @@ import { useBrowserSteps, TextStep } from '../../context/browserSteps'; import { useGlobalInfoStore } from '../../context/globalInfo'; import { useTranslation } from 'react-i18next'; - interface ElementInfo { tagName: string; hasOnlyText?: boolean; From e91a3916a0513af15d306adc71fdd68bb9250e7d Mon Sep 17 00:00:00 2001 From: amhsirak Date: Thu, 2 Jan 2025 21:19:36 +0530 Subject: [PATCH 102/156] chore: cleanup console logs --- src/components/organisms/BrowserWindow.tsx | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 2f66e906e..ad58a3098 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -119,9 +119,6 @@ export const BrowserWindow = () => { const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[] }) => { if (getList === true) { if (listSelector) { - console.log("LIST SELEECTORRRRR: ", listSelector); - console.log("DATA SELEECTORRRRR: ", data.selector); - console.log("CHILDREEENN SELECORRRR: ", data.childSelectors); socket?.emit('listSelector', { selector: listSelector }); const hasValidChildSelectors = Array.isArray(data.childSelectors) && data.childSelectors.length > 0; From edfcd8f869f194f7525744d050e30ba81a8bafef Mon Sep 17 00:00:00 2001 From: amhsirak Date: Thu, 2 Jan 2025 23:15:03 +0530 Subject: [PATCH 103/156] fix: format --- src/components/organisms/BrowserWindow.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index b69a09213..421bb6807 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -326,7 +326,6 @@ export const BrowserWindow = () => { } }, [paginationMode, resetPaginationSelector]); - return (

{ From af237ba1b0d9973e94f3595fdcc8610c8eac03d5 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 3 Jan 2025 20:06:13 +0530 Subject: [PATCH 104/156] fix: update custom limit if value >= 1 --- src/components/organisms/RightSidePanel.tsx | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 8211a64a1..d4670d4f0 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -529,7 +529,22 @@ export const RightSidePanel: React.FC = ({ onFinishCapture updateCustomLimit(e.target.value)} + onChange={(e: React.ChangeEvent) => { + const value = parseInt(e.target.value); + // Only update if the value is greater than or equal to 1 or if the field is empty + if (e.target.value === '' || value >= 1) { + updateCustomLimit(e.target.value); + } + }} + inputProps={{ + min: 1, + onKeyPress: (e: React.KeyboardEvent) => { + const value = (e.target as HTMLInputElement).value + e.key; + if (parseInt(value) < 1) { + e.preventDefault(); + } + } + }} placeholder={t('right_panel.limit.enter_number')} sx={{ marginLeft: '10px', From 35a44bb39fcff9381417918dd37fc43d43ee3519 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 3 Jan 2025 20:18:03 +0530 Subject: [PATCH 105/156] fix: rm translation for alt maxun_logo --- src/components/molecules/ActionDescriptionBox.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/molecules/ActionDescriptionBox.tsx b/src/components/molecules/ActionDescriptionBox.tsx index 190c58384..45ec16415 100644 --- a/src/components/molecules/ActionDescriptionBox.tsx +++ b/src/components/molecules/ActionDescriptionBox.tsx @@ -113,7 +113,7 @@ const ActionDescriptionBox = () => { return ( - + {renderActionDescription()} From 1874e71e0f5ec86a1ba2cad4891dfdce8f1f19b7 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 3 Jan 2025 20:22:30 +0530 Subject: [PATCH 106/156] fix: add translation for same name robot creation --- src/components/molecules/SaveRecording.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/molecules/SaveRecording.tsx b/src/components/molecules/SaveRecording.tsx index 8e1eb462e..cc51f2383 100644 --- a/src/components/molecules/SaveRecording.tsx +++ b/src/components/molecules/SaveRecording.tsx @@ -101,7 +101,7 @@ export const SaveRecording = ({ fileName }: SaveRecordingProps) => { - {t('save_recording.warnings.robot_exists')} + {t('save_recording.errors.exists_warning')} ) :