diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index f6b53da2e..c04a99936 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -210,7 +210,6 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return Array.from(document.querySelectorAll(config.selector)); } - // First handle iframe traversal if present if (config.selector.includes(':>>')) { const parts = config.selector.split(':>>').map(s => s.trim()); let currentElements = [document]; @@ -223,23 +222,44 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, for (const element of currentElements) { try { - // For document or iframe document const doc = element.contentDocument || element || element.contentWindow?.document; if (!doc) continue; - // Query elements in current context + if (part.startsWith('frame[name=') || part.startsWith('iframe[name=')) { + const nameMatch = part.match(/\[name=['"]([^'"]+)['"]\]/); + if (nameMatch && nameMatch[1]) { + const frameName = nameMatch[1]; + let foundFrames = []; + + if (doc.getElementsByName && typeof doc.getElementsByName === 'function') { + foundFrames = Array.from(doc.getElementsByName(frameName)) + .filter(el => el.tagName === 'FRAME' || el.tagName === 'IFRAME'); + } + + if (foundFrames.length === 0) { + const framesBySelector = Array.from(doc.querySelectorAll(`frame[name="${frameName}"], iframe[name="${frameName}"]`)); + foundFrames = framesBySelector; + } + + if (isLast) { + nextElements.push(...foundFrames); + } else { + nextElements.push(...foundFrames); + } + continue; + } + } + const found = Array.from(doc.querySelectorAll(part)); if (isLast) { - // If it's the last part, keep all matching elements nextElements.push(...found); } else { - // If not last, only keep iframes for next iteration - const iframes = found.filter(el => el.tagName === 'IFRAME'); - nextElements.push(...iframes); + const frames = found.filter(el => el.tagName === 'IFRAME' || el.tagName === 'FRAME'); + nextElements.push(...frames); } } catch (error) { - console.warn('Cannot access iframe content:', error, { + console.warn('Cannot access iframe/frame content:', error, { part, element, index: i @@ -285,12 +305,17 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return []; } - // Modified to handle iframe context for URL resolution function getElementValue(element, attribute) { if (!element) return null; - // Get the base URL for resolving relative URLs - const baseURL = element.ownerDocument?.location?.href || window.location.origin; + let baseURL; + try { + baseURL = element.ownerDocument?.location?.href || + element.ownerDocument?.baseURI || + window.location.origin; + } catch (e) { + baseURL = window.location.origin; + } switch (attribute) { case 'href': { @@ -305,6 +330,10 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return element.innerText?.trim(); case 'textContent': return element.textContent?.trim(); + case 'innerHTML': + return element.innerHTML; + case 'outerHTML': + return element.outerHTML; default: return element.getAttribute(attribute) || element.innerText?.trim(); } @@ -394,7 +423,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ window.scrapeList = async function ({ listSelector, fields, limit = 10 }) { - // Enhanced query function to handle both iframe and shadow DOM + // Enhanced query function to handle iframe, frame and shadow DOM const queryElement = (rootElement, selector) => { if (!selector.includes('>>') && !selector.includes(':>>')) { return rootElement.querySelector(selector); @@ -406,14 +435,14 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, for (let i = 0; i < parts.length; i++) { if (!currentElement) return null; - // Handle iframe traversal - if (currentElement.tagName === 'IFRAME') { + // Handle iframe and frame traversal + if (currentElement.tagName === 'IFRAME' || currentElement.tagName === 'FRAME') { try { - const iframeDoc = currentElement.contentDocument || currentElement.contentWindow.document; - currentElement = iframeDoc.querySelector(parts[i]); + const frameDoc = currentElement.contentDocument || currentElement.contentWindow.document; + currentElement = frameDoc.querySelector(parts[i]); continue; } catch (e) { - console.warn('Cannot access iframe content:', e); + console.warn(`Cannot access ${currentElement.tagName.toLowerCase()} content:`, e); return null; } } @@ -456,13 +485,13 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, const nextElements = []; for (const element of currentElements) { - // Handle iframe traversal - if (element.tagName === 'IFRAME') { + // Handle iframe and frame traversal + if (element.tagName === 'IFRAME' || element.tagName === 'FRAME') { try { - const iframeDoc = element.contentDocument || element.contentWindow.document; - nextElements.push(...iframeDoc.querySelectorAll(part)); + const frameDoc = element.contentDocument || element.contentWindow.document; + nextElements.push(...frameDoc.querySelectorAll(part)); } catch (e) { - console.warn('Cannot access iframe content:', e); + console.warn(`Cannot access ${element.tagName.toLowerCase()} content:`, e); continue; } } else { @@ -537,8 +566,8 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return { type: 'TR', element: currentElement }; } - // Handle iframe crossing - if (currentElement.tagName === 'IFRAME') { + // Handle iframe and frame crossing + if (currentElement.tagName === 'IFRAME' || currentElement.tagName === 'FRAME') { try { currentElement = currentElement.contentDocument.body; } catch (e) { @@ -582,7 +611,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, if (current.tagName === 'TH') return true; - if (current.tagName === 'IFRAME') { + if (current.tagName === 'IFRAME' || current.tagName === 'FRAME') { try { current = current.contentDocument.body; } catch (e) { @@ -638,14 +667,18 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, allElements.push(...shadowHost.getElementsByTagName(baseElement.tagName)); } - // Get elements from iframes - const iframes = document.getElementsByTagName('iframe'); - for (const iframe of iframes) { + // Get elements from iframes and frames + const frames = [ + ...Array.from(document.getElementsByTagName('iframe')), + ...Array.from(document.getElementsByTagName('frame')) + ]; + + for (const frame of frames) { try { - const iframeDoc = iframe.contentDocument || iframe.contentWindow.document; - allElements.push(...iframeDoc.getElementsByTagName(baseElement.tagName)); + const frameDoc = frame.contentDocument || frame.contentWindow.document; + allElements.push(...frameDoc.getElementsByTagName(baseElement.tagName)); } catch (e) { - console.warn('Cannot access iframe content:', e); + console.warn(`Cannot access ${frame.tagName.toLowerCase()} content:`, e); } } @@ -707,7 +740,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, const tableData = []; const nonTableData = []; - // Process table data with both iframe and shadow DOM support + // Process table data with support for iframes, frames, and shadow DOM for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { const container = containers[containerIndex]; const { tableFields } = containerFields[containerIndex]; @@ -717,14 +750,14 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, const firstElement = queryElement(container, firstField.selector); let tableContext = firstElement; - // Find table context including both iframe and shadow DOM + // Find table context including iframe, frame and shadow DOM while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) { if (tableContext.getRootNode() instanceof ShadowRoot) { tableContext = tableContext.getRootNode().host; continue; } - if (tableContext.tagName === 'IFRAME') { + if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') { try { tableContext = tableContext.contentDocument.body; } catch (e) { @@ -747,13 +780,13 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, rows.push(...tableContext.shadowRoot.getElementsByTagName('TR')); } - // Get rows from iframes - if (tableContext.tagName === 'IFRAME') { + // Get rows from iframes and frames + if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') { try { - const iframeDoc = tableContext.contentDocument || tableContext.contentWindow.document; - rows.push(...iframeDoc.getElementsByTagName('TR')); + const frameDoc = tableContext.contentDocument || tableContext.contentWindow.document; + rows.push(...frameDoc.getElementsByTagName('TR')); } catch (e) { - console.warn('Cannot access iframe rows:', e); + console.warn(`Cannot access ${tableContext.tagName.toLowerCase()} rows:`, e); } } @@ -823,7 +856,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, } } - // Process non-table data with both contexts support + // Process non-table data with all contexts support for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { if (nonTableData.length >= limit) break; diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 880471285..2887b6347 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -286,6 +286,12 @@ export default class Interpreter extends EventEmitter { ? arrayToObject(superset[key]) : superset[key]; + if ((key === 'url' || key === 'selectors') && + Array.isArray(value) && Array.isArray(superset[key]) && + value.length === 0 && (superset[key] as any[]).length === 0) { + return true; + } + if (key === 'selectors' && Array.isArray(value) && Array.isArray(superset[key])) { return value.some(selector => (superset[key] as any[]).includes(selector) @@ -592,33 +598,52 @@ export default class Interpreter extends EventEmitter { }; // Enhanced button finder with retry mechanism - const findWorkingButton = async (selectors: string[], retryCount = 0): Promise<{ - button: ElementHandle | null, - workingSelector: string | null + const findWorkingButton = async (selectors: string[]): Promise<{ + button: ElementHandle | null, + workingSelector: string | null, + updatedSelectors: string[] }> => { - for (const selector of selectors) { - try { - const button = await page.waitForSelector(selector, { - state: 'attached', - timeout: 10000 // Reduced timeout for faster checks - }); - if (button) { - debugLog('Found working selector:', selector); - return { button, workingSelector: selector }; + let updatedSelectors = [...selectors]; + + for (let i = 0; i < selectors.length; i++) { + const selector = selectors[i]; + let retryCount = 0; + let selectorSuccess = false; + + while (retryCount < MAX_RETRIES && !selectorSuccess) { + try { + const button = await page.waitForSelector(selector, { + state: 'attached', + timeout: 10000 + }); + + if (button) { + debugLog('Found working selector:', selector); + return { + button, + workingSelector: selector, + updatedSelectors + }; + } + } catch (error) { + retryCount++; + debugLog(`Selector "${selector}" failed: attempt ${retryCount}/${MAX_RETRIES}`); + + if (retryCount < MAX_RETRIES) { + await page.waitForTimeout(RETRY_DELAY); + } else { + debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`); + updatedSelectors = updatedSelectors.filter(s => s !== selector); + } } - } catch (error) { - debugLog(`Selector failed: ${selector}`); } } - - // Implement retry mechanism when no selectors work - if (selectors.length > 0 && retryCount < MAX_RETRIES) { - debugLog(`Retry attempt ${retryCount + 1} of ${MAX_RETRIES}`); - await page.waitForTimeout(RETRY_DELAY); - return findWorkingButton(selectors, retryCount + 1); - } - - return { button: null, workingSelector: null }; + + return { + button: null, + workingSelector: null, + updatedSelectors + }; }; const retryOperation = async (operation: () => Promise, retryCount = 0): Promise => { @@ -680,7 +705,10 @@ export default class Interpreter extends EventEmitter { await scrapeCurrentPage(); if (checkLimit()) return allResults; - const { button, workingSelector } = await findWorkingButton(availableSelectors); + const { button, workingSelector, updatedSelectors } = await findWorkingButton(availableSelectors); + + availableSelectors = updatedSelectors; + if (!button || !workingSelector) { // Final retry for navigation when no selectors work const success = await retryOperation(async () => { @@ -697,10 +725,6 @@ export default class Interpreter extends EventEmitter { break; } - availableSelectors = availableSelectors.slice( - availableSelectors.indexOf(workingSelector) - ); - let retryCount = 0; let navigationSuccess = false; @@ -768,22 +792,25 @@ export default class Interpreter extends EventEmitter { } case 'clickLoadMore': { + await scrapeCurrentPage(); + if (checkLimit()) return allResults; + + let loadMoreCounter = 0; + let previousResultCount = allResults.length; + let noNewItemsCounter = 0; + const MAX_NO_NEW_ITEMS = 2; + while (true) { - // Find working button with retry mechanism, consistent with clickNext - const { button: loadMoreButton, workingSelector } = await findWorkingButton(availableSelectors); + // Find working button with retry mechanism + const { button: loadMoreButton, workingSelector, updatedSelectors } = await findWorkingButton(availableSelectors); + + availableSelectors = updatedSelectors; if (!workingSelector || !loadMoreButton) { debugLog('No working Load More selector found after retries'); - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); return allResults; } - // Update available selectors to start from the working one - availableSelectors = availableSelectors.slice( - availableSelectors.indexOf(workingSelector) - ); - // Implement retry mechanism for clicking the button let retryCount = 0; let clickSuccess = false; @@ -808,6 +835,8 @@ export default class Interpreter extends EventEmitter { if (clickSuccess) { await page.waitForTimeout(1000); + loadMoreCounter++; + debugLog(`Successfully clicked Load More button (${loadMoreCounter} times)`); } } catch (error) { debugLog(`Click attempt ${retryCount + 1} failed completely.`); @@ -822,8 +851,6 @@ export default class Interpreter extends EventEmitter { if (!clickSuccess) { debugLog(`Load More clicking failed after ${MAX_RETRIES} attempts`); - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); return allResults; } @@ -833,20 +860,34 @@ export default class Interpreter extends EventEmitter { await page.waitForTimeout(2000); const currentHeight = await page.evaluate(() => document.body.scrollHeight); - if (currentHeight === previousHeight) { - debugLog('No more items loaded after Load More'); - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); - return allResults; - } + const heightChanged = currentHeight !== previousHeight; previousHeight = currentHeight; - if (config.limit && allResults.length >= config.limit) { - allResults = allResults.slice(0, config.limit); - break; + await scrapeCurrentPage(); + + const currentResultCount = allResults.length; + const newItemsAdded = currentResultCount > previousResultCount; + + if (!newItemsAdded) { + noNewItemsCounter++; + debugLog(`No new items added after click (${noNewItemsCounter}/${MAX_NO_NEW_ITEMS})`); + + if (noNewItemsCounter >= MAX_NO_NEW_ITEMS) { + debugLog(`Stopping after ${MAX_NO_NEW_ITEMS} clicks with no new items`); + return allResults; + } + } else { + noNewItemsCounter = 0; + previousResultCount = currentResultCount; + } + + if (checkLimit()) return allResults; + + if (!heightChanged) { + debugLog('No more items loaded after Load More'); + return allResults; } } - break; } default: { diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 2eb5f130f..b4c72e967 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -825,6 +825,7 @@ export class WorkflowGenerator { selectors?.testIdSelector, selectors?.id, selectors?.hrefSelector, + selectors?.relSelector, selectors?.accessibilitySelector, selectors?.attrSelector ] diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 20a6a5291..80ed55f3c 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -24,54 +24,64 @@ export const getElementInformation = async ( const elementInfo = await page.evaluate( async ({ x, y }) => { const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { - // First, get the element at the clicked coordinates in the main document let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - - // Track the deepest element found + let deepestElement = element; - - // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; - - while (shadowRoot) { + let depth = 0; + const MAX_SHADOW_DEPTH = 4; + + while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; + depth++; } - + return deepest; }; - - // Handle iframe traversal + + const isInFrameset = () => { + let node = element; + while (node && node.parentElement) { + if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { + return true; + } + node = node.parentElement; + } + return false; + }; + if (element.tagName === 'IFRAME') { let currentIframe = element as HTMLIFrameElement; - - while (currentIframe) { + let depth = 0; + const MAX_IFRAME_DEPTH = 4; + + while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { - // Convert coordinates to iframe's local space const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; - + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; if (!iframeDocument) break; - + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; - - // Update deepest element and check for shadow DOM + deepestElement = traverseShadowDOM(iframeElement); - - // Continue traversing if we found another iframe + if (iframeElement.tagName === 'IFRAME') { currentIframe = iframeElement as HTMLIFrameElement; + depth++; } else { break; } @@ -80,28 +90,81 @@ export const getElementInformation = async ( break; } } + } + else if (element.tagName === 'FRAME' || isInFrameset()) { + const framesToCheck = []; + + if (element.tagName === 'FRAME') { + framesToCheck.push(element as HTMLFrameElement); + } + + if (isInFrameset()) { + document.querySelectorAll('frame').forEach(frame => { + framesToCheck.push(frame as HTMLFrameElement); + }); + } + + let frameDepth = 0; + const MAX_FRAME_DEPTH = 4; + + const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { + if (currentDepth >= MAX_FRAME_DEPTH) return; + + for (const frameElement of frames) { + try { + const frameRect = frameElement.getBoundingClientRect(); + const frameX = x - frameRect.left; + const frameY = y - frameRect.top; + + if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { + continue; + } + + const frameDocument = + frameElement.contentDocument || + frameElement.contentWindow?.document; + + if (!frameDocument) continue; + + const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; + if (!frameElementAtPoint) continue; + + deepestElement = traverseShadowDOM(frameElementAtPoint); + + if (frameElementAtPoint.tagName === 'FRAME') { + processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); + } + + break; + } catch (error) { + console.warn('Cannot access frame content:', error); + continue; + } + } + }; + + processFrames(framesToCheck, frameDepth); } else { - // If not an iframe, check for shadow DOM deepestElement = traverseShadowDOM(element); } - + return deepestElement; }; - // Get the element and its iframe path const el = getDeepestElementFromPoint(x, y); if (el) { - // Handle potential anchor parent - const { parentElement } = el; - const targetElement = parentElement?.tagName === 'A' ? parentElement : el; + // Prioritize Link (DO NOT REMOVE) + // const { parentElement } = el; + // const targetElement = parentElement?.tagName === 'A' ? parentElement : el; + + const targetElement = el; - // Get containing context information const ownerDocument = targetElement.ownerDocument; const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; const isIframeContent = Boolean(frameElement); + const isFrameContent = frameElement?.tagName === 'FRAME'; - // Get the containing shadow root if any const containingShadowRoot = targetElement.getRootNode() as ShadowRoot; const isShadowRoot = containingShadowRoot instanceof ShadowRoot; @@ -115,8 +178,11 @@ export const getElementInformation = async ( innerHTML?: string; outerHTML?: string; isIframeContent?: boolean; + isFrameContent?: boolean; iframeURL?: string; + frameURL?: string; iframeIndex?: number; + frameIndex?: number; frameHierarchy?: string[]; isShadowRoot?: boolean; shadowRootMode?: string; @@ -124,43 +190,47 @@ export const getElementInformation = async ( } = { tagName: targetElement?.tagName ?? '', isIframeContent, + isFrameContent, isShadowRoot }; - if (isIframeContent) { - // Include iframe specific information - info.iframeURL = frameElement.src; + if (isIframeContent || isFrameContent) { + if (isIframeContent) { + info.iframeURL = (frameElement as HTMLIFrameElement).src; + } else { + info.frameURL = (frameElement).src; + } - // Calculate the frame's position in the hierarchy let currentFrame = frameElement; const frameHierarchy: string[] = []; let frameIndex = 0; while (currentFrame) { - // Store the frame's identifier (src, id, or index) frameHierarchy.unshift( currentFrame.id || + currentFrame.getAttribute('name') || currentFrame.src || - `iframe[${frameIndex}]` + `${currentFrame.tagName.toLowerCase()}[${frameIndex}]` ); - // Move up to parent frame if it exists const parentDoc = currentFrame.ownerDocument; currentFrame = parentDoc?.defaultView?.frameElement as HTMLIFrameElement; frameIndex++; } info.frameHierarchy = frameHierarchy; - info.iframeIndex = frameIndex - 1; // Adjust for 0-based index + if (isIframeContent) { + info.iframeIndex = frameIndex - 1; + } else { + info.frameIndex = frameIndex - 1; + } } if (isShadowRoot) { - // Include shadow root specific information info.shadowRootMode = containingShadowRoot.mode; info.shadowRootContent = containingShadowRoot.innerHTML; } - // Collect element attributes and properties if (targetElement) { info.attributes = Array.from(targetElement.attributes).reduce( (acc, attr) => { @@ -206,56 +276,65 @@ export const getElementInformation = async ( } else { const elementInfo = await page.evaluate( async ({ x, y }) => { - // Enhanced helper function to get element from point including shadow DOM const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { - // First, get the element at the clicked coordinates in the main document let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - - // Track the deepest element found + let deepestElement = element; - - // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; - - while (shadowRoot) { + let depth = 0; + const MAX_SHADOW_DEPTH = 4; + + while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; + depth++; } - + return deepest; }; - - // Handle iframe traversal + + const isInFrameset = () => { + let node = element; + while (node && node.parentElement) { + if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { + return true; + } + node = node.parentElement; + } + return false; + }; + if (element.tagName === 'IFRAME') { let currentIframe = element as HTMLIFrameElement; - - while (currentIframe) { + let depth = 0; + const MAX_IFRAME_DEPTH = 4; + + while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { - // Convert coordinates to iframe's local space const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; - + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; if (!iframeDocument) break; - + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; - - // Update deepest element and check for shadow DOM + deepestElement = traverseShadowDOM(iframeElement); - - // Continue traversing if we found another iframe + if (iframeElement.tagName === 'IFRAME') { currentIframe = iframeElement as HTMLIFrameElement; + depth++; } else { break; } @@ -264,14 +343,67 @@ export const getElementInformation = async ( break; } } + } + else if (element.tagName === 'FRAME' || isInFrameset()) { + const framesToCheck = []; + + if (element.tagName === 'FRAME') { + framesToCheck.push(element as HTMLFrameElement); + } + + if (isInFrameset()) { + document.querySelectorAll('frame').forEach(frame => { + framesToCheck.push(frame as HTMLFrameElement); + }); + } + + let frameDepth = 0; + const MAX_FRAME_DEPTH = 4; + + const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { + if (currentDepth >= MAX_FRAME_DEPTH) return; + + for (const frameElement of frames) { + try { + const frameRect = frameElement.getBoundingClientRect(); + const frameX = x - frameRect.left; + const frameY = y - frameRect.top; + + if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { + continue; + } + + const frameDocument = + frameElement.contentDocument || + frameElement.contentWindow?.document; + + if (!frameDocument) continue; + + const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; + if (!frameElementAtPoint) continue; + + deepestElement = traverseShadowDOM(frameElementAtPoint); + + if (frameElementAtPoint.tagName === 'FRAME') { + processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); + } + + break; + } catch (error) { + console.warn('Cannot access frame content:', error); + continue; + } + } + }; + + processFrames(framesToCheck, frameDepth); } else { - // If not an iframe, check for shadow DOM deepestElement = traverseShadowDOM(element); } - + return deepestElement; }; - + const originalEl = getDeepestElementFromPoint(x, y); if (originalEl) { let element = originalEl; @@ -301,7 +433,7 @@ export const getElementInformation = async ( const significantOverlap = (childRect.width * childRect.height) / - (parentRect.width * parentRect.height) > 0.5; + (parentRect.width * parentRect.height) > 0.1; if (fullyContained && significantOverlap) { const nextParent = element.parentElement; @@ -318,8 +450,9 @@ export const getElementInformation = async ( } const ownerDocument = element.ownerDocument; - const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + const frameElement = ownerDocument?.defaultView?.frameElement; const isIframeContent = Boolean(frameElement); + const isFrameContent = frameElement?.tagName === 'FRAME'; const containingShadowRoot = element.getRootNode() as ShadowRoot; const isShadowRoot = containingShadowRoot instanceof ShadowRoot; @@ -334,8 +467,11 @@ export const getElementInformation = async ( innerHTML?: string; outerHTML?: string; isIframeContent?: boolean; + isFrameContent?: boolean; iframeURL?: string; + frameURL?: string; iframeIndex?: number; + frameIndex?: number; frameHierarchy?: string[]; isShadowRoot?: boolean; shadowRootMode?: string; @@ -343,44 +479,48 @@ export const getElementInformation = async ( } = { tagName: element?.tagName ?? '', isIframeContent, + isFrameContent, isShadowRoot }; - if (isIframeContent) { - // Include iframe specific information - info.iframeURL = frameElement.src; + if (isIframeContent || isFrameContent) { + if (isIframeContent && !isFrameContent) { + info.iframeURL = (frameElement as HTMLIFrameElement).src; + } else if (isFrameContent) { + info.frameURL = (frameElement as HTMLFrameElement).src; + } - // Calculate the frame's position in the hierarchy let currentFrame = frameElement; const frameHierarchy: string[] = []; let frameIndex = 0; while (currentFrame) { - // Store the frame's identifier (src, id, or index) frameHierarchy.unshift( currentFrame.id || - currentFrame.src || - `iframe[${frameIndex}]` + currentFrame.getAttribute('name') || + (currentFrame as HTMLFrameElement).src || + `${currentFrame.tagName.toLowerCase()}[${frameIndex}]` ); - // Move up to parent frame if it exists const parentDoc = currentFrame.ownerDocument; - currentFrame = parentDoc?.defaultView?.frameElement as HTMLIFrameElement; + currentFrame = parentDoc?.defaultView?.frameElement; frameIndex++; } info.frameHierarchy = frameHierarchy; - info.iframeIndex = frameIndex - 1; // Adjust for 0-based index - }; + if (isIframeContent && !isFrameContent) { + info.iframeIndex = frameIndex - 1; + } else if (isFrameContent) { + info.frameIndex = frameIndex - 1; + } + } if (isShadowRoot) { - // Include shadow root specific information info.shadowRootMode = containingShadowRoot.mode; info.shadowRootContent = containingShadowRoot.innerHTML; } if (element) { - // Get attributes including those from shadow DOM context info.attributes = Array.from(element.attributes).reduce( (acc, attr) => { acc[attr.name] = attr.value; @@ -389,14 +529,21 @@ export const getElementInformation = async ( {} as Record ); - // Handle specific element types if (element.tagName === 'A') { info.url = (element as HTMLAnchorElement).href; info.innerText = element.textContent ?? ''; } else if (element.tagName === 'IMG') { info.imageUrl = (element as HTMLImageElement).src; + } else if (element?.tagName === 'SELECT') { + const selectElement = element as HTMLSelectElement; + info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? ''; + info.attributes = { + ...info.attributes, + selectedValue: selectElement.value, + }; + } else if (element?.tagName === 'INPUT' && ((element as HTMLInputElement).type === 'time' || (element as HTMLInputElement).type === 'date')) { + info.innerText = (element as HTMLInputElement).value; } else { - // Handle text content with proper null checking info.hasOnlyText = element.children.length === 0 && (element.textContent !== null && element.textContent.trim().length > 0); @@ -427,56 +574,65 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector if (!getList || listSelector !== '') { const rect = await page.evaluate( async ({ x, y }) => { - // Enhanced helper function to get element from point including iframes const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { - // First, get the element at the clicked coordinates in the main document let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - - // Track the deepest element found + let deepestElement = element; - - // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; - - while (shadowRoot) { + let depth = 0; + const MAX_SHADOW_DEPTH = 4; + + while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; + depth++; } - + return deepest; }; - - // Handle iframe traversal + + const isInFrameset = () => { + let node = element; + while (node && node.parentElement) { + if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { + return true; + } + node = node.parentElement; + } + return false; + }; + if (element.tagName === 'IFRAME') { let currentIframe = element as HTMLIFrameElement; - - while (currentIframe) { + let depth = 0; + const MAX_IFRAME_DEPTH = 4; + + while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { - // Convert coordinates to iframe's local space const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; - + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; if (!iframeDocument) break; - + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; - - // Update deepest element and check for shadow DOM + deepestElement = traverseShadowDOM(iframeElement); - - // Continue traversing if we found another iframe + if (iframeElement.tagName === 'IFRAME') { currentIframe = iframeElement as HTMLIFrameElement; + depth++; } else { break; } @@ -485,18 +641,74 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector break; } } + } + else if (element.tagName === 'FRAME' || isInFrameset()) { + const framesToCheck = []; + + if (element.tagName === 'FRAME') { + framesToCheck.push(element as HTMLFrameElement); + } + + if (isInFrameset()) { + document.querySelectorAll('frame').forEach(frame => { + framesToCheck.push(frame as HTMLFrameElement); + }); + } + + let frameDepth = 0; + const MAX_FRAME_DEPTH = 4; + + const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { + if (currentDepth >= MAX_FRAME_DEPTH) return; + + for (const frameElement of frames) { + try { + const frameRect = frameElement.getBoundingClientRect(); + const frameX = x - frameRect.left; + const frameY = y - frameRect.top; + + if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { + continue; + } + + const frameDocument = + frameElement.contentDocument || + frameElement.contentWindow?.document; + + if (!frameDocument) continue; + + const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; + if (!frameElementAtPoint) continue; + + deepestElement = traverseShadowDOM(frameElementAtPoint); + + if (frameElementAtPoint.tagName === 'FRAME') { + processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); + } + + break; + } catch (error) { + console.warn('Cannot access frame content:', error); + continue; + } + } + }; + + processFrames(framesToCheck, frameDepth); } else { - // If not an iframe, check for shadow DOM deepestElement = traverseShadowDOM(element); } - + return deepestElement; }; const el = getDeepestElementFromPoint(x, y); if (el) { - const { parentElement } = el; - const element = parentElement?.tagName === 'A' ? parentElement : el; + // Prioritize Link (DO NOT REMOVE) + // const { parentElement } = el; + // const element = parentElement?.tagName === 'A' ? parentElement : el; + + const element = el; const rectangle = element?.getBoundingClientRect(); if (rectangle) { const createRectObject = (rect: DOMRect) => ({ @@ -557,54 +769,64 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector const rect = await page.evaluate( async ({ x, y }) => { const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { - // First, get the element at the clicked coordinates in the main document let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - - // Track the deepest element found + let deepestElement = element; - - // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; - - while (shadowRoot) { + let depth = 0; + const MAX_SHADOW_DEPTH = 4; + + while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; + depth++; } - + return deepest; }; - - // Handle iframe traversal + + const isInFrameset = () => { + let node = element; + while (node && node.parentElement) { + if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { + return true; + } + node = node.parentElement; + } + return false; + }; + if (element.tagName === 'IFRAME') { let currentIframe = element as HTMLIFrameElement; - - while (currentIframe) { + let depth = 0; + const MAX_IFRAME_DEPTH = 4; + + while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { - // Convert coordinates to iframe's local space const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; - + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; if (!iframeDocument) break; - + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; - - // Update deepest element and check for shadow DOM + deepestElement = traverseShadowDOM(iframeElement); - - // Continue traversing if we found another iframe + if (iframeElement.tagName === 'IFRAME') { currentIframe = iframeElement as HTMLIFrameElement; + depth++; } else { break; } @@ -613,11 +835,64 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector break; } } + } + else if (element.tagName === 'FRAME' || isInFrameset()) { + const framesToCheck = []; + + if (element.tagName === 'FRAME') { + framesToCheck.push(element as HTMLFrameElement); + } + + if (isInFrameset()) { + document.querySelectorAll('frame').forEach(frame => { + framesToCheck.push(frame as HTMLFrameElement); + }); + } + + let frameDepth = 0; + const MAX_FRAME_DEPTH = 4; + + const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { + if (currentDepth >= MAX_FRAME_DEPTH) return; + + for (const frameElement of frames) { + try { + const frameRect = frameElement.getBoundingClientRect(); + const frameX = x - frameRect.left; + const frameY = y - frameRect.top; + + if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { + continue; + } + + const frameDocument = + frameElement.contentDocument || + frameElement.contentWindow?.document; + + if (!frameDocument) continue; + + const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; + if (!frameElementAtPoint) continue; + + deepestElement = traverseShadowDOM(frameElementAtPoint); + + if (frameElementAtPoint.tagName === 'FRAME') { + processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); + } + + break; + } catch (error) { + console.warn('Cannot access frame content:', error); + continue; + } + } + }; + + processFrames(framesToCheck, frameDepth); } else { - // If not an iframe, check for shadow DOM deepestElement = traverseShadowDOM(element); } - + return deepestElement; }; @@ -650,7 +925,7 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector const significantOverlap = (childRect.width * childRect.height) / - (parentRect.width * parentRect.height) > 0.5; + (parentRect.width * parentRect.height) > 0.1; if (fullyContained && significantOverlap) { const nextParent = element.parentElement; @@ -691,12 +966,12 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } }); - // Same coordinate adjustment for iframe elements as above + // For elements inside iframes or frames, adjust coordinates relative to the top window let adjustedRect = createRectObject(rectangle); let currentWindow = element.ownerDocument.defaultView; while (currentWindow !== window.top) { - const frameElement = currentWindow?.frameElement as HTMLIFrameElement; + const frameElement = currentWindow?.frameElement; if (!frameElement) break; const frameRect = frameElement.getBoundingClientRect(); @@ -1205,65 +1480,63 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { } const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { - // Helper function to traverse shadow DOM - const traverseShadowDOM = (element: HTMLElement, depth: number = 0): HTMLElement => { - const MAX_SHADOW_DEPTH = 4; + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + let deepestElement = element; + + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; + let shadowRoot = current.shadowRoot; let deepest = current; - - while (current && depth < MAX_SHADOW_DEPTH) { - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; - + let depth = 0; + const MAX_SHADOW_DEPTH = 4; + + while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; - + deepest = shadowElement; current = shadowElement; + shadowRoot = current.shadowRoot; depth++; } - + return deepest; }; - - // Start with the element at the specified coordinates - let element = document.elementFromPoint(x, y) as HTMLElement; - if (!element) return null; - - // Initialize tracking variables - let deepestElement = element; - let depth = 0; - const MAX_IFRAME_DEPTH = 4; - - // First check if the initial element has a shadow root - deepestElement = traverseShadowDOM(element); - - // If it's an iframe, traverse through iframe hierarchy - if (deepestElement.tagName === 'IFRAME') { - let currentIframe = deepestElement as HTMLIFrameElement; - + + const isInFrameset = () => { + let node = element; + while (node && node.parentElement) { + if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { + return true; + } + node = node.parentElement; + } + return false; + }; + + if (element.tagName === 'IFRAME') { + let currentIframe = element as HTMLIFrameElement; + let depth = 0; + const MAX_IFRAME_DEPTH = 4; + while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { - // Convert coordinates to iframe's local space const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; - - // Access iframe's document - const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document; - if (!iframeDoc) break; - - // Get element at transformed coordinates in iframe - const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement; + + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDocument) break; + + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; - - // Check for shadow DOM within iframe - const shadowResult = traverseShadowDOM(iframeElement); - deepestElement = shadowResult; - - // If we found another iframe, continue traversing - if (shadowResult.tagName === 'IFRAME') { - currentIframe = shadowResult as HTMLIFrameElement; + + deepestElement = traverseShadowDOM(iframeElement); + + if (iframeElement.tagName === 'IFRAME') { + currentIframe = iframeElement as HTMLIFrameElement; depth++; } else { break; @@ -1273,74 +1546,129 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { break; } } - } - - return deepestElement; - }; + } + else if (element.tagName === 'FRAME' || isInFrameset()) { + const framesToCheck = []; + + if (element.tagName === 'FRAME') { + framesToCheck.push(element as HTMLFrameElement); + } + + if (isInFrameset()) { + document.querySelectorAll('frame').forEach(frame => { + framesToCheck.push(frame as HTMLFrameElement); + }); + } - const genSelectorForIframe = (element: HTMLElement) => { - // Helper function to get the complete iframe path up to document root - const getIframePath = (el: HTMLElement) => { - const path = []; - let current = el; - let depth = 0; - const MAX_DEPTH = 4; + let frameDepth = 0; + const MAX_FRAME_DEPTH = 4; + + const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { + if (currentDepth >= MAX_FRAME_DEPTH) return; - while (current && depth < MAX_DEPTH) { - // Get the owner document of the current element - const ownerDocument = current.ownerDocument; - - // Check if this document belongs to an iframe - const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + for (const frameElement of frames) { + try { + const frameRect = frameElement.getBoundingClientRect(); + const frameX = x - frameRect.left; + const frameY = y - frameRect.top; + + if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { + continue; + } + + const frameDocument = + frameElement.contentDocument || + frameElement.contentWindow?.document; + + if (!frameDocument) continue; + + const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; + if (!frameElementAtPoint) continue; + + deepestElement = traverseShadowDOM(frameElementAtPoint); - if (frameElement) { - path.unshift({ - frame: frameElement, - document: ownerDocument, - element: current - }); - // Move up to the parent document's element (the iframe) - current = frameElement; - depth++; - } else { - break; + if (frameElementAtPoint.tagName === 'FRAME') { + processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); } - } - return path; - }; - - const iframePath = getIframePath(element); - if (iframePath.length === 0) return null; - + + break; + } catch (error) { + console.warn('Cannot access frame content:', error); + continue; + } + } + }; + + processFrames(framesToCheck, frameDepth); + } else { + deepestElement = traverseShadowDOM(element); + } + + return deepestElement; + }; + + + const genSelectorForFrame = (element: HTMLElement) => { + const getFramePath = (el: HTMLElement) => { + const path = []; + let current = el; + let depth = 0; + const MAX_DEPTH = 4; + + while (current && depth < MAX_DEPTH) { + const ownerDocument = current.ownerDocument; + + const frameElement = + ownerDocument?.defaultView?.frameElement as HTMLIFrameElement | HTMLFrameElement; + + if (frameElement) { + path.unshift({ + frame: frameElement, + document: ownerDocument, + element: current, + isFrame: frameElement.tagName === 'FRAME' + }); + + current = frameElement; + depth++; + } else { + break; + } + } + return path; + }; + + const framePath = getFramePath(element); + if (framePath.length === 0) return null; + try { - const selectorParts: string[] = []; + const selectorParts: string[] = []; + + framePath.forEach((context, index) => { + const frameSelector = context.isFrame ? + `frame[name="${context.frame.getAttribute('name')}"]` : + finder(context.frame, { + root: index === 0 ? document.body : + (framePath[index - 1].document.body as Element) + }); - // Generate selector for each iframe boundary - iframePath.forEach((context, index) => { - // Get selector for the iframe element - const frameSelector = finder(context.frame, { - root: index === 0 ? document.body : - (iframePath[index - 1].document.body as Element) - }); - - // For the last context, get selector for target element - if (index === iframePath.length - 1) { - const elementSelector = finder(element, { - root: context.document.body as Element - }); - selectorParts.push(`${frameSelector} :>> ${elementSelector}`); - } else { - selectorParts.push(frameSelector); - } - }); - - return { - fullSelector: selectorParts.join(' :>> '), - isFrameContent: true - }; + if (index === framePath.length - 1) { + const elementSelector = finder(element, { + root: context.document.body as Element + }); + selectorParts.push(`${frameSelector} :>> ${elementSelector}`); + } else { + selectorParts.push(frameSelector); + } + }); + + return { + fullSelector: selectorParts.join(' :>> '), + isFrameContent: true + }; } catch (e) { - console.warn('Error generating iframe selector:', e); - return null; + console.warn('Error generating frame selector:', e); + return null; } }; @@ -1424,9 +1752,25 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { } - const iframeSelector = genSelectorForIframe(element); + let iframeSelector = null; + try { + // Check if element is within frame/iframe + const isInFrame = element.ownerDocument !== document; + const isInFrameset = () => { + let doc = element.ownerDocument; + return doc.querySelectorAll('frameset').length > 0; + }; + + if (isInFrame || isInFrameset()) { + iframeSelector = genSelectorForFrame(element); + } + } catch (e) { + console.warn('Error detecting frames:', e); + } + const shadowSelector = genSelectorForShadowDOM(element); + const relSelector = genSelectorForAttributes(element, ['rel']); const hrefSelector = genSelectorForAttributes(element, ['href']); const formSelector = genSelectorForAttributes(element, [ 'name', @@ -1473,6 +1817,7 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { hrefSelector, accessibilitySelector, formSelector, + relSelector, iframeSelector: iframeSelector ? { full: iframeSelector.fullSelector, isIframe: iframeSelector.isFrameContent, @@ -1509,6 +1854,11 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { function genSelectorForAttributes(element: HTMLElement, attributes: string[]) { let selector = null; try { + if (attributes.includes('rel') && element.hasAttribute('rel')) { + const relValue = element.getAttribute('rel'); + return `[rel="${relValue}"]`; + } + selector = isAttributesDefined(element, attributes) ? finder(element, { idName: () => false, // Don't use the id to generate a selector @@ -1531,9 +1881,12 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { hoveredElement != null && !hoveredElement.closest('#overlay-controls') != null ) { - const { parentElement } = hoveredElement; + // Prioritize Link (DO NOT REMOVE) + // const { parentElement } = hoveredElement; // Match the logic in recorder.ts for link clicks - const element = parentElement?.tagName === 'A' ? parentElement : hoveredElement; + // const element = parentElement?.tagName === 'A' ? parentElement : hoveredElement; + + const element = hoveredElement; const generatedSelectors = genSelectors(element); return generatedSelectors; } @@ -1562,9 +1915,9 @@ interface SelectorResult { export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates, listSelector: string): Promise => { interface DOMContext { - type: 'iframe' | 'shadow'; + type: 'iframe' | 'frame' | 'shadow'; element: HTMLElement; - container: HTMLIFrameElement | ShadowRoot; + container: HTMLIFrameElement | HTMLFrameElement | ShadowRoot; host?: HTMLElement; document?: Document; } @@ -1573,53 +1926,63 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates if (!listSelector) { const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { - // Function to traverse shadow DOM + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + let deepestElement = element; + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; - let deepest = current; let shadowRoot = current.shadowRoot; - - while (shadowRoot) { + let deepest = current; + let depth = 0; + const MAX_SHADOW_DEPTH = 4; + + while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; + depth++; } - + return deepest; }; - - // Start with the element at coordinates - let element = document.elementFromPoint(x, y) as HTMLElement; - if (!element) return null; - - let deepestElement = element; - let depth = 0; - const MAX_DEPTH = 4; - - // Handle iframe traversal + + const isInFrameset = () => { + let node = element; + while (node && node.parentElement) { + if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { + return true; + } + node = node.parentElement; + } + return false; + }; + if (element.tagName === 'IFRAME') { let currentIframe = element as HTMLIFrameElement; - - while (currentIframe && depth < MAX_DEPTH) { + let depth = 0; + const MAX_IFRAME_DEPTH = 4; + + while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; - - const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document; - if (!iframeDoc) break; - - const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement; + + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDocument) break; + + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; - - // Check for shadow DOM within iframe + deepestElement = traverseShadowDOM(iframeElement); - - if (deepestElement.tagName === 'IFRAME') { - currentIframe = deepestElement as HTMLIFrameElement; + + if (iframeElement.tagName === 'IFRAME') { + currentIframe = iframeElement as HTMLIFrameElement; depth++; } else { break; @@ -1629,25 +1992,134 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates break; } } + } + else if (element.tagName === 'FRAME' || isInFrameset()) { + const framesToCheck = []; + + if (element.tagName === 'FRAME') { + framesToCheck.push(element as HTMLFrameElement); + } + + if (isInFrameset()) { + document.querySelectorAll('frame').forEach(frame => { + framesToCheck.push(frame as HTMLFrameElement); + }); + } + + let frameDepth = 0; + const MAX_FRAME_DEPTH = 4; + + const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { + if (currentDepth >= MAX_FRAME_DEPTH) return; + + for (const frameElement of frames) { + try { + const frameRect = frameElement.getBoundingClientRect(); + const frameX = x - frameRect.left; + const frameY = y - frameRect.top; + + if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { + continue; + } + + const frameDocument = + frameElement.contentDocument || + frameElement.contentWindow?.document; + + if (!frameDocument) continue; + + const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; + if (!frameElementAtPoint) continue; + + deepestElement = traverseShadowDOM(frameElementAtPoint); + + if (frameElementAtPoint.tagName === 'FRAME') { + processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); + } + + break; + } catch (error) { + console.warn('Cannot access frame content:', error); + continue; + } + } + }; + + processFrames(framesToCheck, frameDepth); } else { - // If not an iframe, check for shadow DOM deepestElement = traverseShadowDOM(element); } - + return deepestElement; }; - // Basic selector generation function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); - + + if (selector === 'frame' || selector === 'iframe') { + let baseSelector = selector; + + if (element.className) { + const classes = element.className.split(/\s+/).filter(Boolean); + if (classes.length > 0) { + const validClasses = classes.filter(cls => !cls.startsWith('!') && !cls.includes(':')); + if (validClasses.length > 0) { + baseSelector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); + } + } + } + + if (element.id) { + return `${selector}#${CSS.escape(element.id)}`; + } + + if (element.getAttribute('name')) { + return `${selector}[name="${CSS.escape(element.getAttribute('name')!)}"]`; + } + + if (element.parentElement && element.parentElement.tagName === 'FRAMESET') { + const frameIndex = Array.from(element.parentElement.children) + .filter(child => child.tagName.toLowerCase() === selector) + .indexOf(element) + 1; + + if (frameIndex > 0) { + return `${selector}:nth-of-type(${frameIndex})`; + } + } + + if (element.parentElement) { + const siblings = Array.from(element.parentElement.children); + const identicalSiblings = siblings.filter(sibling => { + if (sibling === element) return false; + + let siblingSelector = sibling.tagName.toLowerCase(); + const siblingClassName = typeof sibling.className === 'string' ? sibling.className : ''; + if (siblingClassName) { + const siblingClasses = siblingClassName.split(/\s+/).filter(Boolean); + const validSiblingClasses = siblingClasses.filter(cls => !cls.startsWith('!') && !cls.includes(':')); + if (validSiblingClasses.length > 0) { + siblingSelector += '.' + validSiblingClasses.map(cls => CSS.escape(cls)).join('.'); + } + } + + return siblingSelector === baseSelector; + }); + + if (identicalSiblings.length > 0) { + const position = siblings.indexOf(element) + 1; + return `${baseSelector}:nth-child(${position})`; + } + } + + return baseSelector; + } + if (selector === 'td' && element.parentElement) { - // Find position among td siblings const siblings = Array.from(element.parentElement.children); const position = siblings.indexOf(element) + 1; return `${selector}:nth-child(${position})`; } - + if (element.className) { const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); if (classes.length > 0) { @@ -1657,9 +2129,8 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates } } } - + if (element.parentElement) { - // Look for identical siblings const siblings = Array.from(element.parentElement.children); const identicalSiblings = siblings.filter(sibling => { if (sibling === element) return false; @@ -1682,11 +2153,10 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates selector += `:nth-child(${position})`; } } - + return selector; } - function getContextPath(element: HTMLElement): DOMContext[] { const path: DOMContext[] = []; let current = element; @@ -1708,18 +2178,19 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates continue; } - // Check for iframe + // Check for iframe or frame const ownerDocument = current.ownerDocument; - const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + const frameElement = ownerDocument?.defaultView?.frameElement; if (frameElement) { + const isFrame = frameElement.tagName === 'FRAME'; path.unshift({ - type: 'iframe', + type: isFrame ? 'frame' : 'iframe', element: current, - container: frameElement, + container: frameElement as (HTMLIFrameElement | HTMLFrameElement), document: ownerDocument }); - current = frameElement; + current = frameElement as HTMLElement; depth++; continue; } @@ -1786,7 +2257,6 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates } } - // if (listSelector === '') { if (element.tagName !== 'TABLE') { while (element.parentElement) { if (element.tagName.toLowerCase() === 'body' || @@ -1805,7 +2275,7 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates const significantOverlap = (childRect.width * childRect.height) / - (parentRect.width * parentRect.height) > 0.5; + (parentRect.width * parentRect.height) > 0.1; if (fullyContained && significantOverlap) { const nextParent = element.parentElement; @@ -1827,68 +2297,66 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates return selectors || { generalSelector: '' }; } else { - // When we have a list selector, we need special handling while maintaining shadow DOM support + // When we have a list selector, we need special handling while maintaining shadow DOM and frame support const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { - // Helper function to traverse shadow DOM - const traverseShadowDOM = (element: HTMLElement, depth: number = 0): HTMLElement => { - const MAX_SHADOW_DEPTH = 4; + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + let deepestElement = element; + + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; + let shadowRoot = current.shadowRoot; let deepest = current; - - while (current && depth < MAX_SHADOW_DEPTH) { - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; - + let depth = 0; + const MAX_SHADOW_DEPTH = 4; + + while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; + shadowRoot = current.shadowRoot; depth++; } - + return deepest; }; - // Start with the element at the specified coordinates - let element = document.elementFromPoint(x, y) as HTMLElement; - if (!element) return null; - - // Initialize tracking variables - let deepestElement = element; - let depth = 0; - const MAX_IFRAME_DEPTH = 4; + const isInFrameset = () => { + let node = element; + while (node && node.parentElement) { + if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { + return true; + } + node = node.parentElement; + } + return false; + }; - // First check if the initial element has a shadow root - deepestElement = traverseShadowDOM(element); + if (element.tagName === 'IFRAME') { + let currentIframe = element as HTMLIFrameElement; + let depth = 0; + const MAX_IFRAME_DEPTH = 4; - // If it's an iframe, traverse through iframe hierarchy - if (deepestElement.tagName === 'IFRAME') { - let currentIframe = deepestElement as HTMLIFrameElement; - while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { - // Convert coordinates to iframe's local space const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; - // Access iframe's document - const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document; - if (!iframeDoc) break; + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDocument) break; - // Get element at transformed coordinates in iframe - const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement; + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; - // Check for shadow DOM within iframe - const shadowResult = traverseShadowDOM(iframeElement); - deepestElement = shadowResult; + deepestElement = traverseShadowDOM(iframeElement); - // If we found another iframe, continue traversing - if (shadowResult.tagName === 'IFRAME') { - currentIframe = shadowResult as HTMLIFrameElement; + if (iframeElement.tagName === 'IFRAME') { + currentIframe = iframeElement as HTMLIFrameElement; depth++; } else { break; @@ -1898,33 +2366,145 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates break; } } + } + else if (element.tagName === 'FRAME' || isInFrameset()) { + const framesToCheck = []; + + if (element.tagName === 'FRAME') { + framesToCheck.push(element as HTMLFrameElement); + } + + if (isInFrameset()) { + document.querySelectorAll('frame').forEach(frame => { + framesToCheck.push(frame as HTMLFrameElement); + }); + } + + let frameDepth = 0; + const MAX_FRAME_DEPTH = 4; + + const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { + if (currentDepth >= MAX_FRAME_DEPTH) return; + + for (const frameElement of frames) { + try { + const frameRect = frameElement.getBoundingClientRect(); + const frameX = x - frameRect.left; + const frameY = y - frameRect.top; + + if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { + continue; + } + + const frameDocument = + frameElement.contentDocument || + frameElement.contentWindow?.document; + + if (!frameDocument) continue; + + const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; + if (!frameElementAtPoint) continue; + + deepestElement = traverseShadowDOM(frameElementAtPoint); + + if (frameElementAtPoint.tagName === 'FRAME') { + processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); + } + + break; + } catch (error) { + console.warn('Cannot access frame content:', error); + continue; + } + } + }; + + processFrames(framesToCheck, frameDepth); + } else { + deepestElement = traverseShadowDOM(element); } return deepestElement; }; - // Generate basic selector from element's tag and classes function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); - + + if (selector === 'frame' || selector === 'iframe') { + let baseSelector = selector; + + if (element.className) { + const classes = element.className.split(/\s+/).filter(Boolean); + if (classes.length > 0) { + const validClasses = classes.filter(cls => !cls.startsWith('!') && !cls.includes(':')); + if (validClasses.length > 0) { + baseSelector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); + } + } + } + + if (element.id) { + return `${selector}#${CSS.escape(element.id)}`; + } + + if (element.getAttribute('name')) { + return `${selector}[name="${CSS.escape(element.getAttribute('name')!)}"]`; + } + + if (element.parentElement && element.parentElement.tagName === 'FRAMESET') { + const frameIndex = Array.from(element.parentElement.children) + .filter(child => child.tagName.toLowerCase() === selector) + .indexOf(element) + 1; + + if (frameIndex > 0) { + return `${selector}:nth-of-type(${frameIndex})`; + } + } + + if (element.parentElement) { + const siblings = Array.from(element.parentElement.children); + const identicalSiblings = siblings.filter(sibling => { + if (sibling === element) return false; + + let siblingSelector = sibling.tagName.toLowerCase(); + const siblingClassName = typeof sibling.className === 'string' ? sibling.className : ''; + if (siblingClassName) { + const siblingClasses = siblingClassName.split(/\s+/).filter(Boolean); + const validSiblingClasses = siblingClasses.filter(cls => !cls.startsWith('!') && !cls.includes(':')); + if (validSiblingClasses.length > 0) { + siblingSelector += '.' + validSiblingClasses.map(cls => CSS.escape(cls)).join('.'); + } + } + + return siblingSelector === baseSelector; + }); + + if (identicalSiblings.length > 0) { + const position = siblings.indexOf(element) + 1; + return `${baseSelector}:nth-child(${position})`; + } + } + + return baseSelector; + } + if (selector === 'td' && element.parentElement) { const siblings = Array.from(element.parentElement.children); const position = siblings.indexOf(element) + 1; return `${selector}:nth-child(${position})`; } - + if (element.className) { - const classes = element.className.split(/\s+/).filter(Boolean); + const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); if (classes.length > 0) { - const validClasses = classes.filter(cls => !cls.startsWith('!') && !cls.includes(':')); + const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':')); if (validClasses.length > 0) { selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); } } } - + if (element.parentElement) { - // Look for identical siblings const siblings = Array.from(element.parentElement.children); const identicalSiblings = siblings.filter(sibling => { if (sibling === element) return false; @@ -1947,11 +2527,11 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates selector += `:nth-child(${position})`; } } - + return selector; } - // Get complete context path (both iframe and shadow DOM) + // Get complete context path (iframe, frame, and shadow DOM) function getContextPath(element: HTMLElement): DOMContext[] { const path: DOMContext[] = []; let current = element; @@ -1973,18 +2553,19 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates continue; } - // Check for iframe + // Check for iframe or frame const ownerDocument = current.ownerDocument; - const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + const frameElement = ownerDocument?.defaultView?.frameElement; if (frameElement) { + const isFrame = frameElement.tagName === 'FRAME'; path.unshift({ - type: 'iframe', + type: isFrame ? 'frame' : 'iframe', element: current, - container: frameElement, + container: frameElement as (HTMLIFrameElement | HTMLFrameElement), document: ownerDocument }); - current = frameElement; + current = frameElement as HTMLElement; depth++; continue; } @@ -2045,9 +2626,9 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates const generalSelector = getSelectorPath(element); return { generalSelector }; - }, coordinates); + }, coordinates); - return selectors || { generalSelector: '' }; + return selectors || { generalSelector: '' }; } } catch (error) { console.error('Error in getNonUniqueSelectors:', error); @@ -2120,11 +2701,13 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro return `${hostSelector} >> ${elementSelector}`; } - // Check for iframe context + // Check for iframe/frame context const ownerDocument = element.ownerDocument; - const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + const frameElement = ownerDocument?.defaultView?.frameElement; if (frameElement) { - const frameSelector = getNonUniqueSelector(frameElement); + const frameSelector = getNonUniqueSelector(frameElement as HTMLElement); + const isFrame = frameElement.tagName === 'FRAME'; + // Use the appropriate delimiter based on whether it's a frame or iframe return `${frameSelector} :>> ${elementSelector}`; } @@ -2133,8 +2716,7 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro return `${parentSelector} > ${elementSelector}`; } - - // Function to get all children from special contexts + // Function to get all children from special contexts including frames function getSpecialContextChildren(element: HTMLElement): HTMLElement[] { const children: HTMLElement[] = []; @@ -2160,10 +2742,43 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro } } + // Get frame children + const frames = Array.from(element.querySelectorAll('frame')) as HTMLFrameElement[]; + for (const frame of frames) { + try { + const frameDoc = frame.contentDocument || frame.contentWindow?.document; + if (frameDoc) { + const frameElements = Array.from(frameDoc.querySelectorAll('*')) as HTMLElement[]; + children.push(...frameElements); + } + } catch (error) { + console.warn('Cannot access frame content:', error); + continue; + } + } + + // Check for framesets + const framesets = Array.from(element.querySelectorAll('frameset')) as HTMLElement[]; + for (const frameset of framesets) { + const framesToCheck = Array.from(frameset.querySelectorAll('frame')) as HTMLFrameElement[]; + for (const frame of framesToCheck) { + try { + const frameDoc = frame.contentDocument || frame.contentWindow?.document; + if (frameDoc) { + const frameElements = Array.from(frameDoc.querySelectorAll('*')) as HTMLElement[]; + children.push(...frameElements); + } + } catch (error) { + console.warn('Cannot access frameset frame content:', error); + continue; + } + } + } + return children; } - // Function to recursively get all descendant selectors including shadow DOM and iframes + // Function to recursively get all descendant selectors function getAllDescendantSelectors(element: HTMLElement): string[] { let selectors: string[] = []; @@ -2177,7 +2792,7 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro // Process regular descendants selectors = selectors.concat(getAllDescendantSelectors(child)); - // Process special context children (shadow DOM and iframes) + // Process special context children (shadow DOM, iframes, and frames) const specialChildren = getSpecialContextChildren(child); for (const specialChild of specialChildren) { const specialPath = getSelectorPath(specialChild); @@ -2202,7 +2817,7 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro return selectors; } - // Handle both shadow DOM and iframe parent selectors + // Handle both shadow DOM, iframe, and frame parent selectors let parentElements: HTMLElement[] = []; // Check for special context traversal in parent selector @@ -2242,6 +2857,42 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro continue; } } + + // Check for frame + if (element.tagName === 'FRAME') { + try { + const frameDoc = (element as HTMLFrameElement).contentDocument || + (element as HTMLFrameElement).contentWindow?.document; + if (frameDoc) { + const frameChildren = Array.from( + frameDoc.querySelectorAll(selectorParts[i]) + ) as HTMLElement[]; + newParentElements.push(...frameChildren); + } + } catch (error) { + console.warn('Cannot access frame content during traversal:', error); + continue; + } + } + + // Check for frameset + if (element.tagName === 'FRAMESET') { + const frames = Array.from(element.querySelectorAll('frame')) as HTMLFrameElement[]; + for (const frame of frames) { + try { + const frameDoc = frame.contentDocument || frame.contentWindow?.document; + if (frameDoc) { + const frameChildren = Array.from( + frameDoc.querySelectorAll(selectorParts[i]) + ) as HTMLElement[]; + newParentElements.push(...frameChildren); + } + } catch (error) { + console.warn('Cannot access frameset frame during traversal:', error); + continue; + } + } + } } parentElements = newParentElements;