@@ -207,69 +207,82 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
207207 function findAllElements ( config ) {
208208 // Regular DOM query if no special delimiters
209209 if ( ! config . selector . includes ( '>>' ) && ! config . selector . includes ( ':>>' ) ) {
210- return Array . from ( document . querySelectorAll ( config . selector ) ) ;
210+ return Array . from ( document . querySelectorAll ( config . selector ) ) ;
211211 }
212212
213- // Split by both types of delimiters
214- const parts = config . selector . split ( / (?: > > | : > > ) / ) . map ( s => s . trim ( ) ) ;
215- const delimiters = config . selector . match ( / (?: > > | : > > ) / g) || [ ] ;
216- let currentElements = [ document ] ;
217-
218- for ( let i = 0 ; i < parts . length ; i ++ ) {
219- const part = parts [ i ] ;
220- const nextElements = [ ] ;
221- const isLast = i === parts . length - 1 ;
222- const delimiter = delimiters [ i ] || '' ;
223- const isIframeTraversal = delimiter === ':>>' ;
224-
225- for ( const element of currentElements ) {
226- try {
227- let targets ;
228-
229- if ( i === 0 ) {
230- // First selector is queried from main document
231- targets = Array . from ( element . querySelectorAll ( part ) )
232- . filter ( el => {
233- if ( isLast ) return true ;
234- // For iframe traversal, only include iframes
235- if ( isIframeTraversal ) return el . tagName === 'IFRAME' ;
236- // For shadow DOM traversal, only include elements with shadow root
237- return el . shadowRoot && el . shadowRoot . mode === 'open' ;
238- } ) ;
239- } else {
240- if ( isIframeTraversal ) {
241- // Handle iframe traversal
242- const iframeDocument = element . contentDocument || element . contentWindow ?. document ;
243- if ( ! iframeDocument ) continue ;
244-
245- targets = Array . from ( iframeDocument . querySelectorAll ( part ) ) ;
246- if ( ! isLast ) {
247- targets = targets . filter ( el => el . tagName === 'IFRAME' ) ;
213+ // First handle iframe traversal if present
214+ if ( config . selector . includes ( ':>>' ) ) {
215+ const parts = config . selector . split ( ':>>' ) . map ( s => s . trim ( ) ) ;
216+ let currentElements = [ document ] ;
217+
218+ // Traverse through each part of the selector
219+ for ( let i = 0 ; i < parts . length ; i ++ ) {
220+ const part = parts [ i ] ;
221+ const nextElements = [ ] ;
222+ const isLast = i === parts . length - 1 ;
223+
224+ for ( const element of currentElements ) {
225+ try {
226+ // For document or iframe document
227+ const doc = element . contentDocument || element || element . contentWindow ?. document ;
228+ if ( ! doc ) continue ;
229+
230+ // Query elements in current context
231+ const found = Array . from ( doc . querySelectorAll ( part ) ) ;
232+
233+ if ( isLast ) {
234+ // If it's the last part, keep all matching elements
235+ nextElements . push ( ...found ) ;
236+ } else {
237+ // If not last, only keep iframes for next iteration
238+ const iframes = found . filter ( el => el . tagName === 'IFRAME' ) ;
239+ nextElements . push ( ...iframes ) ;
240+ }
241+ } catch ( error ) {
242+ console . warn ( 'Cannot access iframe content:' , error , {
243+ part,
244+ element,
245+ index : i
246+ } ) ;
248247 }
249- } else {
250- // Handle shadow DOM traversal
251- const shadowRoot = element . shadowRoot ;
252- if ( ! shadowRoot || shadowRoot . mode !== 'open' ) continue ;
253-
254- targets = Array . from ( shadowRoot . querySelectorAll ( part ) ) ;
255- if ( ! isLast ) {
256- targets = targets . filter ( el => el . shadowRoot && el . shadowRoot . mode === 'open' ) ;
257- }
258- }
259248 }
260-
261- nextElements . push ( ... targets ) ;
262- } catch ( error ) {
263- console . warn ( 'Cannot access content:' , error ) ;
264- continue ;
265- }
249+
250+ if ( nextElements . length === 0 ) {
251+ console . warn ( 'No elements found for part:' , part , 'at depth:' , i ) ;
252+ return [ ] ;
253+ }
254+ currentElements = nextElements ;
266255 }
256+
257+ return currentElements ;
258+ }
259+
260+ // Handle shadow DOM traversal
261+ if ( config . selector . includes ( '>>' ) ) {
262+ const parts = config . selector . split ( '>>' ) . map ( s => s . trim ( ) ) ;
263+ let currentElements = [ document ] ;
267264
268- if ( nextElements . length === 0 ) return [ ] ;
269- currentElements = nextElements ;
265+ for ( const part of parts ) {
266+ const nextElements = [ ] ;
267+ for ( const element of currentElements ) {
268+ // Try regular DOM first
269+ const found = Array . from ( element . querySelectorAll ( part ) ) ;
270+
271+ // Then check shadow roots
272+ for ( const foundEl of found ) {
273+ if ( foundEl . shadowRoot ) {
274+ nextElements . push ( foundEl . shadowRoot ) ;
275+ } else {
276+ nextElements . push ( foundEl ) ;
277+ }
278+ }
279+ }
280+ currentElements = nextElements ;
281+ }
282+ return currentElements . filter ( el => ! ( el instanceof ShadowRoot ) ) ;
270283 }
271284
272- return currentElements ;
285+ return [ ] ;
273286 }
274287
275288 // Modified to handle iframe context for URL resolution
0 commit comments