6
6
* @author Sean Burke <@seantomburke>
7
7
*/
8
8
9
- import { parseStringPromise } from 'xml2js ' ;
9
+ import { XMLParser } from 'fast-xml-parser ' ;
10
10
import got from 'got' ;
11
11
import zlib from 'zlib' ;
12
12
import pLimit from 'p-limit' ;
@@ -95,6 +95,7 @@ export default class Sitemapper {
95
95
errors : results . errors || [ ] ,
96
96
} ;
97
97
}
98
+
98
99
/**
99
100
* Get the timeout
100
101
*
@@ -174,7 +175,7 @@ export default class Sitemapper {
174
175
}
175
176
176
177
/**
177
- * Requests the URL and uses parseStringPromise to parse through and find the data
178
+ * Requests the URL and uses fast-xml-parser to parse through and find the data
178
179
*
179
180
* @private
180
181
* @param {string } [url] - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml)
@@ -218,8 +219,10 @@ export default class Sitemapper {
218
219
responseBody = response . body ;
219
220
}
220
221
221
- // otherwise parse the XML that was returned.
222
- const data = await parseStringPromise ( responseBody ) ;
222
+ // Parse XML using fast-xml-parser
223
+ const parser = new XMLParser ( ) ;
224
+
225
+ const data = parser . parse ( responseBody . toString ( ) ) ;
223
226
224
227
// return the results
225
228
return { error : null , data } ;
@@ -312,26 +315,32 @@ export default class Sitemapper {
312
315
if ( this . debug ) {
313
316
console . debug ( `Urlset found during "crawl('${ url } ')"` ) ;
314
317
}
315
- // filter out any urls that are older than the lastmod
316
- const sites = data . urlset . url
318
+
319
+ // Convert single object to array if needed
320
+ const urlArray = Array . isArray ( data . urlset . url )
321
+ ? data . urlset . url
322
+ : [ data . urlset . url ] ;
323
+
324
+ // Begin filtering the urls
325
+ const sites = urlArray
317
326
. filter ( ( site ) => {
318
327
if ( this . lastmod === 0 ) return true ;
319
328
if ( site . lastmod === undefined ) return false ;
320
- const modified = new Date ( site . lastmod [ 0 ] ) . getTime ( ) ;
329
+ const modified = new Date ( site . lastmod ) . getTime ( ) ;
321
330
322
331
return modified >= this . lastmod ;
323
332
} )
324
333
. filter ( ( site ) => {
325
- return ! this . isExcluded ( site . loc [ 0 ] ) ;
334
+ return ! this . isExcluded ( site . loc ) ;
326
335
} )
327
336
. map ( ( site ) => {
328
337
if ( ! this . fields ) {
329
- return site . loc && site . loc [ 0 ] ;
338
+ return site . loc ;
330
339
} else {
331
340
let fields = { } ;
332
341
for ( const [ field , active ] of Object . entries ( this . fields ) ) {
333
342
if ( active && site [ field ] ) {
334
- fields [ field ] = site [ field ] [ 0 ] ;
343
+ fields [ field ] = site [ field ] ;
335
344
}
336
345
}
337
346
return fields ;
@@ -349,7 +358,7 @@ export default class Sitemapper {
349
358
}
350
359
// Map each child url into a promise to create an array of promises
351
360
const sitemap = data . sitemapindex . sitemap
352
- . map ( ( map ) => map . loc && map . loc [ 0 ] )
361
+ . map ( ( map ) => map . loc )
353
362
. filter ( ( url ) => {
354
363
return ! this . isExcluded ( url ) ;
355
364
} ) ;
@@ -441,8 +450,8 @@ export default class Sitemapper {
441
450
* @param {Buffer } body - body of the gzipped file
442
451
* @returns {boolean }
443
452
*/
444
- decompressResponseBody ( body ) {
445
- return new Promise ( ( resolve , reject ) => {
453
+ async decompressResponseBody ( body ) {
454
+ return await new Promise ( ( resolve , reject ) => {
446
455
const buffer = Buffer . from ( body ) ;
447
456
zlib . gunzip ( buffer , ( err , result ) => {
448
457
if ( err ) {
@@ -488,7 +497,7 @@ export default class Sitemapper {
488
497
*
489
498
* @typedef {Object } ParseData
490
499
*
491
- * @property {Error } error that either comes from `parseStringPromise` or `got` or custom error
500
+ * @property {Error } error that either comes from fast-xml-parser or `got` or custom error
492
501
* @property {Object } data
493
502
* @property {string } data.url - URL of sitemap
494
503
* @property {Array } data.urlset - Array of returned URLs
0 commit comments