@@ -237,168 +237,180 @@ function studyBackrefs (edResults, trResults = [], htmlFragments = {}, shortname
237
237
trResults = trResults || [ ] ;
238
238
const report = [ ] ;
239
239
240
- const recordAnomaly = recordCategorizedAnomaly ( report , 'links' , possibleAnomalies ) ;
241
-
242
240
edResults . forEach ( spec => {
243
241
if ( shortnameFilter && spec . shortname !== shortnameFilter ) return ;
244
- Object . keys ( spec . links ?. rawlinks || { } )
245
- . filter ( matchSpecUrl )
246
- . forEach ( link => {
247
- let shortname ;
248
- if ( spec . links . rawlinks [ link ] . specShortname ) {
249
- shortname = spec . links . rawlinks [ link ] . specShortname ;
250
- } else {
251
- let nakedLink = link ;
252
-
253
- // Ignoring links to PDF specs
254
- if ( nakedLink . endsWith ( '.pdf' ) ) {
255
- return ;
256
- }
257
-
258
- if ( nakedLink . endsWith ( '.html' ) ) {
259
- nakedLink = nakedLink . replace ( / \/ ( O v e r v i e w | o v e r v i e w | i n d e x ) \. h t m l $ / , '/' ) ;
260
- }
261
- if ( nakedLink [ nakedLink . length - 1 ] !== '/' && ! nakedLink . endsWith ( ".html" ) ) {
262
- nakedLink += '/' ;
263
- }
264
-
265
- // Detect links to dated specs
266
- const match = nakedLink . match ( / w w w \. w 3 \. o r g \/ T R \/ [ 0 - 9 ] { 4 } \/ ( [ A - Z ] + ) - ( .+ ) - [ 0 - 9 ] { 8 } \/ / ) ;
267
- if ( match ) {
268
- // ED should not link to dated versions of the spec, unless it
269
- // voluntarily links to previous versions of itself
270
- if ( ( match [ 2 ] !== spec . shortname || outdatedShortnames [ match [ 2 ] ] === spec . shortname ) && ! [ 'REC' , 'NOTE' ] . includes ( match [ 1 ] ) ) {
271
- recordAnomaly ( spec , 'datedUrls' , link ) ;
272
- }
242
+ studyLinks ( spec , spec . links ?. rawlinks , report , edResults , trResults , htmlFragments ) ;
243
+ // given the current limitation of classification of links for bikeshed
244
+ // https://github.com/w3c/reffy/issues/1584
245
+ // we also check autolinks for bikeshed specs
246
+ if ( spec . generator === "bikeshed" ) {
247
+ studyLinks ( spec , spec . links ?. autolinks , report , edResults , trResults , htmlFragments ) ;
248
+ }
249
+ } ) ;
250
+ return report ;
251
+ }
273
252
274
- // TODO: consider pursuing the analysis with the non-dated version,
275
- // but note this may trigger some obscure broken fragment messages
276
- // when a fragment exists in the dated version but no longer exists
277
- // in the ED.
278
- return ;
279
- }
253
+ function studyLinks ( spec , links , report , edResults , trResults , htmlFragments ) {
254
+ if ( ! links ) return ;
280
255
281
- // Check whether the naked link matches any known URL in the crawl
282
- shortname = ( edResults . find ( r =>
283
- r . url === nakedLink ||
284
- ( r . release && r . release . url === nakedLink ) ||
285
- ( r . nightly && r . nightly . url === nakedLink ) ||
286
- ( r . series && nakedLink === `https://www.w3.org/TR/${ r . series . shortname } /` ) && r . series . currentSpecification === r . shortname ) || { } ) . shortname ;
287
-
288
- // If it does not match any known URL, try to compute a shortname out of
289
- // it directly.
290
- if ( ! shortname ) {
291
- try {
292
- shortname = computeShortname ( nakedLink ) ;
293
- } catch ( e ) {
294
- recordAnomaly ( spec , 'unknownSpecs' , link ) ;
295
- return ;
296
- }
297
- }
298
- }
256
+ const recordAnomaly = recordCategorizedAnomaly ( report , 'links' , possibleAnomalies ) ;
299
257
300
- if ( ( link . match ( / w 3 \. o r g / ) || link . match ( / w 3 c \. g i t h u b \. i o / ) ) && shortNamesOfTransferedSpecs [ shortname ] ) {
301
- // The specification should no longer be referenced.
302
- // In theory, we could still try to match the anchor against the
303
- // right spec. In practice, these outdated specs are sufficiently
304
- // outdated that it does not make a lot of sense to do so.
305
- recordAnomaly ( spec , 'outdatedSpecs' , link ) ;
306
- return ;
258
+ Object . keys ( links )
259
+ . filter ( matchSpecUrl )
260
+ . forEach ( link => {
261
+ let shortname ;
262
+ if ( links [ link ] . specShortname ) {
263
+ shortname = links [ link ] . specShortname ;
264
+ } else {
265
+ let nakedLink = link ;
266
+
267
+ // Ignoring links to PDF specs
268
+ if ( nakedLink . endsWith ( '.pdf' ) ) {
269
+ return ;
270
+ }
271
+
272
+ if ( nakedLink . endsWith ( '.html' ) ) {
273
+ nakedLink = nakedLink . replace ( / \/ ( O v e r v i e w | o v e r v i e w | i n d e x ) \. h t m l $ / , '/' ) ;
307
274
}
308
- // Links to WHATWG commit snapshots
309
- if ( link . match ( / s p e c \. w h a t w g \. o r g / ) && link . match ( / c o m m i t - s n a p s h o t s / ) ) {
310
- recordAnomaly ( spec , 'outdatedSpecs' , link ) ;
311
- return ;
275
+ if ( nakedLink [ nakedLink . length - 1 ] !== '/' && ! nakedLink . endsWith ( ".html" ) ) {
276
+ nakedLink += '/' ;
312
277
}
313
278
314
- if ( link . match ( / h e y c a m \. g i t h u b \. i o / ) ) {
315
- recordAnomaly ( spec , 'nonCanonicalRefs' , link ) ;
316
- shortname = 'webidl' ;
317
- }
318
- if ( outdatedShortnames [ shortname ] ) {
319
- shortname = outdatedShortnames [ shortname ] ;
320
- recordAnomaly ( spec , 'nonCanonicalRefs' , link ) ;
279
+ // Detect links to dated specs
280
+ const match = nakedLink . match ( / w w w \. w 3 \. o r g \/ T R \/ [ 0 - 9 ] { 4 } \/ ( [ A - Z ] + ) - ( .+ ) - [ 0 - 9 ] { 8 } \/ / ) ;
281
+ if ( match ) {
282
+ // ED should not link to dated versions of the spec, unless it
283
+ // voluntarily links to previous versions of itself
284
+ if ( ( match [ 2 ] !== spec . shortname || outdatedShortnames [ match [ 2 ] ] === spec . shortname ) && ! [ 'REC' , 'NOTE' ] . includes ( match [ 1 ] ) ) {
285
+ recordAnomaly ( spec , 'datedUrls' , link ) ;
286
+ }
287
+
288
+ // TODO: consider pursuing the analysis with the non-dated version,
289
+ // but note this may trigger some obscure broken fragment messages
290
+ // when a fragment exists in the dated version but no longer exists
291
+ // in the ED.
292
+ return ;
321
293
}
322
294
323
- // At this point, we managed to associate the link with a shortname,
324
- // let's check whether the shortname matches a spec in the crawl,
325
- // matching the exact spec shortname if possible, or the series
326
- // shortname otherwise (in which case we'll use the current spec)
327
- const sourceSpec =
328
- edResults . find ( s => s . shortname === shortname ) ||
329
- edResults . find ( s => s . series . shortname === shortname && s . series . currentSpecification === s . shortname ) ;
330
- if ( ! sourceSpec ) {
331
- if ( ! shortnameOfNonNormativeDocs . includes ( shortname ) ) {
295
+ // Check whether the naked link matches any known URL in the crawl
296
+ shortname = ( edResults . find ( r =>
297
+ r . url === nakedLink ||
298
+ ( r . release && r . release . url === nakedLink ) ||
299
+ ( r . nightly && r . nightly . url === nakedLink ) ||
300
+ ( r . series && nakedLink === `https://www.w3.org/TR/${ r . series . shortname } /` ) && r . series . currentSpecification === r . shortname ) || { } ) . shortname ;
301
+
302
+ // If it does not match any known URL, try to compute a shortname out of
303
+ // it directly.
304
+ if ( ! shortname ) {
305
+ try {
306
+ shortname = computeShortname ( nakedLink ) ;
307
+ } catch ( e ) {
332
308
recordAnomaly ( spec , 'unknownSpecs' , link ) ;
309
+ return ;
333
310
}
334
- return ;
335
311
}
336
- if ( sourceSpec . error ) {
337
- // no point in reporting an error on failed crawls
338
- return ;
339
- }
340
-
341
- // Self-references might be broken because of ED vs TR, ignore that
342
- if ( shortname === spec . shortname || shortname === spec . series . shortname ) {
343
- return ;
312
+ }
313
+
314
+ if ( ( link . match ( / w 3 \. o r g / ) || link . match ( / w 3 c \. g i t h u b \. i o / ) ) && shortNamesOfTransferedSpecs [ shortname ] ) {
315
+ // The specification should no longer be referenced.
316
+ // In theory, we could still try to match the anchor against the
317
+ // right spec. In practice, these outdated specs are sufficiently
318
+ // outdated that it does not make a lot of sense to do so.
319
+ recordAnomaly ( spec , 'outdatedSpecs' , link ) ;
320
+ return ;
321
+ }
322
+ // Links to WHATWG commit snapshots
323
+ if ( link . match ( / s p e c \. w h a t w g \. o r g / ) && link . match ( / c o m m i t - s n a p s h o t s / ) ) {
324
+ recordAnomaly ( spec , 'outdatedSpecs' , link ) ;
325
+ return ;
326
+ }
327
+
328
+ if ( link . match ( / h e y c a m \. g i t h u b \. i o / ) ) {
329
+ recordAnomaly ( spec , 'nonCanonicalRefs' , link ) ;
330
+ shortname = 'webidl' ;
331
+ }
332
+ if ( outdatedShortnames [ shortname ] ) {
333
+ shortname = outdatedShortnames [ shortname ] ;
334
+ recordAnomaly ( spec , 'nonCanonicalRefs' , link ) ;
335
+ }
336
+
337
+ // At this point, we managed to associate the link with a shortname,
338
+ // let's check whether the shortname matches a spec in the crawl,
339
+ // matching the exact spec shortname if possible, or the series
340
+ // shortname otherwise (in which case we'll use the current spec)
341
+ const sourceSpec =
342
+ edResults . find ( s => s . shortname === shortname ) ||
343
+ edResults . find ( s => s . series . shortname === shortname && s . series . currentSpecification === s . shortname ) ;
344
+ if ( ! sourceSpec ) {
345
+ if ( ! shortnameOfNonNormativeDocs . includes ( shortname ) ) {
346
+ recordAnomaly ( spec , 'unknownSpecs' , link ) ;
344
347
}
345
-
346
- // Look for a corresponding entry in the TR crawl, which we'll use to
347
- // distinguish between broken links and "evolving" links (meaning links
348
- // that exist in the TR version but no longer exist in the ED)
349
- const trSourceSpec =
350
- trResults . find ( s => s . shortname === shortname ) ||
351
- trResults . find ( s => s . series . shortname === shortname && s . series . currentSpecification === s . shortname ) ||
352
- { } ;
353
- const headings = sourceSpec . headings || [ ] ;
354
- const dfns = sourceSpec . dfns || [ ] ;
355
- const ids = sourceSpec . ids || [ ] ;
356
-
357
- // Check anchors
358
- const anchors = spec . links . rawlinks [ link ] . anchors || [ ] ;
359
- for ( const anchor of anchors ) {
360
- const baseLink = ( sourceSpec . nightly ?. url === link || sourceSpec . nightly ?. pages ?. includes ( link ) ) ? link : sourceSpec . nightly ?. url ;
361
- const matchFullNightlyLink = matchAnchor ( baseLink , anchor ) ;
362
- const matchFullReleaseLink = matchAnchor ( ( sourceSpec . release || sourceSpec . nightly ) . url , anchor ) ;
363
- const isKnownId = ids . find ( matchFullNightlyLink ) ;
364
- const heading = headings . find ( h => matchFullNightlyLink ( h . href ) ) ;
365
- const dfn = dfns . find ( d => matchFullNightlyLink ( d . href ) ) ;
366
- if ( ! isKnownId ) {
367
- if ( ( trSourceSpec . ids || [ ] ) . find ( matchFullReleaseLink ) && link . match ( / w 3 \. o r g \/ T R \/ / ) ) {
368
- recordAnomaly ( spec , 'evolvingLinks' , link + '#' + anchor ) ;
348
+ return ;
349
+ }
350
+ if ( sourceSpec . error ) {
351
+ // no point in reporting an error on failed crawls
352
+ return ;
353
+ }
354
+
355
+ // Self-references might be broken because of ED vs TR, ignore that
356
+ if ( shortname === spec . shortname || shortname === spec . series . shortname ) {
357
+ return ;
358
+ }
359
+
360
+ // Look for a corresponding entry in the TR crawl, which we'll use to
361
+ // distinguish between broken links and "evolving" links (meaning links
362
+ // that exist in the TR version but no longer exist in the ED)
363
+ const trSourceSpec =
364
+ trResults . find ( s => s . shortname === shortname ) ||
365
+ trResults . find ( s => s . series . shortname === shortname && s . series . currentSpecification === s . shortname ) ||
366
+ { } ;
367
+ const headings = sourceSpec . headings || [ ] ;
368
+ const dfns = sourceSpec . dfns || [ ] ;
369
+ const ids = sourceSpec . ids || [ ] ;
370
+
371
+ // Check anchors
372
+ const anchors = links [ link ] . anchors || [ ] ;
373
+ for ( const anchor of anchors ) {
374
+ const baseLink = ( sourceSpec . nightly ?. url === link || sourceSpec . nightly ?. pages ?. includes ( link ) ) ? link : sourceSpec . nightly ?. url ;
375
+ const matchFullNightlyLink = matchAnchor ( baseLink , anchor ) ;
376
+ const matchFullReleaseLink = matchAnchor ( ( sourceSpec . release || sourceSpec . nightly ) . url , anchor ) ;
377
+ const isKnownId = ids . find ( matchFullNightlyLink ) ;
378
+ const heading = headings . find ( h => matchFullNightlyLink ( h . href ) ) ;
379
+ const dfn = dfns . find ( d => matchFullNightlyLink ( d . href ) ) ;
380
+ if ( ! isKnownId ) {
381
+ if ( ( trSourceSpec . ids || [ ] ) . find ( matchFullReleaseLink ) && link . match ( / w 3 \. o r g \/ T R \/ / ) ) {
382
+ recordAnomaly ( spec , 'evolvingLinks' , link + '#' + anchor ) ;
383
+ } else {
384
+ if ( link . startsWith ( 'https://html.spec.whatwg.org/C' ) || link . startsWith ( 'http://html.spec.whatwg.org/C' ) ) {
385
+ recordAnomaly ( spec , 'nonCanonicalRefs' , link ) ;
386
+ link = link . replace ( 'http:' , 'https:' ) . replace ( 'https://html.spec.whatwg.org/C' , 'https://html.spec.whatwg.org/multipage' ) ;
387
+ }
388
+ // Links to single-page version of HTML spec
389
+ if ( link === 'https://html.spec.whatwg.org/' &&
390
+ // is there an equivalent id in the multipage spec?
391
+ ids . find ( i => i . startsWith ( 'https://html.spec.whatwg.org/multipage/' ) &&
392
+ ( i . endsWith ( '#' + anchor ) || i . endsWith ( '#' + decodeURIComponent ( anchor ) ) || i . endsWith ( '#' + encodeURIComponent ( anchor ) ) ) ) ) {
393
+ // Should we keep track of those? ignoring for now
394
+ } else if ( link . startsWith ( 'https://html.spec.whatwg.org/multipage' ) && htmlFragments &&
395
+ htmlFragments [ anchor ] &&
396
+ ids . find ( matchAnchor ( `https://html.spec.whatwg.org/multipage/${ htmlFragments [ anchor ] } .html` , anchor ) ) ) {
397
+ // Deal with anchors that are JS-redirected from
398
+ // the multipage version of HTML
399
+ recordAnomaly ( spec , 'frailLinks' , link + '#' + anchor ) ;
400
+ } else if ( anchor . startsWith ( ':~:text=' ) ) {
401
+ // links using text fragments are inherently fragile
402
+ recordAnomaly ( spec , 'frailLinks' , link + '#' + anchor ) ;
369
403
} else {
370
- if ( link . startsWith ( 'https://html.spec.whatwg.org/C' ) || link . startsWith ( 'http://html.spec.whatwg.org/C' ) ) {
371
- recordAnomaly ( spec , 'nonCanonicalRefs' , link ) ;
372
- link = link . replace ( 'http:' , 'https:' ) . replace ( 'https://html.spec.whatwg.org/C' , 'https://html.spec.whatwg.org/multipage' ) ;
373
- }
374
- // Links to single-page version of HTML spec
375
- if ( link === 'https://html.spec.whatwg.org/' &&
376
- // is there an equivalent id in the multipage spec?
377
- ids . find ( i => i . startsWith ( 'https://html.spec.whatwg.org/multipage/' ) &&
378
- ( i . endsWith ( '#' + anchor ) || i . endsWith ( '#' + decodeURIComponent ( anchor ) ) || i . endsWith ( '#' + encodeURIComponent ( anchor ) ) ) ) ) {
379
- // Should we keep track of those? ignoring for now
380
- } else if ( link . startsWith ( 'https://html.spec.whatwg.org/multipage' ) && htmlFragments &&
381
- htmlFragments [ anchor ] &&
382
- ids . find ( matchAnchor ( `https://html.spec.whatwg.org/multipage/${ htmlFragments [ anchor ] } .html` , anchor ) ) ) {
383
- // Deal with anchors that are JS-redirected from
384
- // the multipage version of HTML
385
- recordAnomaly ( spec , 'frailLinks' , link + '#' + anchor ) ;
386
- } else if ( anchor . startsWith ( ':~:text=' ) ) {
387
- // links using text fragments are inherently fragile
388
- recordAnomaly ( spec , 'frailLinks' , link + '#' + anchor ) ;
389
- } else {
390
- recordAnomaly ( spec , 'brokenLinks' , link + '#' + anchor ) ;
391
- }
404
+ recordAnomaly ( spec , 'brokenLinks' , link + '#' + anchor ) ;
392
405
}
393
- } else if ( ! heading && ! dfn ) {
394
- recordAnomaly ( spec , 'notDfn' , link + '#' + anchor ) ;
395
- } else if ( dfn && dfn . access !== 'public' ) {
396
- recordAnomaly ( spec , 'notExported' , link + '#' + anchor ) ;
397
406
}
407
+ } else if ( ! heading && ! dfn ) {
408
+ recordAnomaly ( spec , 'notDfn' , link + '#' + anchor ) ;
409
+ } else if ( dfn && dfn . access !== 'public' ) {
410
+ recordAnomaly ( spec , 'notExported' , link + '#' + anchor ) ;
398
411
}
399
- } ) ;
400
- } ) ;
401
- return report ;
412
+ }
413
+ } ) ;
402
414
}
403
415
404
416
/**************************************************
0 commit comments