-
Notifications
You must be signed in to change notification settings - Fork 37
/
utils.ts
88 lines (79 loc) · 2.32 KB
/
utils.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import type { TextResponse } from '../download.ts'
export function isString(attr: null | string): attr is string {
return typeof attr === 'string' && attr.length > 0
}
export function isHTML(text: TextResponse): boolean {
return text.text.toLocaleLowerCase().includes('<html')
}
function buildFullURL(
link: HTMLAnchorElement | HTMLLinkElement,
baseUrl: string
): string {
let href = link.getAttribute('href')!
let urlSegments: string[] = [href]
let parent: Element | null = link.parentElement
while (parent) {
let path = parent.getAttribute('xml:base') || ''
urlSegments.push(path)
parent = parent.parentElement
if (path.startsWith('/') || path.startsWith('http')) {
break
}
}
return urlSegments.reduceRight(
(base, url) => new URL(url, base).href,
baseUrl
)
}
export function findLinksByType(text: TextResponse, type: string): string[] {
let document = text.parseXml()
if (!document) return []
return [...document.querySelectorAll('link')]
.filter(
link =>
link.getAttribute('type') === type &&
isString(link.getAttribute('href'))
)
.map(link => buildFullURL(link, text.url))
}
export function findAnchorHrefs(
text: TextResponse,
hrefPattern: RegExp,
textPattern?: RegExp
): string[] {
let document = text.parseXml()
if (!document) return []
return [...document.querySelectorAll('a')]
.filter(a => {
let href = a.getAttribute('href')
if (!href) return false
if (textPattern && a.textContent && textPattern.test(a.textContent)) {
return true
}
return hrefPattern.test(href)
})
.map(a => buildFullURL(a, text.url))
}
export function toTime(date: null | string | undefined): number | undefined {
if (!date) return undefined
let time = new Date(date).getTime() / 1000
if (isNaN(time)) {
return undefined
} else {
return time
}
}
export function findImageByAttr(
attr: 'src' | 'url',
elements?: Iterable<Element> | null
): string[] {
return [...(elements || [])].reduce<string[]>((urls, element) => {
let url = element.getAttribute(attr)
return url ? urls.concat(url) : urls
}, [])
}
export function unique<T extends number | string = string>(
collection: Iterable<null | T | undefined>
): T[] {
return [...new Set([...collection].filter(str => str != null))] as T[]
}