Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase performance of checkLink.ts #89

Merged
merged 1 commit into from
Mar 17, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 45 additions & 47 deletions checkLink.ts
Original file line number Diff line number Diff line change
@@ -1,61 +1,59 @@
const axios = require("axios");

const ignoredCodes: Set<number> = new Set([
999,
429,
403,
401
]);

const ignoredURLs: Set<string> = new Set([
'example.com',
'www.example.com',
'example.org',
'www.example.org',
'goo.gl',
'fonts.googleapis.com',
'fonts.gstatic.com'
]);

const params: object = {
headers: {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "cross-site",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
"X-Amzn-Trace-Id": "Root=1-659f58c5-4de24ef7384486270161f185"
},
};

// Return true if link is broken
export async function checkLink(link: string): Promise<boolean> {
const ignoredCodes: Set<number> = new Set([
999,
429,
403,
401
]);

const ignoredURLs: Set<string> = new Set([
'example.com',
'www.example.com',
'example.org',
'www.example.org',
'goo.gl',
'fonts.googleapis.com',
'fonts.gstatic.com'
]);

const url = new URL(link);
if (ignoredURLs.has(url.host)) {
if (ignoredURLs.has(url.host))
return false;
}

const params: object = {
headers: {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "cross-site",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
"X-Amzn-Trace-Id": "Root=1-659f58c5-4de24ef7384486270161f185"
},
};

try {
const response = await axios.get(link, params);
const finalUrl = new URL(response.request.res.responseUrl); // Get the final URL after any redirects

if (ignoredURLs.has(finalUrl.host)) {
return false;
}
await axios.head(link, params);
return false;
} catch (err: any) {
// If false positive, return false
if (ignoredCodes.has(err.response.status)) {
return false;
// Head request is not allowed, make get request
if (err.response.status === "405") {
try {
await axios.get(link, params);
return false;
} catch {}
}

// If HEAD is not allowed try GET (not needed since we're using GET)

// If false positive, return false
if (ignoredCodes.has(err.response.status))
return false;

return true;
}

return false;
}