Skip to content

Commit 1ce2288

Browse files
authored
Improve sitemap generate performance (#10795)
1 parent b2ef6ab commit 1ce2288

File tree

5 files changed

+254
-73
lines changed

5 files changed

+254
-73
lines changed

.changeset/famous-seals-camp.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@astrojs/sitemap": patch
3+
---
4+
5+
Improves performance when generating the sitemap data
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,77 @@
11
import type { EnumChangefreq } from 'sitemap';
22
import type { SitemapItem, SitemapOptions } from './index.js';
3-
import { parseUrl } from './utils/parse-url.js';
3+
import { parseI18nUrl } from './utils/parse-i18n-url.js';
44

55
/** Construct sitemap.xml given a set of URLs */
6-
export function generateSitemap(pages: string[], finalSiteUrl: string, opts: SitemapOptions) {
7-
const { changefreq, priority, lastmod: lastmodSrc, i18n } = opts!;
6+
export function generateSitemap(pages: string[], finalSiteUrl: string, opts?: SitemapOptions) {
7+
const { changefreq, priority, lastmod: lastmodSrc, i18n } = opts ?? {};
88
// TODO: find way to respect <link rel="canonical"> URLs here
99
const urls = [...pages];
1010
urls.sort((a, b) => a.localeCompare(b, 'en', { numeric: true })); // sort alphabetically so sitemap is same each time
1111

1212
const lastmod = lastmodSrc?.toISOString();
1313

14-
const { locales, defaultLocale } = i18n || {};
15-
const localeCodes = Object.keys(locales || {});
14+
// Parse URLs for i18n matching later
15+
const { defaultLocale, locales } = i18n ?? {};
16+
let getI18nLinks: GetI18nLinks | undefined;
17+
if (defaultLocale && locales) {
18+
getI18nLinks = createGetI18nLinks(urls, defaultLocale, locales, finalSiteUrl);
19+
}
1620

17-
const getPath = (url: string) => {
18-
const result = parseUrl(url, i18n?.defaultLocale || '', localeCodes, finalSiteUrl);
19-
return result?.path;
20-
};
21-
const getLocale = (url: string) => {
22-
const result = parseUrl(url, i18n?.defaultLocale || '', localeCodes, finalSiteUrl);
23-
return result?.locale;
24-
};
21+
const urlData: SitemapItem[] = urls.map((url, i) => ({
22+
url,
23+
links: getI18nLinks?.(i),
24+
lastmod,
25+
priority,
26+
changefreq: changefreq as EnumChangefreq,
27+
}));
28+
29+
return urlData;
30+
}
31+
32+
type GetI18nLinks = (urlIndex: number) => SitemapItem['links'] | undefined;
33+
34+
function createGetI18nLinks(
35+
urls: string[],
36+
defaultLocale: string,
37+
locales: Record<string, string>,
38+
finalSiteUrl: string
39+
): GetI18nLinks {
40+
// `parsedI18nUrls` will have the same length as `urls`, matching correspondingly
41+
const parsedI18nUrls = urls.map((url) => parseI18nUrl(url, defaultLocale, locales, finalSiteUrl));
42+
// Cache as multiple i18n URLs with the same path will have the same links
43+
const i18nPathToLinksCache = new Map<string, SitemapItem['links']>();
2544

26-
const urlData: SitemapItem[] = urls.map((url) => {
27-
let links;
28-
if (defaultLocale && locales) {
29-
const currentPath = getPath(url);
30-
if (currentPath) {
31-
const filtered = urls.filter((subUrl) => getPath(subUrl) === currentPath);
32-
if (filtered.length > 1) {
33-
links = filtered.map((subUrl) => ({
34-
url: subUrl,
35-
lang: locales[getLocale(subUrl)!],
36-
}));
37-
}
45+
return (urlIndex) => {
46+
const i18nUrl = parsedI18nUrls[urlIndex];
47+
if (!i18nUrl) {
48+
return undefined;
49+
}
50+
51+
const cached = i18nPathToLinksCache.get(i18nUrl.path);
52+
if (cached) {
53+
return cached;
54+
}
55+
56+
// Find all URLs with the same path (without the locale part), e.g. /en/foo and /es/foo
57+
const links: NonNullable<SitemapItem['links']> = [];
58+
for (let i = 0; i < parsedI18nUrls.length; i++) {
59+
const parsed = parsedI18nUrls[i];
60+
if (parsed?.path === i18nUrl.path) {
61+
links.push({
62+
url: urls[i],
63+
lang: locales[parsed.locale],
64+
});
3865
}
3966
}
4067

41-
return {
42-
url,
43-
links,
44-
lastmod,
45-
priority,
46-
changefreq: changefreq as EnumChangefreq,
47-
};
48-
});
68+
// If 0 or 1 (which is itself), return undefined to not create any links.
69+
// We also don't need to cache this as we know there's no other URLs that would've match this.
70+
if (links.length <= 1) {
71+
return undefined;
72+
}
4973

50-
return urlData;
74+
i18nPathToLinksCache.set(i18nUrl.path, links);
75+
return links;
76+
};
5177
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
interface ParsedI18nUrl {
2+
locale: string;
3+
path: string;
4+
}
5+
6+
// NOTE: The parameters have been schema-validated with Zod
7+
export function parseI18nUrl(
8+
url: string,
9+
defaultLocale: string,
10+
locales: Record<string, string>,
11+
base: string
12+
): ParsedI18nUrl | undefined {
13+
if (!url.startsWith(base)) {
14+
return undefined;
15+
}
16+
17+
let s = url.slice(base.length);
18+
19+
// Handle root URL
20+
if (!s || s === '/') {
21+
return { locale: defaultLocale, path: '/' };
22+
}
23+
24+
if (s[0] !== '/') {
25+
s = '/' + s;
26+
}
27+
28+
// Get locale from path, e.g.
29+
// "/en-US/" -> "en-US"
30+
// "/en-US/foo" -> "en-US"
31+
const locale = s.split('/')[1];
32+
if (locale in locales) {
33+
// "/en-US/foo" -> "/foo"
34+
let path = s.slice(1 + locale.length);
35+
if (!path) {
36+
path = '/';
37+
}
38+
return { locale, path };
39+
}
40+
41+
return { locale: defaultLocale, path: s };
42+
}

packages/integrations/sitemap/src/utils/parse-url.ts

-39
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
import assert from 'node:assert/strict';
2+
import { describe, it } from 'node:test';
3+
import { generateSitemap } from '../../dist/generate-sitemap.js';
4+
5+
const site = 'http://example.com';
6+
7+
describe('generateSitemap', () => {
8+
describe('basic', () => {
9+
it('works', () => {
10+
const items = generateSitemap(
11+
[
12+
// All pages
13+
`${site}/a`,
14+
`${site}/b`,
15+
`${site}/c`,
16+
],
17+
site
18+
);
19+
assert.equal(items.length, 3);
20+
assert.equal(items[0].url, `${site}/a`);
21+
assert.equal(items[1].url, `${site}/b`);
22+
assert.equal(items[2].url, `${site}/c`);
23+
});
24+
25+
it('sorts the items', () => {
26+
const items = generateSitemap(
27+
[
28+
// All pages
29+
`${site}/c`,
30+
`${site}/a`,
31+
`${site}/b`,
32+
],
33+
site
34+
);
35+
assert.equal(items.length, 3);
36+
assert.equal(items[0].url, `${site}/a`);
37+
assert.equal(items[1].url, `${site}/b`);
38+
assert.equal(items[2].url, `${site}/c`);
39+
});
40+
41+
it('sitemap props are passed to items', () => {
42+
const now = new Date();
43+
const items = generateSitemap(
44+
[
45+
// All pages
46+
`${site}/a`,
47+
`${site}/b`,
48+
`${site}/c`,
49+
],
50+
site,
51+
{
52+
changefreq: 'monthly',
53+
lastmod: now,
54+
priority: 0.5,
55+
}
56+
);
57+
58+
assert.equal(items.length, 3);
59+
60+
assert.equal(items[0].url, `${site}/a`);
61+
assert.equal(items[0].changefreq, 'monthly');
62+
assert.equal(items[0].lastmod, now.toISOString());
63+
assert.equal(items[0].priority, 0.5);
64+
65+
assert.equal(items[1].url, `${site}/b`);
66+
assert.equal(items[1].changefreq, 'monthly');
67+
assert.equal(items[1].lastmod, now.toISOString());
68+
assert.equal(items[1].priority, 0.5);
69+
70+
assert.equal(items[2].url, `${site}/c`);
71+
assert.equal(items[2].changefreq, 'monthly');
72+
assert.equal(items[2].lastmod, now.toISOString());
73+
assert.equal(items[2].priority, 0.5);
74+
});
75+
});
76+
77+
describe('i18n', () => {
78+
it('works', () => {
79+
const items = generateSitemap(
80+
[
81+
// All pages
82+
`${site}/a`,
83+
`${site}/b`,
84+
`${site}/c`,
85+
`${site}/es/a`,
86+
`${site}/es/b`,
87+
`${site}/es/c`,
88+
`${site}/fr/a`,
89+
`${site}/fr/b`,
90+
// `${site}/fr-CA/c`, (intentionally missing for testing)
91+
],
92+
site,
93+
{
94+
i18n: {
95+
defaultLocale: 'en',
96+
locales: {
97+
en: 'en-US',
98+
es: 'es-ES',
99+
fr: 'fr-CA',
100+
},
101+
},
102+
}
103+
);
104+
105+
assert.equal(items.length, 8);
106+
107+
const aLinks = [
108+
{ url: `${site}/a`, lang: 'en-US' },
109+
{ url: `${site}/es/a`, lang: 'es-ES' },
110+
{ url: `${site}/fr/a`, lang: 'fr-CA' },
111+
];
112+
const bLinks = [
113+
{ url: `${site}/b`, lang: 'en-US' },
114+
{ url: `${site}/es/b`, lang: 'es-ES' },
115+
{ url: `${site}/fr/b`, lang: 'fr-CA' },
116+
];
117+
const cLinks = [
118+
{ url: `${site}/c`, lang: 'en-US' },
119+
{ url: `${site}/es/c`, lang: 'es-ES' },
120+
];
121+
122+
assert.equal(items[0].url, `${site}/a`);
123+
assert.deepEqual(items[0].links, aLinks);
124+
125+
assert.equal(items[1].url, `${site}/b`);
126+
assert.deepEqual(items[1].links, bLinks);
127+
128+
assert.equal(items[2].url, `${site}/c`);
129+
assert.deepEqual(items[2].links, cLinks);
130+
131+
assert.equal(items[3].url, `${site}/es/a`);
132+
assert.deepEqual(items[3].links, aLinks);
133+
134+
assert.equal(items[4].url, `${site}/es/b`);
135+
assert.deepEqual(items[4].links, bLinks);
136+
137+
assert.equal(items[5].url, `${site}/es/c`);
138+
assert.deepEqual(items[5].links, cLinks);
139+
140+
assert.equal(items[6].url, `${site}/fr/a`);
141+
assert.deepEqual(items[6].links, aLinks);
142+
143+
assert.equal(items[7].url, `${site}/fr/b`);
144+
assert.deepEqual(items[7].links, bLinks);
145+
});
146+
});
147+
});

0 commit comments

Comments
 (0)