From 90569a1a83bf6edc569887ef0578cdd5805fc247 Mon Sep 17 00:00:00 2001 From: Matt Kane Date: Mon, 15 Dec 2025 07:39:16 +0000 Subject: [PATCH 1/2] feat: add `retainBody` option to the `glob()` loader --- .changeset/olive-crabs-build.md | 26 +++++++++++++++++++ packages/astro/src/content/loaders/glob.ts | 10 +++++-- packages/astro/test/content-layer.test.js | 20 ++++++++++++++ .../content-layer/src/content.config.ts | 19 ++++++++++++++ .../src/pages/collections.json.js | 3 +++ 5 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 .changeset/olive-crabs-build.md diff --git a/.changeset/olive-crabs-build.md b/.changeset/olive-crabs-build.md new file mode 100644 index 000000000000..eb2f8766f86a --- /dev/null +++ b/.changeset/olive-crabs-build.md @@ -0,0 +1,26 @@ +--- +'astro': minor +--- + +Adds a new `retainBody` option to the `glob()` loader to allow reducing the size of the data store. + +Currently, the `glob()` loader stores the raw body of each content file in the entry, in addition to the rendered HTML. + +The `retainBody` option defaults to `true`, but you can set it to `false` to prevent the raw body of content files from being stored in the data store. This significantly reduces the deployed size of the data store and helps avoid hitting size limits for sites with very large collections. + +The rendered body will still be available in the `entry.rendered.html` property for markdown files, and the `entry.filePath` property will still point to the original file. + +```js +import { defineCollection } from 'astro:content'; +import { glob } from 'astro/loaders'; + +const blog = defineCollection({ + loader: glob({ + pattern: '**/*.md', + base: './src/content/blog', + retainBody: false + }), +}); +``` + +When `retainBody` is `false`, `entry.body` will be `undefined` instead of containing the raw file contents. diff --git a/packages/astro/src/content/loaders/glob.ts b/packages/astro/src/content/loaders/glob.ts index 4040a4e614a6..6aaecc08b288 100644 --- a/packages/astro/src/content/loaders/glob.ts +++ b/packages/astro/src/content/loaders/glob.ts @@ -30,6 +30,12 @@ interface GlobOptions { * @returns The ID of the entry. Must be unique per collection. **/ generateId?: (options: GenerateIdOptions) => string; + /** + * Retains the unparsed body of the file in the data store, in addition to the rendered HTML. + * If `false`, `entry.body` will be undefined if the content type has a parser. + * Defaults to `true`. + */ + retainBody?: boolean; } function generateIdDefault({ entry, base, data }: GenerateIdOptions): string { @@ -201,7 +207,7 @@ export function glob(globOptions: GlobOptions): Loader { store.set({ id, data: parsedData, - body, + body: globOptions.retainBody === false && rendered ? undefined : body, filePath: relativePath, digest, rendered, @@ -214,7 +220,7 @@ export function glob(globOptions: GlobOptions): Loader { store.set({ id, data: parsedData, - body, + body: globOptions.retainBody === false ? undefined : body, filePath: relativePath, digest, deferredRender: true, diff --git a/packages/astro/test/content-layer.test.js b/packages/astro/test/content-layer.test.js index 0dc171ceeddf..d3ceafca44b5 100644 --- a/packages/astro/test/content-layer.test.js +++ b/packages/astro/test/content-layer.test.js @@ -106,6 +106,26 @@ describe('Content Layer', () => { ); }); + it('retains body by default in glob() loader', async () => { + assert.ok(json.hasOwnProperty('spacecraftWithBody')); + assert.ok(Array.isArray(json.spacecraftWithBody)); + // All entries should have non-empty body + const columbia = json.spacecraftWithBody.find((s) => s.id === 'columbia'); + assert.ok(columbia, 'columbia entry should exist'); + assert.ok(columbia.body, 'body should be present'); + assert.ok(columbia.body.length > 0, 'body should not be empty'); + assert.ok(columbia.body.includes('Space Shuttle Columbia'), 'body should contain markdown content'); + }); + + it('clears body when retainBody is false in glob() loader', async () => { + assert.ok(json.hasOwnProperty('spacecraftNoBody')); + assert.ok(Array.isArray(json.spacecraftNoBody)); + // All entries should have undefined body + const columbia = json.spacecraftNoBody.find((s) => s.id === 'columbia'); + assert.ok(columbia, 'columbia entry should exist'); + assert.equal(columbia.body, undefined, 'body should be undefined when retainBody is false'); + }); + it('Returns nested json `file()` loader collection', async () => { assert.ok(json.hasOwnProperty('nestedJsonLoader')); assert.ok(Array.isArray(json.nestedJsonLoader)); diff --git a/packages/astro/test/fixtures/content-layer/src/content.config.ts b/packages/astro/test/fixtures/content-layer/src/content.config.ts index d3a173ff654d..c44852c551bc 100644 --- a/packages/astro/test/fixtures/content-layer/src/content.config.ts +++ b/packages/astro/test/fixtures/content-layer/src/content.config.ts @@ -91,6 +91,24 @@ const spacecraft = defineCollection({ }), }); +// Same as spacecraft, but with retainBody: false +const spacecraftNoBody = defineCollection({ + loader: glob({ pattern: '*.md', base: absoluteRoot, retainBody: false }), + schema: ({ image }) => + z.object({ + title: z.string(), + description: z.string(), + publishedDate: z.coerce.date(), + tags: z.array(z.string()), + heroImage: image().optional(), + cat: reference('cats').default('siamese'), + something: z + .string() + .optional() + .transform((str) => ({ type: 'test', content: str })), + }), +}); + const cats = defineCollection({ loader: async function () { @@ -275,6 +293,7 @@ export const collections = { numbersToml, numbersYaml, spacecraft, + spacecraftNoBody, increment, images, artists, diff --git a/packages/astro/test/fixtures/content-layer/src/pages/collections.json.js b/packages/astro/test/fixtures/content-layer/src/pages/collections.json.js index 0eed94939599..a9f84a0836af 100644 --- a/packages/astro/test/fixtures/content-layer/src/pages/collections.json.js +++ b/packages/astro/test/fixtures/content-layer/src/pages/collections.json.js @@ -16,6 +16,7 @@ export async function GET() { const referencedEntry = await getEntry(entryWithReference.data.cat); const spacecraft = await getCollection('spacecraft'); + const spacecraftNoBody = await getCollection('spacecraftNoBody'); const entryWithImagePath = await getEntry('spacecraft', 'lunar-module'); @@ -66,6 +67,8 @@ export async function GET() { csvLoader, atlantis, spacecraft: spacecraft.map(({id}) => id).sort((a, b) => a.localeCompare(b)), + spacecraftWithBody: spacecraft.map(({id, body}) => ({id, body})), + spacecraftNoBody: spacecraftNoBody.map(({id, body}) => ({id, body})), }) ); } From fe1513f6b68df917e345dd0cf0c012b141bd104d Mon Sep 17 00:00:00 2001 From: Matt Kane Date: Mon, 15 Dec 2025 14:17:39 +0000 Subject: [PATCH 2/2] Remove redundant check on rendered --- packages/astro/src/content/loaders/glob.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/packages/astro/src/content/loaders/glob.ts b/packages/astro/src/content/loaders/glob.ts index 6aaecc08b288..8c980420a46d 100644 --- a/packages/astro/src/content/loaders/glob.ts +++ b/packages/astro/src/content/loaders/glob.ts @@ -207,7 +207,7 @@ export function glob(globOptions: GlobOptions): Loader { store.set({ id, data: parsedData, - body: globOptions.retainBody === false && rendered ? undefined : body, + body: globOptions.retainBody === false ? undefined : body, filePath: relativePath, digest, rendered, @@ -227,7 +227,14 @@ export function glob(globOptions: GlobOptions): Loader { legacyId, }); } else { - store.set({ id, data: parsedData, body, filePath: relativePath, digest, legacyId }); + store.set({ + id, + data: parsedData, + body: globOptions.retainBody === false ? undefined : body, + filePath: relativePath, + digest, + legacyId, + }); } fileToIdMap.set(filePath, id);