Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions packages/astro/src/content/consts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ export const CONTENT_FLAGS = [
] as const;

export const CONTENT_TYPES_FILE = 'content.d.ts';

export const DATA_STORE_FILE = 'data-store.json';
export const DATA_STORE_MANIFEST_FILE = '__manifest.json';
export const DATA_STORE_DIR = 'data-store/';

export const ASSET_IMPORTS_FILE = 'content-assets.mjs';
export const MODULES_IMPORTS_FILE = 'content-modules.mjs';
export const COLLECTIONS_MANIFEST_FILE = 'collections/collections.json';
Expand Down
29 changes: 29 additions & 0 deletions packages/astro/src/content/content-layer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import {
ASSET_IMPORTS_FILE,
COLLECTIONS_MANIFEST_FILE,
CONTENT_LAYER_TYPE,
DATA_STORE_DIR,
DATA_STORE_FILE,
MODULES_IMPORTS_FILE,
} from './consts.js';
Expand Down Expand Up @@ -463,6 +464,7 @@ async function simpleLoader<TData extends { id: string }>(
),
});
}

/**
* Get the path to the data store file.
* During development, this is in the `.astro` directory so that the Vite watcher can see it.
Expand All @@ -471,3 +473,30 @@ async function simpleLoader<TData extends { id: string }>(
export function getDataStoreFile(settings: AstroSettings, isDev: boolean) {
return new URL(DATA_STORE_FILE, isDev ? settings.dotAstroDir : settings.config.cacheDir);
}

/**
* Get the path to the data store directory.
* During development, this is in the `.astro` directory so that the Vite watcher can see it.
* In production, it's in the cache directory so that it's preserved between builds.
*/
export function getDataStoreDir(settings: AstroSettings, isDev: boolean) {
return new URL(DATA_STORE_DIR, isDev ? settings.dotAstroDir : settings.config.cacheDir);
}

function contentLayerSingleton() {
let instance: ContentLayer | null = null;
return {
init: (options: ContentLayerOptions) => {
instance?.dispose();
instance = new ContentLayer(options);
return instance;
},
get: () => instance,
dispose: () => {
instance?.dispose();
instance = null;
},
};
}

export const globalContentLayer = contentLayerSingleton();
39 changes: 38 additions & 1 deletion packages/astro/src/content/data-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,39 @@ export class ImmutableDataStore {
return this._collections;
}

/**
* Converts an expanded manifest object to a collections map.
*
* Expanded manifest has file names swapped with actual file contents,
* in a form of either ESM imports or raw strings.
*/
static async manifestToMap(manifest: Record<string, any[][]>) {
const map = new Map();
for (const [collectionName, chunks] of Object.entries(manifest)) {
const collection = new Map<string, any>();
for (const chunk of chunks) {
// Combine all string parts into a single string
let stringified = '';
for (const data of chunk) {
// Handle strings and ESM default imports
stringified += typeof data === 'string' ? data : data.default;
}

// Restore the collection chunk (up to 1000 entries)
const entries: Map<string, any> = devalue.parse(stringified);

// Combine into the full collection
for (const [id, entry] of entries) {
collection.set(id, entry);
}
}

map.set(collectionName, collection);
}

return map;
}

/**
* Attempts to load a DataStore from the virtual module.
* This only works in Vite.
Expand All @@ -94,7 +127,11 @@ export class ImmutableDataStore {
if (data.default instanceof Map) {
return ImmutableDataStore.fromMap(data.default);
}
const map = devalue.unflatten(data.default);
if (Array.isArray(data.default)) {
const map = devalue.unflatten(data.default);
return ImmutableDataStore.fromMap(map);
}
const map = await this.manifestToMap(data.default);
return ImmutableDataStore.fromMap(map);
} catch {}
return new ImmutableDataStore();
Expand Down
136 changes: 126 additions & 10 deletions packages/astro/src/content/mutable-data-store.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
import { existsSync, promises as fs, type PathLike } from 'node:fs';
import * as devalue from 'devalue';
import { Traverse } from 'neotraverse/modern';
import type { XXHashAPI } from 'xxhash-wasm';
import xxhash from 'xxhash-wasm';
import { imageSrcToImportId, importIdToSymbolName } from '../assets/utils/resolveImports.js';
import { AstroError, AstroErrorData } from '../core/errors/index.js';
import { IMAGE_IMPORT_PREFIX } from './consts.js';
import { emptyDir } from '../core/fs/index.js';
import { DATA_STORE_MANIFEST_FILE, IMAGE_IMPORT_PREFIX } from './consts.js';
import { type DataEntry, ImmutableDataStore } from './data-store.js';
import { contentModuleToId } from './utils.js';
import { chunkMap, chunkString, contentModuleToId, sanitizeFileName } from './utils.js';

const SAVE_DEBOUNCE_MS = 500;

const MAX_DEPTH = 10;
const CHUNK_SIZE_LIMIT = 20 * 1024 * 1024; // 20MB in bytes

/**
* Extends the DataStore with the ability to change entries and write them to disk.
* This is kept as a separate class to avoid needing node builtins at runtime, when read-only access is all that is needed.
*/
export class MutableDataStore extends ImmutableDataStore {
#file?: PathLike;
#manifestFile?: URL;
#dir?: URL;

#assetsFile?: PathLike;
#modulesFile?: PathLike;
Expand All @@ -38,6 +43,9 @@ export class MutableDataStore extends ImmutableDataStore {
#writeInProgress = false;
#writeQueued = false;

#hasher?: XXHashAPI;
#chunking = false;

set(collectionName: string, key: string, value: unknown) {
const collection = this._collections.get(collectionName) ?? new Map();
collection.set(String(key), value);
Expand Down Expand Up @@ -254,7 +262,7 @@ export default new Map([\n${lines.join(',\n')}]);
clearTimeout(this.#saveTimeout);
}
this.#saveTimeout = undefined;
if (this.#file) {
if (this.#file || (this.#manifestFile && this.#dir)) {
await this.writeToDisk();
}
this.#maybeResolveSavePromise();
Expand All @@ -274,7 +282,7 @@ export default new Map([\n${lines.join(',\n')}]);

this.#saveTimeout = setTimeout(async () => {
this.#saveTimeout = undefined;
if (this.#file) {
if (this.#file || (this.#manifestFile && this.#dir)) {
await this.writeToDisk();
}
this.#maybeResolveSavePromise();
Expand Down Expand Up @@ -436,10 +444,7 @@ export default new Map([\n${lines.join(',\n')}]);
return devalue.stringify(sorted);
}

async writeToDisk() {
if (!this.#dirty) {
return;
}
async writeToFile() {
if (!this.#file) {
throw new AstroError(AstroErrorData.UnknownFilesystemError);
}
Expand Down Expand Up @@ -468,6 +473,68 @@ export default new Map([\n${lines.join(',\n')}]);
}
}

async writeToDir() {
if (!this.#manifestFile || !this.#dir) {
throw new AstroError(AstroErrorData.UnknownFilesystemError);
}
if (!this.#hasher) {
this.#hasher = await xxhash();
}

try {
// Mark as clean before writing to disk so that it catches any changes that happen during the write
this.#dirty = false;

// Keep track of written files to remove old ones
const writtenFiles = new Set<string>();

const manifest: Record<string, string[][]> = {};

// Split by collection
for (const [collectionName, entries] of this._collections) {
manifest[collectionName] = [];

// Split into chunks of 1000 entries each (avoid huge strings)
const chunkedCollection = chunkMap(entries, 1000);
for (const chunkedEntries of chunkedCollection) {
const stringified = devalue.stringify(chunkedEntries);

// Further split string into chunks of <20MB each (avoid platform-specific single file size limits)
const chunkedStrings = chunkString(stringified, CHUNK_SIZE_LIMIT);
const parts = [];
for (const chunk of chunkedStrings) {
const fileName = `${sanitizeFileName(collectionName)}.${this.#hasher.h64ToString(chunk)}.json`;
await this.#writeFileAtomic(new URL(`./${fileName}`, this.#dir), chunk);
parts.push(fileName);
writtenFiles.add(fileName);
}
manifest[collectionName].push(parts);
}
}

// Finally, write the manifest
await this.#writeFileAtomic(this.#manifestFile, JSON.stringify(manifest));
writtenFiles.add(DATA_STORE_MANIFEST_FILE);

// Remove any files that are no longer referenced in the manifest
emptyDir(this.#dir, writtenFiles);
} catch (err) {
throw new AstroError(AstroErrorData.UnknownFilesystemError, { cause: err });
}
}

async writeToDisk() {
if (!this.#dirty) {
return;
}

if (this.#chunking) {
return this.writeToDir();
} else {
return this.writeToFile();
}
}

/**
* Attempts to load a MutableDataStore from the virtual module.
* This only works in Vite.
Expand All @@ -476,7 +543,14 @@ export default new Map([\n${lines.join(',\n')}]);
try {
// @ts-expect-error - this is a virtual module
const data = await import('astro:data-layer-content');
const map = devalue.unflatten(data.default);
if (data.default instanceof Map) {
return MutableDataStore.fromMap(data.default);
}
if (Array.isArray(data.default)) {
const map = devalue.unflatten(data.default);
return MutableDataStore.fromMap(map);
}
const map = await this.manifestToMap(data.default);
return MutableDataStore.fromMap(map);
} catch {}
return new MutableDataStore();
Expand Down Expand Up @@ -508,6 +582,48 @@ export default new Map([\n${lines.join(',\n')}]);
store.#file = filePath;
return store;
}

static async fromDir(dirPath: URL) {
const manifestPath = new URL(`./${DATA_STORE_MANIFEST_FILE}`, dirPath);
try {
if (existsSync(dirPath) && existsSync(manifestPath)) {
const data = await fs.readFile(manifestPath, 'utf-8');
const manifest: Record<string, string[][]> = JSON.parse(data);

if (manifest) {
// Read each file in the manifest
const parsed: Record<string, string[][]> = {};

for (const collection in manifest) {
parsed[collection] = [];
for (const chunks of manifest[collection]) {
parsed[collection].push(
await Promise.all(
chunks.map(
async (file) => await fs.readFile(new URL('./' + file, dirPath), 'utf-8'),
),
),
);
}
}

const map = await this.manifestToMap(parsed);
const store = await MutableDataStore.fromMap(map);
store.#manifestFile = manifestPath;
store.#dir = dirPath;
store.#chunking = true;
return store;
}
} else {
await fs.mkdir(dirPath, { recursive: true });
}
} catch {}
const store = new MutableDataStore();
store.#manifestFile = manifestPath;
store.#dir = dirPath;
store.#chunking = true;
return store;
}
}

// This is the scoped store for a single collection. It's a subset of the MutableDataStore API, and is the only public type.
Expand Down
57 changes: 57 additions & 0 deletions packages/astro/src/content/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -896,3 +896,60 @@ export function safeStringify(value: unknown) {
const seen = new WeakSet();
return JSON.stringify(value, safeStringifyReplacer(seen));
}

const safeFileNameReplacers = [
/[/?<>\\:*|"]/g, // common illegal characters
// eslint-disable-next-line no-control-regex
/[\x00-\x1f\x80-\x9f]/g, // unicode control codes
/^\.+$/, // unix reserved
/^(con|prn|aux|nul|com\d|lpt\d)(\..*)?$/i, // windows reserved
];

/**
* Cross-platform string sanitizer for file names
* Adapted from https://gist.github.com/barbietunnie/7bc6d48a424446c44ff4
*/
export function sanitizeFileName(fileName: string, replacement = '_') {
let sanitized = fileName;

for (const re of safeFileNameReplacers) {
sanitized = sanitized.replace(re, replacement);
}

// truncate to 200 chars (leave space for hash and extension)
const encoded = new TextEncoder().encode(sanitized);
const truncated = encoded.slice(0, 200);
return new TextDecoder().decode(truncated);
}

// Splits a string into chunks that are each below the specified byte size limit
export function chunkString(str: string, maxBytes: number): string[] {
const maxChars = Math.floor(maxBytes / 2); // assume average-case 2 bytes per char
const chunks = [];

for (let i = 0; i < str.length; i += maxChars) {
chunks.push(str.slice(i, i + maxChars));
}

return chunks;
}

// Splits a Map into equally sized chunks of Maps
export function chunkMap<T>(map: Map<string, T>, chunkSize: number): Map<string, T>[] {
const chunks: Map<string, T>[] = [];
let currentChunk = new Map<string, T>();

for (const [key, value] of map) {
currentChunk.set(key, value);
if (currentChunk.size >= chunkSize) {
chunks.push(currentChunk);
currentChunk = new Map<string, T>();
}
}

if (currentChunk.size > 0) {
chunks.push(currentChunk);
}

return chunks;
}
Loading
Loading