Skip to content

Commit

Permalink
fix(gatsby): Chunk nodes when serializing redux to prevent OOM (#21555)
Browse files Browse the repository at this point in the history
* fix(gatsby): Chunk nodes when serializing redux to prevent OOM

We are using `v8.serialize` to write and read the redux state. This is faster than `JSON.parse`. Unfortunately, as reported in #17233, this can lead to a fatal when the contents of the redux state is too big to be serialized to a Buffer (hard max of 2GB). Alternatively, we also hit this problem on large site like a million small md pages.

The solution is to shard the `nodes` property, which holds all the page data. In this change I've added a simple heuristic to determine the max chunk size (mind you, currently that's basically `Infinity`). It will serialize about 11 individual nodes, measure their size, and based on the biggest node determine how many nodes would fit in 1.5GB.

The serialization process is updated to no longer put the `nodes` in the main redux file, but rather sharded over a few specific files. When reading the state from cache, these files are all read and their contents are put together in a single Map again. If there were no nodes files this part does nothing so it's even backwards compatible.

Because the write is no longer atomized, the process will now write the redux cache to its own `redux` folder. When writing a new cache it will prepare the new cache in a tmp folder first, then move the existing `redux` folder to a temp location, move the new folder to `redux`, and then try to drop the old folder. This is about as transactional as you can get and should leave the cache in either a stale, empty, or updated state. But never in a partial state.
  • Loading branch information
pvdz authored Feb 25, 2020
1 parent 0fee286 commit c944aae
Show file tree
Hide file tree
Showing 4 changed files with 371 additions and 31 deletions.
63 changes: 62 additions & 1 deletion packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`redux db should write cache to disk 1`] = `
exports[`redux db should write loki cache to disk 1`] = `
Object {
"componentDataDependencies": Object {
"connections": Map {},
Expand Down Expand Up @@ -28,3 +28,64 @@ Object {
"webpackCompilationHash": "",
}
`;

exports[`redux db should write redux cache to disk 1`] = `
Object {
"componentDataDependencies": Object {
"connections": Map {},
"nodes": Map {},
},
"components": Map {
"/Users/username/dev/site/src/templates/my-sweet-new-page.js" => Object {
"componentPath": "/Users/username/dev/site/src/templates/my-sweet-new-page.js",
"isInBootstrap": true,
"pages": Set {
"/my-sweet-new-page/",
},
"query": "",
},
},
"jobsV2": Object {
"complete": Map {},
"incomplete": Map {},
},
"nodes": Map {
"pageA" => Object {
"id": "pageA",
"internal": Object {
"type": "Ding",
},
},
"pageB" => Object {
"id": "pageB",
"internal": Object {
"type": "Dong",
},
},
},
"nodesByType": Map {
"Ding" => Map {
"pageA" => Object {
"id": "pageA",
"internal": Object {
"type": "Ding",
},
},
},
"Dong" => Map {
"pageB" => Object {
"id": "pageB",
"internal": Object {
"type": "Dong",
},
},
},
},
"pageDataStats": Map {},
"staticQueryComponents": Map {},
"status": Object {
"plugins": Object {},
},
"webpackCompilationHash": "",
}
`;
148 changes: 131 additions & 17 deletions packages/gatsby/src/redux/__tests__/index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const _ = require(`lodash`)
const path = require(`path`)

const writeToCache = jest.spyOn(require(`../persist`), `writeToCache`)
const { saveState, store, readState } = require(`../index`)
Expand All @@ -8,15 +9,84 @@ const {
} = require(`../actions`)

const mockWrittenContent = new Map()
const mockCompatiblePath = path
jest.mock(`fs-extra`, () => {
return {
writeFileSync: jest.fn((file, content) =>
mockWrittenContent.set(file, content)
),
readFileSync: jest.fn(file => mockWrittenContent.get(file)),
renameSync: jest.fn((from, to) => {
// This will only work for folders if they are always the full prefix
// of the file... (that goes for both input dirs). That's the case here.
if (mockWrittenContent.has(to)) {
throw new Error(`File/folder exists`)
}

// Move all files in this folder as well ... :/
mockWrittenContent.forEach((value, key) => {
if (key.startsWith(from)) {
// rename('foo/bar', 'a/b/c') => foo/bar/ding.js -> a/b/c/ding.js
// (.replace with string arg will only replace the first occurrence)
mockWrittenContent.set(
key.replace(from, to),
mockWrittenContent.get(key)
)
mockWrittenContent.delete(key)
}
})
}),
existsSync: jest.fn(target => mockWrittenContent.has(target)),
mkdtempSync: jest.fn(suffix => {
let dir = mockCompatiblePath.join(`some`, `tmp` + suffix + Math.random())
mockWrittenContent.set(dir, Buffer(`empty dir`))
return dir
}),
removeSync: jest.fn(file => mockWrittenContent.delete(file)),
}
})
jest.mock(`glob`, () => {
return {
sync: jest.fn(pattern => {
// Tricky.
// Expecting a path prefix, ending with star. Else this won't work :/
if (pattern.slice(-1) !== `*`) {
throw new Error(`Expected pattern ending with star`)
}
let globPrefix = pattern.slice(0, -1)
if (globPrefix.includes(`*`)) {
throw new Error(`Expected pattern to be a prefix`)
}
const files = []
mockWrittenContent.forEach((value, key) => {
if (key.startsWith(globPrefix)) {
files.push(key)
}
})
return files
}),
}
})

function getFakeNodes() {
// Set nodes to something or the cache will fail because it asserts this
// Actual nodes content should match TS type; these are verified
let map /*: Map<string, IReduxNode>*/ = new Map()
map.set(`pageA`, {
id: `pageA`,
internal: {
type: `Ding`,
},
})
map.set(`pageB`, {
id: `pageB`,
internal: {
type: `Dong`,
},
})
return map
}

describe(`redux db`, () => {
const initialComponentsState = _.cloneDeep(store.getState().components)

Expand All @@ -41,29 +111,73 @@ describe(`redux db`, () => {
mockWrittenContent.clear()
})

it(`expect components state to be empty initially`, () => {
expect(initialComponentsState).toEqual(new Map())
})
// yuck - loki and redux will have different shape of redux state (nodes and nodesByType)
// Note: branched skips will keep snapshots with and without loki env var
if (process.env.GATSBY_DB_NODES === `loki`) {
it.skip(`should write redux cache to disk`, async () => {})
it(`should write loki cache to disk`, async () => {
expect(initialComponentsState).toEqual(new Map())

it(`should write cache to disk`, async () => {
await saveState()
store.getState().nodes = getFakeNodes()

await saveState()

expect(writeToCache).toBeCalled()

// reset state in memory
store.dispatch({
type: `DELETE_CACHE`,
})
// make sure store in memory is empty
expect(store.getState().components).toEqual(initialComponentsState)

// read data that was previously cached
const data = readState()

expect(writeToCache).toBeCalled()
// make sure data was read and is not the same as our clean redux state
expect(data.components).not.toEqual(initialComponentsState)

// reset state in memory
store.dispatch({
type: `DELETE_CACHE`,
expect(_.omit(data, [`nodes`, `nodesByType`])).toMatchSnapshot()
})
// make sure store in memory is empty
expect(store.getState().components).toEqual(initialComponentsState)
} else {
it.skip(`should write loki cache to disk`, async () => {})
it(`should write redux cache to disk`, async () => {
expect(initialComponentsState).toEqual(new Map())

// read data that was previously cached
const data = readState()
store.getState().nodes = getFakeNodes()

// make sure data was read and is not the same as our clean redux state
expect(data.components).not.toEqual(initialComponentsState)
await saveState()

expect(writeToCache).toBeCalled()

// reset state in memory
store.dispatch({
type: `DELETE_CACHE`,
})
// make sure store in memory is empty
expect(store.getState().components).toEqual(initialComponentsState)

// read data that was previously cached
const data = readState()

// make sure data was read and is not the same as our clean redux state
expect(data.components).not.toEqual(initialComponentsState)

expect(data).toMatchSnapshot()
})
}

it(`should drop legacy file if exists`, async () => {
expect(initialComponentsState).toEqual(new Map())

const legacyLocation = path.join(process.cwd(), `.cache/redux.state`)
mockWrittenContent.set(
legacyLocation,
Buffer.from(`legacy location for cache`)
)

await saveState()

// yuck - loki and redux will have different shape of redux state (nodes and nodesByType)
expect(_.omit(data, [`nodes`, `nodesByType`])).toMatchSnapshot()
expect(mockWrittenContent.has(legacyLocation)).toBe(false)
})
})
Loading

0 comments on commit c944aae

Please sign in to comment.