Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(gatsby-source-filesystem): fix broken stream with gzipped files #28913

Merged
merged 5 commits into from
Jan 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion packages/gatsby-source-filesystem/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
"@babel/cli": "^7.12.1",
"@babel/core": "^7.12.3",
"babel-preset-gatsby-package": "^0.11.0-next.0",
"cross-env": "^7.0.3"
"cross-env": "^7.0.3",
"msw": "^0.25.0"
},
"homepage": "https://github.com/gatsbyjs/gatsby/tree/master/packages/gatsby-source-filesystem#readme",
"keywords": [
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
import * as path from "path"
import * as zlib from "zlib"
import * as os from "os"
import { rest } from "msw"
import { setupServer } from "msw/node"
import { Writable } from "stream"
import got from "got"
import createRemoteFileNode from "../create-remote-file-node"

const fs = jest.requireActual(`fs-extra`)

const gotStream = jest.spyOn(got, `stream`)
const urlCount = new Map()

async function getFileSize(file) {
const stat = await fs.stat(file)

return stat.size
}

/**
* A utility to help create file responses
* - Url with attempts will use maxBytes for x amount of time until it delivers the full response
* - MaxBytes indicates how much bytes we'll be sending
*
* @param {string} file File path on disk
* @param {Object} req Is the request object from msw
* @param {{ compress: boolean}} options Options for the getFilecontent (use gzip or not)
*/
async function getFileContent(file, req, options = {}) {
const cacheKey = req.url.origin + req.url.pathname
const maxRetry = req.url.searchParams.get(`attempts`)
const maxBytes = req.url.searchParams.get(`maxBytes`)
const currentRetryCount = urlCount.get(cacheKey) || 0
urlCount.set(cacheKey, currentRetryCount + 1)

let fileContentBuffer = await fs.readFile(file)
if (options.compress) {
fileContentBuffer = zlib.deflateSync(fileContentBuffer)
}

const content = await new Promise(resolve => {
const fileStream = fs.createReadStream(file, {
end:
currentRetryCount < Number(maxRetry)
? Number(maxBytes)
: Number.MAX_SAFE_INTEGER,
})

const writableStream = new Writable()
const result = []
writableStream._write = (chunk, encoding, next) => {
result.push(chunk)

next()
}

writableStream.on(`finish`, () => {
resolve(Buffer.concat(result))
})

// eslint-disable-next-line no-unused-vars
let stream = fileStream
if (options.compress) {
stream = stream.pipe(zlib.createDeflate())
}

stream.pipe(writableStream)
})

return {
content,
contentLength:
req.url.searchParams.get(`contentLength`) === `false`
? undefined
: fileContentBuffer.length,
}
}

const server = setupServer(
rest.get(`http://external.com/logo.svg`, async (req, res, ctx) => {
const { content, contentLength } = await getFileContent(
path.join(__dirname, `./fixtures/gatsby-logo.svg`),
req
)

return res(
ctx.set(`Content-Type`, `image/svg+xml`),
ctx.set(`Content-Length`, contentLength),
ctx.status(200),
ctx.body(content)
)
}),
rest.get(`http://external.com/logo-gzip.svg`, async (req, res, ctx) => {
const { content, contentLength } = await getFileContent(
path.join(__dirname, `./fixtures/gatsby-logo.svg`),
req,
{
compress: true,
}
)

return res(
ctx.set(`Content-Type`, `image/svg+xml`),
ctx.set(`content-encoding`, `gzip`),
ctx.set(`Content-Length`, contentLength),
ctx.status(200),
ctx.body(content)
)
}),
rest.get(`http://external.com/dog.jpg`, async (req, res, ctx) => {
const { content, contentLength } = await getFileContent(
path.join(__dirname, `./fixtures/dog-thumbnail.jpg`),
req
)

return res(
ctx.set(`Content-Type`, `image/svg+xml`),
ctx.set(`Content-Length`, contentLength),
ctx.status(200),
ctx.body(content)
)
})
)

function createMockCache() {
const tmpDir = fs.mkdtempSync(
path.join(os.tmpdir(), `gatsby-source-filesystem-`)
)

return {
get: jest.fn(),
set: jest.fn(),
directory: tmpDir,
}
}

const reporter = jest.fn(() => {
return {}
})

describe(`create-remote-file-node`, () => {
let cache

beforeAll(() => {
cache = createMockCache()
// Establish requests interception layer before all tests.
server.listen()
})
afterAll(() => {
if (cache) {
fs.removeSync(cache.directory)
}

// Clean up after all tests are done, preventing this
// interception layer from affecting irrelevant tests.
server.close()
})

beforeEach(() => {
gotStream.mockClear()
urlCount.clear()
})

it(`downloads and create a file`, async () => {
const fileNode = await createRemoteFileNode({
url: `http://external.com/logo.svg`,
store: {},
getCache: () => cache,
createNode: jest.fn(),
createNodeId: jest.fn(),
reporter,
})

expect(fileNode.base).toBe(`logo.svg`)
expect(fileNode.size).toBe(
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
)
expect(gotStream).toBeCalledTimes(1)
})

it(`downloads and create a gzip file`, async () => {
const fileNode = await createRemoteFileNode({
url: `http://external.com/logo-gzip.svg`,
store: {},
getCache: () => cache,
createNode: jest.fn(),
createNodeId: jest.fn(),
reporter,
})

expect(fileNode.base).toBe(`logo-gzip.svg`)
expect(fileNode.size).toBe(
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
)
expect(gotStream).toBeCalledTimes(1)
})

it(`downloads and create a file`, async () => {
const fileNode = await createRemoteFileNode({
url: `http://external.com/dog.jpg`,
store: {},
getCache: () => cache,
createNode: jest.fn(),
createNodeId: jest.fn(),
reporter,
})

expect(fileNode.base).toBe(`dog.jpg`)
expect(fileNode.size).toBe(
await getFileSize(path.join(__dirname, `./fixtures/dog-thumbnail.jpg`))
)
expect(gotStream).toBeCalledTimes(1)
})

it(`doesn't retry when no content-length is given`, async () => {
const fileNode = await createRemoteFileNode({
url: `http://external.com/logo-gzip.svg?attempts=1&maxBytes=300&contentLength=false`,
store: {},
getCache: () => cache,
createNode: jest.fn(),
createNodeId: jest.fn(),
reporter,
})

expect(fileNode.base).toBe(`logo-gzip.svg`)
expect(fileNode.size).not.toBe(
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
)
expect(gotStream).toBeCalledTimes(1)
})

describe(`retries the download`, () => {
it(`Retries when gzip compression file is incomplete`, async () => {
const fileNode = await createRemoteFileNode({
url: `http://external.com/logo-gzip.svg?attempts=1&maxBytes=300`,
store: {},
getCache: () => cache,
createNode: jest.fn(),
createNodeId: jest.fn(),
reporter,
})

expect(fileNode.base).toBe(`logo-gzip.svg`)
expect(fileNode.size).toBe(
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
)
expect(gotStream).toBeCalledTimes(2)
})

it(`Retries when binary file is incomplete`, async () => {
const fileNode = await createRemoteFileNode({
url: `http://external.com/dog.jpg?attempts=1&maxBytes=300`,
store: {},
getCache: () => cache,
createNode: jest.fn(),
createNodeId: jest.fn(),
reporter,
})

expect(fileNode.base).toBe(`dog.jpg`)
expect(fileNode.size).toBe(
await getFileSize(path.join(__dirname, `./fixtures/dog-thumbnail.jpg`))
)
expect(gotStream).toBeCalledTimes(2)
})
})
})
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,7 @@ describe(`create-remote-file-node`, () => {
describe(`valid url`, () => {
let uuid = 0

const setup = (
args = {},
type = `response`,
response = { statusCode: 200 }
) => {
const setup = (args = {}, response = { statusCode: 200 }) => {
const url = `https://images.whatever.com/real-image-trust-me-${uuid}.png`

const gotMock = {
Expand All @@ -121,14 +117,21 @@ describe(`create-remote-file-node`, () => {
got.stream.mockReturnValueOnce({
pipe: jest.fn(() => gotMock),
on: jest.fn((mockType, mockCallback) => {
if (mockType === type) {
if (mockType === `response`) {
// got throws on 404/500 so we mimic this behaviour
if (response.statusCode === 404) {
throw new Error(`Response code 404 (Not Found)`)
}

mockCallback(response)
}
if (mockType === `downloadProgress`) {
mockCallback({
progress: 1,
transferred: 1,
total: 1,
})
}

return gotMock
}),
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 11 additions & 3 deletions packages/gatsby-source-filesystem/src/create-remote-file-node.js
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,14 @@ const requestRemoteNode = (url, headers, tmpFilename, httpOpts, attempt = 1) =>
},
...httpOpts,
})

let haveAllBytesBeenWritten = false
responseStream.on(`downloadProgress`, progress => {
if (progress.transferred === progress.total || progress.total === null) {
haveAllBytesBeenWritten = true
}
})

const fsWriteStream = fs.createWriteStream(tmpFilename)
responseStream.pipe(fsWriteStream)

Expand All @@ -180,12 +188,12 @@ const requestRemoteNode = (url, headers, tmpFilename, httpOpts, attempt = 1) =>

responseStream.on(`response`, response => {
resetTimeout()
const contentLength =
response.headers && Number(response.headers[`content-length`])

fsWriteStream.on(`finish`, () => {
fsWriteStream.close()

// We have an incomplete download
if (contentLength && contentLength !== fsWriteStream.bytesWritten) {
if (!haveAllBytesBeenWritten) {
fs.removeSync(tmpFilename)

if (attempt < INCOMPLETE_RETRY_LIMIT) {
Expand Down
Loading