Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: perf regression on hot string munging path #286

Merged
merged 2 commits into from
Aug 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions lib/normalize-unicode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// warning: extremely hot code path.
// This has been meticulously optimized for use
// within npm install on large package trees.
// Do not edit without careful benchmarking.
const normalizeCache = Object.create(null)
const {hasOwnProperty} = Object.prototype
module.exports = s => {
if (!hasOwnProperty.call(normalizeCache, s))
normalizeCache[s] = s.normalize('NFKD')
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In long-running server-side application scenarios, this can lead to memory leaks here.

return normalizeCache[s]
}
9 changes: 4 additions & 5 deletions lib/path-reservations.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// while still allowing maximal safe parallelization.

const assert = require('assert')
const normPath = require('./normalize-windows-path.js')
const normalize = require('./normalize-unicode.js')
const stripSlashes = require('./strip-trailing-slashes.js')
const { join } = require('path')

Expand All @@ -28,7 +28,7 @@ module.exports = () => {
const getDirs = path => {
const dirs = path.split('/').slice(0, -1).reduce((set, path) => {
if (set.length)
path = normPath(join(set[set.length - 1], path))
path = join(set[set.length - 1], path)
set.push(path || '/')
return set
}, [])
Expand Down Expand Up @@ -116,9 +116,8 @@ module.exports = () => {
// So, we just pretend that every path matches every other path here,
// effectively removing all parallelization on windows.
paths = isWindows ? ['win32 parallelization disabled'] : paths.map(p => {
return stripSlashes(normPath(join(p)))
.normalize('NFKD')
.toLowerCase()
// don't need normPath, because we skip this entirely for windows
return normalize(stripSlashes(join(p))).toLowerCase()
})

const dirs = new Set(
Expand Down
31 changes: 10 additions & 21 deletions lib/strip-trailing-slashes.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,13 @@
// this is the only approach that was significantly faster than using
// str.replace(/\/+$/, '') for strings ending with a lot of / chars and
// containing multiple / chars.
const batchStrings = [
'/'.repeat(1024),
'/'.repeat(512),
'/'.repeat(256),
'/'.repeat(128),
'/'.repeat(64),
'/'.repeat(32),
'/'.repeat(16),
'/'.repeat(8),
'/'.repeat(4),
'/'.repeat(2),
'/',
]

// warning: extremely hot code path.
// This has been meticulously optimized for use
// within npm install on large package trees.
// Do not edit without careful benchmarking.
module.exports = str => {
for (const s of batchStrings) {
while (str.length >= s.length && str.slice(-1 * s.length) === s)
str = str.slice(0, -1 * s.length)
let i = str.length - 1
let slashesStart = -1
while (i > -1 && str.charAt(i) === '/') {
slashesStart = i
i--
}
return str
return slashesStart === -1 ? str : str.slice(0, slashesStart)
}
4 changes: 2 additions & 2 deletions lib/unpack.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ const pathReservations = require('./path-reservations.js')
const stripAbsolutePath = require('./strip-absolute-path.js')
const normPath = require('./normalize-windows-path.js')
const stripSlash = require('./strip-trailing-slashes.js')
const normalize = require('./normalize-unicode.js')

const ONENTRY = Symbol('onEntry')
const CHECKFS = Symbol('checkFs')
Expand Down Expand Up @@ -101,8 +102,7 @@ const uint32 = (a, b, c) =>
// Note that on windows, we always drop the entire cache whenever a
// symbolic link is encountered, because 8.3 filenames are impossible
// to reason about, and collisions are hazards rather than just failures.
const cacheKeyNormalize = path => stripSlash(normPath(path))
.normalize('NFKD')
const cacheKeyNormalize = path => normalize(stripSlash(normPath(path)))
.toLowerCase()

const pruneCache = (cache, abs) => {
Expand Down
8 changes: 7 additions & 1 deletion test/create.js
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,13 @@ t.test('gzipped tarball that makes some drain/resume stuff', t => {
const cwd = path.dirname(__dirname)
const out = path.resolve(dir, 'package.tgz')

c({ z: true, C: cwd }, ['node_modules'])
// don't include node_modules/.cache, since that gets written to
// by nyc during tests, and can result in spurious errors.
const entries = fs.readdirSync(`${cwd}/node_modules`)
.filter(e => !/^\./.test(e))
.map(e => `node_modules/${e}`)

c({ z: true, C: cwd }, entries)
.pipe(fs.createWriteStream(out))
.on('finish', _ => {
const child = spawn('tar', ['tf', out], {
Expand Down
12 changes: 12 additions & 0 deletions test/normalize-unicode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
const t = require('tap')
const normalize = require('../lib/normalize-unicode.js')

// café
const cafe1 = Buffer.from([0x63, 0x61, 0x66, 0xc3, 0xa9]).toString()

// cafe with a `
const cafe2 = Buffer.from([0x63, 0x61, 0x66, 0x65, 0xcc, 0x81]).toString()

t.equal(normalize(cafe1), normalize(cafe2), 'matching unicodes')
t.equal(normalize(cafe1), normalize(cafe2), 'cached')
t.equal(normalize('foo'), 'foo', 'non-unicdoe string')
1 change: 1 addition & 0 deletions test/strip-trailing-slashes.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ const stripSlash = require('../lib/strip-trailing-slashes.js')
const short = '///a///b///c///'
const long = short.repeat(10) + '/'.repeat(1000000)

t.equal(stripSlash('no slash'), 'no slash')
t.equal(stripSlash(short), '///a///b///c')
t.equal(stripSlash(long), short.repeat(9) + '///a///b///c')