From 758475db179ae439f0904b81afd52f0e2d72b56e Mon Sep 17 00:00:00 2001 From: Konstantin Raev Date: Thu, 9 Feb 2017 06:15:54 -0800 Subject: [PATCH] --link-duplicates flag. Creates hardlinks to duplicate files in node_modules (#2620) * Feature: duplicated dependencies are hardlinked from the first copy * cleanup and tests * made linking non default * added fallback if links are not supported * lint fix * nits * nits --- .../commands/install/integration-deduping.js | 36 +++ .../deps/a-1/package.json | 4 + .../deps/a-2/package.json | 4 + .../deps/b-1/package.json | 7 + .../deps/c-1/package.json | 7 + .../package.json | 7 + src/cli/commands/install.js | 6 +- src/cli/index.js | 1 + src/package-linker.js | 79 +++-- src/reporters/lang/en.js | 2 + src/util/fs.js | 295 ++++++++++++++++-- 11 files changed, 400 insertions(+), 48 deletions(-) create mode 100644 __tests__/fixtures/install/hardlink-repeated-dependencies/deps/a-1/package.json create mode 100644 __tests__/fixtures/install/hardlink-repeated-dependencies/deps/a-2/package.json create mode 100644 __tests__/fixtures/install/hardlink-repeated-dependencies/deps/b-1/package.json create mode 100644 __tests__/fixtures/install/hardlink-repeated-dependencies/deps/c-1/package.json create mode 100644 __tests__/fixtures/install/hardlink-repeated-dependencies/package.json diff --git a/__tests__/commands/install/integration-deduping.js b/__tests__/commands/install/integration-deduping.js index 641c58ff31..f6ee63df23 100644 --- a/__tests__/commands/install/integration-deduping.js +++ b/__tests__/commands/install/integration-deduping.js @@ -1,8 +1,10 @@ /* @flow */ import {getPackageVersion, runInstall} from '../_helpers.js'; +import * as fs from '../../../src/util/fs.js'; const assert = require('assert'); +const path = require('path'); jasmine.DEFAULT_TIMEOUT_INTERVAL = 120000; @@ -201,3 +203,37 @@ test.concurrent('install should dedupe dependencies avoiding conflicts 9', (): P assert.equal(await getPackageVersion(config, 'run-async'), '0.1.0'); }); }); + +test.concurrent('install should hardlink repeated dependencies', (): Promise => { + // A@1 + // B@1 -> A@2 + // C@1 -> A@2 (this is hardlink to B@1->A@2) + return runInstall({linkDuplicates: true}, 'hardlink-repeated-dependencies', async (config) => { + const b_a = await fs.stat(path.join( + config.cwd, + 'node_modules/b/node_modules/a/package.json', + )); + const c_a = await fs.stat(path.join( + config.cwd, + 'node_modules/c/node_modules/a/package.json', + )); + assert.equal(b_a.ino, c_a.ino); + }); +}); + +test.concurrent('install should not hardlink repeated dependencies if linkDuplicates=false', (): Promise => { + // A@1 + // B@1 -> A@2 + // C@1 -> A@2 + return runInstall({linkDuplicates: false}, 'hardlink-repeated-dependencies', async (config) => { + const b_a = await fs.stat(path.join( + config.cwd, + 'node_modules/b/node_modules/a/package.json', + )); + const c_a = await fs.stat(path.join( + config.cwd, + 'node_modules/c/node_modules/a/package.json', + )); + assert(b_a.ino != c_a.ino); + }); +}); diff --git a/__tests__/fixtures/install/hardlink-repeated-dependencies/deps/a-1/package.json b/__tests__/fixtures/install/hardlink-repeated-dependencies/deps/a-1/package.json new file mode 100644 index 0000000000..9113c2528e --- /dev/null +++ b/__tests__/fixtures/install/hardlink-repeated-dependencies/deps/a-1/package.json @@ -0,0 +1,4 @@ +{ + "name": "a", + "version": "1.0.0" +} diff --git a/__tests__/fixtures/install/hardlink-repeated-dependencies/deps/a-2/package.json b/__tests__/fixtures/install/hardlink-repeated-dependencies/deps/a-2/package.json new file mode 100644 index 0000000000..9ce57f3180 --- /dev/null +++ b/__tests__/fixtures/install/hardlink-repeated-dependencies/deps/a-2/package.json @@ -0,0 +1,4 @@ +{ + "name": "a", + "version": "2.0.0" +} diff --git a/__tests__/fixtures/install/hardlink-repeated-dependencies/deps/b-1/package.json b/__tests__/fixtures/install/hardlink-repeated-dependencies/deps/b-1/package.json new file mode 100644 index 0000000000..034077b298 --- /dev/null +++ b/__tests__/fixtures/install/hardlink-repeated-dependencies/deps/b-1/package.json @@ -0,0 +1,7 @@ +{ + "name": "b", + "version": "1.0.0", + "dependencies": { + "a": "file:../a-2" + } +} diff --git a/__tests__/fixtures/install/hardlink-repeated-dependencies/deps/c-1/package.json b/__tests__/fixtures/install/hardlink-repeated-dependencies/deps/c-1/package.json new file mode 100644 index 0000000000..f630919d51 --- /dev/null +++ b/__tests__/fixtures/install/hardlink-repeated-dependencies/deps/c-1/package.json @@ -0,0 +1,7 @@ +{ + "name": "c", + "version": "1.0.0", + "dependencies": { + "a": "file:../a-2" + } +} diff --git a/__tests__/fixtures/install/hardlink-repeated-dependencies/package.json b/__tests__/fixtures/install/hardlink-repeated-dependencies/package.json new file mode 100644 index 0000000000..2a4eb34e69 --- /dev/null +++ b/__tests__/fixtures/install/hardlink-repeated-dependencies/package.json @@ -0,0 +1,7 @@ +{ + "dependencies": { + "a": "file:deps/a-1", + "b": "file:deps/b-1", + "c": "file:deps/c-1" + } +} diff --git a/src/cli/commands/install.js b/src/cli/commands/install.js index cf6f2a6ddc..58b9569081 100644 --- a/src/cli/commands/install.js +++ b/src/cli/commands/install.js @@ -50,11 +50,12 @@ export type IntegrityMatch = { type Flags = { // install + har: boolean, ignorePlatform: boolean, ignoreEngines: boolean, ignoreScripts: boolean, ignoreOptional: boolean, - har: boolean, + linkDuplicates: boolean, force: boolean, flat: boolean, lockfile: boolean, @@ -128,6 +129,7 @@ function normalizeFlags(config: Config, rawFlags: Object): Flags { pureLockfile: !!rawFlags.pureLockfile, skipIntegrity: !!rawFlags.skipIntegrity, frozenLockfile: !!rawFlags.frozenLockfile, + linkDuplicates: !!rawFlags.linkDuplicates, // add peer: !!rawFlags.peer, @@ -396,7 +398,7 @@ export class Install { const loc = await this.getIntegrityHashLocation(); await fs.unlink(loc); this.reporter.step(curr, total, this.reporter.lang('linkingDependencies'), emoji.get('link')); - await this.linker.init(patterns); + await this.linker.init(patterns, this.flags.linkDuplicates); }); steps.push(async (curr: number, total: number) => { diff --git a/src/cli/index.js b/src/cli/index.js index 8643d470a9..da28db6325 100644 --- a/src/cli/index.js +++ b/src/cli/index.js @@ -64,6 +64,7 @@ commander.option('--prod, --production [prod]', ''); commander.option('--no-lockfile', "don't read or generate a lockfile"); commander.option('--pure-lockfile', "don't generate a lockfile"); commander.option('--frozen-lockfile', "don't generate a lockfile and fail if an update is needed"); +commander.option('--link-duplicates', 'create hardlinks to the repeated modules in node_modules'); commander.option('--global-folder ', ''); commander.option( '--modules-folder ', diff --git a/src/package-linker.js b/src/package-linker.js index 9c4905bbe3..1710a3c3e7 100644 --- a/src/package-linker.js +++ b/src/package-linker.js @@ -112,7 +112,7 @@ export default class PackageLinker { return Promise.resolve(hoister.init()); } - async copyModules(patterns: Array): Promise { + async copyModules(patterns: Array, linkDuplicates: boolean): Promise { let flatTree = await this.getFlatHoistedTree(patterns); // sorted tree makes file creation and copying not to interfere with each other @@ -121,10 +121,13 @@ export default class PackageLinker { }); // list of artifacts in modules to remove from extraneous removal - const phantomFiles = []; + const artifactFiles = []; - // - const queue: Map = new Map(); + const copyQueue: Map = new Map(); + const hardlinkQueue: Map = new Map(); + const hardlinksEnabled = linkDuplicates && await fs.hardlinksWork(this.config.cwd); + + const copiedSrcs: Map = new Map(); for (const [dest, {pkg, loc: src}] of flatTree) { const ref = pkg._reference; invariant(ref, 'expected package reference'); @@ -134,18 +137,34 @@ export default class PackageLinker { // extraneous const metadata = await this.config.readPackageMetadata(src); for (const file of metadata.artifacts) { - phantomFiles.push(path.join(dest, file)); + artifactFiles.push(path.join(dest, file)); } - queue.set(dest, { - src, - dest, - onFresh() { - if (ref) { - ref.setFresh(true); - } - }, - }); + const copiedDest = copiedSrcs.get(src); + if (!copiedDest) { + if (hardlinksEnabled) { + copiedSrcs.set(src, dest); + } + copyQueue.set(dest, { + src, + dest, + onFresh() { + if (ref) { + ref.setFresh(true); + } + }, + }); + } else { + hardlinkQueue.set(dest, { + src: copiedDest, + dest, + onFresh() { + if (ref) { + ref.setFresh(true); + } + }, + }); + } } // keep track of all scoped paths to remove empty scopes after copy @@ -179,15 +198,15 @@ export default class PackageLinker { const stat = await fs.lstat(loc); if (stat.isSymbolicLink()) { possibleExtraneous.delete(loc); - queue.delete(loc); + copyQueue.delete(loc); } } // let tick; - await fs.copyBulk(Array.from(queue.values()), this.reporter, { + await fs.copyBulk(Array.from(copyQueue.values()), this.reporter, { possibleExtraneous, - phantomFiles, + artifactFiles, ignoreBasenames: [ constants.METADATA_FILENAME, @@ -204,6 +223,28 @@ export default class PackageLinker { } }, }); + await fs.hardlinkBulk(Array.from(hardlinkQueue.values()), this.reporter, { + possibleExtraneous, + artifactFiles, + + onStart: (num: number) => { + tick = this.reporter.progress(num); + }, + + onProgress(src: string) { + if (tick) { + tick(src); + } + }, + }); + + // remove all extraneous files that weren't in the tree + for (const loc of possibleExtraneous) { + this.reporter.verbose( + this.reporter.lang('verboseFileRemoveExtraneous', loc), + ); + await fs.unlink(loc); + } // remove any empty scoped directories for (const scopedPath of scopedPaths) { @@ -287,9 +328,9 @@ export default class PackageLinker { return range === '*' || semver.satisfies(version, range, this.config.looseSemver); } - async init(patterns: Array): Promise { + async init(patterns: Array, linkDuplicates: boolean): Promise { this.resolvePeerModules(); - await this.copyModules(patterns); + await this.copyModules(patterns, linkDuplicates); await this.saveAll(patterns); } diff --git a/src/reporters/lang/en.js b/src/reporters/lang/en.js index e272b46f9a..9f07510902 100644 --- a/src/reporters/lang/en.js +++ b/src/reporters/lang/en.js @@ -33,9 +33,11 @@ const messages = { manifestDirectoryNotFound: 'Unable to read $0 directory of module $1', verboseFileCopy: 'Copying $0 to $1.', + verboseFileLink: 'Creating hardlink at $0 to $1.', verboseFileSymlink: 'Creating symlink at $0 to $1.', verboseFileSkip: 'Skipping copying of file $0 as the file at $1 is the same size ($2) and mtime ($3).', verboseFileSkipSymlink: 'Skipping copying of $0 as the file at $1 is the same symlink ($2).', + verboseFileSkipHardlink: 'Skipping copying of $0 as the file at $1 is the same hardlink ($2).', verboseFileRemoveExtraneous: 'Removing extraneous file $0.', verboseFilePhantomExtraneous: "File $0 would be marked as extraneous but has been removed as it's listed as a phantom file.", verboseFileFolder: 'Creating directory $0.', diff --git a/src/util/fs.js b/src/util/fs.js index 957b999fe0..f6d2dbcf37 100644 --- a/src/util/fs.js +++ b/src/util/fs.js @@ -25,6 +25,9 @@ export const mkdirp: (path: string) => Promise = promisify(require('mkdirp export const exists: (path: string) => Promise = promisify(fs.exists, true); export const lstat: (path: string) => Promise = promisify(fs.lstat); export const chmod: (path: string, mode: number | string) => Promise = promisify(fs.chmod); +export const link: (path: string) => Promise = promisify(fs.link); + +const CONCURRENT_QUEUE_ITEMS = 4; const fsSymlink: ( target: string, @@ -54,33 +57,36 @@ type CopyFileAction = { mode: number }; +type LinkFileAction = { + type: 'link', + src: string, + dest: string, + removeDest: boolean, +}; + type CopySymlinkAction = { type: 'symlink', dest: string, linkname: string, }; -type CopyActions = Array; - -type PossibleExtraneous = void | false | Iterable; +type CopyActions = Array; type CopyOptions = { onProgress: (dest: string) => void, onStart: (num: number) => void, - possibleExtraneous: PossibleExtraneous, + possibleExtraneous: Set, ignoreBasenames: Array, - phantomFiles: Array, + artifactFiles: Array, }; async function buildActionsForCopy( queue: CopyQueue, events: CopyOptions, - possibleExtraneousSeed: PossibleExtraneous, + possibleExtraneous: Set, reporter: Reporter, ): Promise { - const possibleExtraneous: Set = new Set(possibleExtraneousSeed || []); - const phantomFiles: Set = new Set(events.phantomFiles || []); - const noExtraneous = possibleExtraneousSeed === false; + const artifactFiles: Set = new Set(events.artifactFiles || []); const files: Set = new Set(); // initialise events @@ -98,28 +104,24 @@ async function buildActionsForCopy( // start building actions const actions: CopyActions = []; - // custom concurrency logic as we're always executing stacks of 4 queue items + // custom concurrency logic as we're always executing stacks of CONCURRENT_QUEUE_ITEMS queue items // at a time due to the requirement to push items onto the queue while (queue.length) { - const items = queue.splice(0, 4); + const items = queue.splice(0, CONCURRENT_QUEUE_ITEMS); await Promise.all(items.map(build)); } // simulate the existence of some files to prevent considering them extraenous - for (const file of phantomFiles) { + for (const file of artifactFiles) { if (possibleExtraneous.has(file)) { reporter.verbose(reporter.lang('verboseFilePhantomExtraneous', file)); possibleExtraneous.delete(file); } } - // remove all extraneous files that weren't in the tree - if (!noExtraneous) { - for (const loc of possibleExtraneous) { - if (!files.has(loc)) { - reporter.verbose(reporter.lang('verboseFileRemoveExtraneous', loc)); - await unlink(loc); - } + for (const loc of possibleExtraneous) { + if (files.has(loc)) { + possibleExtraneous.delete(loc); } } @@ -177,7 +179,7 @@ async function buildActionsForCopy( } } - if (bothFolders && !noExtraneous) { + if (bothFolders) { // mark files that aren't in this folder as possibly extraneous const destFiles = await readdir(dest); invariant(srcFiles, 'src files not initialised'); @@ -251,6 +253,179 @@ async function buildActionsForCopy( } } +async function buildActionsForHardlink( + queue: CopyQueue, + events: CopyOptions, + possibleExtraneous: Set, + reporter: Reporter, +): Promise { + const artifactFiles: Set = new Set(events.artifactFiles || []); + const files: Set = new Set(); + + // initialise events + for (const item of queue) { + const onDone = item.onDone; + item.onDone = () => { + events.onProgress(item.dest); + if (onDone) { + onDone(); + } + }; + } + events.onStart(queue.length); + + // start building actions + const actions: CopyActions = []; + + // custom concurrency logic as we're always executing stacks of CONCURRENT_QUEUE_ITEMS queue items + // at a time due to the requirement to push items onto the queue + while (queue.length) { + const items = queue.splice(0, CONCURRENT_QUEUE_ITEMS); + await Promise.all(items.map(build)); + } + + // simulate the existence of some files to prevent considering them extraenous + for (const file of artifactFiles) { + if (possibleExtraneous.has(file)) { + reporter.verbose(reporter.lang('verboseFilePhantomExtraneous', file)); + possibleExtraneous.delete(file); + } + } + + for (const loc of possibleExtraneous) { + if (files.has(loc)) { + possibleExtraneous.delete(loc); + } + } + + return actions; + + // + async function build(data): Promise { + const {src, dest} = data; + const onFresh = data.onFresh || noop; + const onDone = data.onDone || noop; + files.add(dest); + + if (events.ignoreBasenames.indexOf(path.basename(src)) >= 0) { + // ignored file + return; + } + + const srcStat = await lstat(src); + let srcFiles; + + if (srcStat.isDirectory()) { + srcFiles = await readdir(src); + } + + const destExists = await exists(dest); + if (destExists) { + const destStat = await lstat(dest); + + const bothSymlinks = srcStat.isSymbolicLink() && destStat.isSymbolicLink(); + const bothFolders = srcStat.isDirectory() && destStat.isDirectory(); + const bothFiles = srcStat.isFile() && destStat.isFile(); + + if (srcStat.mode !== destStat.mode) { + try { + await access(dest, srcStat.mode); + } catch (err) { + // EINVAL access errors sometimes happen which shouldn't because node shouldn't be giving + // us modes that aren't valid. investigate this, it's generally safe to proceed. + reporter.verbose(err); + } + } + + // correct hardlink + if (bothFiles && (srcStat.ino !== null) && (srcStat.ino === destStat.ino)) { + onDone(); + reporter.verbose(reporter.lang('verboseFileSkip', src, dest, srcStat.ino)); + return; + } + + if (bothSymlinks) { + const srcReallink = await readlink(src); + if (srcReallink === await readlink(dest)) { + // if both symlinks are the same then we can continue on + onDone(); + reporter.verbose(reporter.lang('verboseFileSkipSymlink', src, dest, srcReallink)); + return; + } + } + + if (bothFolders) { + // mark files that aren't in this folder as possibly extraneous + const destFiles = await readdir(dest); + invariant(srcFiles, 'src files not initialised'); + + for (const file of destFiles) { + if (srcFiles.indexOf(file) < 0) { + const loc = path.join(dest, file); + possibleExtraneous.add(loc); + + if ((await lstat(loc)).isDirectory()) { + for (const file of await readdir(loc)) { + possibleExtraneous.add(path.join(loc, file)); + } + } + } + } + } + } + + if (srcStat.isSymbolicLink()) { + onFresh(); + const linkname = await readlink(src); + actions.push({ + type: 'symlink', + dest, + linkname, + }); + onDone(); + } else if (srcStat.isDirectory()) { + reporter.verbose(reporter.lang('verboseFileFolder', dest)); + await mkdirp(dest); + + const destParts = dest.split(path.sep); + while (destParts.length) { + files.add(destParts.join(path.sep)); + destParts.pop(); + } + + // push all files to queue + invariant(srcFiles, 'src files not initialised'); + let remaining = srcFiles.length; + if (!remaining) { + onDone(); + } + for (const file of srcFiles) { + queue.push({ + onFresh, + src: path.join(src, file), + dest: path.join(dest, file), + onDone: () => { + if (--remaining === 0) { + onDone(); + } + }, + }); + } + } else if (srcStat.isFile()) { + onFresh(); + actions.push({ + type: 'link', + src, + dest, + removeDest: destExists, + }); + onDone(); + } else { + throw new Error(`unsure how to copy this: ${src}`); + } + } +} + export function copy(src: string, dest: string, reporter: Reporter): Promise { return copyBulk([{src, dest}], reporter); } @@ -261,20 +436,25 @@ export async function copyBulk( _events?: { onProgress?: ?(dest: string) => void, onStart?: ?(num: number) => void, - possibleExtraneous?: PossibleExtraneous, + possibleExtraneous: Set, ignoreBasenames?: Array, - phantomFiles?: Array, + artifactFiles?: Array, }, ): Promise { const events: CopyOptions = { onStart: (_events && _events.onStart) || noop, onProgress: (_events && _events.onProgress) || noop, - possibleExtraneous: _events ? _events.possibleExtraneous : [], + possibleExtraneous: _events ? _events.possibleExtraneous : new Set(), ignoreBasenames: (_events && _events.ignoreBasenames) || [], - phantomFiles: (_events && _events.phantomFiles) || [], + artifactFiles: (_events && _events.artifactFiles) || [], }; - const actions: CopyActions = await buildActionsForCopy(queue, events, events.possibleExtraneous, reporter); + const actions: CopyActions = await buildActionsForCopy( + queue, + events, + events.possibleExtraneous, + reporter, + ); events.onStart(actions.length); const fileActions: Array = (actions.filter((action) => action.type === 'file'): any); @@ -319,9 +499,54 @@ export async function copyBulk( cleanup(); throw arg; }); - }, 4); + }, CONCURRENT_QUEUE_ITEMS); + + // we need to copy symlinks last as they could reference files we were copying + const symlinkActions: Array = (actions.filter((action) => action.type === 'symlink'): any); + await promise.queue(symlinkActions, (data): Promise => { + const linkname = path.resolve(path.dirname(data.dest), data.linkname); + reporter.verbose(reporter.lang('verboseFileSymlink', data.dest, linkname)); + return symlink(linkname, data.dest); + }); +} - // we need to copy symlinks last as the could reference files we were copying +export async function hardlinkBulk( + queue: CopyQueue, + reporter: Reporter, + _events?: { + onProgress?: ?(dest: string) => void, + onStart?: ?(num: number) => void, + possibleExtraneous: Set, + artifactFiles?: Array, + }, +): Promise { + const events: CopyOptions = { + onStart: (_events && _events.onStart) || noop, + onProgress: (_events && _events.onProgress) || noop, + possibleExtraneous: _events ? _events.possibleExtraneous : new Set(), + artifactFiles: (_events && _events.artifactFiles) || [], + ignoreBasenames: [], + }; + + const actions: CopyActions = await buildActionsForHardlink( + queue, + events, + events.possibleExtraneous, + reporter, + ); + events.onStart(actions.length); + + const fileActions: Array = (actions.filter((action) => action.type === 'link'): any); + + await promise.queue(fileActions, async (data): Promise => { + reporter.verbose(reporter.lang('verboseFileLink', data.src, data.dest)); + if (data.removeDest) { + await unlink(data.dest); + } + await link(data.src, data.dest); + }, CONCURRENT_QUEUE_ITEMS); + + // we need to copy symlinks last as they could reference files we were copying const symlinkActions: Array = (actions.filter((action) => action.type === 'symlink'): any); await promise.queue(symlinkActions, (data): Promise => { const linkname = path.resolve(path.dirname(data.dest), data.linkname); @@ -511,6 +736,22 @@ export async function writeFilePreservingEol(path: string, data: string) : Promi await promisify(fs.writeFile)(path, data); } +export async function hardlinksWork(dir: string): Promise { + const filename = 'test-file' + Math.random(); + const file = path.join(dir, filename); + const fileLink = path.join(dir, filename + '-link'); + try { + await writeFile(file, 'test'); + await link(file, fileLink); + } catch (err) { + return false; + } finally { + await unlink(file); + await unlink(fileLink); + } + return true; +} + // not a strict polyfill for Node's fs.mkdtemp export async function makeTempDir(prefix?: string): Promise { const dir = path.join(