From c5ac34541018464497d9ee4b245a72608fec6aea Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Wed, 25 Jul 2018 00:59:45 -0400 Subject: [PATCH 1/3] Add support for rabin fingerprinting chunk algorithm --- package.json | 4 ++- src/builder/builder.js | 3 +- src/chunker/index.js | 12 ++++++++ src/chunker/rabin.js | 28 ++++++++++++++++++ src/importer/index.js | 5 +--- test/chunker-rabin.js | 65 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 111 insertions(+), 6 deletions(-) create mode 100644 src/chunker/index.js create mode 100644 src/chunker/rabin.js create mode 100644 test/chunker-rabin.js diff --git a/package.json b/package.json index a2070756..1115d342 100644 --- a/package.json +++ b/package.json @@ -72,7 +72,9 @@ "pull-through": "^1.0.18", "pull-traverse": "^1.0.3", "pull-write": "^1.1.4", - "sparse-array": "^1.3.1" + "rabin": "^1.6.0", + "sparse-array": "^1.3.1", + "stream-to-pull-stream": "^1.7.2" }, "contributors": [ "Alan Shaw ", diff --git a/src/builder/builder.js b/src/builder/builder.js index b0c117ed..a94763dd 100644 --- a/src/builder/builder.js +++ b/src/builder/builder.js @@ -16,7 +16,8 @@ const DAGNode = dagPB.DAGNode const defaultOptions = { chunkerOptions: { - maxChunkSize: 262144 + maxChunkSize: 262144, + avgChunkSize: 262144 }, rawLeaves: false, hashAlg: 'sha2-256', diff --git a/src/chunker/index.js b/src/chunker/index.js new file mode 100644 index 00000000..6248f1d4 --- /dev/null +++ b/src/chunker/index.js @@ -0,0 +1,12 @@ +const rabin = require('rabin') + +const chunkers = { + fixed: require('../chunker/fixed-size') +} + +// Don't include the rabin chunker when rabin require is null for browser +if (rabin) { + chunkers.rabin = require('../chunker/rabin') +} + +module.exports = chunkers diff --git a/src/chunker/rabin.js b/src/chunker/rabin.js new file mode 100644 index 00000000..89bac16e --- /dev/null +++ b/src/chunker/rabin.js @@ -0,0 +1,28 @@ +'use strict' + +const createRabin = require('rabin') +const toPull = require('stream-to-pull-stream') + +module.exports = (options) => { + let min, max, avg; + if (options.minChunkSize && options.maxChunkSize && options.avgChunkSize) { + avg = options.avgChunkSize + min = options.minChunkSize + max = options.maxChunkSize + } else { + avg = options.avgChunkSize + min = avg / 3 + max = avg + (avg / 2) + } + + const sizepow = Math.floor(Math.log2(avg)) + const rabin = createRabin({ + min: min, + max: max, + bits: sizepow, + window: options.window || 16, + polynomial: options.polynomial || "0x3DF305DFB2A805" + }) + + return toPull.duplex(rabin) +} diff --git a/src/importer/index.js b/src/importer/index.js index beb8b994..5be9670f 100644 --- a/src/importer/index.js +++ b/src/importer/index.js @@ -8,10 +8,7 @@ const assert = require('assert') const setImmediate = require('async/setImmediate') const DAGBuilder = require('../builder') const createTreeBuilder = require('./tree-builder') - -const chunkers = { - fixed: require('../chunker/fixed-size') -} +const chunkers = require('../chunker') const defaultOptions = { chunker: 'fixed', diff --git a/test/chunker-rabin.js b/test/chunker-rabin.js new file mode 100644 index 00000000..0376a0ba --- /dev/null +++ b/test/chunker-rabin.js @@ -0,0 +1,65 @@ +/* eslint-env mocha */ +'use strict' + +const chunker = require('./../src/chunker/rabin') +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const pull = require('pull-stream') +const loadFixture = require('aegir/fixtures') + +const rawFile = loadFixture('test/fixtures/1MiB.txt') + +describe('chunker: rabin', function () { + this.timeout(30000) + + it('chunks non flat buffers', (done) => { + const b1 = Buffer.alloc(2 * 256) + const b2 = Buffer.alloc(1 * 256) + const b3 = Buffer.alloc(5 * 256) + + b1.fill('a') + b2.fill('b') + b3.fill('c') + + pull( + pull.values([b1, b2, b3]), + chunker({minChunkSize: 48, avgChunkSize: 96, maxChunkSize: 192}), + pull.collect((err, chunks) => { + expect(err).to.not.exist() + let totalSize = 0 + chunks.forEach((chunk) => { + totalSize += chunk.length + expect(chunk).to.have.length.gte(48) + expect(chunk).to.have.length.lte(192) + }) + done() + }) + ) + }) + + it('256 KiB avg chunks of non scalar filesize', (done) => { + const KiB256 = 262144 + let file = Buffer.concat([rawFile, Buffer.from('hello')]) + const opts = { + minChunkSize: KiB256/3, + avgChunkSize: KiB256, + maxChunkSize: KiB256 + (KiB256 / 2) + } + pull( + pull.values([file]), + chunker(opts), + pull.collect((err, chunks) => { + expect(err).to.not.exist() + + chunks.forEach((chunk) => { + expect(chunk).to.have.length.gte(opts.minChunkSize) + expect(chunk).to.have.length.lte(opts.maxChunkSize) + }) + + done() + }) + ) + }) + +}) \ No newline at end of file From 918b1ac98e35bf3c2c18d34330d7474ac04d1040 Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Wed, 25 Jul 2018 12:41:17 -0400 Subject: [PATCH 2/3] Fix linting issues --- src/chunker/index.js | 2 ++ src/chunker/rabin.js | 4 ++-- test/chunker-rabin.js | 7 ++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/chunker/index.js b/src/chunker/index.js index 6248f1d4..c5fb6c09 100644 --- a/src/chunker/index.js +++ b/src/chunker/index.js @@ -1,3 +1,5 @@ +'use strict' + const rabin = require('rabin') const chunkers = { diff --git a/src/chunker/rabin.js b/src/chunker/rabin.js index 89bac16e..8fe506e4 100644 --- a/src/chunker/rabin.js +++ b/src/chunker/rabin.js @@ -4,7 +4,7 @@ const createRabin = require('rabin') const toPull = require('stream-to-pull-stream') module.exports = (options) => { - let min, max, avg; + let min, max, avg if (options.minChunkSize && options.maxChunkSize && options.avgChunkSize) { avg = options.avgChunkSize min = options.minChunkSize @@ -21,7 +21,7 @@ module.exports = (options) => { max: max, bits: sizepow, window: options.window || 16, - polynomial: options.polynomial || "0x3DF305DFB2A805" + polynomial: options.polynomial || '0x3DF305DFB2A805' }) return toPull.duplex(rabin) diff --git a/test/chunker-rabin.js b/test/chunker-rabin.js index 0376a0ba..a0d7d7cb 100644 --- a/test/chunker-rabin.js +++ b/test/chunker-rabin.js @@ -27,9 +27,7 @@ describe('chunker: rabin', function () { chunker({minChunkSize: 48, avgChunkSize: 96, maxChunkSize: 192}), pull.collect((err, chunks) => { expect(err).to.not.exist() - let totalSize = 0 chunks.forEach((chunk) => { - totalSize += chunk.length expect(chunk).to.have.length.gte(48) expect(chunk).to.have.length.lte(192) }) @@ -42,7 +40,7 @@ describe('chunker: rabin', function () { const KiB256 = 262144 let file = Buffer.concat([rawFile, Buffer.from('hello')]) const opts = { - minChunkSize: KiB256/3, + minChunkSize: KiB256 / 3, avgChunkSize: KiB256, maxChunkSize: KiB256 + (KiB256 / 2) } @@ -61,5 +59,4 @@ describe('chunker: rabin', function () { }) ) }) - -}) \ No newline at end of file +}) From a16863831e21a9bc04d739273558f290032adf93 Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Wed, 25 Jul 2018 13:01:31 -0400 Subject: [PATCH 3/3] Exclude rabin chunker from browser build --- package.json | 3 ++- src/chunker/index.js | 8 ++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/package.json b/package.json index 1115d342..7f9fc175 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,8 @@ "leadMaintainer": "Alex Potsides ", "main": "src/index.js", "browser": { - "fs": false + "fs": false, + "rabin": false }, "scripts": { "test": "aegir test", diff --git a/src/chunker/index.js b/src/chunker/index.js index c5fb6c09..5a3111c8 100644 --- a/src/chunker/index.js +++ b/src/chunker/index.js @@ -3,12 +3,8 @@ const rabin = require('rabin') const chunkers = { - fixed: require('../chunker/fixed-size') -} - -// Don't include the rabin chunker when rabin require is null for browser -if (rabin) { - chunkers.rabin = require('../chunker/rabin') + fixed: require('../chunker/fixed-size'), + rabin: require('../chunker/rabin') } module.exports = chunkers