From 735c70fd7c1bdd3de7347bcbd93a4e65284dc718 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 24 Jul 2019 12:42:31 +0200 Subject: [PATCH 1/3] fix: limit concurrent HTTP requests All HTTP requests made by this module are sent to the same delegate host. Browsers throttle the number of concurrent requests per hostname, right now it is 6 per host, which suffocates the use of delegate and blocking it from being used for preload or delegated peer routing. This change introduces task queues that limit the number of concurrent requests, making it safe to run in browser context. Optimizations: - preload calls via `refs` are known to take time, so we limit them to max two at a time - swarm.connect was the main offender (causing multiple requests to delegate), we now check if connection is already present and cache result for 1 minute removing most of redundant http requests - hostname of default delegate is changed Context: https://github.com/libp2p/js-libp2p-delegated-content-routing/issues/12 Closes #12 License: MIT Signed-off-by: Marcin Rataj --- .gitignore | 3 +- package.json | 5 ++- src/index.js | 79 +++++++++++++++++++++++++++++++++++----------- test/index.spec.js | 4 +-- 4 files changed, 69 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index a10b662..230dfd8 100644 --- a/.gitignore +++ b/.gitignore @@ -61,4 +61,5 @@ typings/ .env yarn.lock -package-lock.json \ No newline at end of file +package-lock.json +dist/ diff --git a/package.json b/package.json index 7a75a37..80b39ac 100644 --- a/package.json +++ b/package.json @@ -27,8 +27,11 @@ "peer-id": "~0.13.1" }, "dependencies": { + "debug": "^4.1.1", "ipfs-http-client": "^33.1.0", - "multiaddr": "^6.1.0" + "multiaddr": "^6.1.0", + "p-memoize": "^3.1.0", + "p-queue": "^6.1.0" }, "contributors": [ "Alan Shaw ", diff --git a/src/index.js b/src/index.js index 59d899e..c660197 100644 --- a/src/index.js +++ b/src/index.js @@ -5,14 +5,21 @@ const swarm = require('ipfs-http-client/src/swarm') const refs = require('ipfs-http-client/src/files-regular/refs') const defaultConfig = require('ipfs-http-client/src/utils/default-config') const multiaddr = require('multiaddr') +const { default: PQueue } = require('p-queue') +const pMemoize = require('p-memoize') +const debug = require('debug') + +const log = debug('libp2p-delegated-content-routing') +log.error = debug('libp2p-delegated-content-routing:error') const DEFAULT_MAX_TIMEOUT = 30e3 // 30 second default const DEFAULT_IPFS_API = { protocol: 'https', port: 443, - host: 'ipfs.io' + host: 'node0.delegate.ipfs.io' } +// assuming below nodes need have autorelay enabled const DEFAULT_BOOSTRAP_NODES = [ '/ipfs/QmSoLer265NRgSp2LA3dPaeykiS1J6DifTC88f5uVQKNAd', '/ipfs/QmSoLMeWqB7YGVLJN3pNLQpmmEk35v6wYtsMGLzSr5QBU3', @@ -24,6 +31,9 @@ const DEFAULT_BOOSTRAP_NODES = [ '/ipfs/Qmbut9Ywz9YEDrz8ySBSgWyJk41Uvm2QJPhwDJzJyGFsD6' ] +const CONCURRENT_HTTP_REQUESTS = 4 +const SWARM_CONNECT_MAX_AGE = 60e3 + /** * An implementation of content routing, using a delegated peer. */ @@ -43,10 +53,26 @@ class DelegatedContentRouting { this.api = Object.assign({}, defaultConfig(), DEFAULT_IPFS_API, api) this.dht = dht(this.api) this.swarm = swarm(this.api) + // optimization: avoid calling swarm.connect too often + this.swarm.connect = pMemoize(this.swarm.connect, { maxAge: SWARM_CONNECT_MAX_AGE }) this.refs = refs(this.api) - this.peerId = peerId - this.bootstrappers = bootstrappers || DEFAULT_BOOSTRAP_NODES.map((addr) => multiaddr(addr)) + + bootstrappers = bootstrappers || DEFAULT_BOOSTRAP_NODES.map((addr) => multiaddr(addr)) + this.circuits = bootstrappers.map((addr) => { + return addr.encapsulate(`/p2p-circuit/ipfs/${this.peerId.toB58String()}`) + }) + + // limit concurrency to avoid request flood in web browser + // https://github.com/libp2p/js-libp2p-delegated-content-routing/issues/12 + const concurrency = { concurrency: CONCURRENT_HTTP_REQUESTS } + this._httpQueue = new PQueue(concurrency) + // sometimes refs requests take long time, they need separate queue + // to not suffocate regular bussiness + this._httpQueueRefs = new PQueue(Object.assign({}, concurrency, { + concurrency: 2 + })) + log(`enabled DelegatedContentRouting via ${this.api.protocol}://${this.api.host}:${this.api.port}`) } /** @@ -60,15 +86,18 @@ class DelegatedContentRouting { * @returns {AsyncIterable} */ async * findProviders (key, options = {}) { + const keyString = key.toBaseEncodedString() + log('findProviders starts: ' + keyString) options.maxTimeout = options.maxTimeout || DEFAULT_MAX_TIMEOUT - const results = await this.dht.findProvs(key, { + const results = await this._httpQueue.add(() => this.dht.findProvs(key, { timeout: `${options.maxTimeout}ms` // The api requires specification of the time unit (s/ms) - }) + })) for (let i = 0; i < results.length; i++) { yield results[i] } + log('findProviders finished: ' + keyString) } /** @@ -76,23 +105,34 @@ class DelegatedContentRouting { * * Currently this uses the following hack * - call swarm.connect on the delegated node to us, to ensure we are connected - * - call refs --recursive on the delegated node, so it fetches the content + * - call refs on the delegated node, so it fetches the content * * @param {CID} key * @param {function(Error)} callback * @returns {Promise} */ async provide (key) { - const addrs = this.bootstrappers.map((addr) => { - return addr.encapsulate(`/p2p-circuit/ipfs/${this.peerId.toB58String()}`) - }) - - const results = await Promise.all( - addrs.map((addr) => { - return this.swarm.connect(addr.toString()).catch(() => {}) - }) - ) + const keyString = key.toBaseEncodedString() + log('provide starts: ' + keyString) + let results + try { + // optimization: try the first addr + // (swarm.connect will return success if ANY connection to this.peerId already exists) + const addr = this.circuits.find(a => Boolean(a)) + const res = await this._httpQueue.add(() => this.swarm.connect(addr.toString())) + if (res && res.error) throw new Error() // trigger fallback + results = [res] + } catch (err) { + // fallback to trying all potential circuits + results = await Promise.all( + this.circuits.map((addr) => + this._httpQueue.add(() => + this.swarm.connect(addr.toString()).catch(() => {}) + ) + ) + ) + } // only some need to succeed const success = results.filter((res) => res && res.error == null) @@ -100,9 +140,12 @@ class DelegatedContentRouting { throw new Error('unable to swarm.connect using p2p-circuit') } - this.refs(key.toBaseEncodedString(), { - recursive: true - }) + // async preload of data to delegate node + // note: we call `provide` for every block, so it does not need to be recursive + await this._httpQueueRefs.add(() => + this.refs(keyString, { recursive: false }) + ) + log('provide finished: ' + keyString) } } diff --git a/test/index.spec.js b/test/index.spec.js index 81a0ab6..d4e478d 100644 --- a/test/index.spec.js +++ b/test/index.spec.js @@ -71,14 +71,14 @@ describe('DelegatedContentRouting', function () { expect(() => new DelegatedContentRouting()).to.throw() }) - it('should default to https://ipfs.io as the delegate', () => { + it('should default to https://node0.delegate.ipfs.io as the delegate', () => { const router = new DelegatedContentRouting(selfId) expect(router.api).to.include({ 'api-path': '/api/v0/', protocol: 'https', port: 443, - host: 'ipfs.io' + host: 'node0.delegate.ipfs.io' }) }) From 7ea2eae3318ac3b7e602200c958507c447b7d14c Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 24 Jul 2019 13:26:08 +0200 Subject: [PATCH 2/3] refactor: remove the need for swarm connect This removes all code that caused swarm connect calls from delegate to self. It was not needed: delegate is one of bootstrap nodes, so we are always connected to it. https://github.com/libp2p/js-libp2p-delegated-content-routing/issues/12#issuecomment-514591525 License: MIT Signed-off-by: Marcin Rataj --- src/index.js | 51 ++------------------------------------------------- 1 file changed, 2 insertions(+), 49 deletions(-) diff --git a/src/index.js b/src/index.js index c660197..973716d 100644 --- a/src/index.js +++ b/src/index.js @@ -4,7 +4,6 @@ const dht = require('ipfs-http-client/src/dht') const swarm = require('ipfs-http-client/src/swarm') const refs = require('ipfs-http-client/src/files-regular/refs') const defaultConfig = require('ipfs-http-client/src/utils/default-config') -const multiaddr = require('multiaddr') const { default: PQueue } = require('p-queue') const pMemoize = require('p-memoize') const debug = require('debug') @@ -19,18 +18,6 @@ const DEFAULT_IPFS_API = { host: 'node0.delegate.ipfs.io' } -// assuming below nodes need have autorelay enabled -const DEFAULT_BOOSTRAP_NODES = [ - '/ipfs/QmSoLer265NRgSp2LA3dPaeykiS1J6DifTC88f5uVQKNAd', - '/ipfs/QmSoLMeWqB7YGVLJN3pNLQpmmEk35v6wYtsMGLzSr5QBU3', - '/ipfs/QmSoLPppuBtQSGwKDZT2M73ULpjvfd3aZ6ha4oFGL1KrGM', - '/ipfs/QmSoLSafTMBsPKadTEgaXctDQVcqN88CNLHXMkTNwMKPnu', - '/ipfs/QmSoLueR4xBeUbY9WZ9xGUUxunbKWcrNFTDAadQJmocnWm', - '/ipfs/QmSoLV4Bbm51jM9C4gDYZQ9Cy3U6aXMJDAbzgu2fzaDs64', - '/ipfs/QmZMxNdpMkewiVZLMRxaNxUeZpDUb34pWjZ1kZvsd16Zic', - '/ipfs/Qmbut9Ywz9YEDrz8ySBSgWyJk41Uvm2QJPhwDJzJyGFsD6' -] - const CONCURRENT_HTTP_REQUESTS = 4 const SWARM_CONNECT_MAX_AGE = 60e3 @@ -43,9 +30,8 @@ class DelegatedContentRouting { * * @param {PeerID} peerId - the id of the node that is using this routing. * @param {object} [api] - (Optional) the api endpoint of the delegated node to use. - * @param {Array} [bootstrappers] - (Optional) list of bootstrapper nodes we are connected to. */ - constructor (peerId, api, bootstrappers) { + constructor (peerId, api) { if (peerId == null) { throw new Error('missing self peerId') } @@ -58,11 +44,6 @@ class DelegatedContentRouting { this.refs = refs(this.api) this.peerId = peerId - bootstrappers = bootstrappers || DEFAULT_BOOSTRAP_NODES.map((addr) => multiaddr(addr)) - this.circuits = bootstrappers.map((addr) => { - return addr.encapsulate(`/p2p-circuit/ipfs/${this.peerId.toB58String()}`) - }) - // limit concurrency to avoid request flood in web browser // https://github.com/libp2p/js-libp2p-delegated-content-routing/issues/12 const concurrency = { concurrency: CONCURRENT_HTTP_REQUESTS } @@ -104,7 +85,7 @@ class DelegatedContentRouting { * Announce to the network that the delegated node can provide the given key. * * Currently this uses the following hack - * - call swarm.connect on the delegated node to us, to ensure we are connected + * - delegate is one of bootstrap nodes, so we are always connected to it * - call refs on the delegated node, so it fetches the content * * @param {CID} key @@ -114,34 +95,6 @@ class DelegatedContentRouting { async provide (key) { const keyString = key.toBaseEncodedString() log('provide starts: ' + keyString) - - let results - try { - // optimization: try the first addr - // (swarm.connect will return success if ANY connection to this.peerId already exists) - const addr = this.circuits.find(a => Boolean(a)) - const res = await this._httpQueue.add(() => this.swarm.connect(addr.toString())) - if (res && res.error) throw new Error() // trigger fallback - results = [res] - } catch (err) { - // fallback to trying all potential circuits - results = await Promise.all( - this.circuits.map((addr) => - this._httpQueue.add(() => - this.swarm.connect(addr.toString()).catch(() => {}) - ) - ) - ) - } - // only some need to succeed - const success = results.filter((res) => res && res.error == null) - - if (success.length === 0) { - throw new Error('unable to swarm.connect using p2p-circuit') - } - - // async preload of data to delegate node - // note: we call `provide` for every block, so it does not need to be recursive await this._httpQueueRefs.add(() => this.refs(keyString, { recursive: false }) ) From e1218f39202e1c7f0c5026be5a2e646d99fa1447 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 24 Jul 2019 13:36:09 +0200 Subject: [PATCH 3/3] chore: remove unused dependencies We no longer need those (removed calls to swarm.connect) License: MIT Signed-off-by: Marcin Rataj --- package.json | 1 - src/index.js | 6 ------ 2 files changed, 7 deletions(-) diff --git a/package.json b/package.json index 80b39ac..41f7a9b 100644 --- a/package.json +++ b/package.json @@ -30,7 +30,6 @@ "debug": "^4.1.1", "ipfs-http-client": "^33.1.0", "multiaddr": "^6.1.0", - "p-memoize": "^3.1.0", "p-queue": "^6.1.0" }, "contributors": [ diff --git a/src/index.js b/src/index.js index 973716d..d6c7397 100644 --- a/src/index.js +++ b/src/index.js @@ -1,11 +1,9 @@ 'use strict' const dht = require('ipfs-http-client/src/dht') -const swarm = require('ipfs-http-client/src/swarm') const refs = require('ipfs-http-client/src/files-regular/refs') const defaultConfig = require('ipfs-http-client/src/utils/default-config') const { default: PQueue } = require('p-queue') -const pMemoize = require('p-memoize') const debug = require('debug') const log = debug('libp2p-delegated-content-routing') @@ -19,7 +17,6 @@ const DEFAULT_IPFS_API = { } const CONCURRENT_HTTP_REQUESTS = 4 -const SWARM_CONNECT_MAX_AGE = 60e3 /** * An implementation of content routing, using a delegated peer. @@ -38,9 +35,6 @@ class DelegatedContentRouting { this.api = Object.assign({}, defaultConfig(), DEFAULT_IPFS_API, api) this.dht = dht(this.api) - this.swarm = swarm(this.api) - // optimization: avoid calling swarm.connect too often - this.swarm.connect = pMemoize(this.swarm.connect, { maxAge: SWARM_CONNECT_MAX_AGE }) this.refs = refs(this.api) this.peerId = peerId