Skip to content

Commit 0fb3ec9

Browse files
committed
cluster: fix race condition setting suicide prop
There is no guarantee that the `suicide` property of a worker in the master process is going to be set when the `disconnect` and `exit` events are emitted. To fix it, wait for the ACK of the suicide message from the master before disconnecting the worker. Also, there's no need to send the suicide message from the worker if the disconnection has been initiated in the master. Add `test-cluster-disconnect-suicide-race` that forks a lot of workers to consistently reproduce the issue this patch tries to solve. Modify `test-regress-nodejsGH-3238` so it checks both the `kill` and `disconnect` cases. Also take into account that the `disconnect` event may be received after the `exit` event.
1 parent 6cfd0b5 commit 0fb3ec9

File tree

3 files changed

+75
-28
lines changed

3 files changed

+75
-28
lines changed

lib/cluster.js

+31-17
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ function masterInit() {
434434
else if (message.act === 'listening')
435435
listening(worker, message);
436436
else if (message.act === 'suicide')
437-
worker.suicide = true;
437+
suicide(worker, message);
438438
else if (message.act === 'close')
439439
close(worker, message);
440440
}
@@ -445,6 +445,11 @@ function masterInit() {
445445
cluster.emit('online', worker);
446446
}
447447

448+
function suicide(worker, message) {
449+
worker.suicide = true;
450+
send(worker, { ack: message.seq });
451+
}
452+
448453
function queryServer(worker, message) {
449454
// Stop processing if worker already disconnecting
450455
if (worker.suicide)
@@ -541,7 +546,7 @@ function workerInit() {
541546
if (message.act === 'newconn')
542547
onconnection(message, handle);
543548
else if (message.act === 'disconnect')
544-
worker.disconnect();
549+
_disconnect.call(worker, true);
545550
}
546551
};
547552

@@ -662,14 +667,36 @@ function workerInit() {
662667
}
663668

664669
Worker.prototype.disconnect = function() {
670+
_disconnect.call(this);
671+
};
672+
673+
Worker.prototype.destroy = function() {
674+
this.suicide = true;
675+
if (!this.isConnected()) process.exit(0);
676+
var exit = process.exit.bind(null, 0);
677+
send({ act: 'suicide' }, () => process.disconnect());
678+
process.once('disconnect', exit);
679+
};
680+
681+
function send(message, cb) {
682+
sendHelper(process, message, null, cb);
683+
}
684+
685+
function _disconnect(masterInitiated) {
665686
this.suicide = true;
666687
let waitingCount = 1;
667688

668689
function checkWaitingCount() {
669690
waitingCount--;
670691
if (waitingCount === 0) {
671-
send({ act: 'suicide' });
672-
process.disconnect();
692+
// If disconnect is worker initiated, wait for ack to be sure suicide
693+
// is properly set in the master, otherwise, if it's master initiated
694+
// there's no need to send the suicide message
695+
if (masterInitiated) {
696+
process.disconnect();
697+
} else {
698+
send({ act: 'suicide' }, () => process.disconnect());
699+
}
673700
}
674701
}
675702

@@ -681,19 +708,6 @@ function workerInit() {
681708
}
682709

683710
checkWaitingCount();
684-
};
685-
686-
Worker.prototype.destroy = function() {
687-
this.suicide = true;
688-
if (!this.isConnected()) process.exit(0);
689-
var exit = process.exit.bind(null, 0);
690-
send({ act: 'suicide' }, exit);
691-
process.once('disconnect', exit);
692-
process.disconnect();
693-
};
694-
695-
function send(message, cb) {
696-
sendHelper(process, message, null, cb);
697711
}
698712
}
699713

test/parallel/test-regress-GH-3238.js

+12-11
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,19 @@ const assert = require('assert');
44
const cluster = require('cluster');
55

66
if (cluster.isMaster) {
7-
const worker = cluster.fork();
8-
let disconnected = false;
7+
function forkWorker(action) {
8+
const worker = cluster.fork({ action });
9+
worker.on('disconnect', common.mustCall(() => {
10+
assert.strictEqual(worker.suicide, true);
11+
}));
912

10-
worker.on('disconnect', common.mustCall(function() {
11-
assert.strictEqual(worker.suicide, true);
12-
disconnected = true;
13-
}));
13+
worker.on('exit', common.mustCall(() => {
14+
assert.strictEqual(worker.suicide, true);
15+
}));
16+
}
1417

15-
worker.on('exit', common.mustCall(function() {
16-
assert.strictEqual(worker.suicide, true);
17-
assert.strictEqual(disconnected, true);
18-
}));
18+
forkWorker('disconnect');
19+
forkWorker('kill');
1920
} else {
20-
cluster.worker.disconnect();
21+
cluster.worker[process.env.action]();
2122
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
'use strict';
2+
const common = require('../common');
3+
const assert = require('assert');
4+
const cluster = require('cluster');
5+
const os = require('os');
6+
7+
if (cluster.isMaster) {
8+
function forkWorker(action) {
9+
const worker = cluster.fork({ action });
10+
worker.on('disconnect', common.mustCall(() => {
11+
assert.strictEqual(worker.suicide, true);
12+
}));
13+
14+
worker.on('exit', common.mustCall(() => {
15+
assert.strictEqual(worker.suicide, true);
16+
}));
17+
}
18+
19+
const cpus = os.cpus().length;
20+
const tries = cpus > 8 ? 64 : cpus * 8;
21+
22+
cluster.on('exit', common.mustCall((worker, code) => {
23+
assert.strictEqual(code, 0, 'worker exited with error');
24+
}, tries * 2));
25+
26+
for (let i = 0; i < tries; ++i) {
27+
forkWorker('disconnect');
28+
forkWorker('kill');
29+
}
30+
} else {
31+
cluster.worker[process.env.action]();
32+
}

0 commit comments

Comments
 (0)