From b0e9835b03c7ad2746f94c80edc65c190b7602d3 Mon Sep 17 00:00:00 2001 From: Menno Pruijssers Date: Tue, 20 Dec 2016 14:37:34 +0100 Subject: [PATCH 1/2] Support rolling restart in tick-cluster --- package.json | 3 ++- tools/tick-cluster.js | 61 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index d400861..3cb2f0a 100644 --- a/package.json +++ b/package.json @@ -7,14 +7,15 @@ "version": "0.0.1", "repository": "git://github.com/uber/ringpop-common.git", "dependencies": { + "async": "^2.1.4", "cli-color": "^0.3.2", "commander": "^2.6.0", "farmhash": "^1.1.0", "grafana-dash-gen": "uber/grafana-dash-gen#41a17abfa174fdda048f9a1084bc302dec28e6ff", "lodash": "^4.6.1", "strformat": "^0.0.7", - "uber-licence": "^2.0.1", "tchannel": "^3.6.24", + "uber-licence": "^2.0.1", "zero-config": "^5.0.0" }, "pre-commit": [ diff --git a/tools/tick-cluster.js b/tools/tick-cluster.js index 17fee8c..4c01536 100755 --- a/tools/tick-cluster.js +++ b/tools/tick-cluster.js @@ -22,6 +22,7 @@ 'use strict'; var _ = require('lodash'); +var async = require('async'); var childProc = require('child_process'); var color = require('cli-color'); var farmhash = require('farmhash').hash32; @@ -318,6 +319,11 @@ function onData(char) { state = 'readnum'; process.stdout.write('terminate count: '); break; + case 'r': + func = restartProc; + state = 'readnum'; + process.stdout.write('batch size: '); + break; case 'K': reviveProcs(); break; @@ -519,6 +525,60 @@ function terminateProc(count) { }); } +function restartProc(batchSize) { + if (batchSize === 0) { + return; + } + var coolDownDelay = 1000; + + var processGroups = _.chain(procs) + .filter(function(proc) { + return !proc.killed && !proc.suspended; + }) + .chunk(batchSize) + .value(); + + async.eachOfSeries(processGroups, restartProcessGroup, function() { + logMsg('cluster', color.green('rolling restart completed')); + }); + + function restartProcessGroup(group, index, cb) { + logMsg('cluster', color.cyan('rolling restart batch: ' + (index + 1) + ' / ' + processGroups.length)); + async.each(group, restartSingleProcess, cb); + } + + function restartSingleProcess(proc, done) { + var index = procs.indexOf(proc); + + stopProcess(terminated); + + function stopProcess(cb) { + logMsg(proc.port, color.green('pid ' + proc.pid) + color.red(' terminating ' + index)); + var hardKillTimer = setTimeout(function() { + logMsg(proc.port, color.green('pid ' + proc.pid) + color.red(' didn\'t terminate in 5 seconds. Hard killing...')); + proc.proc.kill('SIGKILL'); + }, 5000); + proc.proc.once('exit', function() { + clearTimeout(hardKillTimer); + logMsg(proc.port, color.green('pid ' + proc.pid) + color.green(' terminated.')); + proc.killed = Date.now(); + cb(); + }); + proc.proc.kill('SIGTERM'); + } + + function terminated() { + setTimeout(startProcess, coolDownDelay); + } + + function startProcess() { + logMsg(proc.port, color.red('restarting ' + index + ' after ') + color.green((Date.now() - proc.killed) + 'ms')); + procs[index] = new ClusterProc(proc.port); + setTimeout(done, coolDownDelay); + } + } +} + function startCluster() { procs = []; // note module scope for (var i = 0; i < procsToStart ; i++) { @@ -570,6 +630,7 @@ function displayMenu(logFn) { logFn('\tK\t\tRevive suspended or killed processes'); logFn('\tl \tSuspend processes'); logFn('\tm \tTerminate processes'); + logFn('\tr \tRestart processes'); logFn('\tp\t\tPrint out protocol stats'); logFn('\tq\t\tQuit'); logFn('\ts\t\tPrint out stats'); From f71cbb48e3af2c2cab1c58263e84c1469af6cd0b Mon Sep 17 00:00:00 2001 From: Menno Pruijssers Date: Fri, 27 Jan 2017 13:05:07 +0100 Subject: [PATCH 2/2] order help --- tools/tick-cluster.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/tick-cluster.js b/tools/tick-cluster.js index 4c01536..de562e1 100755 --- a/tools/tick-cluster.js +++ b/tools/tick-cluster.js @@ -630,9 +630,9 @@ function displayMenu(logFn) { logFn('\tK\t\tRevive suspended or killed processes'); logFn('\tl \tSuspend processes'); logFn('\tm \tTerminate processes'); - logFn('\tr \tRestart processes'); logFn('\tp\t\tPrint out protocol stats'); logFn('\tq\t\tQuit'); + logFn('\tr \tRestart processes'); logFn('\ts\t\tPrint out stats'); logFn('\tt\t\tTick protocol period'); logFn('\t\t\tPrint out horizontal rule');