From 4076877f011a6b3e4dbf02b1aa1df5b77d7fb994 Mon Sep 17 00:00:00 2001 From: Stefan Budeanu Date: Mon, 2 May 2016 16:17:13 -0400 Subject: [PATCH 1/4] test: avoid test-cluster-master-kill flakiness Removed reliance on worker exit before arbitrary timeout. Instead of killing the parent process, destroy the IPC pipe for the same effect, and wait for the worker's exit before also exiting the parent. Insuring these steps are well-ordered removes the need for timeouts and reduces intermittent failures. In case of an actual hang in the child's exit code, the test harness global timeout will kick in, and the test will still fail. --- test/parallel/test-cluster-master-kill.js | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/parallel/test-cluster-master-kill.js b/test/parallel/test-cluster-master-kill.js index 32f22b0f968b7b..b07f9a083962b1 100644 --- a/test/parallel/test-cluster-master-kill.js +++ b/test/parallel/test-cluster-master-kill.js @@ -21,10 +21,17 @@ if (cluster.isWorker) { // terminate the cluster process worker.once('listening', function() { setTimeout(function() { - process.exit(0); +/* Cluster.disconnect() will exercise a different 'graceful' shutdown path. + From the perspective of the worker, closing the channel is equivalent + to the parent calling process.exit(0). */ + worker.process._channel.close(); }, 1000); }); + worker.once('exit', function() { + process.exit(0); + }); + } else { // This is the testcase @@ -55,18 +62,11 @@ if (cluster.isWorker) { var alive = true; master.on('exit', function(code) { - // make sure that the master died by purpose + // make sure that the master died on purpose assert.equal(code, 0); // check worker process status - var timeout = 200; - if (common.isAix) { - // AIX needs more time due to default exit performance - timeout = 1000; - } - setTimeout(function() { - alive = isAlive(pid); - }, timeout); + alive = isAlive(pid); }); process.once('exit', function() { From fadbde5d3ef6cb3bddb9909d84a1ca550381a0e2 Mon Sep 17 00:00:00 2001 From: Stefan Budeanu Date: Mon, 2 May 2016 22:16:08 -0400 Subject: [PATCH 2/4] WIP: Poll indefinitely --- test/parallel/test-cluster-master-kill.js | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/test/parallel/test-cluster-master-kill.js b/test/parallel/test-cluster-master-kill.js index b07f9a083962b1..9d98f413d60309 100644 --- a/test/parallel/test-cluster-master-kill.js +++ b/test/parallel/test-cluster-master-kill.js @@ -21,17 +21,10 @@ if (cluster.isWorker) { // terminate the cluster process worker.once('listening', function() { setTimeout(function() { -/* Cluster.disconnect() will exercise a different 'graceful' shutdown path. - From the perspective of the worker, closing the channel is equivalent - to the parent calling process.exit(0). */ - worker.process._channel.close(); + process.exit(0); }, 1000); }); - worker.once('exit', function() { - process.exit(0); - }); - } else { // This is the testcase @@ -66,15 +59,17 @@ if (cluster.isWorker) { assert.equal(code, 0); // check worker process status - alive = isAlive(pid); + var pollWorker = function() { + alive = isAlive(pid); + if (alive) { + setTimeout(pollWorker, 500); + } + }; + // Loop indefinitely until worker exit. + pollWorker(); }); process.once('exit', function() { - // cleanup: kill the worker if alive - if (alive) { - process.kill(pid); - } - assert.equal(typeof pid, 'number', 'did not get worker pid info'); assert.equal(alive, false, 'worker was alive after master died'); }); From 89b0ff4fa3e051e3f0cda3ea42ee744eb037e047 Mon Sep 17 00:00:00 2001 From: Stefan Budeanu Date: Tue, 3 May 2016 10:37:07 -0400 Subject: [PATCH 3/4] WIP: Also change test-cluster-master-error --- test/parallel/test-cluster-master-error.js | 45 ++++++++++------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/test/parallel/test-cluster-master-error.js b/test/parallel/test-cluster-master-error.js index ae0f655bb8e5d0..4786b40c2061c4 100644 --- a/test/parallel/test-cluster-master-error.js +++ b/test/parallel/test-cluster-master-error.js @@ -30,7 +30,7 @@ if (cluster.isWorker) { } }); - // Throw accidently error when all workers are listening + // Throw accidental error when all workers are listening var listeningNum = 0; cluster.on('listening', function listeningEvent() { @@ -39,10 +39,10 @@ if (cluster.isWorker) { // Stop listening cluster.removeListener('listening', listeningEvent); - // throw accidently error + // Throw accidental error process.nextTick(function() { console.error('about to throw'); - throw new Error('accidently error'); + throw new Error('accidental error'); }); } @@ -68,8 +68,8 @@ if (cluster.isWorker) { } }; - var existMaster = false; - var existWorker = false; + var masterExited = false; + var workersExited = false; // List all workers var workers = []; @@ -89,36 +89,33 @@ if (cluster.isWorker) { // When cluster is dead master.on('exit', function(code) { - // Check that the cluster died accidently - existMaster = !!code; + // Check that the cluster died accidentally (non-zero exit code) + masterExited = !!code; - // Give the workers time to shut down - var timeout = 200; - if (common.isAix) { - // AIX needs more time due to default exit performance - timeout = 1000; - } - setTimeout(checkWorkers, timeout); - - function checkWorkers() { - // When master is dead all workers should be dead to + var pollWorkers = function() { + // When master is dead all workers should be dead too var alive = false; workers.forEach(function(pid) { if (isAlive(pid)) { alive = true; } }); - - // If a worker was alive this did not act as expected - existWorker = !alive; - } + if (alive) { + setTimeout(pollWorkers, 500); + } else { + workersExited = true; + } + }; + + // Loop indefinitely until worker exit + pollWorkers(); }); process.once('exit', function() { - var m = 'The master did not die after an error was throwed'; - assert.ok(existMaster, m); + var m = 'The master did not die after an error was thrown'; + assert.ok(masterExited, m); m = 'The workers did not die after an error in the master'; - assert.ok(existWorker, m); + assert.ok(workersExited, m); }); } From 1246090165678b05461216d5f0218c9bf43277bb Mon Sep 17 00:00:00 2001 From: Stefan Budeanu Date: Tue, 3 May 2016 16:06:45 -0400 Subject: [PATCH 4/4] WIP: Reduce poll interval to 50ms --- test/parallel/test-cluster-master-error.js | 2 +- test/parallel/test-cluster-master-kill.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/parallel/test-cluster-master-error.js b/test/parallel/test-cluster-master-error.js index 4786b40c2061c4..1f0cb8c9098c62 100644 --- a/test/parallel/test-cluster-master-error.js +++ b/test/parallel/test-cluster-master-error.js @@ -101,7 +101,7 @@ if (cluster.isWorker) { } }); if (alive) { - setTimeout(pollWorkers, 500); + setTimeout(pollWorkers, 50); } else { workersExited = true; } diff --git a/test/parallel/test-cluster-master-kill.js b/test/parallel/test-cluster-master-kill.js index 9d98f413d60309..549655f1b64cbc 100644 --- a/test/parallel/test-cluster-master-kill.js +++ b/test/parallel/test-cluster-master-kill.js @@ -62,7 +62,7 @@ if (cluster.isWorker) { var pollWorker = function() { alive = isAlive(pid); if (alive) { - setTimeout(pollWorker, 500); + setTimeout(pollWorker, 50); } }; // Loop indefinitely until worker exit.