From 9af6703655bc53a78df2ec2b6ac34f93e9ea4eff Mon Sep 17 00:00:00 2001 From: Vita Batrla Date: Wed, 22 Feb 2023 18:40:08 +0100 Subject: [PATCH] test: fix test-net-connect-reset-until-connected Fixes a race condition in test that causes the test to randomly timeout on Solaris 11.4, SmartOS and potentially also FreeBSD. The client resets the connection using conn.resetAndDestroy(). This call is asynchronous and if it's effect occurs before server's listening socket accepts the connection, the test hangs. The fix is to put a synchronization barrier that resets the connection only after it is established on both server and client side. Below is a little bit more about the root cause. I show positive (test works) and negative (when test hangs) scenarios. The output contains only relevant system / library calls that were collected using truss(1). Without the fix the test randomly hangs. With the fix the test completes thousands of runs without single issue. Race condition scenario: ``` connect(23, 0x7FFFBFFF7F10, 32, SOV_XPG4_2)Err#150 EINPROGRESS ^ client socket connects to server close(23)= 0 ^ client closes the socket too early accept(21, 0x00000000, 0x00000000, SOV_DEFAULT)Err#130 ECONNABORTED accept(21, 0x00000000, 0x00000000, SOV_DEFAULT)Err#11 EAGAIN ^ accept on listening socket fails ... test hangs and times out... ``` Working (good) scenario: ``` connect(23, 0x7FFFBFFF7F00, 32, SOV_XPG4_2)Err#150 EINPROGRESS ^ client socket connects to server accept(21, 0x00000000, 0x00000000, SOV_DEFAULT)= 24 ^ server accepts client connection on listening socket close(23)= 0 ^ client socket closes read(24, 0x046B6010, 65536)Err#131 ECONNRESET ^ test gets so much wanted error while reading on accepted FD close(24)= 0 ^ accepted FD closes ... test completes, passes ... ``` Fixes: https://github.com/nodejs/node/issues/43446 --- test/parallel/parallel.status | 4 ---- .../test-net-connect-reset-until-connected.js | 11 ++++++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/test/parallel/parallel.status b/test/parallel/parallel.status index 2f3d912a9b7ab1..1cbcefb9712fae 100644 --- a/test/parallel/parallel.status +++ b/test/parallel/parallel.status @@ -31,8 +31,6 @@ test-crypto-dh-stateless: SKIP test-crypto-keygen: SKIP [$system==solaris] # Also applies to SmartOS -# https://github.com/nodejs/node/issues/43446 -test-net-connect-reset-until-connected: PASS, FLAKY # https://github.com/nodejs/node/issues/43457 test-domain-no-error-handler-abort-on-uncaught-0: PASS, FLAKY test-domain-no-error-handler-abort-on-uncaught-1: PASS,FLAKY @@ -52,8 +50,6 @@ test-domain-with-abort-on-uncaught-exception: PASS, FLAKY test-fs-stat-bigint: PASS,FLAKY # https://github.com/nodejs/node/issues/31280 test-worker-message-port-message-before-close: PASS,FLAKY -# https://github.com/nodejs/node/issues/43446 -test-net-connect-reset-until-connected: PASS, FLAKY [$system==ibmi] # https://github.com/nodejs/node/pull/30819 diff --git a/test/parallel/test-net-connect-reset-until-connected.js b/test/parallel/test-net-connect-reset-until-connected.js index e40ec05f6ce1e9..62b0c9c0c017fb 100644 --- a/test/parallel/test-net-connect-reset-until-connected.js +++ b/test/parallel/test-net-connect-reset-until-connected.js @@ -3,12 +3,21 @@ const common = require('../common'); const net = require('net'); +function barrier(count, cb) { + return function () { + if (--count == 0) + cb(); + } +} + const server = net.createServer(); server.listen(0, common.mustCall(function() { const port = server.address().port; const conn = net.createConnection(port); + const connok = barrier(2, () => conn.resetAndDestroy()); conn.on('close', common.mustCall()); server.on('connection', (socket) => { + connok(); socket.on('error', common.expectsError({ code: 'ECONNRESET', message: 'read ECONNRESET', @@ -16,5 +25,5 @@ server.listen(0, common.mustCall(function() { })); server.close(); }); - conn.resetAndDestroy(); + conn.on('connect', connok); }));