From a71cb0a7aefae283cad5cca5cc28942ebcbb1530 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 11 Apr 2022 10:25:41 +0100 Subject: [PATCH 1/5] Forbid port range re-use in tests Today we select the port range to use in tests by taking the Gradle worker ID mod 223. We now use significantly more than 223 Gradle workers in each test run which means port ranges are re-used, resulting in spurious failures. This commit removes this lenience, insisting that every worker now gets a distinct port range. It reduces the size of the port range available to each worker to avoid running out of ports. Future changes that add more workers may need to reduce the port range still further, or find a different solution to keeping the tests separate, but at least will not see spurious failures due to cross-test interactions. --- .../org/elasticsearch/test/ESTestCase.java | 84 +++++++++++++------ .../test/test/ESTestCaseTests.java | 4 +- 2 files changed, 61 insertions(+), 27 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index 147bdc8318313..63d246d0c3e98 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -193,7 +193,6 @@ public abstract class ESTestCase extends LuceneTestCase { private static final AtomicInteger portGenerator = new AtomicInteger(); private static final Collection loggedLeaks = new ArrayList<>(); - public static final int MIN_PRIVATE_PORT = 13301; private HeaderWarningAppender headerWarningAppender; @@ -1647,38 +1646,73 @@ public static boolean inFipsJvm() { return Boolean.parseBoolean(System.getProperty(FIPS_SYSPROP)); } + /* + * [NOTE: Port ranges for tests] + * + * Some tests involve interactions over the localhost interface of the machine running the tests. The tests run concurrently in multiple + * JVMs, but all have access to the same network, so there's a risk that different tests will interact with each other in unexpected + * ways and trigger spurious failures. Gradle numbers its workers sequentially starting at 1 and each worker can determine its own + * identity from the {@link #TEST_WORKER_SYS_PROPERTY} system property. We use this to assign disjoint port ranges to each test worker, + * avoiding any unexpected interactions. + */ + + /** + * Defines the size of the port range assigned to each worker, which must be large enough to supply enough ports to run the tests, but + * not so large that we run out of ports. See also [NOTE: Port ranges for tests]. + */ + private static final int PORTS_PER_WORKER = 30; + + /** + * Defines the minimum port that test workers should use. See also [NOTE: Port ranges for tests]. + */ + protected static final int MIN_PRIVATE_PORT = 13301; + /** - * Returns a unique port range for this JVM starting from the computed base port + * Defines the maximum port that test workers should use. See also [NOTE: Port ranges for tests]. + */ + private static final int MAX_PRIVATE_PORT = 36600; + + /** + * Returns a port range for this JVM according to its Gradle worker ID. See also [NOTE: Port ranges for tests]. */ public static String getPortRange() { - return getBasePort() + "-" + (getBasePort() + 99); // upper bound is inclusive + final var firstPort = getBasePort(); + final var lastPort = firstPort + PORTS_PER_WORKER - 1; // upper bound is inclusive + assert MIN_PRIVATE_PORT <= firstPort && lastPort <= MAX_PRIVATE_PORT + : firstPort + "-" + lastPort + " vs " + MIN_PRIVATE_PORT + "-" + MAX_PRIVATE_PORT; + return firstPort + "-" + lastPort; } + /** + * Returns the start of the port range for this JVM according to its Gradle worker ID. See also [NOTE: Port ranges for tests]. + */ protected static int getBasePort() { - // some tests use MockTransportService to do network based testing. Yet, we run tests in multiple JVMs that means - // concurrent tests could claim port that another JVM just released and if that test tries to simulate a disconnect it might - // be smart enough to re-connect depending on what is tested. To reduce the risk, since this is very hard to debug we use - // a different default port range per JVM unless the incoming settings override it - // use a non-default base port otherwise some cluster in this JVM might reuse a port - - // We rely on Gradle implementation details here, the worker IDs are long values incremented by one for the - // lifespan of the daemon this means that they can get larger than the allowed port range. - // Ephemeral ports on Linux start at 32768 so we modulo to make sure that we don't exceed that. - // This is safe as long as we have fewer than 224 Gradle workers running in parallel - // See also: https://github.com/elastic/elasticsearch/issues/44134 - final String workerIdStr = System.getProperty(ESTestCase.TEST_WORKER_SYS_PROPERTY); - final int startAt; + final var workerIdStr = System.getProperty(ESTestCase.TEST_WORKER_SYS_PROPERTY); if (workerIdStr == null) { - startAt = 0; // IDE - } else { - // we adjust the gradle worker id with mod so as to not go over the ephemoral port ranges, but gradle continually - // increases this value, so the mod can eventually become zero, thus we shift on both sides by 1 - final long workerId = Long.valueOf(workerIdStr); - assert workerId >= 1 : "Non positive gradle worker id: " + workerIdStr; - startAt = Math.floorMod(workerId - 1, 223) + 1; + // running in IDE + return MIN_PRIVATE_PORT; + } + + final var workerId = Integer.parseInt(workerIdStr); + assert workerId >= 1 : "Non positive gradle worker id: " + workerIdStr; + final var firstPort = MIN_PRIVATE_PORT + (workerId * PORTS_PER_WORKER); + final var lastPort = firstPort + PORTS_PER_WORKER - 1; // upper bound is inclusive; + if (lastPort > MAX_PRIVATE_PORT) { + throw new AssertionError( + String.format( + Locale.ROOT, + """ + worker ID %d would get port range %d-%d which is outside the range of available ports %d-%d; either reduce \ + ESTestCase#PORTS_PER_WORKER or run fewer Gradle workers""", + workerId, + firstPort, + lastPort, + MIN_PRIVATE_PORT, + MAX_PRIVATE_PORT + ) + ); } - assert startAt >= 0 : "Unexpected test worker Id, resulting port range would be negative"; - return MIN_PRIVATE_PORT + (startAt * 100); + return firstPort; } protected static InetAddress randomIp(boolean v4) { diff --git a/test/framework/src/test/java/org/elasticsearch/test/test/ESTestCaseTests.java b/test/framework/src/test/java/org/elasticsearch/test/test/ESTestCaseTests.java index d1c55f2fbf5a9..bd563b2f27695 100644 --- a/test/framework/src/test/java/org/elasticsearch/test/test/ESTestCaseTests.java +++ b/test/framework/src/test/java/org/elasticsearch/test/test/ESTestCaseTests.java @@ -183,12 +183,12 @@ public void testWorkerSystemProperty() { public void testBasePortGradle() { assumeTrue("requires running tests with Gradle", System.getProperty("tests.gradle") != null); // Gradle worker IDs are 1 based - assertNotEquals(10300, ESTestCase.getBasePort()); + assertNotEquals(ESTestCase.MIN_PRIVATE_PORT, ESTestCase.getBasePort()); } public void testBasePortIDE() { assumeTrue("requires running tests without Gradle", System.getProperty("tests.gradle") == null); - assertEquals(10300, ESTestCase.getBasePort()); + assertEquals(ESTestCase.MIN_PRIVATE_PORT, ESTestCase.getBasePort()); } public void testRandomDateFormatterPattern() { From 9e5058ee0a6d7a0f9769771fde981cdffa352852 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 12 Apr 2022 15:09:02 +0100 Subject: [PATCH 2/5] Re-introduce wraparound again, but computed in terms of port ranges --- .../org/elasticsearch/test/ESTestCase.java | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index 18bc031e7ffed..12df7b40fa2bf 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -1648,8 +1648,9 @@ public static boolean inFipsJvm() { * Some tests involve interactions over the localhost interface of the machine running the tests. The tests run concurrently in multiple * JVMs, but all have access to the same network, so there's a risk that different tests will interact with each other in unexpected * ways and trigger spurious failures. Gradle numbers its workers sequentially starting at 1 and each worker can determine its own - * identity from the {@link #TEST_WORKER_SYS_PROPERTY} system property. We use this to assign disjoint port ranges to each test worker, - * avoiding any unexpected interactions. + * identity from the {@link #TEST_WORKER_SYS_PROPERTY} system property. We use this to try and assign disjoint port ranges to each test + * worker, avoiding any unexpected interactions, although if we spawn enough test workers then we will wrap around to the beginning + * again. */ /** @@ -1668,6 +1669,11 @@ public static boolean inFipsJvm() { */ private static final int MAX_PRIVATE_PORT = 36600; + /** + * Wrap around at this worker ID + */ + private static final int MAX_EFFECTIVE_WORKER_ID = (MAX_PRIVATE_PORT - MIN_PRIVATE_PORT + 1) / PORTS_PER_WORKER; + /** * Returns a port range for this JVM according to its Gradle worker ID. See also [NOTE: Port ranges for tests]. */ @@ -1691,24 +1697,8 @@ protected static int getBasePort() { final var workerId = Integer.parseInt(workerIdStr); assert workerId >= 1 : "Non positive gradle worker id: " + workerIdStr; - final var firstPort = MIN_PRIVATE_PORT + (workerId * PORTS_PER_WORKER); - final var lastPort = firstPort + PORTS_PER_WORKER - 1; // upper bound is inclusive; - if (lastPort > MAX_PRIVATE_PORT) { - throw new AssertionError( - String.format( - Locale.ROOT, - """ - worker ID %d would get port range %d-%d which is outside the range of available ports %d-%d; either reduce \ - ESTestCase#PORTS_PER_WORKER or run fewer Gradle workers""", - workerId, - firstPort, - lastPort, - MIN_PRIVATE_PORT, - MAX_PRIVATE_PORT - ) - ); - } - return firstPort; + final var effectiveWorkerId = workerId % MAX_EFFECTIVE_WORKER_ID; + return MIN_PRIVATE_PORT + (effectiveWorkerId * PORTS_PER_WORKER); } protected static InetAddress randomIp(boolean v4) { From a97e30111850c0b6e170c181e877ca6247580367 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 12 Apr 2022 15:09:34 +0100 Subject: [PATCH 3/5] Inline --- .../src/main/java/org/elasticsearch/test/ESTestCase.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index 12df7b40fa2bf..3913fd3959fdf 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -1697,8 +1697,7 @@ protected static int getBasePort() { final var workerId = Integer.parseInt(workerIdStr); assert workerId >= 1 : "Non positive gradle worker id: " + workerIdStr; - final var effectiveWorkerId = workerId % MAX_EFFECTIVE_WORKER_ID; - return MIN_PRIVATE_PORT + (effectiveWorkerId * PORTS_PER_WORKER); + return MIN_PRIVATE_PORT + (workerId % MAX_EFFECTIVE_WORKER_ID) * PORTS_PER_WORKER; } protected static InetAddress randomIp(boolean v4) { From 18f2dd2e1b2f034746ab7b4329843dca09fb6c1c Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 12 Apr 2022 15:18:23 +0100 Subject: [PATCH 4/5] Safety check --- .../src/main/java/org/elasticsearch/test/ESTestCase.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index 3913fd3959fdf..388a0eaf5d305 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -1674,6 +1674,11 @@ public static boolean inFipsJvm() { */ private static final int MAX_EFFECTIVE_WORKER_ID = (MAX_PRIVATE_PORT - MIN_PRIVATE_PORT + 1) / PORTS_PER_WORKER; + static { + //noinspection ConstantConditions this is here to catch mistakes when changing other constants in this class + assert 0 < MAX_EFFECTIVE_WORKER_ID; + } + /** * Returns a port range for this JVM according to its Gradle worker ID. See also [NOTE: Port ranges for tests]. */ From 59fccaa60298d796bc79bb527458f3facc5203fc Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 12 Apr 2022 15:36:59 +0100 Subject: [PATCH 5/5] Finesse --- .../org/elasticsearch/test/ESTestCase.java | 22 +++++++++++-------- .../test/test/ESTestCaseTests.java | 4 ++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index 388a0eaf5d305..6e745872bd065 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -1670,30 +1670,28 @@ public static boolean inFipsJvm() { private static final int MAX_PRIVATE_PORT = 36600; /** - * Wrap around at this worker ID + * Wrap around after reaching this worker ID. */ - private static final int MAX_EFFECTIVE_WORKER_ID = (MAX_PRIVATE_PORT - MIN_PRIVATE_PORT + 1) / PORTS_PER_WORKER; + private static final int MAX_EFFECTIVE_WORKER_ID = (MAX_PRIVATE_PORT - MIN_PRIVATE_PORT - PORTS_PER_WORKER + 1) / PORTS_PER_WORKER - 1; static { - //noinspection ConstantConditions this is here to catch mistakes when changing other constants in this class - assert 0 < MAX_EFFECTIVE_WORKER_ID; + assert getWorkerBasePort(MAX_EFFECTIVE_WORKER_ID) + PORTS_PER_WORKER - 1 <= MAX_PRIVATE_PORT; } /** * Returns a port range for this JVM according to its Gradle worker ID. See also [NOTE: Port ranges for tests]. */ public static String getPortRange() { - final var firstPort = getBasePort(); + final var firstPort = getWorkerBasePort(); final var lastPort = firstPort + PORTS_PER_WORKER - 1; // upper bound is inclusive - assert MIN_PRIVATE_PORT <= firstPort && lastPort <= MAX_PRIVATE_PORT - : firstPort + "-" + lastPort + " vs " + MIN_PRIVATE_PORT + "-" + MAX_PRIVATE_PORT; + assert MIN_PRIVATE_PORT <= firstPort && lastPort <= MAX_PRIVATE_PORT; return firstPort + "-" + lastPort; } /** * Returns the start of the port range for this JVM according to its Gradle worker ID. See also [NOTE: Port ranges for tests]. */ - protected static int getBasePort() { + protected static int getWorkerBasePort() { final var workerIdStr = System.getProperty(ESTestCase.TEST_WORKER_SYS_PROPERTY); if (workerIdStr == null) { // running in IDE @@ -1702,7 +1700,13 @@ protected static int getBasePort() { final var workerId = Integer.parseInt(workerIdStr); assert workerId >= 1 : "Non positive gradle worker id: " + workerIdStr; - return MIN_PRIVATE_PORT + (workerId % MAX_EFFECTIVE_WORKER_ID) * PORTS_PER_WORKER; + return getWorkerBasePort(workerId % (MAX_EFFECTIVE_WORKER_ID + 1)); + } + + private static int getWorkerBasePort(int effectiveWorkerId) { + assert 0 <= effectiveWorkerId && effectiveWorkerId <= MAX_EFFECTIVE_WORKER_ID; + // the range [MIN_PRIVATE_PORT, MIN_PRIVATE_PORT+PORTS_PER_WORKER) is only for running outside of Gradle + return MIN_PRIVATE_PORT + PORTS_PER_WORKER + effectiveWorkerId * PORTS_PER_WORKER; } protected static InetAddress randomIp(boolean v4) { diff --git a/test/framework/src/test/java/org/elasticsearch/test/test/ESTestCaseTests.java b/test/framework/src/test/java/org/elasticsearch/test/test/ESTestCaseTests.java index bd563b2f27695..b671aa502a1e7 100644 --- a/test/framework/src/test/java/org/elasticsearch/test/test/ESTestCaseTests.java +++ b/test/framework/src/test/java/org/elasticsearch/test/test/ESTestCaseTests.java @@ -183,12 +183,12 @@ public void testWorkerSystemProperty() { public void testBasePortGradle() { assumeTrue("requires running tests with Gradle", System.getProperty("tests.gradle") != null); // Gradle worker IDs are 1 based - assertNotEquals(ESTestCase.MIN_PRIVATE_PORT, ESTestCase.getBasePort()); + assertNotEquals(ESTestCase.MIN_PRIVATE_PORT, ESTestCase.getWorkerBasePort()); } public void testBasePortIDE() { assumeTrue("requires running tests without Gradle", System.getProperty("tests.gradle") == null); - assertEquals(ESTestCase.MIN_PRIVATE_PORT, ESTestCase.getBasePort()); + assertEquals(ESTestCase.MIN_PRIVATE_PORT, ESTestCase.getWorkerBasePort()); } public void testRandomDateFormatterPattern() {