From 1369989a40e6a6746be9f9689fb9de053afa9e17 Mon Sep 17 00:00:00 2001 From: Mahesh Raju Somalaraju Date: Fri, 4 Apr 2025 11:24:56 +0530 Subject: [PATCH] TEZ-4598: JDK-17: Migrate Tez to jdk17 (compile & runtime) --- .github/workflows/build.yml | 2 +- BUILDING.txt | 2 +- Jenkinsfile | 6 +- build-tools/docker/Dockerfile | 35 ++++------ pom.xml | 15 ++-- .../org/apache/tez/client/TezClientUtils.java | 1 + .../apache/tez/dag/api/TezConfiguration.java | 2 + .../apache/tez/client/TestTezClientUtils.java | 11 +-- tez-dag/pom.xml | 1 + tez-ext-service-tests/pom.xml | 1 + .../TezTestServiceTaskSchedulerService.java | 6 +- .../impl/SimpleFetchedInputAllocator.java | 4 +- .../impl/TestSimpleFetchedInputAllocator.java | 68 ++++++++++++++----- tez-tests/pom.xml | 1 + 14 files changed, 97 insertions(+), 58 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f81c1ad8b1..1148df52c9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -27,7 +27,7 @@ jobs: build: strategy: matrix: - java-version: [8, 11, 17] + java-version: [17] os: [ubuntu-latest, macos-latest] runs-on: ${{ matrix.os }} steps: diff --git a/BUILDING.txt b/BUILDING.txt index 16632dd849..733dcb09cf 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -8,7 +8,7 @@ Requirements: * JDK 1.8+ * Maven 3.6.3 or later -* spotbugs 4.2.2 or later (if running spotbugs) +* spotbugs 4.9.3 or later (if running spotbugs) * ProtocolBuffer 3.21.1 * Internet connection for first build (to fetch all dependencies) * Hadoop version should be 2.7.0 or higher. diff --git a/Jenkinsfile b/Jenkinsfile index d83e58918f..aa6263fa26 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -147,10 +147,8 @@ pipeline { # help keep the ASF boxes clean YETUS_ARGS+=("--sentinel") - # test with Java 8 and 11 - YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64") - YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64") - YETUS_ARGS+=("--multijdktests=compile") + # test with Java 17 + YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-17-openjdk-amd64") YETUS_ARGS+=("--debug") # write Yetus report as GitHub comment (YETUS-1102) diff --git a/build-tools/docker/Dockerfile b/build-tools/docker/Dockerfile index 20c6a26a26..d4cc574ed8 100644 --- a/build-tools/docker/Dockerfile +++ b/build-tools/docker/Dockerfile @@ -165,35 +165,24 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] #### #### -# OpenJDK 8 -#### +# OpenJDK 17 # hadolint ignore=DL3008 -RUN apt-get -q update && apt-get -q install --no-install-recommends -y openjdk-8-jdk-headless \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* +RUN apt-get -q update \ + && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y openjdk-17-jdk \ + && apt-get clean && rm -rf /var/lib/apt/lists/* -#### -# OpenJDK 11 (but keeps default to JDK8) -# NOTE: This default only works when Apache Yetus is launched -# _in_ the container and not outside of it! -#### -# hadolint ignore=DL3008 -RUN apt-get -q update && apt-get -q install --no-install-recommends -y default-jre-headless openjdk-11-jdk-headless \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ - && update-java-alternatives -s java-1.8.0-openjdk-amd64 || : -# since update alternatives might fail on executables that we don't really need (e.g. appletviewer) -# and return with exit code <0 (actually: 2), we can simply do a sanity check if the version is -# as expected for "java" executable after the update and go on -RUN java -version 2>&1 | grep "1.8.0" && rm -f /usr/lib/jvm/default-java \ - && ln -s java-8-openjdk-amd64 /usr/lib/jvm/default-java -ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 +# Set JAVA_HOME and PATH environment variables +ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 +ENV PATH="${JAVA_HOME}/bin:${PATH}" + +# Set the default Java version using update-alternatives +RUN update-alternatives --install /usr/bin/java java /usr/lib/jvm/java-17-openjdk-amd64/bin/java 1 ####### -# Install SpotBugs 4.2.2 +# Install SpotBugs 4.9.3 ####### RUN mkdir -p /opt/spotbugs \ - && curl -L -s -S https://github.com/spotbugs/spotbugs/releases/download/4.2.2/spotbugs-4.2.2.tgz \ + && curl -L -s -S https://github.com/spotbugs/spotbugs/releases/download/4.9.3/spotbugs-4.9.3.tgz \ -o /opt/spotbugs.tgz \ && tar xzf /opt/spotbugs.tgz --strip-components 1 -C /opt/spotbugs \ && chmod +x /opt/spotbugs/bin/* diff --git a/pom.xml b/pom.xml index 4deb501e38..6937fb5a9a 100644 --- a/pom.xml +++ b/pom.xml @@ -41,8 +41,8 @@ - 1.8 - 1.8 + 17 + 17 true 3.0.0-M4 false @@ -70,8 +70,8 @@ 2.6 ${user.home}/clover.license 3.2.0 - 4.2.2 - 4.2.0 + 4.9.3 + 4.9.3.0 1.8.0 32.0.1-jre 3.4.1 @@ -102,6 +102,11 @@ 1.1.10.4 ${project.build.directory}/tmp 1.7.9 + + --add-opens=java.base/java.lang=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens java.base/java.io=ALL-UNNAMED + true @@ -922,7 +927,9 @@ 1 false 900 + true -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError + ${test.jvm.args} ${java.home} 4 diff --git a/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java index eff4d4bef3..1969554a58 100644 --- a/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java +++ b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java @@ -1063,6 +1063,7 @@ static String constructAMLaunchOpts(TezConfiguration tezConf, Resource capabilit amOpts = amOpts + tezConf.get(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS, TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS_DEFAULT); + amOpts = amOpts + TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_JDK17_CMD_OPTS_DEFAULT; amOpts = maybeAddDefaultMemoryJavaOpts(amOpts, capability, tezConf.getDouble(TezConfiguration.TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION, TezConfiguration.TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION_DEFAULT)); diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java index 8862f4b7d6..95ae107a8a 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java @@ -399,6 +399,8 @@ public TezConfiguration(boolean loadDefaults) { public static final String TEZ_AM_LAUNCH_CLUSTER_DEFAULT_CMD_OPTS_DEFAULT = "-server -Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN"; + public static final String TEZ_AM_LAUNCH_CLUSTER_JDK17_CMD_OPTS_DEFAULT = + " --add-opens java.base/java.lang=ALL-UNNAMED"; /** * String value. Command line options provided during the launch of the Tez * AppMaster process. Its recommended to not set any Xmx or Xms in these launch opts so that diff --git a/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java b/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java index a52cb3e499..84f0a4bc2e 100644 --- a/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java +++ b/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java @@ -534,7 +534,8 @@ public void testAMCommandOpts() { TezClientUtils.constructAMLaunchOpts(tezConf, Resource.newInstance(1024, 1)); assertEquals(tmpOpts + " " + TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_DEFAULT_CMD_OPTS_DEFAULT + " " - + amCommandOpts, + + amCommandOpts + + TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_JDK17_CMD_OPTS_DEFAULT, amOptsConstructed); // Test2: Setup cluster-default command opts explicitly @@ -543,7 +544,8 @@ public void testAMCommandOpts() { tezConf.set(TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_DEFAULT_CMD_OPTS, clusterDefaultCommandOpts); amOptsConstructed = TezClientUtils.constructAMLaunchOpts(tezConf, Resource.newInstance(1024, 1)); - assertEquals(tmpOpts + " " + clusterDefaultCommandOpts + " " + amCommandOpts, amOptsConstructed); + assertEquals(tmpOpts + " " + clusterDefaultCommandOpts + " " + amCommandOpts + + TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_JDK17_CMD_OPTS_DEFAULT, amOptsConstructed); // Test3: Don't setup Xmx explicitly @@ -555,7 +557,7 @@ public void testAMCommandOpts() { // It's OK for the Xmx value to show up before cluster default options, since Xmx will not be replaced if it already exists. assertEquals( " -Xmx" + ((int) (1024 * factor)) + "m" + " " + tmpOpts + " " + clusterDefaultCommandOpts + " " + - amCommandOpts, + amCommandOpts + TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_JDK17_CMD_OPTS_DEFAULT, amOptsConstructed); // Test4: Ensure admin options with Xmx does not cause them to be overridden. This should almost never be done though. @@ -564,7 +566,8 @@ public void testAMCommandOpts() { tezConf.set(TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_DEFAULT_CMD_OPTS, clusterDefaultCommandOpts); amOptsConstructed = TezClientUtils.constructAMLaunchOpts(tezConf, Resource.newInstance(1024, 1)); - assertEquals(tmpOpts + " " + clusterDefaultCommandOpts + " " + amCommandOpts, amOptsConstructed); + assertEquals(tmpOpts + " " + clusterDefaultCommandOpts + " " + amCommandOpts + + TezConfiguration.TEZ_AM_LAUNCH_CLUSTER_JDK17_CMD_OPTS_DEFAULT, amOptsConstructed); } @Test(timeout = 5000) diff --git a/tez-dag/pom.xml b/tez-dag/pom.xml index 93ab9c51b6..e230347659 100644 --- a/tez-dag/pom.xml +++ b/tez-dag/pom.xml @@ -200,6 +200,7 @@ org.apache.maven.plugins maven-surefire-plugin + ${test.jvm.args} ${test.log.dir} diff --git a/tez-ext-service-tests/pom.xml b/tez-ext-service-tests/pom.xml index 532588a9ce..e13a3ae56c 100644 --- a/tez-ext-service-tests/pom.xml +++ b/tez-ext-service-tests/pom.xml @@ -161,6 +161,7 @@ org.apache.maven.plugins maven-surefire-plugin + ${test.jvm.args} ${test.log.dir} diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java index eafedef11c..31165c19ae 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java @@ -60,7 +60,7 @@ public class TezTestServiceTaskSchedulerService extends TaskScheduler { // AppIdIdentifier to avoid conflicts with other containers in the system. // Per instance - private final int memoryPerInstance; + private final long memoryPerInstance; private final int coresPerInstance; private final int executorsPerInstance; @@ -84,7 +84,7 @@ public TezTestServiceTaskSchedulerService(TaskSchedulerContext taskSchedulerCont throw new TezUncheckedException(e); } this.memoryPerInstance = conf - .getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB, -1); + .getLong(TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB, -1); Preconditions.checkArgument(memoryPerInstance > 0, TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB + " must be configured"); @@ -145,7 +145,7 @@ public void dagComplete() { @Override public Resource getTotalResources() { return Resource - .newInstance(Ints.checkedCast(serviceHosts.size() * memoryPerInstance), + .newInstance(serviceHosts.size() * memoryPerInstance, serviceHosts.size() * coresPerInstance); } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/SimpleFetchedInputAllocator.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/SimpleFetchedInputAllocator.java index 6072c039c1..d9af3c4037 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/SimpleFetchedInputAllocator.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/SimpleFetchedInputAllocator.java @@ -20,6 +20,7 @@ import java.io.IOException; +import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -56,7 +57,8 @@ public class SimpleFetchedInputAllocator implements FetchedInputAllocator, // Configuration parameters private final long memoryLimit; - private final long maxSingleShuffleLimit; + @VisibleForTesting + final long maxSingleShuffleLimit; private final long maxAvailableTaskMemory; private final long initialMemoryAvailable; diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestSimpleFetchedInputAllocator.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestSimpleFetchedInputAllocator.java index 01faa5df7a..77bb0e4e06 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestSimpleFetchedInputAllocator.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestSimpleFetchedInputAllocator.java @@ -19,6 +19,7 @@ package org.apache.tez.runtime.library.common.shuffle.impl; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.File; import java.io.IOException; @@ -36,55 +37,88 @@ public class TestSimpleFetchedInputAllocator { private static final Logger LOG = LoggerFactory.getLogger(TestSimpleFetchedInputAllocator.class); - + @Test(timeout = 5000) public void testInMemAllocation() throws IOException { File localDirs = new File(System.getProperty("test.build.data", "/tmp"), this.getClass().getName()); Configuration conf = new Configuration(); - - long jvmMax = Runtime.getRuntime().maxMemory(); + + long jvmMax = 954728448L; LOG.info("jvmMax: " + jvmMax); - + float bufferPercent = 0.1f; conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, bufferPercent); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 1.0f); conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs.getAbsolutePath()); - + long inMemThreshold = (long) (bufferPercent * jvmMax); LOG.info("InMemThreshold: " + inMemThreshold); SimpleFetchedInputAllocator inputManager = new SimpleFetchedInputAllocator( - "srcName", UUID.randomUUID().toString(), 123, conf, - Runtime.getRuntime().maxMemory(), inMemThreshold); + "srcName", UUID.randomUUID().toString(), 123, conf, + jvmMax, inMemThreshold); long requestSize = (long) (0.4f * inMemThreshold); - long compressedSize = 1l; + long compressedSize = 1L; LOG.info("RequestSize: " + requestSize); - + FetchedInput fi1 = inputManager.allocate(requestSize, compressedSize, new InputAttemptIdentifier(1, 1)); assertEquals(FetchedInput.Type.MEMORY, fi1.getType()); - - + FetchedInput fi2 = inputManager.allocate(requestSize, compressedSize, new InputAttemptIdentifier(2, 1)); assertEquals(FetchedInput.Type.MEMORY, fi2.getType()); - - + // Over limit by this point. Next reserve should give back a DISK allocation FetchedInput fi3 = inputManager.allocate(requestSize, compressedSize, new InputAttemptIdentifier(3, 1)); assertEquals(FetchedInput.Type.DISK, fi3.getType()); - - + // Freed one memory allocation. Next should be mem again. fi1.abort(); fi1.free(); FetchedInput fi4 = inputManager.allocate(requestSize, compressedSize, new InputAttemptIdentifier(4, 1)); assertEquals(FetchedInput.Type.MEMORY, fi4.getType()); - - // Freed one disk allocation. Next sould be disk again (no mem freed) + + // Freed one disk allocation. Next should be disk again (no mem freed) fi3.abort(); fi3.free(); FetchedInput fi5 = inputManager.allocate(requestSize, compressedSize, new InputAttemptIdentifier(4, 1)); assertEquals(FetchedInput.Type.DISK, fi5.getType()); } + /** + * This method tests the allocation behavior of SimpleFetchedInputAllocator when + * a high `maxMemory` is reported by the Runtime.The allocation results in a + * DISK input because the `requestSize` exceeds the `maxSingleShuffleLimit`. + */ + @Test(timeout = 5000) + public void testInMemAllocationWithJvmMaxMemory() throws IOException { + File localDirs = new File(System.getProperty("test.build.data", "/tmp"), this.getClass().getName()); + Configuration conf = new Configuration(); + + long jvmMax = Runtime.getRuntime().maxMemory(); + LOG.info("jvmMax: " + jvmMax); + + float bufferPercent = 0.1f; + conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, bufferPercent); + conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 1.0f); + conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs.getAbsolutePath()); + + long inMemThreshold = (long) (bufferPercent * jvmMax); + LOG.info("InMemThreshold: " + inMemThreshold); + + SimpleFetchedInputAllocator inputManager = new SimpleFetchedInputAllocator( + "srcName", UUID.randomUUID().toString(), 123, conf, + jvmMax, inMemThreshold); + + long requestSize = (long) (0.4f * inMemThreshold) + 100L; + long compressedSize = 1L; + LOG.info("RequestSize: " + requestSize); + + // check if requestSize is greater than maxSingleShuffleLimit + assertTrue(requestSize > inputManager.maxSingleShuffleLimit); + + // requestSize is greater than the maxSingleShuffleLimit, so allocation is from DISK + FetchedInput fi1 = inputManager.allocate(requestSize, compressedSize, new InputAttemptIdentifier(1, 1)); + assertEquals(FetchedInput.Type.DISK, fi1.getType()); + } } diff --git a/tez-tests/pom.xml b/tez-tests/pom.xml index 2bf8ca38ee..b66738edb6 100644 --- a/tez-tests/pom.xml +++ b/tez-tests/pom.xml @@ -160,6 +160,7 @@ org.apache.maven.plugins maven-surefire-plugin + ${test.jvm.args} ${test.log.dir}