From f175128a42c5b43929ec6a2547c6e648539cafbc Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Tue, 19 Aug 2025 18:17:51 +0530 Subject: [PATCH 01/13] HDDS-13594. Use a different endpoint for fetching the OM checkpoint tarball. --- .../java/org/apache/hadoop/ozone/OzoneConsts.java | 2 ++ .../apache/hadoop/hdds/utils/DBCheckpointServlet.java | 11 +++++++++++ .../apache/hadoop/ozone/om/helpers/OMNodeDetails.java | 4 ++-- .../ozone/recon/TestReconWithOzoneManagerHA.java | 4 ++-- .../ozone/om/OMDBCheckpointServletInodeBasedXfer.java | 2 +- .../hadoop/ozone/om/OzoneManagerHttpServer.java | 3 +++ .../spi/impl/OzoneManagerServiceProviderImpl.java | 8 ++++---- 7 files changed, 25 insertions(+), 9 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java index 1aab4fc28dd5..7d1758b0711d 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java @@ -75,6 +75,8 @@ public final class OzoneConsts { "/serviceList"; public static final String OZONE_DB_CHECKPOINT_HTTP_ENDPOINT = "/dbCheckpoint"; + public static final String OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2 = + "/dbCheckpointv2"; // Ozone File System scheme public static final String OZONE_URI_SCHEME = "o3fs"; diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java index 26c93cd17921..7beefae0c48c 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hdds.utils.HddsServerUtil.writeDBCheckpointToStream; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_TO_EXCLUDE_SST; +import static org.apache.hadoop.ozone.OzoneConsts.ROCKSDB_SST_SUFFIX; import com.google.common.annotations.VisibleForTesting; import java.io.File; @@ -278,6 +279,16 @@ public void processMetadataSnapshotRequest(HttpServletRequest request, HttpServl } protected static Set extractSstFilesToExclude(String[] sstParam) { + Set receivedSstFiles = new HashSet<>(); + if (sstParam != null) { + receivedSstFiles.addAll( + Arrays.stream(sstParam).filter(s -> s.endsWith(ROCKSDB_SST_SUFFIX)).distinct().collect(Collectors.toList())); + logSstFileList(receivedSstFiles, "Received list of {} SST files to be excluded{}: {}", 5); + } + return receivedSstFiles; + } + + protected static Set extractFilesToExclude(String[] sstParam) { Set receivedSstFiles = new HashSet<>(); if (sstParam != null) { receivedSstFiles.addAll( diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java index 3e0a47bdd110..673735cd0420 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java @@ -17,7 +17,7 @@ package org.apache.hadoop.ozone.om.helpers; -import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; @@ -168,7 +168,7 @@ public URL getOMDBCheckpointEndpointUrl(boolean isHttp, boolean flush) URIBuilder urlBuilder = new URIBuilder(). setScheme(isHttp ? "http" : "https"). setHost(isHttp ? getHttpAddress() : getHttpsAddress()). - setPath(OZONE_DB_CHECKPOINT_HTTP_ENDPOINT). + setPath(OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2). addParameter(OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA, "true"). addParameter(OZONE_DB_CHECKPOINT_REQUEST_FLUSH, flush ? "true" : "false"); diff --git a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java index 4426876e596f..f9d1211575cc 100644 --- a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java +++ b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java @@ -18,7 +18,7 @@ package org.apache.hadoop.ozone.recon; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -116,7 +116,7 @@ public void testReconGetsSnapshotFromLeader() throws Exception { String expectedUrl = "http://" + (hostname.equals("0.0.0.0") ? "localhost" : hostname) + ":" + ozoneManager.get().getHttpServer().getHttpAddress().getPort() + - OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; + OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; String snapshotUrl = impl.getOzoneManagerSnapshotUrl(); assertEquals(expectedUrl, snapshotUrl); // Write some data diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java index 42a28e4a781b..1acd9593c822 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java @@ -131,7 +131,7 @@ public void processMetadataSnapshotRequest(HttpServletRequest request, HttpServl String[] sstParam = isFormData ? parseFormDataParameters(request) : request.getParameterValues( OZONE_DB_CHECKPOINT_REQUEST_TO_EXCLUDE_SST); - Set receivedSstFiles = extractSstFilesToExclude(sstParam); + Set receivedSstFiles = extractFilesToExclude(sstParam); Path tmpdir = null; try (BootstrapStateHandler.Lock lock = getBootstrapStateLock().lock()) { tmpdir = Files.createTempDirectory(getBootstrapTempData().toPath(), diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java index 8836324410b9..9c2688de812d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.om; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OM_SERVICE_LIST_HTTP_ENDPOINT; import java.io.IOException; @@ -36,6 +37,8 @@ public OzoneManagerHttpServer(MutableConfigurationSource conf, addServlet("serviceList", OZONE_OM_SERVICE_LIST_HTTP_ENDPOINT, ServiceListJSONServlet.class); addServlet("dbCheckpoint", OZONE_DB_CHECKPOINT_HTTP_ENDPOINT, + OMDBCheckpointServlet.class); + addServlet("dbCheckpointv2", OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2, OMDBCheckpointServletInodeBasedXfer.class); getWebAppContext().setAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE, om); } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java index e407fe6cd4fe..24197fdcaf74 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java @@ -18,7 +18,7 @@ package org.apache.hadoop.ozone.recon.spi.impl; import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_DB_DIRS_PERMISSIONS_DEFAULT; -import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_AUTH_TYPE; import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_OM_SNAPSHOT_DB; @@ -195,11 +195,11 @@ public OzoneManagerServiceProviderImpl( HttpConfig.Policy policy = HttpConfig.getHttpPolicy(configuration); omDBSnapshotUrl = "http://" + ozoneManagerHttpAddress + - OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; + OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; if (policy.isHttpsEnabled()) { omDBSnapshotUrl = "https://" + ozoneManagerHttpsAddress + - OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; + OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; } boolean flushParam = configuration.getBoolean( @@ -391,7 +391,7 @@ public String getOzoneManagerSnapshotUrl() throws IOException { omLeaderUrl = (policy.isHttpsEnabled() ? "https://" + info.getServiceAddress(Type.HTTPS) : "http://" + info.getServiceAddress(Type.HTTP)) + - OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; + OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; } } } From 26b3b2b63ea0d0945b9f5aa5ecaf6ffd0ddb9074 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Fri, 22 Aug 2025 14:39:11 +0530 Subject: [PATCH 02/13] add test --- .../dist/src/main/compose/xcompat/lib.sh | 121 ++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh index 5e53515b6f50..094f9e420b8b 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh @@ -107,7 +107,128 @@ test_cross_compatibility() { done done + # NEW: Add checkpoint compatibility tests + echo "" + echo "==========================================" + echo "Running checkpoint compatibility tests" + echo "==========================================" + for client_version in "$@"; do + client _test_checkpoint_compatibility + done + KEEP_RUNNING=false stop_docker_env } +_get_om_hostname() { + # Get OM hostname from the cluster configuration + echo "om" # Default OM service name in docker-compose +} + +_download_checkpoint_v1() { + _kinit + local om_host=$(_get_om_hostname) + local expected_result="$1" + + echo "Testing /dbCheckpoint endpoint: client ${client_version} → cluster ${cluster_version}" + + # Download using original checkpoint endpoint + local download_cmd="curl -f -s -o /tmp/checkpoint_v1_${client_version}.tar.gz http://${om_host}:9874/dbCheckpoint" + + if execute_command_in_container ${container} bash -c "${download_cmd}"; then + local actual_result="pass" + echo "✓ Successfully downloaded checkpoint via v1 endpoint" + else + local actual_result="fail" + echo "✗ Failed to download checkpoint via v1 endpoint" + fi + + if [[ "${expected_result}" == "${actual_result}" ]]; then + echo "✓ EXPECTED: ${expected_result}, GOT: ${actual_result}" + return 0 + else + echo "✗ EXPECTED: ${expected_result}, GOT: ${actual_result}" + return 1 + fi +} + +_download_checkpoint_v2() { + _kinit + local om_host=$(_get_om_hostname) + local expected_result="$1" + + echo "Testing /dbCheckpointv2 endpoint: client ${client_version} → cluster ${cluster_version}" + + # Download using new checkpointv2 endpoint + local download_cmd="curl -f -s -o /tmp/checkpoint_v2_${client_version}.tar.gz http://${om_host}:9874/dbCheckpointv2" + + if execute_command_in_container ${container} bash -c "${download_cmd}"; then + local actual_result="pass" + echo "✓ Successfully downloaded checkpoint via v2 endpoint" + else + local actual_result="fail" + echo "✗ Failed to download checkpoint via v2 endpoint" + fi + + if [[ "${expected_result}" == "${actual_result}" ]]; then + echo "✓ EXPECTED: ${expected_result}, GOT: ${actual_result}" + return 0 + else + echo "✗ EXPECTED: ${expected_result}, GOT: ${actual_result}" + return 1 + fi +} + +_test_checkpoint_compatibility() { + local test_result=0 + + # Determine client and cluster types + local is_old_client=false + local is_old_cluster=false + + if [[ "${client_version}" != "${current_version}" ]]; then + is_old_client=true + fi + + if [[ "${cluster_version}" != "${current_version}" ]]; then + is_old_cluster=true + fi + + echo "" + echo "=== CHECKPOINT COMPATIBILITY TEST ===" + echo "Client: ${client_version} ($([ "$is_old_client" = true ] && echo "OLD" || echo "NEW"))" + echo "Cluster: ${cluster_version} ($([ "$is_old_cluster" = true ] && echo "OLD" || echo "NEW"))" + echo "=====================================" + + # Test v1 endpoint (/dbCheckpoint) + echo "→ Testing v1 endpoint compatibility..." + # Both old and new clusters should serve v1 endpoint (backward compatibility) + client _download_checkpoint_v1 "pass" || test_result=1 + + # Test v2 endpoint (/dbCheckpointv2) + echo "→ Testing v2 endpoint compatibility..." + if [ "$is_old_cluster" = true ]; then + # Old cluster doesn't have v2 endpoint + if [ "$is_old_client" = false ]; then + # New client hitting v2 on old cluster should fail + client _download_checkpoint_v2 "fail" || test_result=1 + fi + # Old client won't try v2 endpoint + else + # New cluster has v2 endpoint + if [ "$is_old_client" = false ]; then + # New client should successfully use v2 endpoint + client _download_checkpoint_v2 "pass" || test_result=1 + fi + # Old client doesn't know about v2 endpoint + fi + + if [ $test_result -eq 0 ]; then + echo "✓ All checkpoint compatibility tests PASSED" + else + echo "✗ Some checkpoint compatibility tests FAILED" + fi + + return $test_result +} + create_results_dir From 721e288c0eca3721a117db68fde135b852381da2 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Fri, 22 Aug 2025 22:33:00 +0530 Subject: [PATCH 03/13] add test --- .../dist/src/main/compose/xcompat/lib.sh | 32 ++++++++++++++----- .../dist/src/main/compose/xcompat/test-new.sh | 7 ++++ .../dist/src/main/compose/xcompat/test-old.sh | 8 +++++ 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh index 094f9e420b8b..085eee3f20d5 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh @@ -107,14 +107,30 @@ test_cross_compatibility() { done done - # NEW: Add checkpoint compatibility tests - echo "" - echo "==========================================" - echo "Running checkpoint compatibility tests" - echo "==========================================" - for client_version in "$@"; do - client _test_checkpoint_compatibility - done + + + KEEP_RUNNING=false stop_docker_env +} + +test_checkpoint_compatibility_only() { + local checkpoint_client_versions="$@" + + echo "Starting ${cluster_version} cluster with COMPOSE_FILE=${COMPOSE_FILE}" + + OZONE_KEEP_RESULTS=true start_docker_env 5 + + execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} + + _init + + # Run checkpoint compatibility tests only + echo "" + echo "==========================================" + echo "Running checkpoint compatibility tests" + echo "==========================================" + for client_version in ${checkpoint_client_versions}; do + client _test_checkpoint_compatibility + done KEEP_RUNNING=false stop_docker_env } diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh b/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh index ef8c3ef36cdf..1e227bdde0d1 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh @@ -27,3 +27,10 @@ source "${COMPOSE_DIR}/lib.sh" # current cluster with various clients COMPOSE_FILE=new-cluster.yaml:clients.yaml cluster_version=${current_version} test_cross_compatibility ${old_versions} ${current_version} + +# Run checkpoint compatibility tests specifically for 2.0 client +echo "" +echo "==========================================" +echo "Running checkpoint compatibility tests with 2.0 client" +echo "==========================================" +COMPOSE_FILE=new-cluster.yaml:clients.yaml cluster_version=${current_version} test_checkpoint_compatibility_only "2.0.0" diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh b/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh index f6c69ef6f17f..6f3d7264d256 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh @@ -30,3 +30,11 @@ for cluster_version in ${old_versions}; do export OZONE_VERSION=${cluster_version} COMPOSE_FILE=old-cluster.yaml:clients.yaml test_cross_compatibility ${cluster_version} ${current_version} done + +# Run checkpoint compatibility tests specifically for 2.0 +echo "" +echo "==========================================" +echo "Running checkpoint compatibility tests for 2.0" +echo "==========================================" +export OZONE_VERSION="2.0.0" +COMPOSE_FILE=old-cluster.yaml:clients.yaml cluster_version="2.0.0" test_checkpoint_compatibility_only "${current_version}" From 9ecf676c8994a2ce6aaefbef61b99093c7781942 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Sat, 23 Aug 2025 19:35:59 +0530 Subject: [PATCH 04/13] add debugging --- .../dist/src/main/compose/xcompat/lib.sh | 50 +++++++++++++------ 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh index 085eee3f20d5..12a849bd4d3f 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh @@ -107,23 +107,11 @@ test_cross_compatibility() { done done + # NEW: Add checkpoint compatibility tests + # Give OM a moment to be fully ready for HTTP requests + echo "Waiting for OM to be ready for HTTP requests..." + sleep 10 - - KEEP_RUNNING=false stop_docker_env -} - -test_checkpoint_compatibility_only() { - local checkpoint_client_versions="$@" - - echo "Starting ${cluster_version} cluster with COMPOSE_FILE=${COMPOSE_FILE}" - - OZONE_KEEP_RESULTS=true start_docker_env 5 - - execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} - - _init - - # Run checkpoint compatibility tests only echo "" echo "==========================================" echo "Running checkpoint compatibility tests" @@ -147,12 +135,31 @@ _download_checkpoint_v1() { echo "Testing /dbCheckpoint endpoint: client ${client_version} → cluster ${cluster_version}" + # Add debugging information + echo "DEBUG: Using container: ${container}" + echo "DEBUG: Using OM host: ${om_host}" + + # Check if OM is reachable + echo "DEBUG: Testing OM connectivity..." + execute_command_in_container ${container} curl -v -s --connect-timeout 5 "http://${om_host}:9874/" || echo "DEBUG: Basic OM connectivity failed" + + # List running OM processes for debugging + echo "DEBUG: Checking OM processes..." + execute_command_in_container om ps aux | grep -i ozone || echo "DEBUG: No OM processes found" + + # Check if the specific endpoint exists + echo "DEBUG: Testing if dbCheckpoint endpoint exists..." + execute_command_in_container ${container} curl -v -s --connect-timeout 5 "http://${om_host}:9874/dbCheckpoint" || echo "DEBUG: dbCheckpoint endpoint test failed" + # Download using original checkpoint endpoint local download_cmd="curl -f -s -o /tmp/checkpoint_v1_${client_version}.tar.gz http://${om_host}:9874/dbCheckpoint" + echo "DEBUG: Executing: ${download_cmd}" if execute_command_in_container ${container} bash -c "${download_cmd}"; then local actual_result="pass" echo "✓ Successfully downloaded checkpoint via v1 endpoint" + # Show file info for verification + execute_command_in_container ${container} ls -la /tmp/checkpoint_v1_${client_version}.tar.gz || true else local actual_result="fail" echo "✗ Failed to download checkpoint via v1 endpoint" @@ -174,12 +181,23 @@ _download_checkpoint_v2() { echo "Testing /dbCheckpointv2 endpoint: client ${client_version} → cluster ${cluster_version}" + # Add debugging information (similar to v1 but for v2 endpoint) + echo "DEBUG: Using container: ${container}" + echo "DEBUG: Using OM host: ${om_host}" + + # Check if the specific v2 endpoint exists + echo "DEBUG: Testing if dbCheckpointv2 endpoint exists..." + execute_command_in_container ${container} curl -v -s --connect-timeout 5 "http://${om_host}:9874/dbCheckpointv2" || echo "DEBUG: dbCheckpointv2 endpoint test failed" + # Download using new checkpointv2 endpoint local download_cmd="curl -f -s -o /tmp/checkpoint_v2_${client_version}.tar.gz http://${om_host}:9874/dbCheckpointv2" + echo "DEBUG: Executing: ${download_cmd}" if execute_command_in_container ${container} bash -c "${download_cmd}"; then local actual_result="pass" echo "✓ Successfully downloaded checkpoint via v2 endpoint" + # Show file info for verification + execute_command_in_container ${container} ls -la /tmp/checkpoint_v2_${client_version}.tar.gz || true else local actual_result="fail" echo "✗ Failed to download checkpoint via v2 endpoint" From 7b0e8abac6f595c93c1650c48473c11a700d1e59 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Mon, 8 Sep 2025 11:33:58 +0530 Subject: [PATCH 05/13] address comments --- .../java/org/apache/hadoop/ozone/OzoneConsts.java | 2 +- .../hadoop/hdds/utils/DBCheckpointServlet.java | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java index 7d1758b0711d..f33e36f0feeb 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java @@ -76,7 +76,7 @@ public final class OzoneConsts { public static final String OZONE_DB_CHECKPOINT_HTTP_ENDPOINT = "/dbCheckpoint"; public static final String OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2 = - "/dbCheckpointv2"; + "/v2/dbCheckpoint"; // Ozone File System scheme public static final String OZONE_URI_SCHEME = "o3fs"; diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java index 7beefae0c48c..792f680497bb 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java @@ -278,14 +278,14 @@ public void processMetadataSnapshotRequest(HttpServletRequest request, HttpServl } } - protected static Set extractSstFilesToExclude(String[] sstParam) { - Set receivedSstFiles = new HashSet<>(); - if (sstParam != null) { - receivedSstFiles.addAll( - Arrays.stream(sstParam).filter(s -> s.endsWith(ROCKSDB_SST_SUFFIX)).distinct().collect(Collectors.toList())); - logSstFileList(receivedSstFiles, "Received list of {} SST files to be excluded{}: {}", 5); + protected static Set extractSstFilesToExclude(String[] filesInExclusionParam) { + Set sstFilesToExclude = new HashSet<>(); + if (filesInExclusionParam != null) { + sstFilesToExclude.addAll( + Arrays.stream(filesInExclusionParam).filter(s -> s.endsWith(ROCKSDB_SST_SUFFIX)).distinct().collect(Collectors.toList())); + logSstFileList(sstFilesToExclude, "Received list of {} SST files to be excluded{}: {}", 5); } - return receivedSstFiles; + return sstFilesToExclude; } protected static Set extractFilesToExclude(String[] sstParam) { From c10e941c9993989ea7a9cbb8189deaf6a5487621 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Mon, 8 Sep 2025 11:50:11 +0530 Subject: [PATCH 06/13] remove failing test --- .../dist/src/main/compose/xcompat/lib.sh | 155 ------------------ .../dist/src/main/compose/xcompat/test-new.sh | 7 - .../dist/src/main/compose/xcompat/test-old.sh | 8 - 3 files changed, 170 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh index 12a849bd4d3f..5e53515b6f50 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh @@ -107,162 +107,7 @@ test_cross_compatibility() { done done - # NEW: Add checkpoint compatibility tests - # Give OM a moment to be fully ready for HTTP requests - echo "Waiting for OM to be ready for HTTP requests..." - sleep 10 - - echo "" - echo "==========================================" - echo "Running checkpoint compatibility tests" - echo "==========================================" - for client_version in ${checkpoint_client_versions}; do - client _test_checkpoint_compatibility - done - KEEP_RUNNING=false stop_docker_env } -_get_om_hostname() { - # Get OM hostname from the cluster configuration - echo "om" # Default OM service name in docker-compose -} - -_download_checkpoint_v1() { - _kinit - local om_host=$(_get_om_hostname) - local expected_result="$1" - - echo "Testing /dbCheckpoint endpoint: client ${client_version} → cluster ${cluster_version}" - - # Add debugging information - echo "DEBUG: Using container: ${container}" - echo "DEBUG: Using OM host: ${om_host}" - - # Check if OM is reachable - echo "DEBUG: Testing OM connectivity..." - execute_command_in_container ${container} curl -v -s --connect-timeout 5 "http://${om_host}:9874/" || echo "DEBUG: Basic OM connectivity failed" - - # List running OM processes for debugging - echo "DEBUG: Checking OM processes..." - execute_command_in_container om ps aux | grep -i ozone || echo "DEBUG: No OM processes found" - - # Check if the specific endpoint exists - echo "DEBUG: Testing if dbCheckpoint endpoint exists..." - execute_command_in_container ${container} curl -v -s --connect-timeout 5 "http://${om_host}:9874/dbCheckpoint" || echo "DEBUG: dbCheckpoint endpoint test failed" - - # Download using original checkpoint endpoint - local download_cmd="curl -f -s -o /tmp/checkpoint_v1_${client_version}.tar.gz http://${om_host}:9874/dbCheckpoint" - echo "DEBUG: Executing: ${download_cmd}" - - if execute_command_in_container ${container} bash -c "${download_cmd}"; then - local actual_result="pass" - echo "✓ Successfully downloaded checkpoint via v1 endpoint" - # Show file info for verification - execute_command_in_container ${container} ls -la /tmp/checkpoint_v1_${client_version}.tar.gz || true - else - local actual_result="fail" - echo "✗ Failed to download checkpoint via v1 endpoint" - fi - - if [[ "${expected_result}" == "${actual_result}" ]]; then - echo "✓ EXPECTED: ${expected_result}, GOT: ${actual_result}" - return 0 - else - echo "✗ EXPECTED: ${expected_result}, GOT: ${actual_result}" - return 1 - fi -} - -_download_checkpoint_v2() { - _kinit - local om_host=$(_get_om_hostname) - local expected_result="$1" - - echo "Testing /dbCheckpointv2 endpoint: client ${client_version} → cluster ${cluster_version}" - - # Add debugging information (similar to v1 but for v2 endpoint) - echo "DEBUG: Using container: ${container}" - echo "DEBUG: Using OM host: ${om_host}" - - # Check if the specific v2 endpoint exists - echo "DEBUG: Testing if dbCheckpointv2 endpoint exists..." - execute_command_in_container ${container} curl -v -s --connect-timeout 5 "http://${om_host}:9874/dbCheckpointv2" || echo "DEBUG: dbCheckpointv2 endpoint test failed" - - # Download using new checkpointv2 endpoint - local download_cmd="curl -f -s -o /tmp/checkpoint_v2_${client_version}.tar.gz http://${om_host}:9874/dbCheckpointv2" - echo "DEBUG: Executing: ${download_cmd}" - - if execute_command_in_container ${container} bash -c "${download_cmd}"; then - local actual_result="pass" - echo "✓ Successfully downloaded checkpoint via v2 endpoint" - # Show file info for verification - execute_command_in_container ${container} ls -la /tmp/checkpoint_v2_${client_version}.tar.gz || true - else - local actual_result="fail" - echo "✗ Failed to download checkpoint via v2 endpoint" - fi - - if [[ "${expected_result}" == "${actual_result}" ]]; then - echo "✓ EXPECTED: ${expected_result}, GOT: ${actual_result}" - return 0 - else - echo "✗ EXPECTED: ${expected_result}, GOT: ${actual_result}" - return 1 - fi -} - -_test_checkpoint_compatibility() { - local test_result=0 - - # Determine client and cluster types - local is_old_client=false - local is_old_cluster=false - - if [[ "${client_version}" != "${current_version}" ]]; then - is_old_client=true - fi - - if [[ "${cluster_version}" != "${current_version}" ]]; then - is_old_cluster=true - fi - - echo "" - echo "=== CHECKPOINT COMPATIBILITY TEST ===" - echo "Client: ${client_version} ($([ "$is_old_client" = true ] && echo "OLD" || echo "NEW"))" - echo "Cluster: ${cluster_version} ($([ "$is_old_cluster" = true ] && echo "OLD" || echo "NEW"))" - echo "=====================================" - - # Test v1 endpoint (/dbCheckpoint) - echo "→ Testing v1 endpoint compatibility..." - # Both old and new clusters should serve v1 endpoint (backward compatibility) - client _download_checkpoint_v1 "pass" || test_result=1 - - # Test v2 endpoint (/dbCheckpointv2) - echo "→ Testing v2 endpoint compatibility..." - if [ "$is_old_cluster" = true ]; then - # Old cluster doesn't have v2 endpoint - if [ "$is_old_client" = false ]; then - # New client hitting v2 on old cluster should fail - client _download_checkpoint_v2 "fail" || test_result=1 - fi - # Old client won't try v2 endpoint - else - # New cluster has v2 endpoint - if [ "$is_old_client" = false ]; then - # New client should successfully use v2 endpoint - client _download_checkpoint_v2 "pass" || test_result=1 - fi - # Old client doesn't know about v2 endpoint - fi - - if [ $test_result -eq 0 ]; then - echo "✓ All checkpoint compatibility tests PASSED" - else - echo "✗ Some checkpoint compatibility tests FAILED" - fi - - return $test_result -} - create_results_dir diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh b/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh index 1e227bdde0d1..ef8c3ef36cdf 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh @@ -27,10 +27,3 @@ source "${COMPOSE_DIR}/lib.sh" # current cluster with various clients COMPOSE_FILE=new-cluster.yaml:clients.yaml cluster_version=${current_version} test_cross_compatibility ${old_versions} ${current_version} - -# Run checkpoint compatibility tests specifically for 2.0 client -echo "" -echo "==========================================" -echo "Running checkpoint compatibility tests with 2.0 client" -echo "==========================================" -COMPOSE_FILE=new-cluster.yaml:clients.yaml cluster_version=${current_version} test_checkpoint_compatibility_only "2.0.0" diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh b/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh index 6f3d7264d256..f6c69ef6f17f 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh @@ -30,11 +30,3 @@ for cluster_version in ${old_versions}; do export OZONE_VERSION=${cluster_version} COMPOSE_FILE=old-cluster.yaml:clients.yaml test_cross_compatibility ${cluster_version} ${current_version} done - -# Run checkpoint compatibility tests specifically for 2.0 -echo "" -echo "==========================================" -echo "Running checkpoint compatibility tests for 2.0" -echo "==========================================" -export OZONE_VERSION="2.0.0" -COMPOSE_FILE=old-cluster.yaml:clients.yaml cluster_version="2.0.0" test_checkpoint_compatibility_only "${current_version}" From b0f0412caa5c16f42ad28d7a006ac827a74cbf59 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Mon, 8 Sep 2025 12:09:34 +0530 Subject: [PATCH 07/13] add robot test instead --- .../dist/src/main/compose/xcompat/lib.sh | 9 ++ .../dist/src/main/compose/xcompat/test-new.sh | 23 +++++ .../dist/src/main/compose/xcompat/test-old.sh | 23 +++++ .../smoketest/compatibility/checkpoint.robot | 84 +++++++++++++++++++ 4 files changed, 139 insertions(+) create mode 100644 hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh index 5e53515b6f50..f6284288fa3f 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh @@ -73,6 +73,15 @@ _read() { compatibility/read.robot } +_test_checkpoint_compatibility() { + _kinit + execute_robot_test ${container} -N "xcompat-cluster-${cluster_version}-client-${client_version}-checkpoint" \ + -v CLIENT_VERSION:${client_version} \ + -v CLUSTER_VERSION:${cluster_version} \ + -v TEST_DATA_DIR:/testdata \ + compatibility/checkpoint.robot +} + test_cross_compatibility() { echo "Starting ${cluster_version} cluster with COMPOSE_FILE=${COMPOSE_FILE}" diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh b/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh index ef8c3ef36cdf..48eda13d7efb 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh @@ -27,3 +27,26 @@ source "${COMPOSE_DIR}/lib.sh" # current cluster with various clients COMPOSE_FILE=new-cluster.yaml:clients.yaml cluster_version=${current_version} test_cross_compatibility ${old_versions} ${current_version} + +# Run checkpoint compatibility tests specifically for 2.0 client +echo "" +echo "==========================================" +echo "Running checkpoint compatibility tests with 2.0 client" +echo "==========================================" + +COMPOSE_FILE=new-cluster.yaml:clients.yaml + +echo "Starting current cluster for checkpoint testing..." +OZONE_KEEP_RESULTS=true start_docker_env 5 + +execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} + +# Basic initialization similar to _init +container=scm +execute_command_in_container ${container} kinit -k -t /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM +execute_command_in_container ${container} ozone freon ockg -n1 -t1 -p warmup + +# Test 2.0 client against current cluster +client_version="2.0.0" cluster_version=${current_version} client _test_checkpoint_compatibility + +KEEP_RUNNING=false stop_docker_env diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh b/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh index f6c69ef6f17f..2b8f1d5fe8d8 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh @@ -30,3 +30,26 @@ for cluster_version in ${old_versions}; do export OZONE_VERSION=${cluster_version} COMPOSE_FILE=old-cluster.yaml:clients.yaml test_cross_compatibility ${cluster_version} ${current_version} done + +# Run checkpoint compatibility tests specifically for 2.0 +echo "" +echo "==========================================" +echo "Running checkpoint compatibility tests for 2.0" +echo "==========================================" +export OZONE_VERSION="2.0.0" +COMPOSE_FILE=old-cluster.yaml:clients.yaml + +echo "Starting 2.0.0 cluster for checkpoint testing..." +OZONE_KEEP_RESULTS=true start_docker_env 5 + +execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} + +# Basic initialization similar to _init +container=scm +execute_command_in_container ${container} kinit -k -t /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM +execute_command_in_container ${container} ozone freon ockg -n1 -t1 -p warmup + +# Test current client against 2.0 cluster +client_version=${current_version} cluster_version="2.0.0" client _test_checkpoint_compatibility + +KEEP_RUNNING=false stop_docker_env diff --git a/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot new file mode 100644 index 000000000000..effbcfc4e9ba --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation Checkpoint Compatibility +Resource ../ozone-lib/shell.robot +Resource setup.robot +Test Timeout 5 minutes + +*** Variables *** +${CHECKPOINT_V2_VERSION} 2.1.0 +${OM_HOST} om +${OM_PORT} 9874 + +*** Keywords *** +Download Checkpoint V1 + [Documentation] Download checkpoint using v1 endpoint (/dbCheckpoint) + [Arguments] ${expected_result} + + ${download_file} = Set Variable /tmp/checkpoint_v1_${CLIENT_VERSION}.tar.gz + ${result} = Execute and checkrc curl -f -s --connect-timeout 10 --max-time 30 -o ${download_file} http://${OM_HOST}:${OM_PORT}/dbCheckpoint ${expected_result} + + IF ${expected_result} == 0 + # If we expect success, verify the file was created and has content + ${file_check} = Execute ls -la ${download_file} 2>/dev/null || echo "File not found" + Should Not Contain ${file_check} File not found + Should Contain ${file_check} checkpoint_v1_${CLIENT_VERSION}.tar.gz + Log Successfully downloaded checkpoint via v1 endpoint: ${file_check} + ELSE + Log v1 endpoint failed as expected for this version combination + END + +Download Checkpoint V2 + [Documentation] Download checkpoint using v2 endpoint (/dbCheckpointv2) + [Arguments] ${expected_result} + + ${download_file} = Set Variable /tmp/checkpoint_v2_${CLIENT_VERSION}.tar.gz + ${result} = Execute and checkrc curl -f -s --connect-timeout 10 --max-time 30 -o ${download_file} http://${OM_HOST}:${OM_PORT}/v2/dbCheckpoint ${expected_result} + + IF ${expected_result} == 0 + # If we expect success, verify the file was created and has content + ${file_check} = Execute ls -la ${download_file} 2>/dev/null || echo "File not found" + Should Not Contain ${file_check} File not found + Should Contain ${file_check} checkpoint_v2_${CLIENT_VERSION}.tar.gz + Log Successfully downloaded checkpoint via v2 endpoint: ${file_check} + ELSE + Log v2 endpoint failed as expected for this version combination + END + +*** Test Cases *** +Checkpoint V1 Endpoint Compatibility + [Documentation] Test v1 checkpoint endpoint (/dbCheckpoint) - should work for all versions (backward compatibility) + + Log Testing v1 checkpoint endpoint: CLIENT=${CLIENT_VERSION}, CLUSTER=${CLUSTER_VERSION} + + # Both old and new clusters should serve v1 endpoint for backward compatibility + Download Checkpoint V1 0 + +Checkpoint V2 Endpoint Compatibility + [Documentation] Test v2 checkpoint endpoint (/dbCheckpointv2) - should only work with new cluster + + Log Testing v2 checkpoint endpoint: CLIENT=${CLIENT_VERSION}, CLUSTER=${CLUSTER_VERSION} + + IF '${CLUSTER_VERSION}' < '${CHECKPOINT_V2_VERSION}' + # Old cluster doesn't have v2 endpoint - should fail + Download Checkpoint V2 1 + Log v2 endpoint correctly failed on old cluster ${CLUSTER_VERSION} + ELSE + # New cluster has v2 endpoint - should succeed + Download Checkpoint V2 0 + Log v2 endpoint correctly succeeded on new cluster ${CLUSTER_VERSION} + END From 15127ab0798075057a9aece80def7c31f8923e71 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Mon, 8 Sep 2025 12:27:43 +0530 Subject: [PATCH 08/13] fix checkstyle --- .../java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java index 792f680497bb..dae2df9e5c38 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java @@ -282,7 +282,8 @@ protected static Set extractSstFilesToExclude(String[] filesInExclusionP Set sstFilesToExclude = new HashSet<>(); if (filesInExclusionParam != null) { sstFilesToExclude.addAll( - Arrays.stream(filesInExclusionParam).filter(s -> s.endsWith(ROCKSDB_SST_SUFFIX)).distinct().collect(Collectors.toList())); + Arrays.stream(filesInExclusionParam).filter(s -> s.endsWith(ROCKSDB_SST_SUFFIX)) + .distinct().collect(Collectors.toList())); logSstFileList(sstFilesToExclude, "Received list of {} SST files to be excluded{}: {}", 5); } return sstFilesToExclude; From 662c932392cc994c953b3493e41ee56abc6bdc22 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Mon, 8 Sep 2025 13:58:43 +0530 Subject: [PATCH 09/13] test 2 --- .../dist/src/main/compose/xcompat/lib.sh | 24 +++++++++++++++++++ .../dist/src/main/compose/xcompat/test-new.sh | 23 ------------------ .../dist/src/main/compose/xcompat/test-old.sh | 23 ------------------ 3 files changed, 24 insertions(+), 46 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh index f6284288fa3f..366bb1a4b837 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh @@ -116,6 +116,30 @@ test_cross_compatibility() { done done + # Add checkpoint compatibility tests + echo "" + echo "==========================================" + echo "Running checkpoint compatibility tests" + echo "==========================================" + + # Test 2.0.0 client (if available) + for client_version in "$@"; do + if [[ "${client_version}" == "2.0.0" ]]; then + echo "Testing 2.0.0 client against ${cluster_version} cluster" + client _test_checkpoint_compatibility + break # Only test 2.0 once + fi + done + + # Test current client (if different from 2.0.0 and available) + for client_version in "$@"; do + if [[ "${client_version}" == "${current_version}" ]]; then + echo "Testing ${current_version} client against ${cluster_version} cluster" + client _test_checkpoint_compatibility + break # Only test current version once + fi + done + KEEP_RUNNING=false stop_docker_env } diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh b/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh index 48eda13d7efb..ef8c3ef36cdf 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh @@ -27,26 +27,3 @@ source "${COMPOSE_DIR}/lib.sh" # current cluster with various clients COMPOSE_FILE=new-cluster.yaml:clients.yaml cluster_version=${current_version} test_cross_compatibility ${old_versions} ${current_version} - -# Run checkpoint compatibility tests specifically for 2.0 client -echo "" -echo "==========================================" -echo "Running checkpoint compatibility tests with 2.0 client" -echo "==========================================" - -COMPOSE_FILE=new-cluster.yaml:clients.yaml - -echo "Starting current cluster for checkpoint testing..." -OZONE_KEEP_RESULTS=true start_docker_env 5 - -execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} - -# Basic initialization similar to _init -container=scm -execute_command_in_container ${container} kinit -k -t /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM -execute_command_in_container ${container} ozone freon ockg -n1 -t1 -p warmup - -# Test 2.0 client against current cluster -client_version="2.0.0" cluster_version=${current_version} client _test_checkpoint_compatibility - -KEEP_RUNNING=false stop_docker_env diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh b/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh index 2b8f1d5fe8d8..f6c69ef6f17f 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh @@ -30,26 +30,3 @@ for cluster_version in ${old_versions}; do export OZONE_VERSION=${cluster_version} COMPOSE_FILE=old-cluster.yaml:clients.yaml test_cross_compatibility ${cluster_version} ${current_version} done - -# Run checkpoint compatibility tests specifically for 2.0 -echo "" -echo "==========================================" -echo "Running checkpoint compatibility tests for 2.0" -echo "==========================================" -export OZONE_VERSION="2.0.0" -COMPOSE_FILE=old-cluster.yaml:clients.yaml - -echo "Starting 2.0.0 cluster for checkpoint testing..." -OZONE_KEEP_RESULTS=true start_docker_env 5 - -execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} - -# Basic initialization similar to _init -container=scm -execute_command_in_container ${container} kinit -k -t /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM -execute_command_in_container ${container} ozone freon ockg -n1 -t1 -p warmup - -# Test current client against 2.0 cluster -client_version=${current_version} cluster_version="2.0.0" client _test_checkpoint_compatibility - -KEEP_RUNNING=false stop_docker_env From 43e169eb8bd5b18cee83c306677b8df7ff9f8bcd Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Tue, 9 Sep 2025 23:44:57 +0530 Subject: [PATCH 10/13] test 3 --- .../smoketest/compatibility/checkpoint.robot | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot index effbcfc4e9ba..ea411f2a994a 100644 --- a/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot +++ b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot @@ -29,8 +29,20 @@ Download Checkpoint V1 [Documentation] Download checkpoint using v1 endpoint (/dbCheckpoint) [Arguments] ${expected_result} + Log Testing v1 checkpoint endpoint with authentication + + # Try different keytabs based on client version/container ${download_file} = Set Variable /tmp/checkpoint_v1_${CLIENT_VERSION}.tar.gz - ${result} = Execute and checkrc curl -f -s --connect-timeout 10 --max-time 30 -o ${download_file} http://${OM_HOST}:${OM_PORT}/dbCheckpoint ${expected_result} + + # Debug: Check keytab availability first + ${keytab_check} = Execute ls -la /etc/security/keytabs/ 2>&1 | head -5 || echo "No keytabs directory" + Log Keytab directory: ${keytab_check} + + # Combine kinit and curl in a single command to preserve Kerberos session + ${combined_cmd} = Set Variable kinit -k -t /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM && curl -f --negotiate -u : --connect-timeout 10 --max-time 30 -o ${download_file} http://${OM_HOST}:${OM_PORT}/dbCheckpoint + + Log Executing: ${combined_cmd} + ${result} = Execute and checkrc ${combined_cmd} ${expected_result} IF ${expected_result} == 0 # If we expect success, verify the file was created and has content @@ -46,8 +58,18 @@ Download Checkpoint V2 [Documentation] Download checkpoint using v2 endpoint (/dbCheckpointv2) [Arguments] ${expected_result} + Log Testing v2 checkpoint endpoint with authentication + + # Debug: Check keytab availability first (reuse from V1 if already checked) + ${keytab_check} = Execute ls -la /etc/security/keytabs/ 2>&1 | head -5 || echo "No keytabs directory" + Log Keytab directory: ${keytab_check} + + # Combine kinit and curl in a single command to preserve Kerberos session ${download_file} = Set Variable /tmp/checkpoint_v2_${CLIENT_VERSION}.tar.gz - ${result} = Execute and checkrc curl -f -s --connect-timeout 10 --max-time 30 -o ${download_file} http://${OM_HOST}:${OM_PORT}/v2/dbCheckpoint ${expected_result} + ${combined_cmd} = Set Variable kinit -k -t /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM && curl -f --negotiate -u : --connect-timeout 10 --max-time 30 -o ${download_file} http://${OM_HOST}:${OM_PORT}/v2/dbCheckpoint + + Log Executing: ${combined_cmd} + ${result} = Execute and checkrc ${combined_cmd} ${expected_result} IF ${expected_result} == 0 # If we expect success, verify the file was created and has content @@ -69,7 +91,7 @@ Checkpoint V1 Endpoint Compatibility Download Checkpoint V1 0 Checkpoint V2 Endpoint Compatibility - [Documentation] Test v2 checkpoint endpoint (/dbCheckpointv2) - should only work with new cluster + [Documentation] Test v2 endpoint endpoint (/dbCheckpointv2) - should only work with new cluster Log Testing v2 checkpoint endpoint: CLIENT=${CLIENT_VERSION}, CLUSTER=${CLUSTER_VERSION} From ce314f6c22f1c9519ad89b0af7d786f44a7e28eb Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Wed, 10 Sep 2025 02:01:59 +0530 Subject: [PATCH 11/13] test 4 --- .../dist/src/main/compose/xcompat/lib.sh | 51 ++++++++++--------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh index 366bb1a4b837..80bcbd24e2a2 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh @@ -116,29 +116,34 @@ test_cross_compatibility() { done done - # Add checkpoint compatibility tests - echo "" - echo "==========================================" - echo "Running checkpoint compatibility tests" - echo "==========================================" - - # Test 2.0.0 client (if available) - for client_version in "$@"; do - if [[ "${client_version}" == "2.0.0" ]]; then - echo "Testing 2.0.0 client against ${cluster_version} cluster" - client _test_checkpoint_compatibility - break # Only test 2.0 once - fi - done - - # Test current client (if different from 2.0.0 and available) - for client_version in "$@"; do - if [[ "${client_version}" == "${current_version}" ]]; then - echo "Testing ${current_version} client against ${cluster_version} cluster" - client _test_checkpoint_compatibility - break # Only test current version once - fi - done + # Add checkpoint compatibility tests (only for clusters that support checkpoint endpoints) + # Skip checkpoint tests for very old clusters that don't have the endpoints + if [[ "${cluster_version}" < "2.0.0" ]]; then + echo "Skipping checkpoint compatibility tests for cluster ${cluster_version} (checkpoint endpoints not available)" + else + echo "" + echo "==========================================" + echo "Running checkpoint compatibility tests" + echo "==========================================" + + # Test 2.0.0 client (if available) + for client_version in "$@"; do + if [[ "${client_version}" == "2.0.0" ]]; then + echo "Testing 2.0.0 client against ${cluster_version} cluster" + client _test_checkpoint_compatibility + break # Only test 2.0 once + fi + done + + # Test current client (if different from 2.0.0 and available) + for client_version in "$@"; do + if [[ "${client_version}" == "${current_version}" ]]; then + echo "Testing ${current_version} client against ${cluster_version} cluster" + client _test_checkpoint_compatibility + break # Only test current version once + fi + done + fi KEEP_RUNNING=false stop_docker_env } From 1219cd6ef44abbc6b682f6d11249f65eb15643ea Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Wed, 10 Sep 2025 09:36:27 +0530 Subject: [PATCH 12/13] test 5 --- .../src/main/smoketest/compatibility/checkpoint.robot | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot index ea411f2a994a..3ba0482321b8 100644 --- a/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot +++ b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot @@ -96,9 +96,13 @@ Checkpoint V2 Endpoint Compatibility Log Testing v2 checkpoint endpoint: CLIENT=${CLIENT_VERSION}, CLUSTER=${CLUSTER_VERSION} IF '${CLUSTER_VERSION}' < '${CHECKPOINT_V2_VERSION}' - # Old cluster doesn't have v2 endpoint - should fail - Download Checkpoint V2 1 - Log v2 endpoint correctly failed on old cluster ${CLUSTER_VERSION} + # Old cluster doesn't have v2 endpoint - should fail with any non-zero exit code + ${result} = Run Keyword And Return Status Download Checkpoint V2 0 + IF not ${result} + Log v2 endpoint correctly failed on old cluster ${CLUSTER_VERSION} (expected failure) + ELSE + Fail v2 endpoint unexpectedly succeeded on old cluster ${CLUSTER_VERSION} + END ELSE # New cluster has v2 endpoint - should succeed Download Checkpoint V2 0 From b9f15d1d0b760376610453fb0f7fab7acaf55a96 Mon Sep 17 00:00:00 2001 From: Sadanand Shenoy Date: Mon, 15 Sep 2025 11:49:17 +0530 Subject: [PATCH 13/13] Update hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot Co-authored-by: Siyao Meng <50227127+smengcl@users.noreply.github.com> --- .../dist/src/main/smoketest/compatibility/checkpoint.robot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot index 3ba0482321b8..e1776ef1a4be 100644 --- a/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot +++ b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot @@ -91,7 +91,7 @@ Checkpoint V1 Endpoint Compatibility Download Checkpoint V1 0 Checkpoint V2 Endpoint Compatibility - [Documentation] Test v2 endpoint endpoint (/dbCheckpointv2) - should only work with new cluster + [Documentation] Test v2 checkpoint endpoint (/v2/dbCheckpoint) - should only work with new cluster Log Testing v2 checkpoint endpoint: CLIENT=${CLIENT_VERSION}, CLUSTER=${CLUSTER_VERSION}