diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java index 1aab4fc28dd5..f33e36f0feeb 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java @@ -75,6 +75,8 @@ public final class OzoneConsts { "/serviceList"; public static final String OZONE_DB_CHECKPOINT_HTTP_ENDPOINT = "/dbCheckpoint"; + public static final String OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2 = + "/v2/dbCheckpoint"; // Ozone File System scheme public static final String OZONE_URI_SCHEME = "o3fs"; diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java index 26c93cd17921..dae2df9e5c38 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hdds.utils.HddsServerUtil.writeDBCheckpointToStream; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_TO_EXCLUDE_SST; +import static org.apache.hadoop.ozone.OzoneConsts.ROCKSDB_SST_SUFFIX; import com.google.common.annotations.VisibleForTesting; import java.io.File; @@ -277,7 +278,18 @@ public void processMetadataSnapshotRequest(HttpServletRequest request, HttpServl } } - protected static Set extractSstFilesToExclude(String[] sstParam) { + protected static Set extractSstFilesToExclude(String[] filesInExclusionParam) { + Set sstFilesToExclude = new HashSet<>(); + if (filesInExclusionParam != null) { + sstFilesToExclude.addAll( + Arrays.stream(filesInExclusionParam).filter(s -> s.endsWith(ROCKSDB_SST_SUFFIX)) + .distinct().collect(Collectors.toList())); + logSstFileList(sstFilesToExclude, "Received list of {} SST files to be excluded{}: {}", 5); + } + return sstFilesToExclude; + } + + protected static Set extractFilesToExclude(String[] sstParam) { Set receivedSstFiles = new HashSet<>(); if (sstParam != null) { receivedSstFiles.addAll( diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java index 3e0a47bdd110..673735cd0420 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java @@ -17,7 +17,7 @@ package org.apache.hadoop.ozone.om.helpers; -import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; @@ -168,7 +168,7 @@ public URL getOMDBCheckpointEndpointUrl(boolean isHttp, boolean flush) URIBuilder urlBuilder = new URIBuilder(). setScheme(isHttp ? "http" : "https"). setHost(isHttp ? getHttpAddress() : getHttpsAddress()). - setPath(OZONE_DB_CHECKPOINT_HTTP_ENDPOINT). + setPath(OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2). addParameter(OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA, "true"). addParameter(OZONE_DB_CHECKPOINT_REQUEST_FLUSH, flush ? "true" : "false"); diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh index 5e53515b6f50..80bcbd24e2a2 100755 --- a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh +++ b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh @@ -73,6 +73,15 @@ _read() { compatibility/read.robot } +_test_checkpoint_compatibility() { + _kinit + execute_robot_test ${container} -N "xcompat-cluster-${cluster_version}-client-${client_version}-checkpoint" \ + -v CLIENT_VERSION:${client_version} \ + -v CLUSTER_VERSION:${cluster_version} \ + -v TEST_DATA_DIR:/testdata \ + compatibility/checkpoint.robot +} + test_cross_compatibility() { echo "Starting ${cluster_version} cluster with COMPOSE_FILE=${COMPOSE_FILE}" @@ -107,6 +116,35 @@ test_cross_compatibility() { done done + # Add checkpoint compatibility tests (only for clusters that support checkpoint endpoints) + # Skip checkpoint tests for very old clusters that don't have the endpoints + if [[ "${cluster_version}" < "2.0.0" ]]; then + echo "Skipping checkpoint compatibility tests for cluster ${cluster_version} (checkpoint endpoints not available)" + else + echo "" + echo "==========================================" + echo "Running checkpoint compatibility tests" + echo "==========================================" + + # Test 2.0.0 client (if available) + for client_version in "$@"; do + if [[ "${client_version}" == "2.0.0" ]]; then + echo "Testing 2.0.0 client against ${cluster_version} cluster" + client _test_checkpoint_compatibility + break # Only test 2.0 once + fi + done + + # Test current client (if different from 2.0.0 and available) + for client_version in "$@"; do + if [[ "${client_version}" == "${current_version}" ]]; then + echo "Testing ${current_version} client against ${cluster_version} cluster" + client _test_checkpoint_compatibility + break # Only test current version once + fi + done + fi + KEEP_RUNNING=false stop_docker_env } diff --git a/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot new file mode 100644 index 000000000000..e1776ef1a4be --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation Checkpoint Compatibility +Resource ../ozone-lib/shell.robot +Resource setup.robot +Test Timeout 5 minutes + +*** Variables *** +${CHECKPOINT_V2_VERSION} 2.1.0 +${OM_HOST} om +${OM_PORT} 9874 + +*** Keywords *** +Download Checkpoint V1 + [Documentation] Download checkpoint using v1 endpoint (/dbCheckpoint) + [Arguments] ${expected_result} + + Log Testing v1 checkpoint endpoint with authentication + + # Try different keytabs based on client version/container + ${download_file} = Set Variable /tmp/checkpoint_v1_${CLIENT_VERSION}.tar.gz + + # Debug: Check keytab availability first + ${keytab_check} = Execute ls -la /etc/security/keytabs/ 2>&1 | head -5 || echo "No keytabs directory" + Log Keytab directory: ${keytab_check} + + # Combine kinit and curl in a single command to preserve Kerberos session + ${combined_cmd} = Set Variable kinit -k -t /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM && curl -f --negotiate -u : --connect-timeout 10 --max-time 30 -o ${download_file} http://${OM_HOST}:${OM_PORT}/dbCheckpoint + + Log Executing: ${combined_cmd} + ${result} = Execute and checkrc ${combined_cmd} ${expected_result} + + IF ${expected_result} == 0 + # If we expect success, verify the file was created and has content + ${file_check} = Execute ls -la ${download_file} 2>/dev/null || echo "File not found" + Should Not Contain ${file_check} File not found + Should Contain ${file_check} checkpoint_v1_${CLIENT_VERSION}.tar.gz + Log Successfully downloaded checkpoint via v1 endpoint: ${file_check} + ELSE + Log v1 endpoint failed as expected for this version combination + END + +Download Checkpoint V2 + [Documentation] Download checkpoint using v2 endpoint (/dbCheckpointv2) + [Arguments] ${expected_result} + + Log Testing v2 checkpoint endpoint with authentication + + # Debug: Check keytab availability first (reuse from V1 if already checked) + ${keytab_check} = Execute ls -la /etc/security/keytabs/ 2>&1 | head -5 || echo "No keytabs directory" + Log Keytab directory: ${keytab_check} + + # Combine kinit and curl in a single command to preserve Kerberos session + ${download_file} = Set Variable /tmp/checkpoint_v2_${CLIENT_VERSION}.tar.gz + ${combined_cmd} = Set Variable kinit -k -t /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM && curl -f --negotiate -u : --connect-timeout 10 --max-time 30 -o ${download_file} http://${OM_HOST}:${OM_PORT}/v2/dbCheckpoint + + Log Executing: ${combined_cmd} + ${result} = Execute and checkrc ${combined_cmd} ${expected_result} + + IF ${expected_result} == 0 + # If we expect success, verify the file was created and has content + ${file_check} = Execute ls -la ${download_file} 2>/dev/null || echo "File not found" + Should Not Contain ${file_check} File not found + Should Contain ${file_check} checkpoint_v2_${CLIENT_VERSION}.tar.gz + Log Successfully downloaded checkpoint via v2 endpoint: ${file_check} + ELSE + Log v2 endpoint failed as expected for this version combination + END + +*** Test Cases *** +Checkpoint V1 Endpoint Compatibility + [Documentation] Test v1 checkpoint endpoint (/dbCheckpoint) - should work for all versions (backward compatibility) + + Log Testing v1 checkpoint endpoint: CLIENT=${CLIENT_VERSION}, CLUSTER=${CLUSTER_VERSION} + + # Both old and new clusters should serve v1 endpoint for backward compatibility + Download Checkpoint V1 0 + +Checkpoint V2 Endpoint Compatibility + [Documentation] Test v2 checkpoint endpoint (/v2/dbCheckpoint) - should only work with new cluster + + Log Testing v2 checkpoint endpoint: CLIENT=${CLIENT_VERSION}, CLUSTER=${CLUSTER_VERSION} + + IF '${CLUSTER_VERSION}' < '${CHECKPOINT_V2_VERSION}' + # Old cluster doesn't have v2 endpoint - should fail with any non-zero exit code + ${result} = Run Keyword And Return Status Download Checkpoint V2 0 + IF not ${result} + Log v2 endpoint correctly failed on old cluster ${CLUSTER_VERSION} (expected failure) + ELSE + Fail v2 endpoint unexpectedly succeeded on old cluster ${CLUSTER_VERSION} + END + ELSE + # New cluster has v2 endpoint - should succeed + Download Checkpoint V2 0 + Log v2 endpoint correctly succeeded on new cluster ${CLUSTER_VERSION} + END diff --git a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java index 4426876e596f..f9d1211575cc 100644 --- a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java +++ b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java @@ -18,7 +18,7 @@ package org.apache.hadoop.ozone.recon; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -116,7 +116,7 @@ public void testReconGetsSnapshotFromLeader() throws Exception { String expectedUrl = "http://" + (hostname.equals("0.0.0.0") ? "localhost" : hostname) + ":" + ozoneManager.get().getHttpServer().getHttpAddress().getPort() + - OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; + OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; String snapshotUrl = impl.getOzoneManagerSnapshotUrl(); assertEquals(expectedUrl, snapshotUrl); // Write some data diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java index 42a28e4a781b..1acd9593c822 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java @@ -131,7 +131,7 @@ public void processMetadataSnapshotRequest(HttpServletRequest request, HttpServl String[] sstParam = isFormData ? parseFormDataParameters(request) : request.getParameterValues( OZONE_DB_CHECKPOINT_REQUEST_TO_EXCLUDE_SST); - Set receivedSstFiles = extractSstFilesToExclude(sstParam); + Set receivedSstFiles = extractFilesToExclude(sstParam); Path tmpdir = null; try (BootstrapStateHandler.Lock lock = getBootstrapStateLock().lock()) { tmpdir = Files.createTempDirectory(getBootstrapTempData().toPath(), diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java index 8836324410b9..9c2688de812d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.om; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OM_SERVICE_LIST_HTTP_ENDPOINT; import java.io.IOException; @@ -36,6 +37,8 @@ public OzoneManagerHttpServer(MutableConfigurationSource conf, addServlet("serviceList", OZONE_OM_SERVICE_LIST_HTTP_ENDPOINT, ServiceListJSONServlet.class); addServlet("dbCheckpoint", OZONE_DB_CHECKPOINT_HTTP_ENDPOINT, + OMDBCheckpointServlet.class); + addServlet("dbCheckpointv2", OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2, OMDBCheckpointServletInodeBasedXfer.class); getWebAppContext().setAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE, om); } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java index e407fe6cd4fe..24197fdcaf74 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java @@ -18,7 +18,7 @@ package org.apache.hadoop.ozone.recon.spi.impl; import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_DB_DIRS_PERMISSIONS_DEFAULT; -import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_AUTH_TYPE; import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_OM_SNAPSHOT_DB; @@ -195,11 +195,11 @@ public OzoneManagerServiceProviderImpl( HttpConfig.Policy policy = HttpConfig.getHttpPolicy(configuration); omDBSnapshotUrl = "http://" + ozoneManagerHttpAddress + - OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; + OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; if (policy.isHttpsEnabled()) { omDBSnapshotUrl = "https://" + ozoneManagerHttpsAddress + - OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; + OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; } boolean flushParam = configuration.getBoolean( @@ -391,7 +391,7 @@ public String getOzoneManagerSnapshotUrl() throws IOException { omLeaderUrl = (policy.isHttpsEnabled() ? "https://" + info.getServiceAddress(Type.HTTPS) : "http://" + info.getServiceAddress(Type.HTTP)) + - OZONE_DB_CHECKPOINT_HTTP_ENDPOINT; + OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2; } } }