Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ public final class OzoneConsts {
"/serviceList";
public static final String OZONE_DB_CHECKPOINT_HTTP_ENDPOINT =
"/dbCheckpoint";
public static final String OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2 =
"/dbCheckpointv2";

// Ozone File System scheme
public static final String OZONE_URI_SCHEME = "o3fs";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.apache.hadoop.hdds.utils.HddsServerUtil.writeDBCheckpointToStream;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_TO_EXCLUDE_SST;
import static org.apache.hadoop.ozone.OzoneConsts.ROCKSDB_SST_SUFFIX;

import com.google.common.annotations.VisibleForTesting;
import java.io.File;
Expand Down Expand Up @@ -278,6 +279,16 @@ public void processMetadataSnapshotRequest(HttpServletRequest request, HttpServl
}

protected static Set<String> extractSstFilesToExclude(String[] sstParam) {
Set<String> receivedSstFiles = new HashSet<>();
if (sstParam != null) {
receivedSstFiles.addAll(
Arrays.stream(sstParam).filter(s -> s.endsWith(ROCKSDB_SST_SUFFIX)).distinct().collect(Collectors.toList()));
logSstFileList(receivedSstFiles, "Received list of {} SST files to be excluded{}: {}", 5);
}
return receivedSstFiles;
}

protected static Set<String> extractFilesToExclude(String[] sstParam) {
Set<String> receivedSstFiles = new HashSet<>();
if (sstParam != null) {
receivedSstFiles.addAll(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.hadoop.ozone.om.helpers;

import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY;
Expand Down Expand Up @@ -168,7 +168,7 @@ public URL getOMDBCheckpointEndpointUrl(boolean isHttp, boolean flush)
URIBuilder urlBuilder = new URIBuilder().
setScheme(isHttp ? "http" : "https").
setHost(isHttp ? getHttpAddress() : getHttpsAddress()).
setPath(OZONE_DB_CHECKPOINT_HTTP_ENDPOINT).
setPath(OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2).
addParameter(OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA, "true").
addParameter(OZONE_DB_CHECKPOINT_REQUEST_FLUSH,
flush ? "true" : "false");
Expand Down
155 changes: 155 additions & 0 deletions hadoop-ozone/dist/src/main/compose/xcompat/lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,162 @@ test_cross_compatibility() {
done
done

# NEW: Add checkpoint compatibility tests
# Give OM a moment to be fully ready for HTTP requests
echo "Waiting for OM to be ready for HTTP requests..."
sleep 10

echo ""
echo "=========================================="
echo "Running checkpoint compatibility tests"
echo "=========================================="
for client_version in ${checkpoint_client_versions}; do
client _test_checkpoint_compatibility
done

KEEP_RUNNING=false stop_docker_env
}

_get_om_hostname() {
# Get OM hostname from the cluster configuration
echo "om" # Default OM service name in docker-compose
}

_download_checkpoint_v1() {
_kinit
local om_host=$(_get_om_hostname)
local expected_result="$1"

echo "Testing /dbCheckpoint endpoint: client ${client_version} → cluster ${cluster_version}"

# Add debugging information
echo "DEBUG: Using container: ${container}"
echo "DEBUG: Using OM host: ${om_host}"

# Check if OM is reachable
echo "DEBUG: Testing OM connectivity..."
execute_command_in_container ${container} curl -v -s --connect-timeout 5 "http://${om_host}:9874/" || echo "DEBUG: Basic OM connectivity failed"

# List running OM processes for debugging
echo "DEBUG: Checking OM processes..."
execute_command_in_container om ps aux | grep -i ozone || echo "DEBUG: No OM processes found"

# Check if the specific endpoint exists
echo "DEBUG: Testing if dbCheckpoint endpoint exists..."
execute_command_in_container ${container} curl -v -s --connect-timeout 5 "http://${om_host}:9874/dbCheckpoint" || echo "DEBUG: dbCheckpoint endpoint test failed"

# Download using original checkpoint endpoint
local download_cmd="curl -f -s -o /tmp/checkpoint_v1_${client_version}.tar.gz http://${om_host}:9874/dbCheckpoint"
echo "DEBUG: Executing: ${download_cmd}"

if execute_command_in_container ${container} bash -c "${download_cmd}"; then
local actual_result="pass"
echo "✓ Successfully downloaded checkpoint via v1 endpoint"
# Show file info for verification
execute_command_in_container ${container} ls -la /tmp/checkpoint_v1_${client_version}.tar.gz || true
else
local actual_result="fail"
echo "✗ Failed to download checkpoint via v1 endpoint"
fi

if [[ "${expected_result}" == "${actual_result}" ]]; then
echo "✓ EXPECTED: ${expected_result}, GOT: ${actual_result}"
return 0
else
echo "✗ EXPECTED: ${expected_result}, GOT: ${actual_result}"
return 1
fi
}

_download_checkpoint_v2() {
_kinit
local om_host=$(_get_om_hostname)
local expected_result="$1"

echo "Testing /dbCheckpointv2 endpoint: client ${client_version} → cluster ${cluster_version}"

# Add debugging information (similar to v1 but for v2 endpoint)
echo "DEBUG: Using container: ${container}"
echo "DEBUG: Using OM host: ${om_host}"

# Check if the specific v2 endpoint exists
echo "DEBUG: Testing if dbCheckpointv2 endpoint exists..."
execute_command_in_container ${container} curl -v -s --connect-timeout 5 "http://${om_host}:9874/dbCheckpointv2" || echo "DEBUG: dbCheckpointv2 endpoint test failed"

# Download using new checkpointv2 endpoint
local download_cmd="curl -f -s -o /tmp/checkpoint_v2_${client_version}.tar.gz http://${om_host}:9874/dbCheckpointv2"
echo "DEBUG: Executing: ${download_cmd}"

if execute_command_in_container ${container} bash -c "${download_cmd}"; then
local actual_result="pass"
echo "✓ Successfully downloaded checkpoint via v2 endpoint"
# Show file info for verification
execute_command_in_container ${container} ls -la /tmp/checkpoint_v2_${client_version}.tar.gz || true
else
local actual_result="fail"
echo "✗ Failed to download checkpoint via v2 endpoint"
fi

if [[ "${expected_result}" == "${actual_result}" ]]; then
echo "✓ EXPECTED: ${expected_result}, GOT: ${actual_result}"
return 0
else
echo "✗ EXPECTED: ${expected_result}, GOT: ${actual_result}"
return 1
fi
}

_test_checkpoint_compatibility() {
local test_result=0

# Determine client and cluster types
local is_old_client=false
local is_old_cluster=false

if [[ "${client_version}" != "${current_version}" ]]; then
is_old_client=true
fi

if [[ "${cluster_version}" != "${current_version}" ]]; then
is_old_cluster=true
fi

echo ""
echo "=== CHECKPOINT COMPATIBILITY TEST ==="
echo "Client: ${client_version} ($([ "$is_old_client" = true ] && echo "OLD" || echo "NEW"))"
echo "Cluster: ${cluster_version} ($([ "$is_old_cluster" = true ] && echo "OLD" || echo "NEW"))"
echo "====================================="

# Test v1 endpoint (/dbCheckpoint)
echo "→ Testing v1 endpoint compatibility..."
# Both old and new clusters should serve v1 endpoint (backward compatibility)
client _download_checkpoint_v1 "pass" || test_result=1

# Test v2 endpoint (/dbCheckpointv2)
echo "→ Testing v2 endpoint compatibility..."
if [ "$is_old_cluster" = true ]; then
# Old cluster doesn't have v2 endpoint
if [ "$is_old_client" = false ]; then
# New client hitting v2 on old cluster should fail
client _download_checkpoint_v2 "fail" || test_result=1
fi
# Old client won't try v2 endpoint
else
# New cluster has v2 endpoint
if [ "$is_old_client" = false ]; then
# New client should successfully use v2 endpoint
client _download_checkpoint_v2 "pass" || test_result=1
fi
# Old client doesn't know about v2 endpoint
fi

if [ $test_result -eq 0 ]; then
echo "✓ All checkpoint compatibility tests PASSED"
else
echo "✗ Some checkpoint compatibility tests FAILED"
fi

return $test_result
}

create_results_dir
7 changes: 7 additions & 0 deletions hadoop-ozone/dist/src/main/compose/xcompat/test-new.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,10 @@ source "${COMPOSE_DIR}/lib.sh"

# current cluster with various clients
COMPOSE_FILE=new-cluster.yaml:clients.yaml cluster_version=${current_version} test_cross_compatibility ${old_versions} ${current_version}

# Run checkpoint compatibility tests specifically for 2.0 client
echo ""
echo "=========================================="
echo "Running checkpoint compatibility tests with 2.0 client"
echo "=========================================="
COMPOSE_FILE=new-cluster.yaml:clients.yaml cluster_version=${current_version} test_checkpoint_compatibility_only "2.0.0"
8 changes: 8 additions & 0 deletions hadoop-ozone/dist/src/main/compose/xcompat/test-old.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,11 @@ for cluster_version in ${old_versions}; do
export OZONE_VERSION=${cluster_version}
COMPOSE_FILE=old-cluster.yaml:clients.yaml test_cross_compatibility ${cluster_version} ${current_version}
done

# Run checkpoint compatibility tests specifically for 2.0
echo ""
echo "=========================================="
echo "Running checkpoint compatibility tests for 2.0"
echo "=========================================="
export OZONE_VERSION="2.0.0"
COMPOSE_FILE=old-cluster.yaml:clients.yaml cluster_version="2.0.0" test_checkpoint_compatibility_only "${current_version}"
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.hadoop.ozone.recon;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
Expand Down Expand Up @@ -116,7 +116,7 @@ public void testReconGetsSnapshotFromLeader() throws Exception {
String expectedUrl = "http://" +
(hostname.equals("0.0.0.0") ? "localhost" : hostname) + ":" +
ozoneManager.get().getHttpServer().getHttpAddress().getPort() +
OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
String snapshotUrl = impl.getOzoneManagerSnapshotUrl();
assertEquals(expectedUrl, snapshotUrl);
// Write some data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ public void processMetadataSnapshotRequest(HttpServletRequest request, HttpServl
String[] sstParam = isFormData ?
parseFormDataParameters(request) : request.getParameterValues(
OZONE_DB_CHECKPOINT_REQUEST_TO_EXCLUDE_SST);
Set<String> receivedSstFiles = extractSstFilesToExclude(sstParam);
Set<String> receivedSstFiles = extractFilesToExclude(sstParam);
Path tmpdir = null;
try (BootstrapStateHandler.Lock lock = getBootstrapStateLock().lock()) {
tmpdir = Files.createTempDirectory(getBootstrapTempData().toPath(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.hadoop.ozone.om;

import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OM_SERVICE_LIST_HTTP_ENDPOINT;

import java.io.IOException;
Expand All @@ -36,6 +37,8 @@ public OzoneManagerHttpServer(MutableConfigurationSource conf,
addServlet("serviceList", OZONE_OM_SERVICE_LIST_HTTP_ENDPOINT,
ServiceListJSONServlet.class);
addServlet("dbCheckpoint", OZONE_DB_CHECKPOINT_HTTP_ENDPOINT,
OMDBCheckpointServlet.class);
addServlet("dbCheckpointv2", OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2,
OMDBCheckpointServletInodeBasedXfer.class);
getWebAppContext().setAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE, om);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.hadoop.ozone.recon.spi.impl;

import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_DB_DIRS_PERMISSIONS_DEFAULT;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_AUTH_TYPE;
import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_OM_SNAPSHOT_DB;
Expand Down Expand Up @@ -195,11 +195,11 @@ public OzoneManagerServiceProviderImpl(
HttpConfig.Policy policy = HttpConfig.getHttpPolicy(configuration);

omDBSnapshotUrl = "http://" + ozoneManagerHttpAddress +
OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;

if (policy.isHttpsEnabled()) {
omDBSnapshotUrl = "https://" + ozoneManagerHttpsAddress +
OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
}

boolean flushParam = configuration.getBoolean(
Expand Down Expand Up @@ -391,7 +391,7 @@ public String getOzoneManagerSnapshotUrl() throws IOException {
omLeaderUrl = (policy.isHttpsEnabled() ?
"https://" + info.getServiceAddress(Type.HTTPS) :
"http://" + info.getServiceAddress(Type.HTTP)) +
OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
}
}
}
Expand Down