Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,48 +17,34 @@
*/
package org.apache.hadoop.ozone.recon.api;

import java.io.IOException;
import java.time.Instant;
import java.util.Collection;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.stream.Collectors;

import javax.ws.rs.DefaultValue;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;

import javax.inject.Inject;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException;
import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager;
import org.apache.hadoop.ozone.om.helpers.BucketLayout;
import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup;
import org.apache.hadoop.ozone.recon.api.types.ContainerDiscrepancyInfo;
import org.apache.hadoop.ozone.recon.api.types.ContainerKeyPrefix;
import org.apache.hadoop.ozone.recon.api.types.ContainerMetadata;
import org.apache.hadoop.ozone.recon.api.types.ContainersResponse;
import org.apache.hadoop.ozone.recon.api.types.KeyMetadata;
import org.apache.hadoop.ozone.recon.api.types.KeyMetadata.ContainerBlockMetadata;
import org.apache.hadoop.ozone.recon.api.types.KeysResponse;
import org.apache.hadoop.ozone.recon.api.types.MissingContainerMetadata;
import org.apache.hadoop.ozone.recon.api.types.MissingContainersResponse;
import org.apache.hadoop.ozone.recon.api.types.UnhealthyContainerMetadata;
import org.apache.hadoop.ozone.recon.api.types.UnhealthyContainersSummary;
import org.apache.hadoop.ozone.recon.api.types.UnhealthyContainersResponse;
import org.apache.hadoop.ozone.recon.api.types.KeyMetadata.ContainerBlockMetadata;
import org.apache.hadoop.ozone.recon.persistence.ContainerHistory;
import org.apache.hadoop.ozone.recon.api.types.UnhealthyContainersSummary;
import org.apache.hadoop.ozone.recon.persistence.ContainerHealthSchemaManager;
import org.apache.hadoop.ozone.recon.persistence.ContainerHistory;
import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager;
import org.apache.hadoop.ozone.recon.scm.ReconContainerManager;
import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager;
Expand All @@ -68,6 +54,26 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.inject.Inject;
import javax.ws.rs.DefaultValue;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import java.io.IOException;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.stream.Collectors;

import static org.apache.hadoop.ozone.recon.ReconConstants.DEFAULT_BATCH_NUMBER;
import static org.apache.hadoop.ozone.recon.ReconConstants.DEFAULT_FETCH_COUNT;
import static org.apache.hadoop.ozone.recon.ReconConstants.PREV_CONTAINER_ID_DEFAULT_VALUE;
Expand All @@ -91,6 +97,7 @@ public class ContainerEndpoint {
private ReconOMMetadataManager omMetadataManager;

private final ReconContainerManager containerManager;
private final PipelineManager pipelineManager;
private final ContainerHealthSchemaManager containerHealthSchemaManager;
private final ReconNamespaceSummaryManager reconNamespaceSummaryManager;
private final OzoneStorageContainerManager reconSCM;
Expand All @@ -104,6 +111,7 @@ public ContainerEndpoint(OzoneStorageContainerManager reconSCM,
ReconNamespaceSummaryManager reconNamespaceSummaryManager) {
this.containerManager =
(ReconContainerManager) reconSCM.getContainerManager();
this.pipelineManager = reconSCM.getPipelineManager();
this.containerHealthSchemaManager = containerHealthSchemaManager;
this.reconNamespaceSummaryManager = reconNamespaceSummaryManager;
this.reconSCM = reconSCM;
Expand Down Expand Up @@ -417,4 +425,86 @@ private List<ContainerBlockMetadata> getBlocks(
private BucketLayout getBucketLayout() {
return BucketLayout.DEFAULT;
}

@GET
@Path("/mismatch")
public Response getContainerMisMatchInsights() {
List<ContainerDiscrepancyInfo> containerDiscrepancyInfoList =
new ArrayList<>();
try {
Map<Long, ContainerMetadata> omContainers =
reconContainerMetadataManager.getContainers(-1, -1);
List<Long> scmAllContainers = containerManager.getContainers().stream()
.filter(containerInfo -> !(containerInfo.getState() ==
HddsProtos.LifeCycleState.DELETED))
.map(containerInfo -> containerInfo.getContainerID()).collect(
Collectors.toList());
List<Long> scmNonDeletedContainers =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scmNonDeletedContainers and scmAllContainers are same, duplicate

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scmNonDeletedContainers and scmAllContainers are same, duplicate

Its fixed. Pls re-review.

containerManager.getContainers().stream()
.filter(containerInfo -> !(containerInfo.getState() ==
HddsProtos.LifeCycleState.DELETED))
.map(containerInfo -> containerInfo.getContainerID()).collect(
Collectors.toList());

// Filter list of container Ids which are present in OM but not in SCM.
List<Map.Entry<Long, ContainerMetadata>> notSCMContainers =
omContainers.entrySet().stream().filter(containerMetadataEntry ->
!(scmAllContainers.contains(containerMetadataEntry.getKey())))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still looks incorrect. If a container has got Deleted in SCM and still present in OM, it will never get flagged as scmAllContainers has Deleted SCM containers as well. so no discrepancy will ever get reported.
you need to have scmNonDeletedContainers here as well instead of scmAllContainers. What we are trying to figure out is container is still referred in OM but in SCM that container has got deleted.
@devmadhuu @sumitagrawl

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still looks incorrect. If a container has got Deleted in SCM and still present in OM, it will never get flagged as scmAllContainers has Deleted SCM containers as well. so no discrepancy will ever get reported. you need to have scmNonDeletedContainers here as well instead of scmAllContainers. What we are trying to figure out is container is still referred in OM but in SCM that container has got deleted. @devmadhuu @sumitagrawl

IMO, we want to identify mismatch of container referred in OM but present in SCM, now why the container is in deleted state in SCM, that is different issue. But any container which cannot be queried for metadata by OM to SCM is a data loss situation. @sumitagrawl - I think your 1st point is what we discussed. Let us know what discussion happened with @errose28

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@devmadhuu We should not count deleted SCM container for comparing to OM also, deleted container is just a bookmark, to avoid re-created same container at DN due to issue with sync at DN or Old DN.

This needs to be removed in comparison here tool.

.collect(
Collectors.toList());

notSCMContainers.forEach(nonSCMContainer -> {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

notSCMContainers List will be empty in normal scenario and only if some thing went wrong will have it populated.
Here we need to check notSCMContainers size is not zero then only do further steps.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

foreach will not execute if list is empty, so no check required.

ContainerDiscrepancyInfo containerDiscrepancyInfo =
new ContainerDiscrepancyInfo();
containerDiscrepancyInfo.setContainerID(nonSCMContainer.getKey());
containerDiscrepancyInfo.setNumberOfKeys(
nonSCMContainer.getValue().getNumberOfKeys());
containerDiscrepancyInfo.setPipelines(nonSCMContainer.getValue()
.getPipelines());
containerDiscrepancyInfo.setExistsAt("OM");
containerDiscrepancyInfoList.add(containerDiscrepancyInfo);
});

// Filter list of container Ids which are present in SCM but not in OM.
List<Long> nonOMContainers = scmNonDeletedContainers.stream()
.filter(containerId -> !omContainers.containsKey(containerId))
.collect(Collectors.toList());

List<Pipeline> pipelines = new ArrayList<>();
nonOMContainers.forEach(nonOMContainerId -> {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here again need a check if nonOMContainers empty

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

foreach will not execute if list is empty, so no check required.

ContainerDiscrepancyInfo containerDiscrepancyInfo =
new ContainerDiscrepancyInfo();
containerDiscrepancyInfo.setContainerID(nonOMContainerId);
containerDiscrepancyInfo.setNumberOfKeys(0);
PipelineID pipelineID = null;
try {
pipelineID = containerManager.getContainer(
ContainerID.valueOf(nonOMContainerId))
.getPipelineID();

if (null != pipelineID) {
pipelines.add(pipelineManager.getPipeline(pipelineID));
}
} catch (ContainerNotFoundException e) {
LOG.warn("Container {} not found in SCM: {}", nonOMContainerId, e);
} catch (PipelineNotFoundException e) {
LOG.debug("Pipeline not found for container: {} and pipelineId: {}",
nonOMContainerId, pipelineID, e);
}
containerDiscrepancyInfo.setPipelines(pipelines);
containerDiscrepancyInfo.setExistsAt("SCM");
containerDiscrepancyInfoList.add(containerDiscrepancyInfo);
});

} catch (IOException ex) {
throw new WebApplicationException(ex,
Response.Status.INTERNAL_SERVER_ERROR);
} catch (IllegalArgumentException e) {
throw new WebApplicationException(e, Response.Status.BAD_REQUEST);
} catch (Exception ex) {
throw new WebApplicationException(ex,
Response.Status.INTERNAL_SERVER_ERROR);
}
return Response.ok(containerDiscrepancyInfoList).build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.recon.api.types;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;

import java.util.List;

/**
* Metadata object that represents a Container Discrepancy Info.
*/
public class ContainerDiscrepancyInfo {

@JsonProperty("containerId")
private long containerID;

@JsonProperty("omContainerState")
@JsonInclude(JsonInclude.Include.NON_EMPTY)
private long omContainerState;

@JsonProperty("scmContainerState")
@JsonInclude(JsonInclude.Include.NON_EMPTY)
private long scmContainerState;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like scmContainerState and omContainerState are not used in anywhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like scmContainerState and omContainerState are not used in anywhere.

Thanks for pointing out Sammi, I have removed them, earlier I was thinking to use, but I don't have a mechanism to for states of mismatched containers. Pls review again.


@JsonProperty("numberOfKeys")
private long numberOfKeys;

@JsonProperty("pipelines")
private List<Pipeline> pipelines;

@JsonProperty("existsAt")
private String existsAt;

public ContainerDiscrepancyInfo() {

}

public long getContainerID() {
return containerID;
}

public void setContainerID(long containerID) {
this.containerID = containerID;
}

public long getNumberOfKeys() {
return numberOfKeys;
}

public void setNumberOfKeys(long numberOfKeys) {
this.numberOfKeys = numberOfKeys;
}

public long getOmContainerState() {
return omContainerState;
}

public void setOmContainerState(long omContainerState) {
this.omContainerState = omContainerState;
}

public long getScmContainerState() {
return scmContainerState;
}

public void setScmContainerState(long scmContainerState) {
this.scmContainerState = scmContainerState;
}

public List<Pipeline> getPipelines() {
return pipelines;
}

public void setPipelines(
List<Pipeline> pipelines) {
this.pipelines = pipelines;
}

public String getExistsAt() {
return existsAt;
}

public void setExistsAt(String existsAt) {
this.existsAt = existsAt;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,13 @@
*/
package org.apache.hadoop.ozone.recon.api.types;

import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;

import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import java.util.List;

/**
* Metadata object that represents a Container.
Expand All @@ -33,6 +37,9 @@ public class ContainerMetadata {
@XmlElement(name = "NumberOfKeys")
private long numberOfKeys;

@JsonProperty("pipelines")
private List<Pipeline> pipelines;

public ContainerMetadata(long containerID) {
this.containerID = containerID;
}
Expand All @@ -53,4 +60,12 @@ public void setNumberOfKeys(long numberOfKeys) {
this.numberOfKeys = numberOfKeys;
}

public List<Pipeline> getPipelines() {
return pipelines;
}

public void setPipelines(
List<Pipeline> pipelines) {
this.pipelines = pipelines;
}
}
Loading