From 4a337904e627fd4e9c38a70cb17d38b13ff69fcd Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 5 Feb 2026 17:30:33 +0530 Subject: [PATCH 1/5] HDDS-14103. Create an option in SCM to ack/ignore missing containers --- .../hdds/scm/container/ContainerInfo.java | 32 ++++- .../hadoop/hdds/scm/client/ScmClient.java | 15 +++ .../StorageContainerLocationProtocol.java | 16 +++ ...ocationProtocolClientSideTranslatorPB.java | 20 +++ .../src/main/proto/ScmAdminProtocol.proto | 20 +++ .../src/main/proto/hdds.proto | 1 + .../hdds/scm/container/ContainerManager.java | 11 ++ .../scm/container/ContainerManagerImpl.java | 15 +++ .../scm/container/ContainerStateManager.java | 10 ++ .../container/ContainerStateManagerImpl.java | 17 +++ .../replication/ReplicationManager.java | 2 + .../AcknowledgedMissingContainerHandler.java | 57 +++++++++ .../health/ClosingContainerHandler.java | 5 +- ...ocationProtocolServerSideTranslatorPB.java | 29 +++++ .../scm/server/SCMClientProtocolServer.java | 51 ++++++++ .../apache/hadoop/ozone/audit/SCMAction.java | 4 +- .../health/TestClosingContainerHandler.java | 2 +- .../scm/cli/ContainerOperationClient.java | 10 ++ .../cli/container/AckMissingSubcommand.java | 97 ++++++++++++++ .../scm/cli/container/ContainerCommands.java | 4 +- .../cli/container/UnackMissingSubcommand.java | 74 +++++++++++ .../TestAckMissingContainerSubcommand.java | 119 ++++++++++++++++++ 22 files changed, 604 insertions(+), 7 deletions(-) create mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java create mode 100644 hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java create mode 100644 hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java create mode 100644 hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java index 2beef2abf885..b53ae71b7ddb 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java @@ -87,6 +87,7 @@ public final class ContainerInfo implements Comparable { private long sequenceId; // Health state of the container (determined by ReplicationManager) private ContainerHealthState healthState; + private boolean ackMissing; private ContainerInfo(Builder b) { containerID = ContainerID.valueOf(b.containerID); @@ -102,6 +103,7 @@ private ContainerInfo(Builder b) { replicationConfig = b.replicationConfig; clock = b.clock; healthState = b.healthState != null ? b.healthState : ContainerHealthState.HEALTHY; + ackMissing = b.ackMissing; } public static Codec getCodec() { @@ -121,7 +123,8 @@ public static ContainerInfo fromProtobuf(HddsProtos.ContainerInfoProto info) { .setContainerID(info.getContainerID()) .setDeleteTransactionId(info.getDeleteTransactionId()) .setReplicationConfig(config) - .setSequenceId(info.getSequenceId()); + .setSequenceId(info.getSequenceId()) + .setAckMissing(info.getAckMissing()); if (info.hasPipelineID()) { builder.setPipelineID(PipelineID.getFromProtobuf(info.getPipelineID())); @@ -263,6 +266,24 @@ public void setHealthState(ContainerHealthState newHealthState) { this.healthState = newHealthState; } + /** + * Check if container is acked as missing. + * + * @return boolean + */ + public boolean getAckMissing() { + return ackMissing; + } + + /** + * Set the boolean for ackMissing. + * + * @param isAckMissing checks if container is acked as missing or not + */ + public void setAckMissing(boolean isAckMissing) { + this.ackMissing = isAckMissing; + } + @JsonIgnore public HddsProtos.ContainerInfoProto getProtobuf() { HddsProtos.ContainerInfoProto.Builder builder = @@ -275,7 +296,8 @@ public HddsProtos.ContainerInfoProto getProtobuf() { .setDeleteTransactionId(getDeleteTransactionId()) .setOwner(getOwner()) .setSequenceId(getSequenceId()) - .setReplicationType(getReplicationType()); + .setReplicationType(getReplicationType()) + .setAckMissing(getAckMissing()); if (replicationConfig instanceof ECReplicationConfig) { builder.setEcReplicationConfig(((ECReplicationConfig) replicationConfig) @@ -393,6 +415,7 @@ public static class Builder { private PipelineID pipelineID; private ReplicationConfig replicationConfig; private ContainerHealthState healthState; + private boolean ackMissing; public Builder setPipelineID(PipelineID pipelineId) { this.pipelineID = pipelineId; @@ -450,6 +473,11 @@ public Builder setHealthState(ContainerHealthState healthState) { return this; } + public Builder setAckMissing(boolean ackMissing) { + this.ackMissing = ackMissing; + return this; + } + /** * Also resets {@code stateEnterTime}, so make sure to set clock first. */ diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index 892dd4de1ff8..716bfadebf2f 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -463,4 +463,19 @@ DecommissionScmResponseProto decommissionScm( */ void reconcileContainer(long containerID) throws IOException; + /** + * Acknowledge the missing container. + * + * @param containerId The ID of the container to acknowledge as missing. + * @throws IOException + */ + void acknowledgeMissingContainer(long containerId) throws IOException; + + /** + * Unacknowledge the missing container. + * + * @param containerId The ID of the container to unacknowledge as missing. + * @throws IOException + */ + void unacknowledgeMissingContainer(long containerId) throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index 92ddfa7eb8dc..d2ede073a4ee 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -511,4 +511,20 @@ DecommissionScmResponseProto decommissionScm( * @throws IOException On error */ void reconcileContainer(long containerID) throws IOException; + + /** + * Acknowledge the missing container. + * + * @param containerId The ID of the container to acknowledge as missing. + * @throws IOException + */ + void acknowledgeMissingContainer(long containerId) throws IOException; + + /** + * Unacknowledge the missing container. + * + * @param containerId The ID of the container to unacknowledge as missing. + * @throws IOException + */ + void unacknowledgeMissingContainer(long containerId) throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index 94b2230e68ba..a5410bfaff22 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipRequestProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.UpgradeFinalizationStatus; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.AcknowledgeMissingContainerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoRequestProto; @@ -125,6 +126,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopContainerBalancerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.UnacknowledgeMissingContainerRequestProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -1240,4 +1242,22 @@ public void reconcileContainer(long containerID) throws IOException { // TODO check error handling. submitRequest(Type.ReconcileContainer, builder -> builder.setReconcileContainerRequest(request)); } + + @Override + public void acknowledgeMissingContainer(long containerID) throws IOException { + AcknowledgeMissingContainerRequestProto request = AcknowledgeMissingContainerRequestProto.newBuilder() + .setContainerID(containerID) + .build(); + submitRequest(Type.AcknowledgeMissingContainer, + builder -> builder.setAcknowledgeMissingContainerRequest(request)); + } + + @Override + public void unacknowledgeMissingContainer(long containerID) throws IOException { + UnacknowledgeMissingContainerRequestProto request = UnacknowledgeMissingContainerRequestProto.newBuilder() + .setContainerID(containerID) + .build(); + submitRequest(Type.UnacknowledgeMissingContainer, + builder -> builder.setUnacknowledgeMissingContainerRequest(request)); + } } diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index f80a50a3be97..8bcfa627df74 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -87,6 +87,8 @@ message ScmContainerLocationRequest { optional ContainerBalancerStatusInfoRequestProto containerBalancerStatusInfoRequest = 48; optional ReconcileContainerRequestProto reconcileContainerRequest = 49; optional GetDeletedBlocksTxnSummaryRequestProto getDeletedBlocksTxnSummaryRequest = 50; + optional AcknowledgeMissingContainerRequestProto acknowledgeMissingContainerRequest = 51; + optional UnacknowledgeMissingContainerRequestProto unacknowledgeMissingContainerRequest = 52; } message ScmContainerLocationResponse { @@ -145,6 +147,8 @@ message ScmContainerLocationResponse { optional ContainerBalancerStatusInfoResponseProto containerBalancerStatusInfoResponse = 48; optional ReconcileContainerResponseProto reconcileContainerResponse = 49; optional GetDeletedBlocksTxnSummaryResponseProto getDeletedBlocksTxnSummaryResponse = 50; + optional AcknowledgeMissingContainerResponseProto acknowledgeMissingContainerResponse = 51; + optional UnacknowledgeMissingContainerResponseProto unacknowledgeMissingContainerResponse = 52; enum Status { OK = 1; @@ -202,6 +206,8 @@ enum Type { GetContainerBalancerStatusInfo = 44; ReconcileContainer = 45; GetDeletedBlocksTransactionSummary = 46; + AcknowledgeMissingContainer = 47; + UnacknowledgeMissingContainer = 48; } /** @@ -695,6 +701,20 @@ message ReconcileContainerRequestProto { message ReconcileContainerResponseProto { } +message AcknowledgeMissingContainerRequestProto { + required int64 containerID = 1; +} + +message AcknowledgeMissingContainerResponseProto { +} + +message UnacknowledgeMissingContainerRequestProto { + required int64 containerID = 1; +} + +message UnacknowledgeMissingContainerResponseProto { +} + /** * Protocol used from an HDFS node to StorageContainerManager. See the request * and response messages for details of the RPC calls. diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto index eb819b80a3e8..b95569e3bac2 100644 --- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto +++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto @@ -271,6 +271,7 @@ message ContainerInfoProto { optional ReplicationFactor replicationFactor = 10; required ReplicationType replicationType = 11; optional ECReplicationConfig ecReplicationConfig = 12; + optional bool ackMissing = 13; } message ContainerWithPipeline { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java index 370c219ac601..295f2e152814 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java @@ -24,6 +24,7 @@ import java.util.Map; import java.util.Set; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ContainerInfoProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; @@ -222,4 +223,14 @@ void deleteContainer(ContainerID containerID) * @return containerStateManger */ ContainerStateManager getContainerStateManager(); + + /** + * Update container info in the container manager. + * This is used for updating container metadata like ackMissing flag. + * + * @param containerInfo Updated container info proto + * @throws IOException + */ + void updateContainerInfo(ContainerID containerID, ContainerInfoProto containerInfo) + throws IOException; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java index dc701a0be661..64ac028609a1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java @@ -294,6 +294,21 @@ public void updateContainerState(final ContainerID cid, } } + @Override + public void updateContainerInfo(final ContainerID cid, ContainerInfoProto containerInfo) + throws IOException { + lock.lock(); + try { + if (containerExist(cid)) { + containerStateManager.updateContainerInfo(containerInfo); + } else { + throw new ContainerNotFoundException(cid); + } + } finally { + lock.unlock(); + } + } + @Override public void transitionDeletingOrDeletedToClosedState(ContainerID containerID) throws IOException { HddsProtos.ContainerID proto = containerID.getProtobuf(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java index f5a2334b7cd2..4ce28fa40fb2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java @@ -210,4 +210,14 @@ void removeContainer(HddsProtos.ContainerID containerInfo) */ void reinitialize(Table containerStore) throws IOException; + + /** + * Update container info. + * + * @param containerInfo Updated container info proto + * @throws IOException + */ + @Replicate + void updateContainerInfo(HddsProtos.ContainerInfoProto containerInfo) + throws IOException; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index d971b19c406c..3c5de01ddd38 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -551,6 +551,23 @@ public void reinitialize( } } + @Override + public void updateContainerInfo(HddsProtos.ContainerInfoProto updatedInfoProto) + throws IOException { + ContainerInfo updatedInfo = ContainerInfo.fromProtobuf(updatedInfoProto); + ContainerID containerID = updatedInfo.containerID(); + + try (AutoCloseableLock ignored = writeLock(containerID)) { + final ContainerInfo currentInfo = containers.getContainerInfo(containerID); + if (currentInfo == null) { + throw new ContainerNotFoundException(containerID); + } + currentInfo.setAckMissing(updatedInfo.getAckMissing()); + transactionBuffer.addToBuffer(containerStore, containerID, currentInfo); + LOG.debug("Updated container info for container: {}, ackMissing={}", containerID, currentInfo.getAckMissing()); + } + } + private AutoCloseableLock readLock() { return AutoCloseableLock.acquire(lock.readLock()); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index 83d3825b66c0..35c84c299bc4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -61,6 +61,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; +import org.apache.hadoop.hdds.scm.container.replication.health.AcknowledgedMissingContainerHandler; import org.apache.hadoop.hdds.scm.container.replication.health.ClosedWithUnhealthyReplicasHandler; import org.apache.hadoop.hdds.scm.container.replication.health.ClosingContainerHandler; import org.apache.hadoop.hdds.scm.container.replication.health.DeletingContainerHandler; @@ -269,6 +270,7 @@ public ReplicationManager(final ReplicationManagerConfiguration rmConf, .addNext(new MismatchedReplicasHandler(this)) .addNext(new EmptyContainerHandler(this)) .addNext(new DeletingContainerHandler(this)) + .addNext(new AcknowledgedMissingContainerHandler()) .addNext(new QuasiClosedStuckReplicationCheck()) .addNext(ecReplicationCheckHandler) .addNext(ratisReplicationCheckHandler) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java new file mode 100644 index 000000000000..3fe36e4b1ad4 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication.health; + +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class used in Replication Manager to skip containers that have been + * acknowledged as missing. These containers will still be marked as + * MISSING in the health state but will not trigger replication. + */ +public class AcknowledgedMissingContainerHandler extends AbstractCheck { + + private static final Logger LOG = LoggerFactory.getLogger(AcknowledgedMissingContainerHandler.class); + + @Override + public boolean handle(ContainerCheckRequest request) { + ContainerInfo containerInfo = request.getContainerInfo(); + ContainerID containerID = containerInfo.containerID(); + LOG.debug("Checking container {}, ackMissing={} in AcknowledgedMissingContainerHandler", + containerID, containerInfo.getAckMissing()); + + if (!containerInfo.getAckMissing()) { + LOG.debug("Container {} is not acknowledged ", containerID); + return false; + } + LOG.debug("Container {} has been acknowledged as missing.", containerID); + + if (request.getContainerReplicas().isEmpty()) { + LOG.debug("Acknowledged missing container {} confirmed to have no replicas.", containerID); + } else { + LOG.warn("Container {} was acknowledged as missing but now has {} replicas. " + + "The container may have been recovered. Consider un-acknowledging it.", + containerID, request.getContainerReplicas().size()); + } + return true; + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java index a09f5079ffe5..fc3cf27895f4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java @@ -68,8 +68,9 @@ public boolean handle(ContainerCheckRequest request) { boolean forceClose = containerInfo.getReplicationConfig() .getReplicationType() != ReplicationType.RATIS; - // TODO - review this logic - may need an empty check here - if (request.getContainerReplicas().isEmpty()) { + // Don't report MISSING if container is acknowledged or empty (will be handled by other handlers) + if (request.getContainerReplicas().isEmpty() && !containerInfo.getAckMissing() && + containerInfo.getNumberOfKeys() > 0) { request.getReport().incrementAndSample(ContainerHealthState.MISSING, containerInfo); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 62f06079bf0b..d069bb49c432 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -47,6 +47,8 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipResponseProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.UpgradeFinalizationStatus; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.AcknowledgeMissingContainerRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.AcknowledgeMissingContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto; @@ -134,6 +136,8 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopContainerBalancerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.UnacknowledgeMissingContainerRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.UnacknowledgeMissingContainerResponseProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -748,6 +752,20 @@ public ScmContainerLocationResponse processRequest( .setStatus(Status.OK) .setReconcileContainerResponse(reconcileContainer(request.getReconcileContainerRequest())) .build(); + case AcknowledgeMissingContainer: + return ScmContainerLocationResponse.newBuilder() + .setCmdType(request.getCmdType()) + .setStatus(Status.OK) + .setAcknowledgeMissingContainerResponse( + acknowledgeMissingContainer(request.getAcknowledgeMissingContainerRequest())) + .build(); + case UnacknowledgeMissingContainer: + return ScmContainerLocationResponse.newBuilder() + .setCmdType(request.getCmdType()) + .setStatus(Status.OK) + .setUnacknowledgeMissingContainerResponse( + unacknowledgeMissingContainer(request.getUnacknowledgeMissingContainerRequest())) + .build(); default: throw new IllegalArgumentException( "Unknown command type: " + request.getCmdType()); @@ -1387,4 +1405,15 @@ public ReconcileContainerResponseProto reconcileContainer(ReconcileContainerRequ return ReconcileContainerResponseProto.getDefaultInstance(); } + public AcknowledgeMissingContainerResponseProto acknowledgeMissingContainer( + AcknowledgeMissingContainerRequestProto request) throws IOException { + impl.acknowledgeMissingContainer(request.getContainerID()); + return AcknowledgeMissingContainerResponseProto.getDefaultInstance(); + } + + public UnacknowledgeMissingContainerResponseProto unacknowledgeMissingContainer( + UnacknowledgeMissingContainerRequestProto request) throws IOException { + impl.unacknowledgeMissingContainer(request.getContainerID()); + return UnacknowledgeMissingContainerResponseProto.getDefaultInstance(); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 5726d3449a7d..28e9a6a1ef65 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -1680,4 +1680,55 @@ public void reconcileContainer(long longContainerID) throws IOException { throw ex; } } + + @Override + public void acknowledgeMissingContainer(long longContainerID) throws IOException { + ContainerID containerID = ContainerID.valueOf(longContainerID); + final Map auditMap = new HashMap<>(); + auditMap.put("containerID", containerID.toString()); + + try { + ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); + Set replicas = scm.getContainerManager().getContainerReplicas(containerID); + if (replicas != null && !replicas.isEmpty()) { + throw new IOException("Container " + longContainerID + + " has " + replicas.size() + " replicas and cannot be acknowledged as missing"); + } + + if (containerInfo.getNumberOfKeys() == 0) { + throw new IOException("Container " + longContainerID + " is empty (0 keys) and cannot be acknowledged."); + } + + HddsProtos.ContainerInfoProto updatedProto = containerInfo.getProtobuf().toBuilder() + .setAckMissing(true) + .build(); + scm.getContainerManager().updateContainerInfo(containerID, updatedProto); + + AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.ACKNOWLEDGE_MISSING_CONTAINER, auditMap)); + } catch (IOException ex) { + AUDIT.logWriteFailure(buildAuditMessageForFailure(SCMAction.ACKNOWLEDGE_MISSING_CONTAINER, auditMap, ex)); + throw ex; + } + } + + @Override + public void unacknowledgeMissingContainer(long longContainerID) throws IOException { + ContainerID containerID = ContainerID.valueOf(longContainerID); + final Map auditMap = new HashMap<>(); + auditMap.put("containerID", containerID.toString()); + + try { + ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); + + HddsProtos.ContainerInfoProto updatedProto = containerInfo.getProtobuf().toBuilder() + .setAckMissing(false) + .build(); + scm.getContainerManager().updateContainerInfo(containerID, updatedProto); + + AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER, auditMap)); + } catch (IOException ex) { + AUDIT.logWriteFailure(buildAuditMessageForFailure(SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER, auditMap, ex)); + throw ex; + } + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java index 52cd943c4dbb..e3f0be24a4f6 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java @@ -68,7 +68,9 @@ public enum SCMAction implements AuditAction { QUERY_NODE, GET_PIPELINE, RECONCILE_CONTAINER, - GET_DELETED_BLOCK_SUMMARY; + GET_DELETED_BLOCK_SUMMARY, + ACKNOWLEDGE_MISSING_CONTAINER, + UNACKNOWLEDGE_MISSING_CONTAINER; @Override public String getAction() { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosingContainerHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosingContainerHandler.java index dca89171f0e3..d75b2d982514 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosingContainerHandler.java @@ -213,7 +213,7 @@ public void testClosingContainerStateIsNotUpdatedWhenThereAreReplicas() { @Test public void testClosingContainerStateIsUpdatedWhenThereAreNotReplicas() { ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( - RATIS_REPLICATION_CONFIG, 1, CLOSING); + RATIS_REPLICATION_CONFIG, 1, CLOSING, 1, 10); Set containerReplicas = new HashSet<>(); ReplicationManagerReport report = new ReplicationManagerReport(rmConf.getContainerSampleLimit()); ContainerCheckRequest request = new ContainerCheckRequest.Builder() diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java index 61c0f4150c34..b3e8f24c3982 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java @@ -598,4 +598,14 @@ public String getMetrics(String query) throws IOException { public void reconcileContainer(long id) throws IOException { storageContainerLocationClient.reconcileContainer(id); } + + @Override + public void acknowledgeMissingContainer(long containerId) throws IOException { + storageContainerLocationClient.acknowledgeMissingContainer(containerId); + } + + @Override + public void unacknowledgeMissingContainer(long containerId) throws IOException { + storageContainerLocationClient.unacknowledgeMissingContainer(containerId); + } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java new file mode 100644 index 000000000000..e9c8a6ce4910 --- /dev/null +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.cli.container; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerListResult; +import picocli.CommandLine; + +/** + * Acknowledge missing container(s) to suppress them from Replication Manager Report. + */ +@CommandLine.Command( + name = "ack", + description = "Acknowledge missing container(s) to suppress them from reports", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class AckMissingSubcommand extends ScmSubcommand { + + @CommandLine.Parameters(description = "Container IDs to acknowledge (comma-separated)", + arity = "0..1") + private String containers; + + @CommandLine.Option(names = {"--list"}, + description = "List all acknowledged missing containers") + private boolean list; + + @Override + public void execute(ScmClient scmClient) throws IOException { + if (list) { + // List acknowledged containers + ContainerListResult result = scmClient.listContainer(1, Integer.MAX_VALUE); + for (ContainerInfo info : result.getContainerInfoList()) { + if (info.getAckMissing()) { + out().println(info.getContainerID()); + } + } + } else if (containers != null && !containers.isEmpty()) { + // Acknowledge containers + Set ids = parseContainerIds(containers); + for (Long id : ids) { + try { + int replicaCount = scmClient.getContainerReplicas(id).size(); + if (replicaCount > 0) { + err().println("Cannot acknowledge container " + id + ": has " + replicaCount + " replica(s). " + + "Only containers with 0 replicas can be acknowledged as missing."); + continue; + } + + ContainerInfo containerInfo = scmClient.getContainer(id); + if (containerInfo.getNumberOfKeys() == 0) { + err().println("Cannot acknowledge container " + id + ": container is empty (0 keys). " + + "Empty containers are auto-deleted and don't need acknowledgement."); + continue; + } + + scmClient.acknowledgeMissingContainer(id); + out().println("Acknowledged container: " + id); + } catch (IOException e) { + err().println("Failed to acknowledge container " + id + ": " + e.getMessage()); + } + } + } else { + throw new IllegalArgumentException( + "Either provide container IDs or use --list option"); + } + } + + private Set parseContainerIds(String input) { + return Arrays.stream(input.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .map(Long::parseLong) + .collect(Collectors.toSet()); + } +} diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java index b340c4077f44..aafe88415951 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java @@ -37,7 +37,9 @@ CloseSubcommand.class, ReportSubcommand.class, UpgradeSubcommand.class, - ReconcileSubcommand.class + ReconcileSubcommand.class, + AckMissingSubcommand.class, + UnackMissingSubcommand.class }) @MetaInfServices(AdminSubcommand.class) public class ContainerCommands implements AdminSubcommand { diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java new file mode 100644 index 000000000000..c87509122472 --- /dev/null +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.cli.container; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import picocli.CommandLine; + +/** + * Unacknowledge missing container(s) to report them again. + */ +@CommandLine.Command( + name = "unack", + description = "Unacknowledge missing container(s) to report them again", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class UnackMissingSubcommand extends ScmSubcommand { + + @CommandLine.Parameters(description = "Container IDs to unacknowledge (comma-separated)") + private String containers; + + @Override + public void execute(ScmClient scmClient) throws IOException { + if (containers == null || containers.isEmpty()) { + throw new IllegalArgumentException( + "Container IDs must be provided"); + } + + Set ids = parseContainerIds(containers); + for (Long id : ids) { + try { + ContainerInfo containerInfo = scmClient.getContainer(id); + if (!containerInfo.getAckMissing()) { + err().println("Cannot unacknowledge container " + id + ": " + + "Only acknowledged missing containers can be unacknowledged."); + continue; + } + scmClient.unacknowledgeMissingContainer(id); + out().println("Unacknowledged container: " + id); + } catch (IOException e) { + err().println("Failed to unacknowledge container " + id + ": " + e.getMessage()); + } + } + } + + private Set parseContainerIds(String input) { + return Arrays.stream(input.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .map(Long::parseLong) + .collect(Collectors.toSet()); + } +} diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java new file mode 100644 index 000000000000..b625cda61f9c --- /dev/null +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.cli.container; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.OPEN; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerHealthState; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerListResult; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import picocli.CommandLine; + +/** + * Tests for AckMissingContainerSubcommand. + */ +public class TestAckMissingContainerSubcommand { + + private ScmClient scmClient; + private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); + private final ByteArrayOutputStream errContent = new ByteArrayOutputStream(); + private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); + + @BeforeEach + public void setup() throws IOException { + scmClient = mock(ScmClient.class); + + System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); + System.setErr(new PrintStream(errContent, false, DEFAULT_ENCODING)); + } + + @Test + public void testAckMissingContainer() throws Exception { + ContainerInfo container = mockContainer(1, false); + when(scmClient.getContainer(1L)).thenReturn(container); + + AckMissingSubcommand cmd = new AckMissingSubcommand(); + new CommandLine(cmd).parseArgs("1"); + cmd.execute(scmClient); + verify(scmClient, times(1)).acknowledgeMissingContainer(1L); + + String output = outContent.toString(DEFAULT_ENCODING); + assertThat(output).contains("Acknowledged container: 1"); + } + + @Test + public void testListAcknowledgedContainers() throws Exception { + ContainerInfo container1 = mockContainer(1, true); + ContainerInfo container2 = mockContainer(2, false); + + List allContainers = Arrays.asList(container1, container2); + ContainerListResult result = new ContainerListResult(allContainers, 2); + when(scmClient.listContainer(anyLong(), anyInt())).thenReturn(result); + + AckMissingSubcommand cmd = new AckMissingSubcommand(); + new CommandLine(cmd).parseArgs("--list"); + cmd.execute(scmClient); + + String output = outContent.toString(DEFAULT_ENCODING); + assertThat(output).contains("1"); + assertThat(output).doesNotContain("2"); + } + + @Test + public void testUnacknowledgeMissingContainer() throws Exception { + ContainerInfo container = mockContainer(1, true); + when(scmClient.getContainer(1L)).thenReturn(container); + + UnackMissingSubcommand cmd = new UnackMissingSubcommand(); + new CommandLine(cmd).parseArgs("1"); + cmd.execute(scmClient); + verify(scmClient, times(1)).unacknowledgeMissingContainer(1L); + + String output = outContent.toString(DEFAULT_ENCODING); + assertThat(output).contains("Unacknowledged container: 1"); + } + + private ContainerInfo mockContainer(long containerID, boolean ackMissing) { + return new ContainerInfo.Builder() + .setContainerID(containerID) + .setState(OPEN) + .setHealthState(ContainerHealthState.MISSING) + .setReplicationConfig(RatisReplicationConfig.getInstance(ONE)) + .setNumberOfKeys(1) + .setAckMissing(ackMissing) + .build(); + } +} From 2efa8aafde566c7a2c483ea538131cf787c9a812 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Wed, 11 Feb 2026 10:50:51 +0530 Subject: [PATCH 2/5] Add admin check and rename getter method --- .../hadoop/hdds/scm/container/ContainerInfo.java | 10 +++++----- .../hdds/scm/container/ContainerStateManagerImpl.java | 4 ++-- .../health/AcknowledgedMissingContainerHandler.java | 4 ++-- .../replication/health/ClosingContainerHandler.java | 2 +- .../hdds/scm/server/SCMClientProtocolServer.java | 2 ++ .../hdds/scm/cli/container/AckMissingSubcommand.java | 2 +- .../hdds/scm/cli/container/UnackMissingSubcommand.java | 2 +- 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java index b53ae71b7ddb..183991a04097 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java @@ -271,17 +271,17 @@ public void setHealthState(ContainerHealthState newHealthState) { * * @return boolean */ - public boolean getAckMissing() { + public boolean isAckMissing() { return ackMissing; } /** * Set the boolean for ackMissing. * - * @param isAckMissing checks if container is acked as missing or not + * @param acked checks if container is acked as missing or not */ - public void setAckMissing(boolean isAckMissing) { - this.ackMissing = isAckMissing; + public void setAckMissing(boolean acked) { + this.ackMissing = acked; } @JsonIgnore @@ -297,7 +297,7 @@ public HddsProtos.ContainerInfoProto getProtobuf() { .setOwner(getOwner()) .setSequenceId(getSequenceId()) .setReplicationType(getReplicationType()) - .setAckMissing(getAckMissing()); + .setAckMissing(isAckMissing()); if (replicationConfig instanceof ECReplicationConfig) { builder.setEcReplicationConfig(((ECReplicationConfig) replicationConfig) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 3c5de01ddd38..b794e8a6c919 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -562,9 +562,9 @@ public void updateContainerInfo(HddsProtos.ContainerInfoProto updatedInfoProto) if (currentInfo == null) { throw new ContainerNotFoundException(containerID); } - currentInfo.setAckMissing(updatedInfo.getAckMissing()); + currentInfo.setAckMissing(updatedInfo.isAckMissing()); transactionBuffer.addToBuffer(containerStore, containerID, currentInfo); - LOG.debug("Updated container info for container: {}, ackMissing={}", containerID, currentInfo.getAckMissing()); + LOG.debug("Updated container info for container: {}, ackMissing={}", containerID, currentInfo.isAckMissing()); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java index 3fe36e4b1ad4..20f217cf98bb 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java @@ -37,9 +37,9 @@ public boolean handle(ContainerCheckRequest request) { ContainerInfo containerInfo = request.getContainerInfo(); ContainerID containerID = containerInfo.containerID(); LOG.debug("Checking container {}, ackMissing={} in AcknowledgedMissingContainerHandler", - containerID, containerInfo.getAckMissing()); + containerID, containerInfo.isAckMissing()); - if (!containerInfo.getAckMissing()) { + if (!containerInfo.isAckMissing()) { LOG.debug("Container {} is not acknowledged ", containerID); return false; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java index fc3cf27895f4..1d5bc206ef79 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java @@ -69,7 +69,7 @@ public boolean handle(ContainerCheckRequest request) { .getReplicationType() != ReplicationType.RATIS; // Don't report MISSING if container is acknowledged or empty (will be handled by other handlers) - if (request.getContainerReplicas().isEmpty() && !containerInfo.getAckMissing() && + if (request.getContainerReplicas().isEmpty() && !containerInfo.isAckMissing() && containerInfo.getNumberOfKeys() > 0) { request.getReport().incrementAndSample(ContainerHealthState.MISSING, containerInfo); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 28e9a6a1ef65..9b9f3582c30f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -1688,6 +1688,7 @@ public void acknowledgeMissingContainer(long longContainerID) throws IOException auditMap.put("containerID", containerID.toString()); try { + getScm().checkAdminAccess(getRemoteUser(), false); ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); Set replicas = scm.getContainerManager().getContainerReplicas(containerID); if (replicas != null && !replicas.isEmpty()) { @@ -1718,6 +1719,7 @@ public void unacknowledgeMissingContainer(long longContainerID) throws IOExcepti auditMap.put("containerID", containerID.toString()); try { + getScm().checkAdminAccess(getRemoteUser(), false); ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); HddsProtos.ContainerInfoProto updatedProto = containerInfo.getProtobuf().toBuilder() diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java index e9c8a6ce4910..88bac71ee95b 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java @@ -52,7 +52,7 @@ public void execute(ScmClient scmClient) throws IOException { // List acknowledged containers ContainerListResult result = scmClient.listContainer(1, Integer.MAX_VALUE); for (ContainerInfo info : result.getContainerInfoList()) { - if (info.getAckMissing()) { + if (info.isAckMissing()) { out().println(info.getContainerID()); } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java index c87509122472..5bb940912095 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java @@ -51,7 +51,7 @@ public void execute(ScmClient scmClient) throws IOException { for (Long id : ids) { try { ContainerInfo containerInfo = scmClient.getContainer(id); - if (!containerInfo.getAckMissing()) { + if (!containerInfo.isAckMissing()) { err().println("Cannot unacknowledge container " + id + ": " + "Only acknowledged missing containers can be unacknowledged."); continue; From 0088a45f9f2003b6145020645f02e4f42e2f0368 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 19 Feb 2026 11:03:49 +0530 Subject: [PATCH 3/5] Single API to set/unset of healthState ACK_MISSING --- .../scm/container/ContainerHealthState.java | 7 +++ .../hdds/scm/container/ContainerInfo.java | 32 +--------- .../hadoop/hdds/scm/client/ScmClient.java | 15 ++--- .../StorageContainerLocationProtocol.java | 15 ++--- ...ocationProtocolClientSideTranslatorPB.java | 21 ++----- .../src/main/proto/ScmAdminProtocol.proto | 23 +++---- .../src/main/proto/hdds.proto | 1 - .../container/ContainerStateManagerImpl.java | 15 ++++- .../replication/ReplicationManager.java | 11 +++- .../AcknowledgedMissingContainerHandler.java | 57 ----------------- .../health/ClosingContainerHandler.java | 5 +- ...ocationProtocolServerSideTranslatorPB.java | 33 +++------- .../scm/server/SCMClientProtocolServer.java | 63 +++++++------------ .../scm/cli/ContainerOperationClient.java | 9 +-- .../cli/container/AckMissingSubcommand.java | 5 +- .../cli/container/UnackMissingSubcommand.java | 5 +- .../TestAckMissingContainerSubcommand.java | 9 +-- 17 files changed, 100 insertions(+), 226 deletions(-) delete mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerHealthState.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerHealthState.java index 56dd1c5620b6..ac9a82d190d1 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerHealthState.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerHealthState.java @@ -107,6 +107,13 @@ public enum ContainerHealthState { "Containers in OPEN state without any healthy Pipeline", "OpenContainersWithoutPipeline"), + /** + * Acknowledge missing containers which are not problematic. + */ + ACK_MISSING((short) 10, + "Acknowledge missing containers which are not problematic", + "AcknowledgeMissingContainers"), + // ========== Actual Combinations Found in Code (100+) ========== /** diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java index 183991a04097..2beef2abf885 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java @@ -87,7 +87,6 @@ public final class ContainerInfo implements Comparable { private long sequenceId; // Health state of the container (determined by ReplicationManager) private ContainerHealthState healthState; - private boolean ackMissing; private ContainerInfo(Builder b) { containerID = ContainerID.valueOf(b.containerID); @@ -103,7 +102,6 @@ private ContainerInfo(Builder b) { replicationConfig = b.replicationConfig; clock = b.clock; healthState = b.healthState != null ? b.healthState : ContainerHealthState.HEALTHY; - ackMissing = b.ackMissing; } public static Codec getCodec() { @@ -123,8 +121,7 @@ public static ContainerInfo fromProtobuf(HddsProtos.ContainerInfoProto info) { .setContainerID(info.getContainerID()) .setDeleteTransactionId(info.getDeleteTransactionId()) .setReplicationConfig(config) - .setSequenceId(info.getSequenceId()) - .setAckMissing(info.getAckMissing()); + .setSequenceId(info.getSequenceId()); if (info.hasPipelineID()) { builder.setPipelineID(PipelineID.getFromProtobuf(info.getPipelineID())); @@ -266,24 +263,6 @@ public void setHealthState(ContainerHealthState newHealthState) { this.healthState = newHealthState; } - /** - * Check if container is acked as missing. - * - * @return boolean - */ - public boolean isAckMissing() { - return ackMissing; - } - - /** - * Set the boolean for ackMissing. - * - * @param acked checks if container is acked as missing or not - */ - public void setAckMissing(boolean acked) { - this.ackMissing = acked; - } - @JsonIgnore public HddsProtos.ContainerInfoProto getProtobuf() { HddsProtos.ContainerInfoProto.Builder builder = @@ -296,8 +275,7 @@ public HddsProtos.ContainerInfoProto getProtobuf() { .setDeleteTransactionId(getDeleteTransactionId()) .setOwner(getOwner()) .setSequenceId(getSequenceId()) - .setReplicationType(getReplicationType()) - .setAckMissing(isAckMissing()); + .setReplicationType(getReplicationType()); if (replicationConfig instanceof ECReplicationConfig) { builder.setEcReplicationConfig(((ECReplicationConfig) replicationConfig) @@ -415,7 +393,6 @@ public static class Builder { private PipelineID pipelineID; private ReplicationConfig replicationConfig; private ContainerHealthState healthState; - private boolean ackMissing; public Builder setPipelineID(PipelineID pipelineId) { this.pipelineID = pipelineId; @@ -473,11 +450,6 @@ public Builder setHealthState(ContainerHealthState healthState) { return this; } - public Builder setAckMissing(boolean ackMissing) { - this.ackMissing = ackMissing; - return this; - } - /** * Also resets {@code stateEnterTime}, so make sure to set clock first. */ diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index 716bfadebf2f..ea2b5b2848c9 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -464,18 +464,11 @@ DecommissionScmResponseProto decommissionScm( void reconcileContainer(long containerID) throws IOException; /** - * Acknowledge the missing container. + * Set or unset the ACK_MISSING state for a container. * - * @param containerId The ID of the container to acknowledge as missing. + * @param containerId The ID of the container. + * @param acknowledge true to set ACK_MISSING, false to unset to MISSING. * @throws IOException */ - void acknowledgeMissingContainer(long containerId) throws IOException; - - /** - * Unacknowledge the missing container. - * - * @param containerId The ID of the container to unacknowledge as missing. - * @throws IOException - */ - void unacknowledgeMissingContainer(long containerId) throws IOException; + void setAckMissingContainer(long containerId, boolean acknowledge) throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index d2ede073a4ee..83a9e70c572a 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -513,18 +513,11 @@ DecommissionScmResponseProto decommissionScm( void reconcileContainer(long containerID) throws IOException; /** - * Acknowledge the missing container. + * Set or unset the ACK_MISSING state for a container. * - * @param containerId The ID of the container to acknowledge as missing. + * @param containerId The ID of the container. + * @param acknowledge true to set ACK_MISSING, false to unset to MISSING. * @throws IOException */ - void acknowledgeMissingContainer(long containerId) throws IOException; - - /** - * Unacknowledge the missing container. - * - * @param containerId The ID of the container to unacknowledge as missing. - * @throws IOException - */ - void unacknowledgeMissingContainer(long containerId) throws IOException; + void setAckMissingContainer(long containerId, boolean acknowledge) throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index a5410bfaff22..98795e746fe5 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -50,7 +50,6 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipRequestProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.UpgradeFinalizationStatus; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.AcknowledgeMissingContainerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoRequestProto; @@ -116,6 +115,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationRequest; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationRequest.Builder; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SetAckMissingContainerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SingleNodeQueryRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SingleNodeQueryResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerRequestProto; @@ -126,7 +126,6 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopContainerBalancerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.UnacknowledgeMissingContainerRequestProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -1244,20 +1243,12 @@ public void reconcileContainer(long containerID) throws IOException { } @Override - public void acknowledgeMissingContainer(long containerID) throws IOException { - AcknowledgeMissingContainerRequestProto request = AcknowledgeMissingContainerRequestProto.newBuilder() - .setContainerID(containerID) - .build(); - submitRequest(Type.AcknowledgeMissingContainer, - builder -> builder.setAcknowledgeMissingContainerRequest(request)); - } - - @Override - public void unacknowledgeMissingContainer(long containerID) throws IOException { - UnacknowledgeMissingContainerRequestProto request = UnacknowledgeMissingContainerRequestProto.newBuilder() + public void setAckMissingContainer(long containerID, boolean acknowledge) + throws IOException { + SetAckMissingContainerRequestProto request = SetAckMissingContainerRequestProto.newBuilder() .setContainerID(containerID) + .setAcknowledge(acknowledge) .build(); - submitRequest(Type.UnacknowledgeMissingContainer, - builder -> builder.setUnacknowledgeMissingContainerRequest(request)); + submitRequest(Type.SetAckMissingContainer, builder -> builder.setSetAckMissingContainerRequest(request)); } } diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index 8bcfa627df74..6cb9be66df22 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -87,8 +87,7 @@ message ScmContainerLocationRequest { optional ContainerBalancerStatusInfoRequestProto containerBalancerStatusInfoRequest = 48; optional ReconcileContainerRequestProto reconcileContainerRequest = 49; optional GetDeletedBlocksTxnSummaryRequestProto getDeletedBlocksTxnSummaryRequest = 50; - optional AcknowledgeMissingContainerRequestProto acknowledgeMissingContainerRequest = 51; - optional UnacknowledgeMissingContainerRequestProto unacknowledgeMissingContainerRequest = 52; + optional SetAckMissingContainerRequestProto setAckMissingContainerRequest = 51; } message ScmContainerLocationResponse { @@ -147,8 +146,7 @@ message ScmContainerLocationResponse { optional ContainerBalancerStatusInfoResponseProto containerBalancerStatusInfoResponse = 48; optional ReconcileContainerResponseProto reconcileContainerResponse = 49; optional GetDeletedBlocksTxnSummaryResponseProto getDeletedBlocksTxnSummaryResponse = 50; - optional AcknowledgeMissingContainerResponseProto acknowledgeMissingContainerResponse = 51; - optional UnacknowledgeMissingContainerResponseProto unacknowledgeMissingContainerResponse = 52; + optional SetAckMissingContainerResponseProto setAckMissingContainerResponse = 51; enum Status { OK = 1; @@ -206,8 +204,7 @@ enum Type { GetContainerBalancerStatusInfo = 44; ReconcileContainer = 45; GetDeletedBlocksTransactionSummary = 46; - AcknowledgeMissingContainer = 47; - UnacknowledgeMissingContainer = 48; + SetAckMissingContainer = 47; } /** @@ -701,18 +698,12 @@ message ReconcileContainerRequestProto { message ReconcileContainerResponseProto { } -message AcknowledgeMissingContainerRequestProto { - required int64 containerID = 1; -} - -message AcknowledgeMissingContainerResponseProto { -} - -message UnacknowledgeMissingContainerRequestProto { - required int64 containerID = 1; +message SetAckMissingContainerRequestProto { + optional int64 containerID = 1; + optional bool acknowledge = 2; } -message UnacknowledgeMissingContainerResponseProto { +message SetAckMissingContainerResponseProto { } /** diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto index b95569e3bac2..eb819b80a3e8 100644 --- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto +++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto @@ -271,7 +271,6 @@ message ContainerInfoProto { optional ReplicationFactor replicationFactor = 10; required ReplicationType replicationType = 11; optional ECReplicationConfig ecReplicationConfig = 12; - optional bool ackMissing = 13; } message ContainerWithPipeline { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index b794e8a6c919..e7c216b8f422 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -562,9 +562,20 @@ public void updateContainerInfo(HddsProtos.ContainerInfoProto updatedInfoProto) if (currentInfo == null) { throw new ContainerNotFoundException(containerID); } - currentInfo.setAckMissing(updatedInfo.isAckMissing()); + + // Only persist ACK_MISSING health state changes + // Other health states are dynamic and computed by ReplicationManager + ContainerHealthState newHealthState = updatedInfo.getHealthState(); + if (newHealthState == ContainerHealthState.ACK_MISSING) { + currentInfo.setHealthState(ContainerHealthState.ACK_MISSING); + LOG.debug("Persisting ACK_MISSING state for container: {}", containerID); + } else if (currentInfo.getHealthState() == ContainerHealthState.ACK_MISSING) { + currentInfo.setHealthState(null); + LOG.debug("Clearing ACK_MISSING state for container: {}, new state: {}", + containerID, newHealthState); + } transactionBuffer.addToBuffer(containerStore, containerID, currentInfo); - LOG.debug("Updated container info for container: {}, ackMissing={}", containerID, currentInfo.isAckMissing()); + LOG.debug("Updated container info for container: {}, healthState={}", containerID, currentInfo.getHealthState()); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index 35c84c299bc4..d1d317108c89 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -55,13 +55,13 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ReplicationCommandPriority; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.container.ContainerHealthState; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; -import org.apache.hadoop.hdds.scm.container.replication.health.AcknowledgedMissingContainerHandler; import org.apache.hadoop.hdds.scm.container.replication.health.ClosedWithUnhealthyReplicasHandler; import org.apache.hadoop.hdds.scm.container.replication.health.ClosingContainerHandler; import org.apache.hadoop.hdds.scm.container.replication.health.DeletingContainerHandler; @@ -270,7 +270,6 @@ public ReplicationManager(final ReplicationManagerConfiguration rmConf, .addNext(new MismatchedReplicasHandler(this)) .addNext(new EmptyContainerHandler(this)) .addNext(new DeletingContainerHandler(this)) - .addNext(new AcknowledgedMissingContainerHandler()) .addNext(new QuasiClosedStuckReplicationCheck()) .addNext(ecReplicationCheckHandler) .addNext(ratisReplicationCheckHandler) @@ -856,6 +855,14 @@ protected boolean processContainer(ContainerInfo containerInfo, ReplicationQueue repQueue, ReplicationManagerReport report, boolean readOnly) throws ContainerNotFoundException { synchronized (containerInfo) { + // Skip containers that are acknowledged as missing + // These containers are persisted with ACK_MISSING state and should not be + // processed by ReplicationManager until unacknowledged + if (containerInfo.getHealthState() == ContainerHealthState.ACK_MISSING) { + LOG.debug("Skipping ACK_MISSING container: {}", containerInfo.getContainerID()); + return false; + } + // Reset health state to HEALTHY before processing this container report.resetContainerHealthState(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java deleted file mode 100644 index 20f217cf98bb..000000000000 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hdds.scm.container.replication.health; - -import org.apache.hadoop.hdds.scm.container.ContainerID; -import org.apache.hadoop.hdds.scm.container.ContainerInfo; -import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Class used in Replication Manager to skip containers that have been - * acknowledged as missing. These containers will still be marked as - * MISSING in the health state but will not trigger replication. - */ -public class AcknowledgedMissingContainerHandler extends AbstractCheck { - - private static final Logger LOG = LoggerFactory.getLogger(AcknowledgedMissingContainerHandler.class); - - @Override - public boolean handle(ContainerCheckRequest request) { - ContainerInfo containerInfo = request.getContainerInfo(); - ContainerID containerID = containerInfo.containerID(); - LOG.debug("Checking container {}, ackMissing={} in AcknowledgedMissingContainerHandler", - containerID, containerInfo.isAckMissing()); - - if (!containerInfo.isAckMissing()) { - LOG.debug("Container {} is not acknowledged ", containerID); - return false; - } - LOG.debug("Container {} has been acknowledged as missing.", containerID); - - if (request.getContainerReplicas().isEmpty()) { - LOG.debug("Acknowledged missing container {} confirmed to have no replicas.", containerID); - } else { - LOG.warn("Container {} was acknowledged as missing but now has {} replicas. " + - "The container may have been recovered. Consider un-acknowledging it.", - containerID, request.getContainerReplicas().size()); - } - return true; - } -} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java index 1d5bc206ef79..2f1e5bdf897d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java @@ -68,9 +68,8 @@ public boolean handle(ContainerCheckRequest request) { boolean forceClose = containerInfo.getReplicationConfig() .getReplicationType() != ReplicationType.RATIS; - // Don't report MISSING if container is acknowledged or empty (will be handled by other handlers) - if (request.getContainerReplicas().isEmpty() && !containerInfo.isAckMissing() && - containerInfo.getNumberOfKeys() > 0) { + // Report MISSING only for containers with no replicas and keys > 0 + if (request.getContainerReplicas().isEmpty() && containerInfo.getNumberOfKeys() > 0) { request.getReport().incrementAndSample(ContainerHealthState.MISSING, containerInfo); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index d069bb49c432..e9eca075a183 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -47,8 +47,6 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipResponseProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.UpgradeFinalizationStatus; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.AcknowledgeMissingContainerRequestProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.AcknowledgeMissingContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto; @@ -124,6 +122,8 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationRequest; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse.Status; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SetAckMissingContainerRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SetAckMissingContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SingleNodeQueryRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SingleNodeQueryResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerRequestProto; @@ -136,8 +136,6 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopContainerBalancerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerResponseProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.UnacknowledgeMissingContainerRequestProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.UnacknowledgeMissingContainerResponseProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -752,19 +750,12 @@ public ScmContainerLocationResponse processRequest( .setStatus(Status.OK) .setReconcileContainerResponse(reconcileContainer(request.getReconcileContainerRequest())) .build(); - case AcknowledgeMissingContainer: + case SetAckMissingContainer: return ScmContainerLocationResponse.newBuilder() .setCmdType(request.getCmdType()) .setStatus(Status.OK) - .setAcknowledgeMissingContainerResponse( - acknowledgeMissingContainer(request.getAcknowledgeMissingContainerRequest())) - .build(); - case UnacknowledgeMissingContainer: - return ScmContainerLocationResponse.newBuilder() - .setCmdType(request.getCmdType()) - .setStatus(Status.OK) - .setUnacknowledgeMissingContainerResponse( - unacknowledgeMissingContainer(request.getUnacknowledgeMissingContainerRequest())) + .setSetAckMissingContainerResponse( + setAckMissingContainer(request.getSetAckMissingContainerRequest())) .build(); default: throw new IllegalArgumentException( @@ -1405,15 +1396,9 @@ public ReconcileContainerResponseProto reconcileContainer(ReconcileContainerRequ return ReconcileContainerResponseProto.getDefaultInstance(); } - public AcknowledgeMissingContainerResponseProto acknowledgeMissingContainer( - AcknowledgeMissingContainerRequestProto request) throws IOException { - impl.acknowledgeMissingContainer(request.getContainerID()); - return AcknowledgeMissingContainerResponseProto.getDefaultInstance(); - } - - public UnacknowledgeMissingContainerResponseProto unacknowledgeMissingContainer( - UnacknowledgeMissingContainerRequestProto request) throws IOException { - impl.unacknowledgeMissingContainer(request.getContainerID()); - return UnacknowledgeMissingContainerResponseProto.getDefaultInstance(); + public SetAckMissingContainerResponseProto setAckMissingContainer( + SetAckMissingContainerRequestProto request) throws IOException { + impl.setAckMissingContainer(request.getContainerID(), request.getAcknowledge()); + return SetAckMissingContainerResponseProto.getDefaultInstance(); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 9b9f3582c30f..bc25344d9786 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.FetchMetrics; import org.apache.hadoop.hdds.scm.ScmInfo; +import org.apache.hadoop.hdds.scm.container.ContainerHealthState; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerListResult; @@ -1682,54 +1683,38 @@ public void reconcileContainer(long longContainerID) throws IOException { } @Override - public void acknowledgeMissingContainer(long longContainerID) throws IOException { + public void setAckMissingContainer(long longContainerID, boolean acknowledge) throws IOException { ContainerID containerID = ContainerID.valueOf(longContainerID); final Map auditMap = new HashMap<>(); auditMap.put("containerID", containerID.toString()); + auditMap.put("acknowledge", String.valueOf(acknowledge)); try { getScm().checkAdminAccess(getRemoteUser(), false); ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); - Set replicas = scm.getContainerManager().getContainerReplicas(containerID); - if (replicas != null && !replicas.isEmpty()) { - throw new IOException("Container " + longContainerID + - " has " + replicas.size() + " replicas and cannot be acknowledged as missing"); - } - - if (containerInfo.getNumberOfKeys() == 0) { - throw new IOException("Container " + longContainerID + " is empty (0 keys) and cannot be acknowledged."); + + if (acknowledge) { + // Validation for setting ACK_MISSING + Set replicas = scm.getContainerManager().getContainerReplicas(containerID); + if (replicas != null && !replicas.isEmpty()) { + throw new IOException("Container " + longContainerID + " has " + replicas.size() + + " replicas and cannot be acknowledged as missing"); + } + if (containerInfo.getNumberOfKeys() == 0) { + throw new IOException("Container " + longContainerID + " is empty (0 keys) and cannot be acknowledged."); + } + // Set to ACK_MISSING + containerInfo.setHealthState(ContainerHealthState.ACK_MISSING); + AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.ACKNOWLEDGE_MISSING_CONTAINER, auditMap)); + } else { + containerInfo.setHealthState(null); + AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER, auditMap)); } - - HddsProtos.ContainerInfoProto updatedProto = containerInfo.getProtobuf().toBuilder() - .setAckMissing(true) - .build(); - scm.getContainerManager().updateContainerInfo(containerID, updatedProto); - - AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.ACKNOWLEDGE_MISSING_CONTAINER, auditMap)); - } catch (IOException ex) { - AUDIT.logWriteFailure(buildAuditMessageForFailure(SCMAction.ACKNOWLEDGE_MISSING_CONTAINER, auditMap, ex)); - throw ex; - } - } - - @Override - public void unacknowledgeMissingContainer(long longContainerID) throws IOException { - ContainerID containerID = ContainerID.valueOf(longContainerID); - final Map auditMap = new HashMap<>(); - auditMap.put("containerID", containerID.toString()); - - try { - getScm().checkAdminAccess(getRemoteUser(), false); - ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); - - HddsProtos.ContainerInfoProto updatedProto = containerInfo.getProtobuf().toBuilder() - .setAckMissing(false) - .build(); - scm.getContainerManager().updateContainerInfo(containerID, updatedProto); - - AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER, auditMap)); + scm.getContainerManager().updateContainerInfo(containerID, containerInfo.getProtobuf()); } catch (IOException ex) { - AUDIT.logWriteFailure(buildAuditMessageForFailure(SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER, auditMap, ex)); + SCMAction action = acknowledge ? + SCMAction.ACKNOWLEDGE_MISSING_CONTAINER : SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER; + AUDIT.logWriteFailure(buildAuditMessageForFailure(action, auditMap, ex)); throw ex; } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java index b3e8f24c3982..0774fb62df31 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java @@ -600,12 +600,7 @@ public void reconcileContainer(long id) throws IOException { } @Override - public void acknowledgeMissingContainer(long containerId) throws IOException { - storageContainerLocationClient.acknowledgeMissingContainer(containerId); - } - - @Override - public void unacknowledgeMissingContainer(long containerId) throws IOException { - storageContainerLocationClient.unacknowledgeMissingContainer(containerId); + public void setAckMissingContainer(long containerId, boolean acknowledge) throws IOException { + storageContainerLocationClient.setAckMissingContainer(containerId, acknowledge); } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java index 88bac71ee95b..49cb86b9c9e3 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerHealthState; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerListResult; import picocli.CommandLine; @@ -52,7 +53,7 @@ public void execute(ScmClient scmClient) throws IOException { // List acknowledged containers ContainerListResult result = scmClient.listContainer(1, Integer.MAX_VALUE); for (ContainerInfo info : result.getContainerInfoList()) { - if (info.isAckMissing()) { + if (info.getHealthState() == ContainerHealthState.ACK_MISSING) { out().println(info.getContainerID()); } } @@ -75,7 +76,7 @@ public void execute(ScmClient scmClient) throws IOException { continue; } - scmClient.acknowledgeMissingContainer(id); + scmClient.setAckMissingContainer(id, true); out().println("Acknowledged container: " + id); } catch (IOException e) { err().println("Failed to acknowledge container " + id + ": " + e.getMessage()); diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java index 5bb940912095..bafaedcdb184 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerHealthState; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import picocli.CommandLine; @@ -51,12 +52,12 @@ public void execute(ScmClient scmClient) throws IOException { for (Long id : ids) { try { ContainerInfo containerInfo = scmClient.getContainer(id); - if (!containerInfo.isAckMissing()) { + if (containerInfo.getHealthState() != ContainerHealthState.ACK_MISSING) { err().println("Cannot unacknowledge container " + id + ": " + "Only acknowledged missing containers can be unacknowledged."); continue; } - scmClient.unacknowledgeMissingContainer(id); + scmClient.setAckMissingContainer(id, false); out().println("Unacknowledged container: " + id); } catch (IOException e) { err().println("Failed to unacknowledge container " + id + ": " + e.getMessage()); diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java index b625cda61f9c..dc75850a58a4 100644 --- a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java @@ -68,7 +68,7 @@ public void testAckMissingContainer() throws Exception { AckMissingSubcommand cmd = new AckMissingSubcommand(); new CommandLine(cmd).parseArgs("1"); cmd.execute(scmClient); - verify(scmClient, times(1)).acknowledgeMissingContainer(1L); + verify(scmClient, times(1)).setAckMissingContainer(1L, true); String output = outContent.toString(DEFAULT_ENCODING); assertThat(output).contains("Acknowledged container: 1"); @@ -100,20 +100,21 @@ public void testUnacknowledgeMissingContainer() throws Exception { UnackMissingSubcommand cmd = new UnackMissingSubcommand(); new CommandLine(cmd).parseArgs("1"); cmd.execute(scmClient); - verify(scmClient, times(1)).unacknowledgeMissingContainer(1L); + verify(scmClient, times(1)).setAckMissingContainer(1L, false); String output = outContent.toString(DEFAULT_ENCODING); assertThat(output).contains("Unacknowledged container: 1"); } private ContainerInfo mockContainer(long containerID, boolean ackMissing) { + ContainerHealthState healthState = ackMissing ? + ContainerHealthState.ACK_MISSING : ContainerHealthState.MISSING; return new ContainerInfo.Builder() .setContainerID(containerID) .setState(OPEN) - .setHealthState(ContainerHealthState.MISSING) + .setHealthState(healthState) .setReplicationConfig(RatisReplicationConfig.getInstance(ONE)) .setNumberOfKeys(1) - .setAckMissing(ackMissing) .build(); } } From 563a345331de600d49d3d8f3425b2630d5fc8550 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 19 Feb 2026 12:27:32 +0530 Subject: [PATCH 4/5] Persist the healthState --- .../apache/hadoop/hdds/scm/container/ContainerInfo.java | 9 +++++++++ hadoop-hdds/interface-client/src/main/proto/hdds.proto | 1 + 2 files changed, 10 insertions(+) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java index 2beef2abf885..40d82bfa6a48 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java @@ -123,6 +123,10 @@ public static ContainerInfo fromProtobuf(HddsProtos.ContainerInfoProto info) { .setReplicationConfig(config) .setSequenceId(info.getSequenceId()); + if (info.hasAckMissing() && info.getAckMissing()) { + builder.setHealthState(ContainerHealthState.ACK_MISSING); + } + if (info.hasPipelineID()) { builder.setPipelineID(PipelineID.getFromProtobuf(info.getPipelineID())); } @@ -291,6 +295,11 @@ public HddsProtos.ContainerInfoProto getProtobuf() { builder.setPipelineID(getPipelineID().getProtobuf()); } + // Only persist ACK_MISSING health state, others are dynamic + if (healthState == ContainerHealthState.ACK_MISSING) { + builder.setAckMissing(true); + } + return builder.build(); } diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto index eb819b80a3e8..b95569e3bac2 100644 --- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto +++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto @@ -271,6 +271,7 @@ message ContainerInfoProto { optional ReplicationFactor replicationFactor = 10; required ReplicationType replicationType = 11; optional ECReplicationConfig ecReplicationConfig = 12; + optional bool ackMissing = 13; } message ContainerWithPipeline { From 9c210c8fd75bdeaadd47370f8aa850990714723b Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 19 Feb 2026 12:46:54 +0530 Subject: [PATCH 5/5] Fix TestContainerHealthSate --- .../scm/container/TestContainerHealthState.java | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerHealthState.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerHealthState.java index 6ffc678ea175..763a2e5a9a21 100644 --- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerHealthState.java +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerHealthState.java @@ -17,6 +17,7 @@ package org.apache.hadoop.hdds.scm.container; +import static org.apache.hadoop.hdds.scm.container.ContainerHealthState.ACK_MISSING; import static org.apache.hadoop.hdds.scm.container.ContainerHealthState.EMPTY; import static org.apache.hadoop.hdds.scm.container.ContainerHealthState.HEALTHY; import static org.apache.hadoop.hdds.scm.container.ContainerHealthState.MISSING; @@ -37,6 +38,9 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; import org.junit.jupiter.api.Test; /** @@ -59,6 +63,7 @@ public void testIndividualStateValues() { assertEquals(7, OPEN_UNHEALTHY.getValue()); assertEquals(8, QUASI_CLOSED_STUCK.getValue()); assertEquals(9, OPEN_WITHOUT_PIPELINE.getValue()); + assertEquals(10, ACK_MISSING.getValue()); } @Test @@ -69,7 +74,7 @@ public void testCombinationStateValues() { assertEquals(102, MISSING_UNDER_REPLICATED.getValue()); assertEquals(103, QUASI_CLOSED_STUCK_UNDER_REPLICATED.getValue()); assertEquals(104, QUASI_CLOSED_STUCK_OVER_REPLICATED.getValue()); - assertEquals(105, ContainerHealthState.QUASI_CLOSED_STUCK_MISSING.getValue()); + assertEquals(105, QUASI_CLOSED_STUCK_MISSING.getValue()); } @Test @@ -101,6 +106,7 @@ public void testFromValueIndividualStates() { assertEquals(OPEN_UNHEALTHY, ContainerHealthState.fromValue((short) 7)); assertEquals(QUASI_CLOSED_STUCK, ContainerHealthState.fromValue((short) 8)); assertEquals(OPEN_WITHOUT_PIPELINE, ContainerHealthState.fromValue((short) 9)); + assertEquals(ACK_MISSING, ContainerHealthState.fromValue((short) 10)); } @Test @@ -126,7 +132,7 @@ public void testFromValueUnknownReturnsHealthy() { @Test public void testAllEnumValuesAreUnique() { // Verify all enum constants have unique values - java.util.Set values = new java.util.HashSet<>(); + Set values = new HashSet<>(); for (ContainerHealthState state : ContainerHealthState.values()) { assertFalse(values.contains(state.getValue()), "Duplicate value found: " + state.getValue()); @@ -137,16 +143,16 @@ public void testAllEnumValuesAreUnique() { @Test public void testIndividualStateCount() { // Should have 10 individual states (0-9) - long individualCount = java.util.Arrays.stream(ContainerHealthState.values()) + long individualCount = Arrays.stream(ContainerHealthState.values()) .filter(s -> s.getValue() >= 0 && s.getValue() <= 99) .count(); - assertEquals(10, individualCount, "Expected 10 individual states"); + assertEquals(11, individualCount, "Expected 10 individual states"); } @Test public void testCombinationStateCount() { // Should have 6 combination states (100-105) - long combinationCount = java.util.Arrays.stream(ContainerHealthState.values()) + long combinationCount = Arrays.stream(ContainerHealthState.values()) .filter(s -> s.getValue() >= 100) .count(); assertEquals(6, combinationCount, "Expected 6 combination states");