From caed65678cd88ad9ee988f7c1da719f9ddbdda4d Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Wed, 9 Apr 2025 11:06:55 -0700 Subject: [PATCH 1/7] HDDS-12723. Handle Volume Db failure in volume scanner --- .../container/common/volume/HddsVolume.java | 14 ++++++-- .../keyvalue/helpers/BlockUtils.java | 1 + .../common/utils/TestHddsVolumeUtil.java | 19 +++++++++++ pom.xml | 33 ++++++++++++++++++- 4 files changed, 64 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 55bd55021093..019b7c187dcd 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -22,6 +22,7 @@ import static org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil.initPerDiskDBStore; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import jakarta.annotation.Nullable; import java.io.File; import java.io.IOException; @@ -275,8 +276,8 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) "the volume might not have been loaded properly.", getStorageDir()); return VolumeCheckResult.FAILED; } - if (result != VolumeCheckResult.HEALTHY || - !getDatanodeConfig().getContainerSchemaV3Enabled() || !isDbLoaded()) { + + if (!getDatanodeConfig().getContainerSchemaV3Enabled()) { return result; } @@ -288,6 +289,15 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) return VolumeCheckResult.FAILED; } + DatanodeStoreCache cache = DatanodeStoreCache.getInstance(); + Preconditions.checkNotNull(cache); + try { + cache.getDB(String.valueOf(dbFile), getConf()); + } catch (IOException e) { + LOG.error("Could not open Volume DB located at {}", dbFile, e); + return VolumeCheckResult.FAILED; + } + return VolumeCheckResult.HEALTHY; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java index 730689539f94..9f9d138cfd49 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java @@ -136,6 +136,7 @@ public static DBHandle getDB(KeyValueContainerData containerData, containerData.getSchemaVersion(), conf); } } catch (IOException ex) { +// TODO: does the volume scanner get triggered twice? here and upon receiving the exception? onFailure(containerData.getVolume()); String message = String.format("Error opening DB. Container:%s " + "ContainerPath:%s", containerData.getContainerID(), containerData diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestHddsVolumeUtil.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestHddsVolumeUtil.java index 9ec248e38319..74cbcf22af9f 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestHddsVolumeUtil.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestHddsVolumeUtil.java @@ -42,6 +42,8 @@ import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.common.volume.StorageVolume; +import org.jboss.byteman.contrib.bmunit.BMRule; +import org.jboss.byteman.contrib.bmunit.WithByteman; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -52,6 +54,7 @@ /** * Test for {@link HddsVolumeUtil}. */ +@WithByteman public class TestHddsVolumeUtil { @TempDir private Path tempDir; @@ -127,6 +130,22 @@ public void testLoadHDDVolumeWithInitDBException() } + @Test + @BMRule(name = "Throw exception when ", + targetClass = "org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache", + targetMethod = "getDB", + action = "THROW new IOException(\"Mock Byteman Exception\")") + public void testCheckVolumeWithGetDbException() throws Exception { + for (HddsVolume hddsVolume : StorageVolumeUtil.getHddsVolumesList(hddsVolumeSet.getVolumesList())) { + hddsVolume.format(clusterId); + hddsVolume.createWorkingDir(clusterId, null); + } + + for (HddsVolume hddsVolume : StorageVolumeUtil.getHddsVolumesList(hddsVolumeSet.getVolumesList())) { + assertEquals(VolumeCheckResult.FAILED, hddsVolume.check(false)); + } + } + @Test public void testLoadAllHddsVolumeDbStoreWithoutDbVolumes() throws IOException { diff --git a/pom.xml b/pom.xml index 8af4dddef75e..71ca91e1116e 100644 --- a/pom.xml +++ b/pom.xml @@ -40,6 +40,7 @@ 0.8.0.RELEASE 1.80 3.6.0 + 4.0.24 2.0 9.3 true @@ -165,7 +166,7 @@ 3.6.0 3.21.0 3.3.1 - -Xmx8192m -XX:+HeapDumpOnOutOfMemoryError + -Xmx8192m -XX:+HeapDumpOnOutOfMemoryError -Djdk.attach.allowAttachSelf=true 3.5.2 ${maven-surefire-plugin.version} @@ -1300,6 +1301,36 @@ assertj-core test + + org.jboss.byteman + byteman + ${byteman.version} + test + + + org.jboss.byteman + byteman-bmunit + ${byteman.version} + test + + + org.jboss.byteman + byteman-bmunit5 + ${byteman.version} + test + + + org.jboss.byteman + byteman-install + ${byteman.version} + test + + + org.jboss.byteman + byteman-submit + ${byteman.version} + test + org.junit.jupiter junit-jupiter-api From 7e8b8fdf1f2ebd8782b8ad7163e08a3fcc15803a Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Thu, 10 Apr 2025 20:45:16 -0700 Subject: [PATCH 2/7] HDDS-12723. Use getLiveFiles to validate db accessibility. Remove byteman. --- .../container/common/volume/HddsVolume.java | 8 ++- .../keyvalue/helpers/BlockUtils.java | 1 - .../common/utils/TestHddsVolumeUtil.java | 19 ------ ...estDatanodeHddsVolumeFailureDetection.java | 65 +++++++++++++++++++ pom.xml | 33 +--------- 5 files changed, 72 insertions(+), 54 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 019b7c187dcd..0afe8bfa90d2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hdds.annotation.InterfaceStability; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; +import org.apache.hadoop.hdds.utils.db.RDBStore; import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; import org.apache.hadoop.ozone.container.common.impl.StorageLocationReport; import org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache; @@ -277,7 +278,8 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) return VolumeCheckResult.FAILED; } - if (!getDatanodeConfig().getContainerSchemaV3Enabled()) { + if (result != VolumeCheckResult.HEALTHY || + !getDatanodeConfig().getContainerSchemaV3Enabled() || !isDbLoaded()) { return result; } @@ -292,7 +294,9 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) DatanodeStoreCache cache = DatanodeStoreCache.getInstance(); Preconditions.checkNotNull(cache); try { - cache.getDB(String.valueOf(dbFile), getConf()); + RawDB db = cache.getDB(String.valueOf(dbFile), getConf()); + RDBStore store = (RDBStore)db.getStore().getStore(); + store.getDb().getManagedRocksDb().get().getLiveFiles(); } catch (IOException e) { LOG.error("Could not open Volume DB located at {}", dbFile, e); return VolumeCheckResult.FAILED; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java index 9f9d138cfd49..730689539f94 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/BlockUtils.java @@ -136,7 +136,6 @@ public static DBHandle getDB(KeyValueContainerData containerData, containerData.getSchemaVersion(), conf); } } catch (IOException ex) { -// TODO: does the volume scanner get triggered twice? here and upon receiving the exception? onFailure(containerData.getVolume()); String message = String.format("Error opening DB. Container:%s " + "ContainerPath:%s", containerData.getContainerID(), containerData diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestHddsVolumeUtil.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestHddsVolumeUtil.java index 74cbcf22af9f..9ec248e38319 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestHddsVolumeUtil.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestHddsVolumeUtil.java @@ -42,8 +42,6 @@ import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.common.volume.StorageVolume; -import org.jboss.byteman.contrib.bmunit.BMRule; -import org.jboss.byteman.contrib.bmunit.WithByteman; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -54,7 +52,6 @@ /** * Test for {@link HddsVolumeUtil}. */ -@WithByteman public class TestHddsVolumeUtil { @TempDir private Path tempDir; @@ -130,22 +127,6 @@ public void testLoadHDDVolumeWithInitDBException() } - @Test - @BMRule(name = "Throw exception when ", - targetClass = "org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache", - targetMethod = "getDB", - action = "THROW new IOException(\"Mock Byteman Exception\")") - public void testCheckVolumeWithGetDbException() throws Exception { - for (HddsVolume hddsVolume : StorageVolumeUtil.getHddsVolumesList(hddsVolumeSet.getVolumesList())) { - hddsVolume.format(clusterId); - hddsVolume.createWorkingDir(clusterId, null); - } - - for (HddsVolume hddsVolume : StorageVolumeUtil.getHddsVolumesList(hddsVolumeSet.getVolumesList())) { - assertEquals(VolumeCheckResult.FAILED, hddsVolume.check(false)); - } - } - @Test public void testLoadAllHddsVolumeDbStoreWithoutDbVolumes() throws IOException { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java index fdea9054ce7a..e2fb5736eaa0 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java @@ -24,9 +24,11 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_REPLICATION; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.File; import java.io.IOException; @@ -61,9 +63,11 @@ import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.common.volume.StorageVolume; +import org.apache.hadoop.ozone.container.common.volume.StorageVolumeChecker; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; import org.apache.hadoop.ozone.dn.DatanodeTestUtils; +import org.apache.hadoop.util.Timer; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -241,6 +245,67 @@ void corruptDbFile(boolean schemaV3) throws Exception { } } + @ParameterizedTest + @ValueSource(booleans = {true}) + void corruptDbFileWithoutDbHandleCacheInvalidation(boolean schemaV3) throws Exception { + try (MiniOzoneCluster cluster = newCluster(schemaV3)) { + try (OzoneClient client = cluster.newClient()) { + OzoneBucket bucket = TestDataUtil.createVolumeAndBucket(client); + + // write a file, will create container1 + String keyName = UUID.randomUUID().toString(); + long containerId = createKey(bucket, keyName); + + // close container1 + HddsDatanodeService dn = cluster.getHddsDatanodes().get(0); + OzoneContainer oc = dn.getDatanodeStateMachine().getContainer(); + Container c1 = oc.getContainerSet().getContainer(containerId); + c1.close(); + + // create container2, and container1 is kicked out of cache + OzoneConfiguration conf = cluster.getConf(); + try (ScmClient scmClient = new ContainerOperationClient(conf)) { + ContainerWithPipeline c2 = scmClient.createContainer( + ReplicationType.STAND_ALONE, ReplicationFactor.ONE, + OzoneConsts.OZONE); + assertEquals(c2.getContainerInfo().getState(), LifeCycleState.OPEN); + } + + // corrupt db by rename dir->file + File dbDir; + if (schemaV3) { + dbDir = new File(((KeyValueContainerData) (c1.getContainerData())) + .getDbFile().getAbsolutePath()); + } else { + File metadataDir = new File(c1.getContainerFile().getParent()); + dbDir = new File(metadataDir, "1" + OzoneConsts.DN_CONTAINER_DB); + } + + MutableVolumeSet volSet = oc.getVolumeSet(); + StorageVolume vol0 = volSet.getVolumesList().get(0); + + try { + DatanodeTestUtils.injectDataDirFailure(dbDir); + + // simulate bad volume by removing write permission on root dir + // refer to HddsVolume.check() + DatanodeTestUtils.simulateBadVolume(vol0); + + assertFalse(vol0.isFailed()); + StorageVolumeChecker storageVolumeChecker = new StorageVolumeChecker(conf, new Timer(), ""); + storageVolumeChecker.registerVolumeSet(volSet); + storageVolumeChecker.checkAllVolumeSets(); + assertTrue(vol0.isFailed()); + DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1); + } finally { + // restore all + DatanodeTestUtils.restoreBadVolume(vol0); + DatanodeTestUtils.restoreDataDirFromFailure(dbDir); + } + } + } + } + private static void readKeyToTriggerCheckVolumeAsync(OzoneBucket bucket, String key) throws IOException { try (InputStream is = bucket.readKey(key)) { diff --git a/pom.xml b/pom.xml index 71ca91e1116e..8af4dddef75e 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,6 @@ 0.8.0.RELEASE 1.80 3.6.0 - 4.0.24 2.0 9.3 true @@ -166,7 +165,7 @@ 3.6.0 3.21.0 3.3.1 - -Xmx8192m -XX:+HeapDumpOnOutOfMemoryError -Djdk.attach.allowAttachSelf=true + -Xmx8192m -XX:+HeapDumpOnOutOfMemoryError 3.5.2 ${maven-surefire-plugin.version} @@ -1301,36 +1300,6 @@ assertj-core test - - org.jboss.byteman - byteman - ${byteman.version} - test - - - org.jboss.byteman - byteman-bmunit - ${byteman.version} - test - - - org.jboss.byteman - byteman-bmunit5 - ${byteman.version} - test - - - org.jboss.byteman - byteman-install - ${byteman.version} - test - - - org.jboss.byteman - byteman-submit - ${byteman.version} - test - org.junit.jupiter junit-jupiter-api From 59a501af52dfaca4194a041860901605b6e10370 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Tue, 15 Apr 2025 09:53:28 -0700 Subject: [PATCH 3/7] rocksdb readonly volume test --- .../container/common/volume/HddsVolume.java | 62 ++++++++++++++++--- ...estDatanodeHddsVolumeFailureDetection.java | 26 ++++---- 2 files changed, 65 insertions(+), 23 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 0afe8bfa90d2..28062c8a635a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -22,26 +22,31 @@ import static org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil.initPerDiskDBStore; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import jakarta.annotation.Nullable; import java.io.File; import java.io.IOException; import java.util.List; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.annotation.InterfaceStability; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; -import org.apache.hadoop.hdds.utils.db.RDBStore; import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; +import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.common.impl.StorageLocationReport; import org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache; import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil; import org.apache.hadoop.ozone.container.common.utils.RawDB; import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; +import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; +import org.apache.hadoop.ozone.container.metadata.DatanodeStore; +import org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaThreeImpl; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures; import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures.SchemaV3; @@ -99,6 +104,11 @@ public class HddsVolume extends StorageVolume { private AtomicBoolean dbLoaded = new AtomicBoolean(false); private final AtomicBoolean dbLoadFailure = new AtomicBoolean(false); + private final int volumeTestCount; + private final int volumeTestFailureTolerance; + private AtomicInteger volumeTestFailureCount; + private Queue volumeTestResultQueue; + /** * Builder for HddsVolume. */ @@ -131,6 +141,11 @@ private HddsVolume(Builder b) throws IOException { this.volumeInfoMetrics = new VolumeInfoMetrics(b.getVolumeRootStr(), this); + this.volumeTestCount = getDatanodeConfig().getVolumeIOTestCount(); + this.volumeTestFailureTolerance = getDatanodeConfig().getVolumeIOFailureTolerance(); + this.volumeTestFailureCount = new AtomicInteger(0); + this.volumeTestResultQueue = new ConcurrentLinkedQueue<>(); + initialize(); } else { // Builder is called with failedVolume set, so create a failed volume @@ -138,6 +153,8 @@ private HddsVolume(Builder b) throws IOException { this.setState(VolumeState.FAILED); volumeIOStats = null; volumeInfoMetrics = new VolumeInfoMetrics(b.getVolumeRootStr(), this); + this.volumeTestCount = 0; + this.volumeTestFailureTolerance = 0; } LOG.info("HddsVolume: {}", getReport()); @@ -291,17 +308,42 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) return VolumeCheckResult.FAILED; } - DatanodeStoreCache cache = DatanodeStoreCache.getInstance(); - Preconditions.checkNotNull(cache); - try { - RawDB db = cache.getDB(String.valueOf(dbFile), getConf()); - RDBStore store = (RDBStore)db.getStore().getStore(); - store.getDb().getManagedRocksDb().get().getLiveFiles(); - } catch (IOException e) { - LOG.error("Could not open Volume DB located at {}", dbFile, e); + return checkDbHealth(dbFile); + } + + @VisibleForTesting + public VolumeCheckResult checkDbHealth(File dbFile) { + if (volumeTestCount == 0) { + return VolumeCheckResult.HEALTHY; + } + + Boolean isVolumeTestResultHealthy = Boolean.TRUE; + try (DatanodeStore readOnlyStore + = BlockUtils.getUncachedDatanodeStore(dbFile.toString(), OzoneConsts.SCHEMA_V3, getConf(), true)) { +// = new DatanodeStoreSchemaThreeImpl(getConf(), dbFile.toString(), true)) { + + volumeTestResultQueue.add(isVolumeTestResultHealthy); + } catch (AssertionError | Exception e) { + LOG.warn("Could not open Volume DB located at {}", dbFile, e); + volumeTestResultQueue.add(!isVolumeTestResultHealthy); + volumeTestFailureCount.incrementAndGet(); + } + + if (volumeTestResultQueue.size() > volumeTestCount) { + if (!volumeTestResultQueue.isEmpty() && + volumeTestResultQueue.poll() != isVolumeTestResultHealthy) { + volumeTestFailureCount.decrementAndGet(); + } + } + + if (volumeTestFailureCount.get() > volumeTestFailureTolerance) { + LOG.error("Failed volume test for volume {}: the last {} runs encountered {} out of {} tolerated failures.", + this, volumeTestResultQueue.size(), volumeTestFailureCount.get(), volumeTestFailureTolerance); return VolumeCheckResult.FAILED; } + LOG.info("IO test results for volume {}: the last {} runs encountered {} out of {} tolerated failures", + this, volumeTestResultQueue.size(), volumeTestFailureTolerance, volumeTestFailureTolerance); return VolumeCheckResult.HEALTHY; } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java index e2fb5736eaa0..90d3490f022c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java @@ -24,11 +24,9 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_REPLICATION; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.File; import java.io.IOException; @@ -48,6 +46,7 @@ import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient; import org.apache.hadoop.hdds.scm.client.ScmClient; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; +import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.OzoneConsts; @@ -63,21 +62,21 @@ import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.common.volume.StorageVolume; -import org.apache.hadoop.ozone.container.common.volume.StorageVolumeChecker; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; import org.apache.hadoop.ozone.dn.DatanodeTestUtils; -import org.apache.hadoop.util.Timer; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This class tests datanode can detect failed volumes. */ @Timeout(300) class TestDatanodeHddsVolumeFailureDetection { - + private static final Logger LOG = LoggerFactory.getLogger(TestDatanodeHddsVolumeFailureDetection.class); private static final int KEY_SIZE = 128; @ParameterizedTest @@ -282,27 +281,28 @@ void corruptDbFileWithoutDbHandleCacheInvalidation(boolean schemaV3) throws Exce } MutableVolumeSet volSet = oc.getVolumeSet(); - StorageVolume vol0 = volSet.getVolumesList().get(0); + HddsVolume vol0 = (HddsVolume) volSet.getVolumesList().get(0); try { DatanodeTestUtils.injectDataDirFailure(dbDir); - // simulate bad volume by removing write permission on root dir // refer to HddsVolume.check() DatanodeTestUtils.simulateBadVolume(vol0); - assertFalse(vol0.isFailed()); - StorageVolumeChecker storageVolumeChecker = new StorageVolumeChecker(conf, new Timer(), ""); - storageVolumeChecker.registerVolumeSet(volSet); - storageVolumeChecker.checkAllVolumeSets(); - assertTrue(vol0.isFailed()); - DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1); + assertEquals(VolumeCheckResult.HEALTHY, vol0.checkDbHealth(dbDir)); + assertEquals(VolumeCheckResult.FAILED, vol0.checkDbHealth(dbDir)); + } catch (Exception e) { + LOG.error("Exception occurred while running test", e); } finally { // restore all DatanodeTestUtils.restoreBadVolume(vol0); DatanodeTestUtils.restoreDataDirFromFailure(dbDir); } + } catch (Exception e) { + LOG.error("Exception occurred while running test", e); } + } catch (Exception e) { + LOG.error("Exception occurred while running test", e); } } From ae1d274a200e6f76fb7a22e198a2a4587d396ab3 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Tue, 15 Apr 2025 11:04:11 -0700 Subject: [PATCH 4/7] HDDS-12723. Use a sliding window of read-only rocksdb instances as the volume health check --- .../container/common/volume/HddsVolume.java | 16 ++++++---------- .../TestDatanodeHddsVolumeFailureDetection.java | 9 +++------ 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 28062c8a635a..2d366a8de11e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -38,19 +38,17 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; -import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.common.impl.StorageLocationReport; import org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache; import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil; import org.apache.hadoop.ozone.container.common.utils.RawDB; import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; -import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; -import org.apache.hadoop.ozone.container.metadata.DatanodeStore; -import org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaThreeImpl; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures; import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures.SchemaV3; import org.apache.hadoop.util.Time; +import org.rocksdb.Options; +import org.rocksdb.RocksDB; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -318,12 +316,10 @@ public VolumeCheckResult checkDbHealth(File dbFile) { } Boolean isVolumeTestResultHealthy = Boolean.TRUE; - try (DatanodeStore readOnlyStore - = BlockUtils.getUncachedDatanodeStore(dbFile.toString(), OzoneConsts.SCHEMA_V3, getConf(), true)) { -// = new DatanodeStoreSchemaThreeImpl(getConf(), dbFile.toString(), true)) { - - volumeTestResultQueue.add(isVolumeTestResultHealthy); - } catch (AssertionError | Exception e) { + try (Options options = new Options().setCreateIfMissing(true); + RocksDB readDB = RocksDB.openReadOnly(options, dbFile.toString())) { + volumeTestResultQueue.add(isVolumeTestResultHealthy); + } catch (Exception e) { LOG.warn("Could not open Volume DB located at {}", dbFile, e); volumeTestResultQueue.add(!isVolumeTestResultHealthy); volumeTestFailureCount.incrementAndGet(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java index 90d3490f022c..ef028582bd5d 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java @@ -289,20 +289,17 @@ void corruptDbFileWithoutDbHandleCacheInvalidation(boolean schemaV3) throws Exce // refer to HddsVolume.check() DatanodeTestUtils.simulateBadVolume(vol0); + // one volume health check got automatically executed when the cluster started + // the second health should log the rocksdb failure but return a healthy-volume status assertEquals(VolumeCheckResult.HEALTHY, vol0.checkDbHealth(dbDir)); + // the third health check should log the rocksdb failure and return a failed-volume status assertEquals(VolumeCheckResult.FAILED, vol0.checkDbHealth(dbDir)); - } catch (Exception e) { - LOG.error("Exception occurred while running test", e); } finally { // restore all DatanodeTestUtils.restoreBadVolume(vol0); DatanodeTestUtils.restoreDataDirFromFailure(dbDir); } - } catch (Exception e) { - LOG.error("Exception occurred while running test", e); } - } catch (Exception e) { - LOG.error("Exception occurred while running test", e); } } From c69bc7cdaf9ce50a96cabe17b9d19025af9c5eaa Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Tue, 15 Apr 2025 12:05:44 -0700 Subject: [PATCH 5/7] HDDS-12723. Use a sliding window of read-only rocksdb instances as the volume health check --- .../ozone/container/common/volume/HddsVolume.java | 10 +++++----- .../hadoop/hdds/utils/db/managed/ManagedRocksDB.java | 7 +++++++ .../volume/TestDatanodeHddsVolumeFailureDetection.java | 3 --- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 2d366a8de11e..7558448ad0f6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -37,6 +37,8 @@ import org.apache.hadoop.hdds.annotation.InterfaceStability; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; +import org.apache.hadoop.hdds.utils.db.managed.ManagedOptions; +import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB; import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; import org.apache.hadoop.ozone.container.common.impl.StorageLocationReport; import org.apache.hadoop.ozone.container.common.utils.DatanodeStoreCache; @@ -47,8 +49,6 @@ import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures; import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures.SchemaV3; import org.apache.hadoop.util.Time; -import org.rocksdb.Options; -import org.rocksdb.RocksDB; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -315,9 +315,9 @@ public VolumeCheckResult checkDbHealth(File dbFile) { return VolumeCheckResult.HEALTHY; } - Boolean isVolumeTestResultHealthy = Boolean.TRUE; - try (Options options = new Options().setCreateIfMissing(true); - RocksDB readDB = RocksDB.openReadOnly(options, dbFile.toString())) { + boolean isVolumeTestResultHealthy = true; + try (ManagedOptions managedOptions = new ManagedOptions(); + ManagedRocksDB readOnlyDb = ManagedRocksDB.openReadOnly(managedOptions, dbFile.toString())) { volumeTestResultQueue.add(isVolumeTestResultHealthy); } catch (Exception e) { LOG.warn("Could not open Volume DB located at {}", dbFile, e); diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java index cf16ac5fc99c..bd8f86a6efbb 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java @@ -46,6 +46,13 @@ public class ManagedRocksDB extends ManagedObject { super(original); } + public static ManagedRocksDB openReadOnly( + final ManagedOptions options, + final String path) + throws RocksDBException { + return new ManagedRocksDB(RocksDB.openReadOnly(options, path)); + } + public static ManagedRocksDB openReadOnly( final ManagedDBOptions options, final String path, final List columnFamilyDescriptors, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java index ef028582bd5d..94b0c1e0289a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java @@ -68,15 +68,12 @@ import org.junit.jupiter.api.Timeout; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * This class tests datanode can detect failed volumes. */ @Timeout(300) class TestDatanodeHddsVolumeFailureDetection { - private static final Logger LOG = LoggerFactory.getLogger(TestDatanodeHddsVolumeFailureDetection.class); private static final int KEY_SIZE = 128; @ParameterizedTest From f280bb1912b94300891147f4430e164d2608e8b9 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Thu, 17 Apr 2025 15:35:23 -0700 Subject: [PATCH 6/7] HDDS-12723. Use a sliding window of read-only rocksdb instances as the volume health check --- .../container/common/volume/HddsVolume.java | 23 +++++++++++-------- .../common/volume/StorageVolume.java | 2 +- ...estDatanodeHddsVolumeFailureDetection.java | 9 ++++++++ 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 7558448ad0f6..915a3c9b4e77 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -310,36 +310,39 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) } @VisibleForTesting - public VolumeCheckResult checkDbHealth(File dbFile) { + public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException { if (volumeTestCount == 0) { return VolumeCheckResult.HEALTHY; } - boolean isVolumeTestResultHealthy = true; + final boolean isVolumeTestResultHealthy = true; try (ManagedOptions managedOptions = new ManagedOptions(); ManagedRocksDB readOnlyDb = ManagedRocksDB.openReadOnly(managedOptions, dbFile.toString())) { volumeTestResultQueue.add(isVolumeTestResultHealthy); } catch (Exception e) { + if (Thread.currentThread().isInterrupted()) { + throw new InterruptedException("Check of database for volume " + this + " interrupted."); + } LOG.warn("Could not open Volume DB located at {}", dbFile, e); volumeTestResultQueue.add(!isVolumeTestResultHealthy); volumeTestFailureCount.incrementAndGet(); } - if (volumeTestResultQueue.size() > volumeTestCount) { - if (!volumeTestResultQueue.isEmpty() && - volumeTestResultQueue.poll() != isVolumeTestResultHealthy) { + if (volumeTestResultQueue.size() > volumeTestCount + && volumeTestResultQueue.poll() != isVolumeTestResultHealthy) { volumeTestFailureCount.decrementAndGet(); - } } if (volumeTestFailureCount.get() > volumeTestFailureTolerance) { - LOG.error("Failed volume test for volume {}: the last {} runs encountered {} out of {} tolerated failures.", - this, volumeTestResultQueue.size(), volumeTestFailureCount.get(), volumeTestFailureTolerance); + LOG.error("Failed to open the database at \"{}\" for HDDS volume {}: " + + "the last {} runs encountered {} out of {} tolerated failures.", + dbFile, this, volumeTestResultQueue.size(), volumeTestFailureCount.get(), volumeTestFailureTolerance); return VolumeCheckResult.FAILED; } - LOG.info("IO test results for volume {}: the last {} runs encountered {} out of {} tolerated failures", - this, volumeTestResultQueue.size(), volumeTestFailureTolerance, volumeTestFailureTolerance); + LOG.debug("Successfully opened the database at \"{}\" for HDDS volume {}: " + + "the last {} runs encountered {} out of {} tolerated failures", + dbFile, this, volumeTestResultQueue.size(), volumeTestFailureTolerance, volumeTestFailureTolerance); return VolumeCheckResult.HEALTHY; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java index fe0fa896921b..2c02c02f59e2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java @@ -693,7 +693,7 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) // Once the volume is failed, it will not be checked anymore. // The failure counts can be left as is. if (currentIOFailureCount.get() > ioFailureTolerance) { - LOG.info("Failed IO test for volume {}: the last {} runs " + + LOG.error("Failed IO test for volume {}: the last {} runs " + "encountered {} out of {} tolerated failures.", this, ioTestSlidingWindow.size(), currentIOFailureCount, ioFailureTolerance); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java index 94b0c1e0289a..af6e5b490974 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java @@ -241,6 +241,15 @@ void corruptDbFile(boolean schemaV3) throws Exception { } } + /** + * {@link HddsVolume#check(Boolean)} will capture the failures injected by this test and not allow the + * test to reach the helper method {@link HddsVolume#checkDbHealth}. + * As a workaround, we test the helper method directly. + * As we test the helper method directly, we cannot test for schemas older than V3. + * + * @param schemaV3 + * @throws Exception + */ @ParameterizedTest @ValueSource(booleans = {true}) void corruptDbFileWithoutDbHandleCacheInvalidation(boolean schemaV3) throws Exception { From d6ed9dfbc5399d26b7303553ad410cd41f24996d Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Mon, 21 Apr 2025 13:12:11 -0700 Subject: [PATCH 7/7] HDDS-12723. Use a sliding window of read-only rocksdb instances as the volume health check --- .../hadoop/ozone/container/common/volume/HddsVolume.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 915a3c9b4e77..cc9be3892bed 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -25,9 +25,9 @@ import jakarta.annotation.Nullable; import java.io.File; import java.io.IOException; +import java.util.LinkedList; import java.util.List; import java.util.Queue; -import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -142,7 +142,7 @@ private HddsVolume(Builder b) throws IOException { this.volumeTestCount = getDatanodeConfig().getVolumeIOTestCount(); this.volumeTestFailureTolerance = getDatanodeConfig().getVolumeIOFailureTolerance(); this.volumeTestFailureCount = new AtomicInteger(0); - this.volumeTestResultQueue = new ConcurrentLinkedQueue<>(); + this.volumeTestResultQueue = new LinkedList<>(); initialize(); } else { @@ -329,8 +329,8 @@ public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException } if (volumeTestResultQueue.size() > volumeTestCount - && volumeTestResultQueue.poll() != isVolumeTestResultHealthy) { - volumeTestFailureCount.decrementAndGet(); + && (Boolean.TRUE.equals(volumeTestResultQueue.poll()) != isVolumeTestResultHealthy)) { + volumeTestFailureCount.decrementAndGet(); } if (volumeTestFailureCount.get() > volumeTestFailureTolerance) {