From 3ee4c80e58ab52df1a4106770dbe7ccef151c150 Mon Sep 17 00:00:00 2001 From: DaveTeng0 Date: Mon, 29 Apr 2024 17:13:12 -0700 Subject: [PATCH 01/28] HDDS-8101. Add FSO repair tool to ozone CLI in read-only and repair modes --- .../hadoop/fs/ozone/TestFSORepairTool.java | 577 +++++++++++++++ hadoop-ozone/tools/pom.xml | 195 +---- .../hadoop/ozone/debug/FSORepairCLI.java | 78 ++ .../hadoop/ozone/debug/FSORepairTool.java | 687 ++++++++++++++++++ 4 files changed, 1344 insertions(+), 193 deletions(-) create mode 100644 hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java new file mode 100644 index 000000000000..e23ce993a22c --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -0,0 +1,577 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.ozone; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.client.BucketArgs; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.ozone.debug.FSORepairTool; +import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.junit.jupiter.api.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; + +/** + * FSORepairTool test cases. + */ +public class TestFSORepairTool { + public static final Logger LOG = + LoggerFactory.getLogger(TestFSORepairTool.class); + + private static MiniOzoneHAClusterImpl cluster; + private static FileSystem fs; + private static OzoneClient client; + + + @BeforeAll + public static void init() throws Exception { + // Set configs. + OzoneConfiguration conf = new OzoneConfiguration(); + // deletion services will be triggered manually. + conf.setTimeDuration(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, + 1_000_000, TimeUnit.SECONDS); + conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 1_000_000, + TimeUnit.SECONDS); + conf.setInt(OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK, 10); + conf.setInt(OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK, 10); + conf.setBoolean(OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY, true); + // Since delete services use RocksDB iterators, make sure the double + // buffer is flushed between runs. + conf.setInt(OMConfigKeys.OZONE_OM_UNFLUSHED_TRANSACTION_MAX_COUNT, 1); + + // Build cluster. + cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf) + .setNumOfOzoneManagers(1) + .setOMServiceId("omservice") + .setNumDatanodes(3) + .build(); + cluster.waitForClusterToBeReady(); + + // Init ofs. + final String rootPath = String.format("%s://%s/", + OZONE_OFS_URI_SCHEME, cluster.getOzoneManager().getOMNodeId()); + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); + fs = FileSystem.get(conf); + client = OzoneClientFactory.getRpcClient("omservice", conf); + } + + @AfterEach + public void cleanNamespace() throws Exception { + if (fs.exists(new Path("/vol1"))) { + fs.delete(new Path("/vol1"), true); + } + if (fs.exists(new Path("/vol2"))) { + fs.delete(new Path("/vol2"), true); + } + runDeletes(); + assertFileAndDirTablesEmpty(); + } + + @AfterAll + public static void teardown() { + if (cluster != null) { + cluster.shutdown(); + } + IOUtils.closeQuietly(fs); + } + + @Test + public void testConnectedTreeOneBucket() throws Exception { + FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); + + // Test the connected tree in debug mode. + FSORepairTool repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), true); + FSORepairTool.Report debugReport = repair.run(); + + Assertions.assertEquals(expectedReport, debugReport); + assertConnectedTreeReadable("vol1", "bucket1"); + assertDeleteTablesEmpty(); + + // Running again in repair mode should give same results since the tree + // is connected. + repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + FSORepairTool.Report repairReport = repair.run(); + + Assertions.assertEquals(expectedReport, repairReport); + assertConnectedTreeReadable("vol1", "bucket1"); + assertDeleteTablesEmpty(); + } + + @Test + public void testReportedDataSize() throws Exception { + FSORepairTool.Report report1 = buildDisconnectedTree("vol1", "bucket1", 10); + FSORepairTool.Report report2 = buildConnectedTree("vol1", "bucket2", 10); + FSORepairTool.Report expectedReport = new FSORepairTool.Report(report1, report2); + + FSORepairTool repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + FSORepairTool.Report debugReport = repair.run(); + Assertions.assertEquals(expectedReport, debugReport); + } + + @Test + public void testMultipleBucketsAndVolumes() throws Exception { + FSORepairTool.Report report1 = buildConnectedTree("vol1", "bucket1"); + FSORepairTool.Report report2 = buildDisconnectedTree("vol2", "bucket2"); + FSORepairTool.Report expectedAggregateReport = new FSORepairTool.Report( + report1, report2); + + FSORepairTool repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + FSORepairTool.Report generatedReport = repair.run(); + + Assertions.assertEquals(generatedReport, expectedAggregateReport); + assertConnectedTreeReadable("vol1", "bucket1"); + assertDisconnectedTreePartiallyReadable("vol2", "bucket2"); + assertDisconnectedObjectsMarkedForDelete(1); + } + + /** + * Tests having multiple entries in the deleted file and directory tables + * for the same objects. + */ + @Test + public void testDeleteOverwrite() throws Exception { + // Create files and dirs under dir1. To make sure they are added to the + // delete table, the keys must have data. + buildConnectedTree("vol1", "bucket1", 10); + // Move soon to be disconnected objects to the deleted table. + fs.delete(new Path("/vol1/bucket1/dir1/dir2/file3"), true); + fs.delete(new Path("/vol1/bucket1/dir1/dir2"), true); + fs.delete(new Path("/vol1/bucket1/dir1/file1"), true); + fs.delete(new Path("/vol1/bucket1/dir1/file2"), true); + + // Recreate deleted objects, then disconnect dir1. + // This means after the repair runs, these objects will be + // the deleted tables multiple times. Some will have the same dir1 parent ID + // in their key name too. + ContractTestUtils.touch(fs, new Path("/vol1/bucket1/dir1/dir2/file3")); + ContractTestUtils.touch(fs, new Path("/vol1/bucket1/dir1/file1")); + ContractTestUtils.touch(fs, new Path("/vol1/bucket1/dir1/file2")); + disconnectDirectory("dir1"); + + FSORepairTool repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + FSORepairTool.Report generatedReport = repair.run(); + + Assertions.assertEquals(1, generatedReport.getUnreachableDirs()); + Assertions.assertEquals(3, generatedReport.getUnreachableFiles()); + + assertDisconnectedObjectsMarkedForDelete(2); + } + + @Test + public void testEmptyFileTrees() throws Exception { + // Run when there are no file trees. + FSORepairTool repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + FSORepairTool.Report generatedReport = repair.run(); + Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); + assertDeleteTablesEmpty(); + + // Create an empty volume and bucket. + fs.mkdirs(new Path("/vol1")); + fs.mkdirs(new Path("/vol2/bucket1")); + + // Run on an empty volume and bucket. + repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + generatedReport = repair.run(); + Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); + assertDeleteTablesEmpty(); + } + + @Test + public void testNonFSOBucketsSkipped() throws Exception { + ObjectStore store = client.getObjectStore(); + try { + // Create legacy and OBS buckets. + store.createVolume("vol1"); + store.getVolume("vol1").createBucket("obs-bucket", + BucketArgs.newBuilder().setBucketLayout(BucketLayout.OBJECT_STORE) + .build()); + store.getVolume("vol1").createBucket("legacy-bucket", + BucketArgs.newBuilder().setBucketLayout(BucketLayout.LEGACY) + .build()); + + // Put a key in the legacy and OBS buckets. + OzoneOutputStream obsStream = store.getVolume("vol1") + .getBucket("obs-bucket") + .createKey("prefix/test-key", 3); + obsStream.write(new byte[]{1, 1, 1}); + obsStream.close(); + + OzoneOutputStream legacyStream = store.getVolume("vol1") + .getBucket("legacy-bucket") + .createKey("prefix/test-key", 3); + legacyStream.write(new byte[]{1, 1, 1}); + legacyStream.close(); + + // Add an FSO bucket with data. + FSORepairTool.Report connectReport = buildConnectedTree("vol1", "fso" + + "-bucket"); + + // Even in repair mode there should be no action. legacy and obs buckets + // will be skipped and FSO tree is connected. + FSORepairTool repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + FSORepairTool.Report generatedReport = repair.run(); + + Assertions.assertEquals(connectReport, generatedReport); + assertConnectedTreeReadable("vol1", "fso-bucket"); + assertDeleteTablesEmpty(); + } finally { + // Need to manually delete obs bucket. It cannot be deleted with ofs as + // part of the normal test cleanup. + store.getVolume("vol1").getBucket("obs-bucket") + .deleteKey("prefix/test-key"); + store.getVolume("vol1").deleteBucket("obs-bucket"); + } + } + + + private FSORepairTool.Report buildConnectedTree(String volume, String bucket) + throws Exception { + return buildConnectedTree(volume, bucket, 0); + } + + /** + * Creates a tree with 3 reachable directories and 4 reachable files. + */ + private FSORepairTool.Report buildConnectedTree(String volume, String bucket, + int fileSize) + throws Exception { + Path bucketPath = new Path("/" + volume + "/" + bucket); + Path dir1 = new Path(bucketPath, "dir1"); + Path file1 = new Path(dir1, "file1"); + Path file2 = new Path(dir1, "file2"); + + Path dir2 = new Path(bucketPath, "dir1/dir2"); + Path file3 = new Path(dir2, "file3"); + + Path dir3 = new Path(bucketPath, "dir3"); + Path file4 = new Path(bucketPath, "file4"); + + fs.mkdirs(dir1); + fs.mkdirs(dir2); + fs.mkdirs(dir3); + + // Content to put in every file. + String data = new String(new char[fileSize]); + + FSDataOutputStream stream = fs.create(file1); + stream.write(data.getBytes(StandardCharsets.UTF_8)); + stream.close(); + stream = fs.create(file2); + stream.write(data.getBytes(StandardCharsets.UTF_8)); + stream.close(); + stream = fs.create(file3); + stream.write(data.getBytes(StandardCharsets.UTF_8)); + stream.close(); + stream = fs.create(file4); + stream.write(data.getBytes(StandardCharsets.UTF_8)); + stream.close(); + + assertConnectedTreeReadable(volume, bucket); + + return new FSORepairTool.Report.Builder() + .setReachableDirs(3) + .setReachableFiles(4) + .setReachableBytes(fileSize * 4L) + .build(); + } + + private void assertConnectedTreeReadable(String volume, String bucket) + throws IOException { + Path bucketPath = new Path("/" + volume + "/" + bucket); + Path dir1 = new Path(bucketPath, "dir1"); + Path file1 = new Path(dir1, "file1"); + Path file2 = new Path(dir1, "file2"); + + Path dir2 = new Path(bucketPath, "dir1/dir2"); + Path file3 = new Path(dir2, "file3"); + + Path dir3 = new Path(bucketPath, "dir3"); + Path file4 = new Path(bucketPath, "file4"); + + Assertions.assertTrue(fs.exists(dir1)); + Assertions.assertTrue(fs.exists(dir2)); + Assertions.assertTrue(fs.exists(dir3)); + Assertions.assertTrue(fs.exists(file1)); + Assertions.assertTrue(fs.exists(file2)); + Assertions.assertTrue(fs.exists(file3)); + Assertions.assertTrue(fs.exists(file4)); + } + + private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) + throws Exception { + return buildDisconnectedTree(volume, bucket, 0); + } + + /** + * Creates a tree with 2 reachable directories, 1 reachable file, 1 + * unreachable directory, and 3 unreachable files. + */ + private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, + int fileSize) throws Exception { + buildConnectedTree(volume, bucket, fileSize); + + // Manually remove dir1. This should disconnect 3 of the files and 1 of + // the directories. + disconnectDirectory("dir1"); + + assertDisconnectedTreePartiallyReadable(volume, bucket); + + return new FSORepairTool.Report.Builder() + .setReachableDirs(1) + .setReachableFiles(1) + .setReachableBytes(fileSize) + // dir1 does not count towards the unreachable directories the tool + // will see. It was deleted completely so the tool will never see it. + .setUnreachableDirs(1) + .setUnreachableFiles(3) + .setUnreachableBytes(fileSize * 3L) + .build(); + } + + private void disconnectDirectory(String dirName) throws Exception { + OzoneManager leader = cluster.getOMLeader(); + Table dirTable = + leader.getMetadataManager().getDirectoryTable(); + try (TableIterator> iterator = + dirTable.iterator()) { + while (iterator.hasNext()) { + Table.KeyValue entry = iterator.next(); + String key = entry.getKey(); + if (key.contains(dirName)) { + dirTable.delete(key); + break; + } + } + } + } + + private void assertDisconnectedTreePartiallyReadable( + String volume, String bucket) throws Exception { + Path bucketPath = new Path("/" + volume + "/" + bucket); + Path dir1 = new Path(bucketPath, "dir1"); + Path file1 = new Path(dir1, "file1"); + Path file2 = new Path(dir1, "file2"); + + Path dir2 = new Path(bucketPath, "dir1/dir2"); + Path file3 = new Path(dir2, "file3"); + + Path dir3 = new Path(bucketPath, "dir3"); + Path file4 = new Path(bucketPath, "file4"); + + Assertions.assertFalse(fs.exists(dir1)); + Assertions.assertFalse(fs.exists(dir2)); + Assertions.assertTrue(fs.exists(dir3)); + Assertions.assertFalse(fs.exists(file1)); + Assertions.assertFalse(fs.exists(file2)); + Assertions.assertFalse(fs.exists(file3)); + Assertions.assertTrue(fs.exists(file4)); + } + + /** + * Checks that the disconnected tree's unreachable objects are correctly + * moved to the delete table. If the tree was written and deleted multiple + * times, it makes sure the delete entries with the same name are preserved. + */ + private void assertDisconnectedObjectsMarkedForDelete(int numWrites) + throws Exception { + + Map pendingDeleteDirCounts = new HashMap<>(); + + // Check deleted directory table. + OzoneManager leader = cluster.getOMLeader(); + Table deletedDirTable = + leader.getMetadataManager().getDeletedDirTable(); + try (TableIterator> iterator = + deletedDirTable.iterator()) { + while (iterator.hasNext()) { + Table.KeyValue entry = iterator.next(); + String key = entry.getKey(); + OmKeyInfo value = entry.getValue(); + + String dirName = key.split("/")[4]; + LOG.info("In deletedDirTable, extracting directory name {} from DB " + + "key {}", dirName, key); + + // Check that the correct dir info was added. + // FSO delete path will fill in the whole path to the key in the + // proto when it is deleted. Once the tree is disconnected that can't + // be done, so just make sure the dirName contained in the key name + // somewhere. + Assertions.assertTrue(value.getKeyName().contains(dirName)); + + int count = pendingDeleteDirCounts.getOrDefault(dirName, 0); + pendingDeleteDirCounts.put(dirName, count + 1); + } + } + + // 1 directory is disconnected in the tree. dir1 was totally deleted so + // the repair tool will not see it. + Assertions.assertEquals(1, pendingDeleteDirCounts.size()); + Assertions.assertEquals(numWrites, pendingDeleteDirCounts.get("dir2")); + + // Check that disconnected files were put in deleting tables. + Map pendingDeleteFileCounts = new HashMap<>(); + + Table deletedFileTable = + leader.getMetadataManager().getDeletedTable(); + try (TableIterator> iterator = + deletedFileTable.iterator()) { + while (iterator.hasNext()) { + Table.KeyValue entry = iterator.next(); + String key = entry.getKey(); + RepeatedOmKeyInfo value = entry.getValue(); + + String[] keyParts = key.split("/"); + String fileName = keyParts[keyParts.length - 1]; + + LOG.info("In deletedTable, extracting file name {} from DB " + + "key {}", fileName, key); + + for (OmKeyInfo fileInfo: value.getOmKeyInfoList()) { + // Check that the correct file info was added. + Assertions.assertTrue(fileInfo.getKeyName().contains(fileName)); + + int count = pendingDeleteFileCounts.getOrDefault(fileName, 0); + pendingDeleteFileCounts.put(fileName, count + 1); + } + } + } + + // 3 files are disconnected in the tree. + // TODO: dir2 ended up in here with count = 1. file3 also had count=1 + // Likely that the dir2/file3 entry got split in two. + Assertions.assertEquals(3, pendingDeleteFileCounts.size()); + Assertions.assertEquals(numWrites, pendingDeleteFileCounts.get("file1")); + Assertions.assertEquals(numWrites, pendingDeleteFileCounts.get("file2")); + Assertions.assertEquals(numWrites, pendingDeleteFileCounts.get("file3")); + } + +// @Test +// public void testOnSavedDB() throws Exception { +// /* +// Path dir1 = new Path("/vol1/bucket1/dir1"); +// Path file1 = new Path(dir1, "file1"); +// Path file2 = new Path(dir1, "file2"); +// +// Path dir2 = new Path("/vol1/bucket1/dir1/dir2"); +// Path file3 = new Path(dir2, "file3"); +// +// Path dir3 = new Path("/vol1/bucket1/dir3"); +// Path file4 = new Path("/vol1/bucket1/file4"); +// +// ContractTestUtils.touch(fs, file1); +// ContractTestUtils.touch(fs, file2); +// ContractTestUtils.touch(fs, file3); +// ContractTestUtils.touch(fs, file4); +// fs.mkdirs(dir3); +// */ +// FsoRepair repair = new FsoRepair("/Users/erose/Temp/omNode-1/om.db", +// FsoRepair.Mode.DEBUG); +// repair.run(); +// +// /* +// Original: +// Expected: +// 3 reachable dirs, 4 reachable files. +// +// After remove dir1: +// 3 unreachable files, 1 unreachable dir. +// 1 reachable file, 1 reachable dir. +// +// */ +// } + + private void assertDeleteTablesEmpty() throws IOException { + OzoneManager leader = cluster.getOMLeader(); + Assertions.assertTrue(leader.getMetadataManager().getDeletedDirTable().isEmpty()); + Assertions.assertTrue(leader.getMetadataManager().getDeletedTable().isEmpty()); + } + + private void runDeletes() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + + int i = 0; + while (!leader.getMetadataManager().getDeletedDirTable().isEmpty()) { + LOG.info("Running iteration {} of DirectoryDeletingService.", i++); + leader.getKeyManager().getDirDeletingService().runPeriodicalTaskNow(); + // Wait for work from this run to flush through the double buffer. + Thread.sleep(500); + } + + i = 0; + while (!leader.getMetadataManager().getDeletedTable().isEmpty()) { + LOG.info("Running iteration {} of KeyDeletingService.", i++); + leader.getKeyManager().getDeletingService().runPeriodicalTaskNow(); + // Wait for work from this run to flush through the double buffer. + Thread.sleep(500); + } + } + + private void assertFileAndDirTablesEmpty() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + Assertions.assertTrue(leader.getMetadataManager().getDirectoryTable().isEmpty()); + Assertions.assertTrue(leader.getMetadataManager().getFileTable().isEmpty()); + } + + private DBStore getOmDB() { + return cluster.getOMLeader().getMetadataManager().getStore(); + } + + private String getOmDBLocation() { + return cluster.getOMLeader().getMetadataManager().getStore().getDbLocation().toString(); + } +} diff --git a/hadoop-ozone/tools/pom.xml b/hadoop-ozone/tools/pom.xml index 924408122594..839d01f0fa84 100644 --- a/hadoop-ozone/tools/pom.xml +++ b/hadoop-ozone/tools/pom.xml @@ -20,55 +20,15 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone - 2.0.0-SNAPSHOT + 1.5.0-SNAPSHOT ozone-tools - 2.0.0-SNAPSHOT + 1.5.0-SNAPSHOT Apache Ozone Tools Apache Ozone Tools jar - - false - - - - org.apache.ozone - hdds-client - - - org.apache.ozone - hdds-common - - - org.apache.ozone - hdds-config - - - org.apache.ozone - hdds-container-service - - - org.apache.ozone - hdds-interface-admin - - - org.apache.ozone - hdds-interface-client - - - org.apache.ozone - hdds-interface-server - - - org.apache.ozone - hdds-managed-rocksdb - - - org.apache.ozone - hdds-server-scm - org.apache.ozone ozone-manager @@ -85,22 +45,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone-client - - org.apache.ozone - ozone-filesystem-common - org.apache.ozone ozone-filesystem - - org.apache.ozone - ozone-interface-client - - - org.apache.ozone - ozone-interface-storage - org.apache.ozone hdds-server-framework @@ -120,59 +68,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> hdds-hadoop-dependency-server - - commons-codec - commons-codec - - - commons-io - commons-io - - - org.apache.commons - commons-lang3 - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - org.apache.ratis - ratis-client - - - org.apache.ratis - ratis-common - - - org.apache.ratis - ratis-proto - - - org.apache.ratis - ratis-server-api - - - org.apache.ratis - ratis-thirdparty-misc - - - org.apache.ratis - ratis-tools - - - org.apache.ratis - ratis-shell - - - - info.picocli - picocli - jakarta.xml.bind jakarta.xml.bind-api @@ -181,10 +76,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.glassfish.jaxb jaxb-runtime - - jakarta.annotation - jakarta.annotation-api - jakarta.activation jakarta.activation-api @@ -193,14 +84,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> io.dropwizard.metrics metrics-core - - io.opentracing - opentracing-api - - - io.opentracing - opentracing-util - com.amazonaws aws-java-sdk-core @@ -209,42 +92,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> com.amazonaws aws-java-sdk-s3 - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - com.fasterxml.jackson.datatype - jackson-datatype-jsr310 - - - com.google.guava - guava - - - org.jooq - jooq - org.kohsuke.metainf-services metainf-services - - org.rocksdb - rocksdbjni - - - org.slf4j - slf4j-api - @@ -270,48 +121,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> 2048 - - org.apache.maven.plugins - maven-compiler-plugin - - - - org.apache.ozone - hdds-config - ${hdds.version} - - - org.kohsuke.metainf-services - metainf-services - ${metainf-services.version} - - - - org.apache.hadoop.hdds.conf.ConfigFileGenerator - org.kohsuke.metainf_services.AnnotationProcessorImpl - - - - - org.apache.maven.plugins - maven-enforcer-plugin - - - ban-annotations - - - - Only selected annotation processors are enabled, see configuration of maven-compiler-plugin. - - org.apache.hadoop.ozone.om.request.validation.RequestFeatureValidator - org.apache.hadoop.hdds.scm.metadata.Replicate - - - - - - - diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java new file mode 100644 index 000000000000..6d936ef7e23a --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug; + +import org.apache.hadoop.hdds.cli.SubcommandWithParent; +import org.kohsuke.MetaInfServices; +import picocli.CommandLine; + +import java.util.concurrent.Callable; + +/** + * Parser for scm.db file. + */ +@CommandLine.Command( + name = "fso-repair", + description = "Identify a disconnected FSO tree, and optionally mark " + + "unreachable entries for deletion. OM should be " + + "stopped while this tool is run. Information will be logged at " + + "INFO and DEBUG levels." +) +@MetaInfServices(SubcommandWithParent.class) +public class FSORepairCLI implements Callable, SubcommandWithParent { + + @CommandLine.Option(names = {"--db"}, + required = true, + description = "Path to OM RocksDB") + private String dbPath; + + @CommandLine.Option(names = {"--read-mode-only", "-r"}, + required = true, + description = + "Mode to run the tool in. Read-mode will just log information about unreachable files or directories;" + + "otherwise the tool will move those files and directories to the deleted tables.", + defaultValue = "true") + private boolean readModeOnly; + + @CommandLine.ParentCommand + private OzoneDebug parent; + + @Override + public Void call() throws Exception { + + try { + // TODO case insensitive enum options. + FSORepairTool repairTool = new FSORepairTool(dbPath, readModeOnly); + repairTool.run(); + } catch (Exception ex) { + throw new IllegalArgumentException("FSO repair failed: " + ex.getMessage()); + } + + System.out.printf("FSO %s finished. See client logs for results.%n", + readModeOnly ? "read-mode" : "repair-mode"); + + return null; + } + + @Override + public Class getParentType() { + return OzoneDebug.class; + } +} + diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java new file mode 100644 index 000000000000..b67699a46578 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java @@ -0,0 +1,687 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; +import org.apache.hadoop.ozone.om.helpers.*; +import org.apache.hadoop.ozone.om.request.file.OMFileRequest; +import org.apache.ratis.util.Preconditions; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.Holder; +import org.rocksdb.Options; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; + + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.*; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; + +/** + * Tool to identify and repair disconnected FSO trees in all buckets. + * The tool can be run in debug mode, where it will just log information + * about unreachable files or directories, or in repair mode to additionally + * move those files and directories to the deleted tables. If deletes are + * still in progress (the deleted directory table is not empty), the tool may + * report that the tree is disconnected, even though pending deletes would + * fix the issue. + * + * Before using the tool, make sure all OMs are stopped, + * and that all Ratis logs have been flushed to the OM DB. This can be + * done using `ozone admin prepare` before running the tool, and `ozone admin + * cancelprepare` when done. + * + * The tool will run a DFS from each bucket, and save all reachable + * directories as keys in a new temporary RocksDB instance called "reachable.db" + * In the same directory as om.db. + * will then scan the entire file and directory tables for each bucket to see + * if each object's parent is in the reachable table of reachable.db. The + * reachable table will be dropped and recreated for each bucket. + * The tool is idempotent. reachable.db will not be deleted automatically + * when the tool finishes, in case users want to manually inspect it. It can + * be safely deleted once the tool finishes. + */ +public class FSORepairTool { + public static final Logger LOG = + LoggerFactory.getLogger(FSORepairTool.class); + + private final String omDBPath; + + private final DBStore store; + private final Table volumeTable; + private final Table bucketTable; + private final Table directoryTable; + private final Table fileTable; + private final Table deletedDirectoryTable; + private final Table deletedTable; + // The temporary DB is used to track which items have been seen. + // Since usage of this DB is simple, use it directly from + // RocksDB. + private String reachableDBPath; + private static final byte[] REACHABLE_TABLE = + "reachable".getBytes(StandardCharsets.UTF_8); + private ColumnFamilyHandle reachableCF; + private RocksDB reachableDB; + + private boolean readModeOnly; + + private long reachableBytes; + private long reachableFiles; + private long reachableDirs; + private long unreachableBytes; + private long unreachableFiles; + private long unreachableDirs; + + + public FSORepairTool(String dbPath, boolean readModeOnly) throws IOException { + this(getStoreFromPath(dbPath), dbPath, readModeOnly); + } + + /** + * Allows passing RocksDB instance from a MiniOzoneCluster directly to this + * class for testing. + */ + @VisibleForTesting + public FSORepairTool(DBStore dbStore, String dbPath, boolean readModeOnly) throws IOException { + this.readModeOnly = readModeOnly; + // Counters to track as we walk the tree. + reachableBytes = 0; + reachableFiles = 0; + reachableDirs = 0; + unreachableBytes = 0; + unreachableFiles = 0; + unreachableDirs = 0; + + this.store = dbStore; + this.omDBPath = dbPath; + volumeTable = store.getTable(OmMetadataManagerImpl.VOLUME_TABLE, + String.class, + OmVolumeArgs.class); + bucketTable = store.getTable(OmMetadataManagerImpl.BUCKET_TABLE, + String.class, + OmBucketInfo.class); + directoryTable = store.getTable(OmMetadataManagerImpl.DIRECTORY_TABLE, + String.class, + OmDirectoryInfo.class); + fileTable = store.getTable(OmMetadataManagerImpl.FILE_TABLE, + String.class, + OmKeyInfo.class); + deletedDirectoryTable = store.getTable( + OmMetadataManagerImpl.DELETED_DIR_TABLE, + String.class, + OmKeyInfo.class); + deletedTable = store.getTable( + OmMetadataManagerImpl.DELETED_TABLE, + String.class, + RepeatedOmKeyInfo.class); + } + + private static DBStore getStoreFromPath(String dbPath) throws IOException { + File omDBFile = new File(dbPath); + if (!omDBFile.exists() || !omDBFile.isDirectory()) { + throw new IOException(String.format("Specified OM DB instance %s does " + + "not exist or is not a RocksDB directory.", dbPath)); + } + // Load RocksDB and tables needed. + return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), + new File(dbPath).getParentFile()); + } + + public Report run() throws IOException { + // Iterate all volumes. + try (TableIterator> + volumeIterator = volumeTable.iterator()) { + openReachableDB(); + + while (volumeIterator.hasNext()) { + Table.KeyValue volumeEntry = + volumeIterator.next(); + String volumeKey = volumeEntry.getKey(); + + // Iterate all buckets in the volume. + try (TableIterator> + bucketIterator = bucketTable.iterator()) { + bucketIterator.seek(volumeKey); + while (bucketIterator.hasNext()) { + Table.KeyValue bucketEntry = + bucketIterator.next(); + String bucketKey = bucketEntry.getKey(); + OmBucketInfo bucketInfo = bucketEntry.getValue(); + + if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { + LOG.debug("Skipping non-FSO bucket {}", bucketKey); + continue; + } + + // Stop this loop once we have seen all buckets in the current + // volume. + if (!bucketKey.startsWith(volumeKey)) { + break; + } + + // Start with a fresh list of reachable files for this bucket. + // Also clears partial state if the tool failed on a previous run. + dropReachableTableIfExists(); + createReachableTable(); + // Process one bucket's FSO tree at a time. + markReachableObjectsInBucket(volumeEntry.getValue(), bucketInfo); + handleUnreachableObjects(volumeEntry.getValue(), bucketInfo); + dropReachableTableIfExists(); + } + } + } + } finally { + closeReachableDB(); + } + + return buildReportAndLog(); + } + + private Report buildReportAndLog() { + Report report = new Report.Builder() + .setReachableDirs(reachableDirs) + .setReachableFiles(reachableFiles) + .setReachableBytes(reachableBytes) + .setUnreachableDirs(unreachableDirs) + .setUnreachableFiles(unreachableFiles) + .setUnreachableBytes(unreachableBytes) + .build(); + + LOG.info("\n{}", report); + return report; + } + + private void markReachableObjectsInBucket(OmVolumeArgs volume, + OmBucketInfo bucket) throws IOException { + LOG.info("Processing bucket {}", bucket.getBucketName()); + // Only put directories in the stack. + // Directory keys should have the form /volumeID/bucketID/parentID/name. + Stack dirKeyStack = new Stack<>(); + + // Since the tool uses parent directories to check for reachability, add + // a reachable entry for the bucket as well. + addReachableEntry(volume, bucket, bucket); + // Initialize the stack with all immediate child directories of the + // bucket, and mark them all as reachable. + Collection childDirs = + getChildDirectoriesAndMarkAsReachable(volume, bucket, bucket); + dirKeyStack.addAll(childDirs); + + while (!dirKeyStack.isEmpty()) { + // Get one directory and process its immediate children. + String currentDirKey = dirKeyStack.pop(); + OmDirectoryInfo currentDir = directoryTable.get(currentDirKey); + if (currentDir == null) { + LOG.error("Directory key {} to be processed was not found in the " + + "directory table", currentDirKey); + continue; + } + + // TODO revisit this for a more memory efficient implementation, + // possibly making better use of RocksDB iterators. + childDirs = getChildDirectoriesAndMarkAsReachable(volume, bucket, + currentDir); + dirKeyStack.addAll(childDirs); + } + } + + private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { + // Check for unreachable directories in the bucket. + String bucketPrefix = OM_KEY_PREFIX + + volume.getObjectID() + + OM_KEY_PREFIX + + bucket.getObjectID(); + + try (TableIterator> dirIterator = + directoryTable.iterator()) { + dirIterator.seek(bucketPrefix); + while (dirIterator.hasNext()) { + Table.KeyValue dirEntry = dirIterator.next(); + String dirKey = dirEntry.getKey(); + + // Only search directories in this bucket. + if (!dirKey.startsWith(bucketPrefix)) { + break; + } + + if (!isReachable(dirKey)) { + LOG.debug("Found unreachable directory: {}", dirKey); + unreachableDirs++; + + if (!readModeOnly) { + LOG.debug("Marking unreachable directory {} for deletion.", dirKey); + OmDirectoryInfo dirInfo = dirEntry.getValue(); + markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), + dirKey, dirInfo); + } + } + } + } + + // Check for unreachable files + try (TableIterator> + fileIterator = fileTable.iterator()) { + fileIterator.seek(bucketPrefix); + while (fileIterator.hasNext()) { + Table.KeyValue fileEntry = fileIterator.next(); + String fileKey = fileEntry.getKey(); + // Only search files in this bucket. + if (!fileKey.startsWith(bucketPrefix)) { + break; + } + + OmKeyInfo fileInfo = fileEntry.getValue(); + if (!isReachable(fileKey)) { + LOG.debug("Found unreachable file: {}", fileKey); + unreachableBytes += fileInfo.getDataSize(); + unreachableFiles++; + + if (!readModeOnly) { + LOG.debug("Marking unreachable file {} for deletion.", + fileKey); + markFileForDeletion(fileKey, fileInfo); + } + } else { + // NOTE: We are deserializing the proto of every reachable file + // just to log it's size. If we don't need this information we could + // save time by skipping this step. + reachableBytes += fileInfo.getDataSize(); + reachableFiles++; + } + } + } + } + + private void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IOException { + try (BatchOperation batch = store.initBatchOperation()) { + fileTable.deleteWithBatch(batch, fileKey); + + RepeatedOmKeyInfo originalRepeatedKeyInfo = deletedTable.get(fileKey); + RepeatedOmKeyInfo updatedRepeatedOmKeyInfo = OmUtils.prepareKeyForDelete( + fileInfo, fileInfo.getUpdateID(), true); + // NOTE: The FSO code seems to write the open key entry with the whole + // path, using the object's names instead of their ID. This would onyl + // be possible when the file is deleted explicitly, and not part of a + // directory delete. It is also not possible here if the file's parent + // is gone. The name of the key does not matter so just use IDs. + deletedTable.putWithBatch(batch, fileKey, updatedRepeatedOmKeyInfo); + + LOG.debug("Added entry {} to open key table: {}", + fileKey, updatedRepeatedOmKeyInfo); + + store.commitBatchOperation(batch); + } + } + + private void markDirectoryForDeletion(String volumeName, String bucketName, + String dirKeyName, OmDirectoryInfo dirInfo) throws IOException { + try (BatchOperation batch = store.initBatchOperation()) { + directoryTable.deleteWithBatch(batch, dirKeyName); + // HDDS-7592: Make directory entries in deleted dir table unique. + String deleteDirKeyName = + dirKeyName + OM_KEY_PREFIX + dirInfo.getObjectID(); + + // Convert the directory to OmKeyInfo for deletion. + OmKeyInfo dirAsKeyInfo = OMFileRequest.getOmKeyInfo( + volumeName, bucketName, dirInfo, dirInfo.getName()); + deletedDirectoryTable.putWithBatch(batch, deleteDirKeyName, dirAsKeyInfo); + + store.commitBatchOperation(batch); + } + } + + private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs volume, + OmBucketInfo bucket, + WithObjectID currentDir) throws IOException { + + Collection childDirs = new ArrayList<>(); + + try (TableIterator> + dirIterator = directoryTable.iterator()) { + String dirPrefix = buildReachableKey(volume, bucket, currentDir); + // Start searching the directory table at the current directory's + // prefix to get its immediate children. + dirIterator.seek(dirPrefix); + while (dirIterator.hasNext()) { + Table.KeyValue childDirEntry = + dirIterator.next(); + String childDirKey = childDirEntry.getKey(); + // Stop processing once we have seen all immediate children of this + // directory. + if (!childDirKey.startsWith(dirPrefix)) { + break; + } + // This directory was reached by search. + addReachableEntry(volume, bucket, childDirEntry.getValue()); + childDirs.add(childDirKey); + reachableDirs++; + } + } + + return childDirs; + } + + /** + * Add the specified object to the reachable table, indicating it is part + * of the connected FSO tree. + */ + private void addReachableEntry(OmVolumeArgs volume, + OmBucketInfo bucket, WithObjectID object) throws IOException { + byte[] reachableKey = buildReachableKey(volume, bucket, object) + .getBytes(StandardCharsets.UTF_8); + try { + // No value is needed for this table. + reachableDB.put(reachableCF, reachableKey, new byte[]{}); + } catch (RocksDBException ex) { + throw new IOException(ex.getMessage(), ex); + } + } + + /** + * Build an entry in the reachable table for the current object, which + * could be a bucket, file or directory. + */ + private static String buildReachableKey(OmVolumeArgs volume, + OmBucketInfo bucket, WithObjectID object) { + return OM_KEY_PREFIX + + volume.getObjectID() + + OM_KEY_PREFIX + + bucket.getObjectID() + + OM_KEY_PREFIX + + object.getObjectID(); + } + + /** + * + * @param fileOrDirKey The key of a file or directory in RocksDB. + * @return true if the entry's parent is in the reachable table. + */ + private boolean isReachable(String fileOrDirKey) throws IOException { + byte[] reachableParentKey = + buildReachableParentKey(fileOrDirKey).getBytes(StandardCharsets.UTF_8); + try { + if (reachableDB.keyMayExist( + reachableCF, reachableParentKey, new Holder<>())) { + return reachableDB.get(reachableCF, reachableParentKey) != null; + } else { + return false; + } + } catch (RocksDBException ex) { + throw new IOException(ex.getMessage(), ex); + } + } + + /** + * Build an entry in the reachable table for the current object's parent + * object. The object could be a file or directory. + */ + private static String buildReachableParentKey(String fileOrDirKey) { + String[] keyParts = fileOrDirKey.split(OM_KEY_PREFIX); + // Should be /volID/bucketID/parentID/name + // The first part will be blank since key begins with a slash. + Preconditions.assertTrue(keyParts.length >= 4); + String volumeID = keyParts[1]; + String bucketID = keyParts[2]; + String parentID = keyParts[3]; + + return OM_KEY_PREFIX + + volumeID + + OM_KEY_PREFIX + + bucketID + + OM_KEY_PREFIX + + parentID; + } + + private void openReachableDB() throws IOException { + File reachableDBFile = new File(new File(omDBPath).getParentFile(), + "reachable.db"); + LOG.info("Creating database of reachable directories at {}", + reachableDBFile); + try { + // Delete the DB from the last run if it exists. + if (reachableDBFile.exists()) { + FileUtils.deleteDirectory(reachableDBFile); + } + reachableDBPath = reachableDBFile.toString(); + reachableDB = RocksDB.open(reachableDBPath); + } catch (RocksDBException ex) { + if (reachableDB != null) { + reachableDB.close(); + } + throw new IOException(ex.getMessage(), ex); + } + } + + private void closeReachableDB() { + if (reachableDB != null) { + reachableDB.close(); + } + } + + private void dropReachableTableIfExists() throws IOException { + try { + List availableCFs = RocksDB.listColumnFamilies(new Options(), + reachableDBPath); + boolean cfFound = false; + for (byte[] cfNameBytes: availableCFs) { + if (new String(cfNameBytes, UTF_8).equals(new String(REACHABLE_TABLE, UTF_8))) { + cfFound = true; + break; + } + } + + if (cfFound) { + reachableDB.dropColumnFamily(reachableCF); + } + } catch (RocksDBException ex) { + throw new IOException(ex.getMessage(), ex); + } finally { + if (reachableCF != null) { + reachableCF.close(); + } + } + } + + private void createReachableTable() throws IOException { + try { + reachableCF = reachableDB.createColumnFamily( + new ColumnFamilyDescriptor(REACHABLE_TABLE)); + } catch (RocksDBException ex) { + if (reachableCF != null) { + reachableCF.close(); + } + throw new IOException(ex.getMessage(), ex); + } + } + + /** + * Define a Report to be created. + */ + public static class Report { + private long reachableBytes; + private long reachableFiles; + private long reachableDirs; + private long unreachableBytes; + private long unreachableFiles; + private long unreachableDirs; + + /** + * Builds one report that is the aggregate of multiple others. + */ + public Report(Report... reports) { + reachableBytes = 0; + reachableFiles = 0; + reachableDirs = 0; + unreachableBytes = 0; + unreachableFiles = 0; + unreachableDirs = 0; + + for (Report report: reports) { + reachableBytes += report.reachableBytes; + reachableFiles += report.reachableFiles; + reachableDirs += report.reachableDirs; + unreachableBytes += report.unreachableBytes; + unreachableFiles += report.unreachableFiles; + unreachableDirs += report.unreachableDirs; + } + } + + private Report(Builder builder) { + reachableBytes = builder.reachableBytes; + reachableFiles = builder.reachableFiles; + reachableDirs = builder.reachableDirs; + unreachableBytes = builder.unreachableBytes; + unreachableFiles = builder.unreachableFiles; + unreachableDirs = builder.unreachableDirs; + } + + public long getReachableBytes() { + return reachableBytes; + } + + public long getReachableFiles() { + return reachableFiles; + } + + public long getReachableDirs() { + return reachableDirs; + } + + public long getUnreachableBytes() { + return unreachableBytes; + } + + public long getUnreachableFiles() { + return unreachableFiles; + } + + public long getUnreachableDirs() { + return unreachableDirs; + } + + @Override + public String toString() { + return "Reachable:" + + "\n\tDirectories: " + reachableDirs + + "\n\tFiles: " + reachableFiles + + "\n\tBytes: " + reachableBytes + + "\nUnreachable:" + + "\n\tDirectories: " + unreachableDirs + + "\n\tFiles: " + unreachableFiles + + "\n\tBytes: " + unreachableBytes; + } + + @Override + public boolean equals(Object other) { + if (other == this) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + Report report = (Report) other; + + // Useful for testing. + LOG.debug("Comparing reports\nExpect:\n{}\nActual:\n{}", this, report); + + return reachableBytes == report.reachableBytes && + reachableFiles == report.reachableFiles && + reachableDirs == report.reachableDirs && + unreachableBytes == report.unreachableBytes && + unreachableFiles == report.unreachableFiles && + unreachableDirs == report.unreachableDirs; + } + + @Override + public int hashCode() { + return Objects.hash(reachableBytes, + reachableFiles, + reachableDirs, + unreachableBytes, + unreachableFiles, + unreachableDirs); + } + + /** + * Builder class for a Report. + */ + public static final class Builder { + private long reachableBytes; + private long reachableFiles; + private long reachableDirs; + private long unreachableBytes; + private long unreachableFiles; + private long unreachableDirs; + + public Builder() { + } + + public Builder setReachableBytes(long reachableBytes) { + this.reachableBytes = reachableBytes; + return this; + } + + public Builder setReachableFiles(long reachableFiles) { + this.reachableFiles = reachableFiles; + return this; + } + + public Builder setReachableDirs(long reachableDirs) { + this.reachableDirs = reachableDirs; + return this; + } + + public Builder setUnreachableBytes(long unreachableBytes) { + this.unreachableBytes = unreachableBytes; + return this; + } + + public Builder setUnreachableFiles(long unreachableFiles) { + this.unreachableFiles = unreachableFiles; + return this; + } + + public Builder setUnreachableDirs(long unreachableDirs) { + this.unreachableDirs = unreachableDirs; + return this; + } + + public Report build() { + return new Report(this); + } + } + } +} From 0f272cfadef428e99f7168e869d8cf50b432ed66 Mon Sep 17 00:00:00 2001 From: DaveTeng0 Date: Mon, 29 Apr 2024 21:04:58 -0700 Subject: [PATCH 02/28] fix import --- .../apache/hadoop/fs/ozone/TestFSORepairTool.java | 7 ++++++- .../apache/hadoop/ozone/debug/FSORepairTool.java | 15 ++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index e23ce993a22c..8d57a6139513 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -41,7 +41,12 @@ import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; -import org.junit.jupiter.api.*; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java index b67699a46578..69c01b5d1027 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java @@ -26,8 +26,14 @@ import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.WithObjectID; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; -import org.apache.hadoop.ozone.om.helpers.*; import org.apache.hadoop.ozone.om.request.file.OMFileRequest; import org.apache.ratis.util.Preconditions; import org.rocksdb.ColumnFamilyDescriptor; @@ -37,14 +43,17 @@ import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Objects; +import java.util.Stack; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; From 03287067516d485dc116e918d3da0b47df1f5d40 Mon Sep 17 00:00:00 2001 From: DaveTeng0 Date: Thu, 2 May 2024 17:17:53 -0700 Subject: [PATCH 03/28] extract common codes between FSODebugCLI and FSORepairCLI to separated base class FSOBaseCLI and FSOBaseTool --- .../apache/hadoop/hdds/cli/GenericCli.java | 4 +- .../hdds/utils/db/managed/ManagedRocksDB.java | 42 +++++++++ .../hadoop/fs/ozone/TestFSORepairTool.java | 11 ++- .../FSOBaseCLI.java} | 41 ++++---- .../FSOBaseTool.java} | 93 ++++++++++--------- .../hadoop/ozone/common/package-info.java | 27 ++++++ .../hadoop/ozone/debug/FSODebugCLI.java | 67 +++++++++++++ .../hadoop/ozone/repair/om/FSORepairCLI.java | 67 +++++++++++++ .../hadoop/ozone/repair/om/FSORepairTool.java | 39 ++++++++ 9 files changed, 321 insertions(+), 70 deletions(-) rename hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/{debug/FSORepairCLI.java => common/FSOBaseCLI.java} (64%) rename hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/{debug/FSORepairTool.java => common/FSOBaseTool.java} (89%) create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java index 4c5f3fdc872f..d4857e92f547 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java @@ -39,10 +39,10 @@ public class GenericCli implements Callable, GenericParentCommand { public static final int EXECUTION_ERROR_EXIT_CODE = -1; - @Option(names = {"--verbose"}, +` @Option(names = {"--verbose"}, description = "More verbose output. Show the stack trace of the errors.") private boolean verbose; - +` @Option(names = {"-D", "--set"}) private Map configurationOverrides = new HashMap<>(); diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java index 5a5a577351b1..8357a3173525 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java @@ -21,7 +21,9 @@ import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.DBOptions; +import org.rocksdb.Holder; import org.rocksdb.LiveFileMetaData; +import org.rocksdb.Options; import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; import org.slf4j.Logger; @@ -87,6 +89,11 @@ public static ManagedRocksDB open( ); } + public static ManagedRocksDB open(final String path) throws RocksDBException { + return new ManagedRocksDB(RocksDB.open(path)); + } + + /** * Delete liveMetaDataFile from rocks db using RocksDB#deleteFile Api. * This function makes the RocksDB#deleteFile Api synchronized by waiting @@ -102,4 +109,39 @@ public void deleteFile(LiveFileMetaData fileToBeDeleted) File file = new File(fileToBeDeleted.path(), fileToBeDeleted.fileName()); ManagedRocksObjectUtils.waitForFileDelete(file, Duration.ofSeconds(60)); } + + public void put(ColumnFamilyHandle columnFamilyHandle, + byte[] key, byte[] value) throws RocksDBException { + this.get().put(columnFamilyHandle, key, value); + } + + public byte[] get(ColumnFamilyHandle columnFamilyHandle, + byte[] key) throws RocksDBException { + return this.get().get(columnFamilyHandle, key); + } + + public ColumnFamilyHandle createColumnFamily( + ColumnFamilyDescriptor columnFamilyDescriptor) + throws RocksDBException { + return this.get().createColumnFamily(columnFamilyDescriptor); + } + + public void dropColumnFamily(ColumnFamilyHandle columnFamilyHandle) + throws RocksDBException { + this.get().dropColumnFamily(columnFamilyHandle); + } + + public boolean keyMayExist(ColumnFamilyHandle columnFamilyHandle, byte[] key, Holder valueHolder) { + return this.get().keyMayExist(columnFamilyHandle, key, valueHolder); + } + + public void close() { + this.get().close(); + } + + public static List listColumnFamilies(Options options, + String path) throws RocksDBException { + return RocksDB.listColumnFamilies(options, path); + } + } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index 8d57a6139513..0f3d7c7c2891 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -34,13 +34,14 @@ import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; -import org.apache.hadoop.ozone.debug.FSORepairTool; +import org.apache.hadoop.ozone.common.FSOBaseTool; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.ozone.repair.om.FSORepairTool; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; @@ -128,9 +129,9 @@ public void testConnectedTreeOneBucket() throws Exception { FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); // Test the connected tree in debug mode. - FSORepairTool repair = new FSORepairTool(getOmDB(), + FSOBaseTool fsoTool = new FSOBaseTool(getOmDB(), getOmDBLocation(), true); - FSORepairTool.Report debugReport = repair.run(); + FSOBaseTool.Report debugReport = fsoTool.run(); Assertions.assertEquals(expectedReport, debugReport); assertConnectedTreeReadable("vol1", "bucket1"); @@ -138,9 +139,9 @@ public void testConnectedTreeOneBucket() throws Exception { // Running again in repair mode should give same results since the tree // is connected. - repair = new FSORepairTool(getOmDB(), + fsoTool = new FSORepairTool(getOmDB(), getOmDBLocation(), false); - FSORepairTool.Report repairReport = repair.run(); + FSORepairTool.Report repairReport = fsoTool.run(); Assertions.assertEquals(expectedReport, repairReport); assertConnectedTreeReadable("vol1", "bucket1"); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java similarity index 64% rename from hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java rename to hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java index 6d936ef7e23a..809fc74b7892 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.ozone.debug; +package org.apache.hadoop.ozone.common; import org.apache.hadoop.hdds.cli.SubcommandWithParent; import org.kohsuke.MetaInfServices; @@ -28,51 +28,56 @@ * Parser for scm.db file. */ @CommandLine.Command( - name = "fso-repair", + name = "fso-tree", description = "Identify a disconnected FSO tree, and optionally mark " + "unreachable entries for deletion. OM should be " + "stopped while this tool is run. Information will be logged at " + "INFO and DEBUG levels." ) @MetaInfServices(SubcommandWithParent.class) -public class FSORepairCLI implements Callable, SubcommandWithParent { +public class FSOBaseCLI implements Callable, SubcommandWithParent { @CommandLine.Option(names = {"--db"}, required = true, description = "Path to OM RocksDB") private String dbPath; - @CommandLine.Option(names = {"--read-mode-only", "-r"}, - required = true, - description = - "Mode to run the tool in. Read-mode will just log information about unreachable files or directories;" + - "otherwise the tool will move those files and directories to the deleted tables.", - defaultValue = "true") - private boolean readModeOnly; + @CommandLine.Option(names = {"--verbose"}, + description = "More verbose output. ") + private boolean verbose; - @CommandLine.ParentCommand - private OzoneDebug parent; @Override public Void call() throws Exception { try { // TODO case insensitive enum options. - FSORepairTool repairTool = new FSORepairTool(dbPath, readModeOnly); - repairTool.run(); + FSOBaseTool + baseTool = new FSOBaseTool(dbPath, true); + baseTool.run(); } catch (Exception ex) { - throw new IllegalArgumentException("FSO repair failed: " + ex.getMessage()); + throw new IllegalArgumentException("FSO inspection failed: " + ex.getMessage()); } - System.out.printf("FSO %s finished. See client logs for results.%n", - readModeOnly ? "read-mode" : "repair-mode"); + if (verbose) { + System.out.println("FSO inspection finished. See client logs for results."); + } return null; } @Override public Class getParentType() { - return OzoneDebug.class; + throw new UnsupportedOperationException("Should not be called from " + + "FSOBaseCLI directly."); + } + + public String getDbPath() { + return dbPath; + } + + public boolean getVerbose() { + return verbose; } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java similarity index 89% rename from hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java rename to hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java index 69c01b5d1027..5f729ad0aa39 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java @@ -15,8 +15,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.hadoop.ozone.common; -package org.apache.hadoop.ozone.debug; import com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.FileUtils; @@ -25,24 +25,24 @@ import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.hdds.utils.db.managed.ManagedOptions; +import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB; import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; -import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.WithObjectID; -import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.request.file.OMFileRequest; +import org.apache.hadoop.ozone.repair.om.FSORepairTool; import org.apache.ratis.util.Preconditions; import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.Holder; -import org.rocksdb.Options; -import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,11 +59,9 @@ import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; /** - * Tool to identify and repair disconnected FSO trees in all buckets. - * The tool can be run in debug mode, where it will just log information - * about unreachable files or directories, or in repair mode to additionally - * move those files and directories to the deleted tables. If deletes are - * still in progress (the deleted directory table is not empty), the tool may + * Base Tool to identify disconnected FSO trees in all buckets. + * The tool will log information about unreachable files or directories. + * If deletes are still in progress (the deleted directory table is not empty), the tool may * report that the tree is disconnected, even though pending deletes would * fix the issue. * @@ -82,7 +80,7 @@ * when the tool finishes, in case users want to manually inspect it. It can * be safely deleted once the tool finishes. */ -public class FSORepairTool { +public class FSOBaseTool { public static final Logger LOG = LoggerFactory.getLogger(FSORepairTool.class); @@ -102,9 +100,7 @@ public class FSORepairTool { private static final byte[] REACHABLE_TABLE = "reachable".getBytes(StandardCharsets.UTF_8); private ColumnFamilyHandle reachableCF; - private RocksDB reachableDB; - - private boolean readModeOnly; + private ManagedRocksDB reachableDB; private long reachableBytes; private long reachableFiles; @@ -112,10 +108,10 @@ public class FSORepairTool { private long unreachableBytes; private long unreachableFiles; private long unreachableDirs; + private boolean dryRun; - - public FSORepairTool(String dbPath, boolean readModeOnly) throws IOException { - this(getStoreFromPath(dbPath), dbPath, readModeOnly); + public FSOBaseTool(String dbPath, boolean dryRun) throws IOException { + this(getStoreFromPath(dbPath), dbPath, dryRun); } /** @@ -123,8 +119,8 @@ public FSORepairTool(String dbPath, boolean readModeOnly) throws IOException { * class for testing. */ @VisibleForTesting - public FSORepairTool(DBStore dbStore, String dbPath, boolean readModeOnly) throws IOException { - this.readModeOnly = readModeOnly; + public FSOBaseTool(DBStore dbStore, String dbPath, boolean isDryRun) throws IOException { + dryRun = isDryRun; // Counters to track as we walk the tree. reachableBytes = 0; reachableFiles = 0; @@ -157,7 +153,7 @@ public FSORepairTool(DBStore dbStore, String dbPath, boolean readModeOnly) throw RepeatedOmKeyInfo.class); } - private static DBStore getStoreFromPath(String dbPath) throws IOException { + protected static DBStore getStoreFromPath(String dbPath) throws IOException { File omDBFile = new File(dbPath); if (!omDBFile.exists() || !omDBFile.isDirectory()) { throw new IOException(String.format("Specified OM DB instance %s does " + @@ -168,10 +164,10 @@ private static DBStore getStoreFromPath(String dbPath) throws IOException { new File(dbPath).getParentFile()); } - public Report run() throws IOException { + public FSORepairTool.Report run() throws IOException { // Iterate all volumes. try (TableIterator> - volumeIterator = volumeTable.iterator()) { + volumeIterator = volumeTable.iterator()) { openReachableDB(); while (volumeIterator.hasNext()) { @@ -218,8 +214,8 @@ public Report run() throws IOException { return buildReportAndLog(); } - private Report buildReportAndLog() { - Report report = new Report.Builder() + private FSORepairTool.Report buildReportAndLog() { + FSORepairTool.Report report = new FSORepairTool.Report.Builder() .setReachableDirs(reachableDirs) .setReachableFiles(reachableFiles) .setReachableBytes(reachableBytes) @@ -290,7 +286,7 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) LOG.debug("Found unreachable directory: {}", dirKey); unreachableDirs++; - if (!readModeOnly) { + if (dryRun) { LOG.debug("Marking unreachable directory {} for deletion.", dirKey); OmDirectoryInfo dirInfo = dirEntry.getValue(); markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), @@ -318,7 +314,7 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) unreachableBytes += fileInfo.getDataSize(); unreachableFiles++; - if (!readModeOnly) { + if (dryRun) { LOG.debug("Marking unreachable file {} for deletion.", fileKey); markFileForDeletion(fileKey, fileInfo); @@ -334,7 +330,7 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) } } - private void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IOException { + protected void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IOException { try (BatchOperation batch = store.initBatchOperation()) { fileTable.deleteWithBatch(batch, fileKey); @@ -355,7 +351,7 @@ private void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IOEx } } - private void markDirectoryForDeletion(String volumeName, String bucketName, + protected void markDirectoryForDeletion(String volumeName, String bucketName, String dirKeyName, OmDirectoryInfo dirInfo) throws IOException { try (BatchOperation batch = store.initBatchOperation()) { directoryTable.deleteWithBatch(batch, dirKeyName); @@ -379,7 +375,7 @@ private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs vo Collection childDirs = new ArrayList<>(); try (TableIterator> - dirIterator = directoryTable.iterator()) { + dirIterator = directoryTable.iterator()) { String dirPrefix = buildReachableKey(volume, bucket, currentDir); // Start searching the directory table at the current directory's // prefix to get its immediate children. @@ -438,7 +434,7 @@ private static String buildReachableKey(OmVolumeArgs volume, * @param fileOrDirKey The key of a file or directory in RocksDB. * @return true if the entry's parent is in the reachable table. */ - private boolean isReachable(String fileOrDirKey) throws IOException { + protected boolean isReachable(String fileOrDirKey) throws IOException { byte[] reachableParentKey = buildReachableParentKey(fileOrDirKey).getBytes(StandardCharsets.UTF_8); try { @@ -485,7 +481,7 @@ private void openReachableDB() throws IOException { FileUtils.deleteDirectory(reachableDBFile); } reachableDBPath = reachableDBFile.toString(); - reachableDB = RocksDB.open(reachableDBPath); + reachableDB = ManagedRocksDB.open(reachableDBPath); } catch (RocksDBException ex) { if (reachableDB != null) { reachableDB.close(); @@ -502,7 +498,8 @@ private void closeReachableDB() { private void dropReachableTableIfExists() throws IOException { try { - List availableCFs = RocksDB.listColumnFamilies(new Options(), + List + availableCFs = ManagedRocksDB.listColumnFamilies(new ManagedOptions(), reachableDBPath); boolean cfFound = false; for (byte[] cfNameBytes: availableCFs) { @@ -550,7 +547,7 @@ public static class Report { /** * Builds one report that is the aggregate of multiple others. */ - public Report(Report... reports) { + public Report(FSORepairTool.Report... reports) { reachableBytes = 0; reachableFiles = 0; reachableDirs = 0; @@ -558,7 +555,7 @@ public Report(Report... reports) { unreachableFiles = 0; unreachableDirs = 0; - for (Report report: reports) { + for (FSORepairTool.Report report: reports) { reachableBytes += report.reachableBytes; reachableFiles += report.reachableFiles; reachableDirs += report.reachableDirs; @@ -568,7 +565,7 @@ public Report(Report... reports) { } } - private Report(Builder builder) { + private Report(FSORepairTool.Report.Builder builder) { reachableBytes = builder.reachableBytes; reachableFiles = builder.reachableFiles; reachableDirs = builder.reachableDirs; @@ -621,7 +618,7 @@ public boolean equals(Object other) { if (other == null || getClass() != other.getClass()) { return false; } - Report report = (Report) other; + FSORepairTool.Report report = (FSORepairTool.Report) other; // Useful for testing. LOG.debug("Comparing reports\nExpect:\n{}\nActual:\n{}", this, report); @@ -658,38 +655,44 @@ public static final class Builder { public Builder() { } - public Builder setReachableBytes(long reachableBytes) { + @SuppressWarnings("checkstyle:hiddenfield") + public FSOBaseTool.Report.Builder setReachableBytes(long reachableBytes) { this.reachableBytes = reachableBytes; return this; } - public Builder setReachableFiles(long reachableFiles) { + @SuppressWarnings("checkstyle:hiddenfield") + public FSOBaseTool.Report.Builder setReachableFiles(long reachableFiles) { this.reachableFiles = reachableFiles; return this; } - public Builder setReachableDirs(long reachableDirs) { + @SuppressWarnings("checkstyle:hiddenfield") + public FSOBaseTool.Report.Builder setReachableDirs(long reachableDirs) { this.reachableDirs = reachableDirs; return this; } - public Builder setUnreachableBytes(long unreachableBytes) { + @SuppressWarnings("checkstyle:hiddenfield") + public FSOBaseTool.Report.Builder setUnreachableBytes(long unreachableBytes) { this.unreachableBytes = unreachableBytes; return this; } - public Builder setUnreachableFiles(long unreachableFiles) { + @SuppressWarnings("checkstyle:hiddenfield") + public FSOBaseTool.Report.Builder setUnreachableFiles(long unreachableFiles) { this.unreachableFiles = unreachableFiles; return this; } - public Builder setUnreachableDirs(long unreachableDirs) { + @SuppressWarnings("checkstyle:hiddenfield") + public FSOBaseTool.Report.Builder setUnreachableDirs(long unreachableDirs) { this.unreachableDirs = unreachableDirs; return this; } - public Report build() { - return new Report(this); + public FSOBaseTool.Report build() { + return new FSOBaseTool.Report(this); } } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java new file mode 100644 index 000000000000..537abfad32fa --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *

+ * SCM related cli tools. + */ + +/** + * Ozone Admin tools. + */ +/** + * Ozone debug/repair tools uility class. + */ +package org.apache.hadoop.ozone.common; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java new file mode 100644 index 000000000000..26ec8614aa21 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug; + +import org.apache.hadoop.hdds.cli.SubcommandWithParent; +import org.apache.hadoop.ozone.common.FSOBaseCLI; +import org.apache.hadoop.ozone.common.FSOBaseTool; +import org.kohsuke.MetaInfServices; +import picocli.CommandLine; + +/** + * Parser for scm.db file. + */ +@CommandLine.Command( + name = "fso-tree", + description = "Identify a disconnected FSO tree, and optionally mark " + + "unreachable entries for deletion. OM should be " + + "stopped while this tool is run. Information will be logged at " + + "INFO and DEBUG levels." +) +@MetaInfServices(SubcommandWithParent.class) +public class FSODebugCLI extends FSOBaseCLI { + + @CommandLine.ParentCommand + private OzoneDebug parent; + + @Override + public Void call() throws Exception { + + try { + // TODO case insensitive enum options. + FSOBaseTool + baseTool = new FSOBaseTool(getDbPath(), true); + baseTool.run(); + } catch (Exception ex) { + throw new IllegalArgumentException("FSO inspection failed: " + ex.getMessage()); + } + + if (getVerbose()) { + System.out.println("FSO inspection finished. See client logs for results."); + } + + return null; + } + + @Override + public Class getParentType() { + return OzoneDebug.class; + } +} + diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java new file mode 100644 index 000000000000..c95a60394ad8 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import org.apache.hadoop.hdds.cli.SubcommandWithParent; +import org.apache.hadoop.ozone.common.FSOBaseCLI; +import org.apache.hadoop.ozone.repair.OzoneRepair; +import org.kohsuke.MetaInfServices; +import picocli.CommandLine; + +/** + * Parser for scm.db file. + */ +@CommandLine.Command( + name = "fso-tree-repair", + description = "Identify and repair a disconnected FSO tree, and mark " + + "unreachable entries for deletion. OM should be " + + "stopped while this tool is run. Information will be logged at " + + "INFO and DEBUG levels." +) +@MetaInfServices(SubcommandWithParent.class) +public class FSORepairCLI extends FSOBaseCLI { + + @CommandLine.ParentCommand + private OzoneRepair parent; + + @Override + public Void call() throws Exception { + + try { + // TODO case insensitive enum options. + FSORepairTool + repairTool = new FSORepairTool(getDbPath(), false); + repairTool.run(); + } catch (Exception ex) { + throw new IllegalArgumentException("FSO repair failed: " + ex.getMessage()); + } + + if (getVerbose()) { + System.out.println("FSO repair finished. See client logs for results."); + } + + return null; + } + + @Override + public Class getParentType() { + return OzoneRepair.class; + } +} + diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java new file mode 100644 index 000000000000..ea5bf8625123 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.ozone.common.FSOBaseTool; + +import java.io.IOException; + +/** + * Tool to identify and repair disconnected FSO trees in all buckets. + */ +public class FSORepairTool extends FSOBaseTool { + + public FSORepairTool(String dbPath, boolean dryRun) throws IOException { + this(getStoreFromPath(dbPath), dbPath, dryRun); + } + + public FSORepairTool(DBStore dbStore, String dbPath, boolean dryRun) throws IOException { + super(dbStore, dbPath, dryRun); + } + +} From e918575154a7fbbc62c592234abbef73cc8446b2 Mon Sep 17 00:00:00 2001 From: DaveTeng0 Date: Thu, 2 May 2024 17:26:14 -0700 Subject: [PATCH 04/28] removed unused test --- .../hadoop/fs/ozone/TestFSORepairTool.java | 35 ------------------- 1 file changed, 35 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index 0f3d7c7c2891..c9c8c226b49e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -506,41 +506,6 @@ private void assertDisconnectedObjectsMarkedForDelete(int numWrites) Assertions.assertEquals(numWrites, pendingDeleteFileCounts.get("file3")); } -// @Test -// public void testOnSavedDB() throws Exception { -// /* -// Path dir1 = new Path("/vol1/bucket1/dir1"); -// Path file1 = new Path(dir1, "file1"); -// Path file2 = new Path(dir1, "file2"); -// -// Path dir2 = new Path("/vol1/bucket1/dir1/dir2"); -// Path file3 = new Path(dir2, "file3"); -// -// Path dir3 = new Path("/vol1/bucket1/dir3"); -// Path file4 = new Path("/vol1/bucket1/file4"); -// -// ContractTestUtils.touch(fs, file1); -// ContractTestUtils.touch(fs, file2); -// ContractTestUtils.touch(fs, file3); -// ContractTestUtils.touch(fs, file4); -// fs.mkdirs(dir3); -// */ -// FsoRepair repair = new FsoRepair("/Users/erose/Temp/omNode-1/om.db", -// FsoRepair.Mode.DEBUG); -// repair.run(); -// -// /* -// Original: -// Expected: -// 3 reachable dirs, 4 reachable files. -// -// After remove dir1: -// 3 unreachable files, 1 unreachable dir. -// 1 reachable file, 1 reachable dir. -// -// */ -// } - private void assertDeleteTablesEmpty() throws IOException { OzoneManager leader = cluster.getOMLeader(); Assertions.assertTrue(leader.getMetadataManager().getDeletedDirTable().isEmpty()); From bbc094163f04433242938453989b3d9011de20ac Mon Sep 17 00:00:00 2001 From: DaveTeng0 Date: Thu, 2 May 2024 17:39:20 -0700 Subject: [PATCH 05/28] remove accidentally added charactor --- .../src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java index d4857e92f547..4c5f3fdc872f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java @@ -39,10 +39,10 @@ public class GenericCli implements Callable, GenericParentCommand { public static final int EXECUTION_ERROR_EXIT_CODE = -1; -` @Option(names = {"--verbose"}, + @Option(names = {"--verbose"}, description = "More verbose output. Show the stack trace of the errors.") private boolean verbose; -` + @Option(names = {"-D", "--set"}) private Map configurationOverrides = new HashMap<>(); From cb2d7c253616e79d220b1f660cfa5e2578f013f5 Mon Sep 17 00:00:00 2001 From: DaveTeng0 Date: Mon, 3 Jun 2024 11:10:19 -0700 Subject: [PATCH 06/28] remove FSOBaseCLI, switch to use RocksDatabase in FSORepairTool, which wraps ManagedRocksDB --- .../hadoop/hdds/utils/db/RocksDatabase.java | 45 +- .../hdds/utils/db/managed/ManagedRocksDB.java | 41 - .../hadoop/fs/ozone/TestFSORepairTool.java | 60 +- .../hadoop/ozone/common/FSOBaseCLI.java | 83 --- .../hadoop/ozone/common/FSOBaseTool.java | 699 ------------------ .../hadoop/ozone/common/package-info.java | 27 - .../hadoop/ozone/debug/FSODebugCLI.java | 67 -- .../hadoop/ozone/repair/om/FSORepairCLI.java | 25 +- .../hadoop/ozone/repair/om/FSORepairTool.java | 661 ++++++++++++++++- 9 files changed, 745 insertions(+), 963 deletions(-) delete mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java delete mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java delete mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java delete mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java index 945138b8b8b3..afe0153e1688 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java @@ -139,7 +139,7 @@ public static List listColumnFamiliesEmptyOptions(final String path) } } - static RocksDatabase open(File dbFile, ManagedDBOptions dbOptions, + public static RocksDatabase open(File dbFile, ManagedDBOptions dbOptions, ManagedWriteOptions writeOptions, Set families, boolean readOnly) throws IOException { List descriptors = null; @@ -460,8 +460,13 @@ public void ingestExternalFile(ColumnFamily family, List files, public void put(ColumnFamily family, byte[] key, byte[] value) throws IOException { + put(family.getHandle(), key, value); + } + + public void put(ColumnFamilyHandle handle, byte[] key, byte[] value) + throws IOException { try (UncheckedAutoCloseable ignored = acquire()) { - db.get().put(family.getHandle(), writeOptions, key, value); + db.get().put(handle, writeOptions, key, value); } catch (RocksDBException e) { closeOnError(e); throw toIOException(this, "put " + bytes2String(key), e); @@ -621,9 +626,14 @@ RocksCheckpoint createCheckpoint() { */ Supplier keyMayExist(ColumnFamily family, byte[] key) throws IOException { + return keyMayExist(family.getHandle(), key); + } + + public Supplier keyMayExist(ColumnFamilyHandle handle, byte[] key) + throws IOException { try (UncheckedAutoCloseable ignored = acquire()) { final Holder out = new Holder<>(); - return db.get().keyMayExist(family.getHandle(), key, out) ? + return db.get().keyMayExist(handle, key, out) ? out::getValue : null; } } @@ -652,16 +662,39 @@ public Collection getExtraColumnFamilies() { return Collections.unmodifiableCollection(columnFamilies.values()); } - byte[] get(ColumnFamily family, byte[] key) throws IOException { + public void dropColumnFamily(ColumnFamilyHandle handle) throws IOException { try (UncheckedAutoCloseable ignored = acquire()) { - return db.get().get(family.getHandle(), key); + db.get().dropColumnFamily(handle); } catch (RocksDBException e) { closeOnError(e); - final String message = "get " + bytes2String(key) + " from " + family; + throw toIOException(this, "dropColumnFamily", e); + } + } + + public ColumnFamilyHandle createColumnFamily(ColumnFamilyDescriptor descriptor) throws IOException { + try (UncheckedAutoCloseable ignored = acquire()) { + return db.get().createColumnFamily(descriptor); + } catch (RocksDBException e) { + closeOnError(e); + throw toIOException(this, "createColumnFamily", e); + } + } + + public byte[] get(ColumnFamily family, byte[] key) throws IOException { + return get(family.getHandle(), key, family.getName()); + } + + public byte[] get(ColumnFamilyHandle handle, byte[] key, String familyName) throws IOException { + try (UncheckedAutoCloseable ignored = acquire()) { + return db.get().get(handle, key); + } catch (RocksDBException e) { + closeOnError(e); + final String message = "get " + bytes2String(key) + " from " + familyName; throw toIOException(this, message, e); } } + /** * Get the value mapped to the given key. * diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java index 8357a3173525..6248dfba321c 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java @@ -21,9 +21,7 @@ import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.DBOptions; -import org.rocksdb.Holder; import org.rocksdb.LiveFileMetaData; -import org.rocksdb.Options; import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; import org.slf4j.Logger; @@ -89,11 +87,6 @@ public static ManagedRocksDB open( ); } - public static ManagedRocksDB open(final String path) throws RocksDBException { - return new ManagedRocksDB(RocksDB.open(path)); - } - - /** * Delete liveMetaDataFile from rocks db using RocksDB#deleteFile Api. * This function makes the RocksDB#deleteFile Api synchronized by waiting @@ -110,38 +103,4 @@ public void deleteFile(LiveFileMetaData fileToBeDeleted) ManagedRocksObjectUtils.waitForFileDelete(file, Duration.ofSeconds(60)); } - public void put(ColumnFamilyHandle columnFamilyHandle, - byte[] key, byte[] value) throws RocksDBException { - this.get().put(columnFamilyHandle, key, value); - } - - public byte[] get(ColumnFamilyHandle columnFamilyHandle, - byte[] key) throws RocksDBException { - return this.get().get(columnFamilyHandle, key); - } - - public ColumnFamilyHandle createColumnFamily( - ColumnFamilyDescriptor columnFamilyDescriptor) - throws RocksDBException { - return this.get().createColumnFamily(columnFamilyDescriptor); - } - - public void dropColumnFamily(ColumnFamilyHandle columnFamilyHandle) - throws RocksDBException { - this.get().dropColumnFamily(columnFamilyHandle); - } - - public boolean keyMayExist(ColumnFamilyHandle columnFamilyHandle, byte[] key, Holder valueHolder) { - return this.get().keyMayExist(columnFamilyHandle, key, valueHolder); - } - - public void close() { - this.get().close(); - } - - public static List listColumnFamilies(Options options, - String path) throws RocksDBException { - return RocksDB.listColumnFamilies(options, path); - } - } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index c9c8c226b49e..430a931d0547 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -34,7 +34,6 @@ import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; -import org.apache.hadoop.ozone.common.FSOBaseTool; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; @@ -126,12 +125,12 @@ public static void teardown() { @Test public void testConnectedTreeOneBucket() throws Exception { - FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); // Test the connected tree in debug mode. - FSOBaseTool fsoTool = new FSOBaseTool(getOmDB(), + FSORepairTool fsoTool = new FSORepairTool(getOmDB(), getOmDBLocation(), true); - FSOBaseTool.Report debugReport = fsoTool.run(); + FSORepairTool.Report debugReport = fsoTool.run(); Assertions.assertEquals(expectedReport, debugReport); assertConnectedTreeReadable("vol1", "bucket1"); @@ -139,9 +138,9 @@ public void testConnectedTreeOneBucket() throws Exception { // Running again in repair mode should give same results since the tree // is connected. - fsoTool = new FSORepairTool(getOmDB(), + fsoTool = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), getOmDBLocation(), false); - FSORepairTool.Report repairReport = fsoTool.run(); + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report repairReport = fsoTool.run(); Assertions.assertEquals(expectedReport, repairReport); assertConnectedTreeReadable("vol1", "bucket1"); @@ -154,7 +153,8 @@ public void testReportedDataSize() throws Exception { FSORepairTool.Report report2 = buildConnectedTree("vol1", "bucket2", 10); FSORepairTool.Report expectedReport = new FSORepairTool.Report(report1, report2); - FSORepairTool repair = new FSORepairTool(getOmDB(), + FSORepairTool + repair = new FSORepairTool(getOmDB(), getOmDBLocation(), false); FSORepairTool.Report debugReport = repair.run(); Assertions.assertEquals(expectedReport, debugReport); @@ -164,12 +164,13 @@ public void testReportedDataSize() throws Exception { public void testMultipleBucketsAndVolumes() throws Exception { FSORepairTool.Report report1 = buildConnectedTree("vol1", "bucket1"); FSORepairTool.Report report2 = buildDisconnectedTree("vol2", "bucket2"); - FSORepairTool.Report expectedAggregateReport = new FSORepairTool.Report( + FSORepairTool.Report expectedAggregateReport = new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report( report1, report2); - FSORepairTool repair = new FSORepairTool(getOmDB(), + org.apache.hadoop.ozone.repair.om.FSORepairTool + repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), getOmDBLocation(), false); - FSORepairTool.Report generatedReport = repair.run(); + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); Assertions.assertEquals(generatedReport, expectedAggregateReport); assertConnectedTreeReadable("vol1", "bucket1"); @@ -201,9 +202,10 @@ public void testDeleteOverwrite() throws Exception { ContractTestUtils.touch(fs, new Path("/vol1/bucket1/dir1/file2")); disconnectDirectory("dir1"); - FSORepairTool repair = new FSORepairTool(getOmDB(), + org.apache.hadoop.ozone.repair.om.FSORepairTool + repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), getOmDBLocation(), false); - FSORepairTool.Report generatedReport = repair.run(); + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); Assertions.assertEquals(1, generatedReport.getUnreachableDirs()); Assertions.assertEquals(3, generatedReport.getUnreachableFiles()); @@ -214,10 +216,11 @@ public void testDeleteOverwrite() throws Exception { @Test public void testEmptyFileTrees() throws Exception { // Run when there are no file trees. - FSORepairTool repair = new FSORepairTool(getOmDB(), + org.apache.hadoop.ozone.repair.om.FSORepairTool + repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), getOmDBLocation(), false); - FSORepairTool.Report generatedReport = repair.run(); - Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); + Assertions.assertEquals(generatedReport, new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report()); assertDeleteTablesEmpty(); // Create an empty volume and bucket. @@ -225,10 +228,10 @@ public void testEmptyFileTrees() throws Exception { fs.mkdirs(new Path("/vol2/bucket1")); // Run on an empty volume and bucket. - repair = new FSORepairTool(getOmDB(), + repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), getOmDBLocation(), false); generatedReport = repair.run(); - Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); + Assertions.assertEquals(generatedReport, new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report()); assertDeleteTablesEmpty(); } @@ -259,14 +262,15 @@ public void testNonFSOBucketsSkipped() throws Exception { legacyStream.close(); // Add an FSO bucket with data. - FSORepairTool.Report connectReport = buildConnectedTree("vol1", "fso" + + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report connectReport = buildConnectedTree("vol1", "fso" + "-bucket"); // Even in repair mode there should be no action. legacy and obs buckets // will be skipped and FSO tree is connected. - FSORepairTool repair = new FSORepairTool(getOmDB(), + org.apache.hadoop.ozone.repair.om.FSORepairTool + repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), getOmDBLocation(), false); - FSORepairTool.Report generatedReport = repair.run(); + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); Assertions.assertEquals(connectReport, generatedReport); assertConnectedTreeReadable("vol1", "fso-bucket"); @@ -281,7 +285,7 @@ public void testNonFSOBucketsSkipped() throws Exception { } - private FSORepairTool.Report buildConnectedTree(String volume, String bucket) + private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildConnectedTree(String volume, String bucket) throws Exception { return buildConnectedTree(volume, bucket, 0); } @@ -289,8 +293,8 @@ private FSORepairTool.Report buildConnectedTree(String volume, String bucket) /** * Creates a tree with 3 reachable directories and 4 reachable files. */ - private FSORepairTool.Report buildConnectedTree(String volume, String bucket, - int fileSize) + private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildConnectedTree(String volume, String bucket, + int fileSize) throws Exception { Path bucketPath = new Path("/" + volume + "/" + bucket); Path dir1 = new Path(bucketPath, "dir1"); @@ -325,7 +329,7 @@ private FSORepairTool.Report buildConnectedTree(String volume, String bucket, assertConnectedTreeReadable(volume, bucket); - return new FSORepairTool.Report.Builder() + return new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report.Builder() .setReachableDirs(3) .setReachableFiles(4) .setReachableBytes(fileSize * 4L) @@ -354,7 +358,7 @@ private void assertConnectedTreeReadable(String volume, String bucket) Assertions.assertTrue(fs.exists(file4)); } - private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) + private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) throws Exception { return buildDisconnectedTree(volume, bucket, 0); } @@ -363,8 +367,8 @@ private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) * Creates a tree with 2 reachable directories, 1 reachable file, 1 * unreachable directory, and 3 unreachable files. */ - private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, - int fileSize) throws Exception { + private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, + int fileSize) throws Exception { buildConnectedTree(volume, bucket, fileSize); // Manually remove dir1. This should disconnect 3 of the files and 1 of @@ -373,7 +377,7 @@ private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, assertDisconnectedTreePartiallyReadable(volume, bucket); - return new FSORepairTool.Report.Builder() + return new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report.Builder() .setReachableDirs(1) .setReachableFiles(1) .setReachableBytes(fileSize) diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java deleted file mode 100644 index 809fc74b7892..000000000000 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.common; - -import org.apache.hadoop.hdds.cli.SubcommandWithParent; -import org.kohsuke.MetaInfServices; -import picocli.CommandLine; - -import java.util.concurrent.Callable; - -/** - * Parser for scm.db file. - */ -@CommandLine.Command( - name = "fso-tree", - description = "Identify a disconnected FSO tree, and optionally mark " + - "unreachable entries for deletion. OM should be " + - "stopped while this tool is run. Information will be logged at " + - "INFO and DEBUG levels." -) -@MetaInfServices(SubcommandWithParent.class) -public class FSOBaseCLI implements Callable, SubcommandWithParent { - - @CommandLine.Option(names = {"--db"}, - required = true, - description = "Path to OM RocksDB") - private String dbPath; - - @CommandLine.Option(names = {"--verbose"}, - description = "More verbose output. ") - private boolean verbose; - - - @Override - public Void call() throws Exception { - - try { - // TODO case insensitive enum options. - FSOBaseTool - baseTool = new FSOBaseTool(dbPath, true); - baseTool.run(); - } catch (Exception ex) { - throw new IllegalArgumentException("FSO inspection failed: " + ex.getMessage()); - } - - if (verbose) { - System.out.println("FSO inspection finished. See client logs for results."); - } - - return null; - } - - @Override - public Class getParentType() { - throw new UnsupportedOperationException("Should not be called from " + - "FSOBaseCLI directly."); - } - - public String getDbPath() { - return dbPath; - } - - public boolean getVerbose() { - return verbose; - } -} - diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java deleted file mode 100644 index 5f729ad0aa39..000000000000 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java +++ /dev/null @@ -1,699 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.ozone.common; - - -import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.utils.db.BatchOperation; -import org.apache.hadoop.hdds.utils.db.DBStore; -import org.apache.hadoop.hdds.utils.db.Table; -import org.apache.hadoop.hdds.utils.db.TableIterator; -import org.apache.hadoop.hdds.utils.db.managed.ManagedOptions; -import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB; -import org.apache.hadoop.ozone.OmUtils; -import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; -import org.apache.hadoop.ozone.om.helpers.BucketLayout; -import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; -import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; -import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; -import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; -import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; -import org.apache.hadoop.ozone.om.helpers.WithObjectID; -import org.apache.hadoop.ozone.om.request.file.OMFileRequest; -import org.apache.hadoop.ozone.repair.om.FSORepairTool; -import org.apache.ratis.util.Preconditions; -import org.rocksdb.ColumnFamilyDescriptor; -import org.rocksdb.ColumnFamilyHandle; -import org.rocksdb.Holder; -import org.rocksdb.RocksDBException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Objects; -import java.util.Stack; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; - -/** - * Base Tool to identify disconnected FSO trees in all buckets. - * The tool will log information about unreachable files or directories. - * If deletes are still in progress (the deleted directory table is not empty), the tool may - * report that the tree is disconnected, even though pending deletes would - * fix the issue. - * - * Before using the tool, make sure all OMs are stopped, - * and that all Ratis logs have been flushed to the OM DB. This can be - * done using `ozone admin prepare` before running the tool, and `ozone admin - * cancelprepare` when done. - * - * The tool will run a DFS from each bucket, and save all reachable - * directories as keys in a new temporary RocksDB instance called "reachable.db" - * In the same directory as om.db. - * will then scan the entire file and directory tables for each bucket to see - * if each object's parent is in the reachable table of reachable.db. The - * reachable table will be dropped and recreated for each bucket. - * The tool is idempotent. reachable.db will not be deleted automatically - * when the tool finishes, in case users want to manually inspect it. It can - * be safely deleted once the tool finishes. - */ -public class FSOBaseTool { - public static final Logger LOG = - LoggerFactory.getLogger(FSORepairTool.class); - - private final String omDBPath; - - private final DBStore store; - private final Table volumeTable; - private final Table bucketTable; - private final Table directoryTable; - private final Table fileTable; - private final Table deletedDirectoryTable; - private final Table deletedTable; - // The temporary DB is used to track which items have been seen. - // Since usage of this DB is simple, use it directly from - // RocksDB. - private String reachableDBPath; - private static final byte[] REACHABLE_TABLE = - "reachable".getBytes(StandardCharsets.UTF_8); - private ColumnFamilyHandle reachableCF; - private ManagedRocksDB reachableDB; - - private long reachableBytes; - private long reachableFiles; - private long reachableDirs; - private long unreachableBytes; - private long unreachableFiles; - private long unreachableDirs; - private boolean dryRun; - - public FSOBaseTool(String dbPath, boolean dryRun) throws IOException { - this(getStoreFromPath(dbPath), dbPath, dryRun); - } - - /** - * Allows passing RocksDB instance from a MiniOzoneCluster directly to this - * class for testing. - */ - @VisibleForTesting - public FSOBaseTool(DBStore dbStore, String dbPath, boolean isDryRun) throws IOException { - dryRun = isDryRun; - // Counters to track as we walk the tree. - reachableBytes = 0; - reachableFiles = 0; - reachableDirs = 0; - unreachableBytes = 0; - unreachableFiles = 0; - unreachableDirs = 0; - - this.store = dbStore; - this.omDBPath = dbPath; - volumeTable = store.getTable(OmMetadataManagerImpl.VOLUME_TABLE, - String.class, - OmVolumeArgs.class); - bucketTable = store.getTable(OmMetadataManagerImpl.BUCKET_TABLE, - String.class, - OmBucketInfo.class); - directoryTable = store.getTable(OmMetadataManagerImpl.DIRECTORY_TABLE, - String.class, - OmDirectoryInfo.class); - fileTable = store.getTable(OmMetadataManagerImpl.FILE_TABLE, - String.class, - OmKeyInfo.class); - deletedDirectoryTable = store.getTable( - OmMetadataManagerImpl.DELETED_DIR_TABLE, - String.class, - OmKeyInfo.class); - deletedTable = store.getTable( - OmMetadataManagerImpl.DELETED_TABLE, - String.class, - RepeatedOmKeyInfo.class); - } - - protected static DBStore getStoreFromPath(String dbPath) throws IOException { - File omDBFile = new File(dbPath); - if (!omDBFile.exists() || !omDBFile.isDirectory()) { - throw new IOException(String.format("Specified OM DB instance %s does " + - "not exist or is not a RocksDB directory.", dbPath)); - } - // Load RocksDB and tables needed. - return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), - new File(dbPath).getParentFile()); - } - - public FSORepairTool.Report run() throws IOException { - // Iterate all volumes. - try (TableIterator> - volumeIterator = volumeTable.iterator()) { - openReachableDB(); - - while (volumeIterator.hasNext()) { - Table.KeyValue volumeEntry = - volumeIterator.next(); - String volumeKey = volumeEntry.getKey(); - - // Iterate all buckets in the volume. - try (TableIterator> - bucketIterator = bucketTable.iterator()) { - bucketIterator.seek(volumeKey); - while (bucketIterator.hasNext()) { - Table.KeyValue bucketEntry = - bucketIterator.next(); - String bucketKey = bucketEntry.getKey(); - OmBucketInfo bucketInfo = bucketEntry.getValue(); - - if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { - LOG.debug("Skipping non-FSO bucket {}", bucketKey); - continue; - } - - // Stop this loop once we have seen all buckets in the current - // volume. - if (!bucketKey.startsWith(volumeKey)) { - break; - } - - // Start with a fresh list of reachable files for this bucket. - // Also clears partial state if the tool failed on a previous run. - dropReachableTableIfExists(); - createReachableTable(); - // Process one bucket's FSO tree at a time. - markReachableObjectsInBucket(volumeEntry.getValue(), bucketInfo); - handleUnreachableObjects(volumeEntry.getValue(), bucketInfo); - dropReachableTableIfExists(); - } - } - } - } finally { - closeReachableDB(); - } - - return buildReportAndLog(); - } - - private FSORepairTool.Report buildReportAndLog() { - FSORepairTool.Report report = new FSORepairTool.Report.Builder() - .setReachableDirs(reachableDirs) - .setReachableFiles(reachableFiles) - .setReachableBytes(reachableBytes) - .setUnreachableDirs(unreachableDirs) - .setUnreachableFiles(unreachableFiles) - .setUnreachableBytes(unreachableBytes) - .build(); - - LOG.info("\n{}", report); - return report; - } - - private void markReachableObjectsInBucket(OmVolumeArgs volume, - OmBucketInfo bucket) throws IOException { - LOG.info("Processing bucket {}", bucket.getBucketName()); - // Only put directories in the stack. - // Directory keys should have the form /volumeID/bucketID/parentID/name. - Stack dirKeyStack = new Stack<>(); - - // Since the tool uses parent directories to check for reachability, add - // a reachable entry for the bucket as well. - addReachableEntry(volume, bucket, bucket); - // Initialize the stack with all immediate child directories of the - // bucket, and mark them all as reachable. - Collection childDirs = - getChildDirectoriesAndMarkAsReachable(volume, bucket, bucket); - dirKeyStack.addAll(childDirs); - - while (!dirKeyStack.isEmpty()) { - // Get one directory and process its immediate children. - String currentDirKey = dirKeyStack.pop(); - OmDirectoryInfo currentDir = directoryTable.get(currentDirKey); - if (currentDir == null) { - LOG.error("Directory key {} to be processed was not found in the " + - "directory table", currentDirKey); - continue; - } - - // TODO revisit this for a more memory efficient implementation, - // possibly making better use of RocksDB iterators. - childDirs = getChildDirectoriesAndMarkAsReachable(volume, bucket, - currentDir); - dirKeyStack.addAll(childDirs); - } - } - - private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { - // Check for unreachable directories in the bucket. - String bucketPrefix = OM_KEY_PREFIX + - volume.getObjectID() + - OM_KEY_PREFIX + - bucket.getObjectID(); - - try (TableIterator> dirIterator = - directoryTable.iterator()) { - dirIterator.seek(bucketPrefix); - while (dirIterator.hasNext()) { - Table.KeyValue dirEntry = dirIterator.next(); - String dirKey = dirEntry.getKey(); - - // Only search directories in this bucket. - if (!dirKey.startsWith(bucketPrefix)) { - break; - } - - if (!isReachable(dirKey)) { - LOG.debug("Found unreachable directory: {}", dirKey); - unreachableDirs++; - - if (dryRun) { - LOG.debug("Marking unreachable directory {} for deletion.", dirKey); - OmDirectoryInfo dirInfo = dirEntry.getValue(); - markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), - dirKey, dirInfo); - } - } - } - } - - // Check for unreachable files - try (TableIterator> - fileIterator = fileTable.iterator()) { - fileIterator.seek(bucketPrefix); - while (fileIterator.hasNext()) { - Table.KeyValue fileEntry = fileIterator.next(); - String fileKey = fileEntry.getKey(); - // Only search files in this bucket. - if (!fileKey.startsWith(bucketPrefix)) { - break; - } - - OmKeyInfo fileInfo = fileEntry.getValue(); - if (!isReachable(fileKey)) { - LOG.debug("Found unreachable file: {}", fileKey); - unreachableBytes += fileInfo.getDataSize(); - unreachableFiles++; - - if (dryRun) { - LOG.debug("Marking unreachable file {} for deletion.", - fileKey); - markFileForDeletion(fileKey, fileInfo); - } - } else { - // NOTE: We are deserializing the proto of every reachable file - // just to log it's size. If we don't need this information we could - // save time by skipping this step. - reachableBytes += fileInfo.getDataSize(); - reachableFiles++; - } - } - } - } - - protected void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IOException { - try (BatchOperation batch = store.initBatchOperation()) { - fileTable.deleteWithBatch(batch, fileKey); - - RepeatedOmKeyInfo originalRepeatedKeyInfo = deletedTable.get(fileKey); - RepeatedOmKeyInfo updatedRepeatedOmKeyInfo = OmUtils.prepareKeyForDelete( - fileInfo, fileInfo.getUpdateID(), true); - // NOTE: The FSO code seems to write the open key entry with the whole - // path, using the object's names instead of their ID. This would onyl - // be possible when the file is deleted explicitly, and not part of a - // directory delete. It is also not possible here if the file's parent - // is gone. The name of the key does not matter so just use IDs. - deletedTable.putWithBatch(batch, fileKey, updatedRepeatedOmKeyInfo); - - LOG.debug("Added entry {} to open key table: {}", - fileKey, updatedRepeatedOmKeyInfo); - - store.commitBatchOperation(batch); - } - } - - protected void markDirectoryForDeletion(String volumeName, String bucketName, - String dirKeyName, OmDirectoryInfo dirInfo) throws IOException { - try (BatchOperation batch = store.initBatchOperation()) { - directoryTable.deleteWithBatch(batch, dirKeyName); - // HDDS-7592: Make directory entries in deleted dir table unique. - String deleteDirKeyName = - dirKeyName + OM_KEY_PREFIX + dirInfo.getObjectID(); - - // Convert the directory to OmKeyInfo for deletion. - OmKeyInfo dirAsKeyInfo = OMFileRequest.getOmKeyInfo( - volumeName, bucketName, dirInfo, dirInfo.getName()); - deletedDirectoryTable.putWithBatch(batch, deleteDirKeyName, dirAsKeyInfo); - - store.commitBatchOperation(batch); - } - } - - private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs volume, - OmBucketInfo bucket, - WithObjectID currentDir) throws IOException { - - Collection childDirs = new ArrayList<>(); - - try (TableIterator> - dirIterator = directoryTable.iterator()) { - String dirPrefix = buildReachableKey(volume, bucket, currentDir); - // Start searching the directory table at the current directory's - // prefix to get its immediate children. - dirIterator.seek(dirPrefix); - while (dirIterator.hasNext()) { - Table.KeyValue childDirEntry = - dirIterator.next(); - String childDirKey = childDirEntry.getKey(); - // Stop processing once we have seen all immediate children of this - // directory. - if (!childDirKey.startsWith(dirPrefix)) { - break; - } - // This directory was reached by search. - addReachableEntry(volume, bucket, childDirEntry.getValue()); - childDirs.add(childDirKey); - reachableDirs++; - } - } - - return childDirs; - } - - /** - * Add the specified object to the reachable table, indicating it is part - * of the connected FSO tree. - */ - private void addReachableEntry(OmVolumeArgs volume, - OmBucketInfo bucket, WithObjectID object) throws IOException { - byte[] reachableKey = buildReachableKey(volume, bucket, object) - .getBytes(StandardCharsets.UTF_8); - try { - // No value is needed for this table. - reachableDB.put(reachableCF, reachableKey, new byte[]{}); - } catch (RocksDBException ex) { - throw new IOException(ex.getMessage(), ex); - } - } - - /** - * Build an entry in the reachable table for the current object, which - * could be a bucket, file or directory. - */ - private static String buildReachableKey(OmVolumeArgs volume, - OmBucketInfo bucket, WithObjectID object) { - return OM_KEY_PREFIX + - volume.getObjectID() + - OM_KEY_PREFIX + - bucket.getObjectID() + - OM_KEY_PREFIX + - object.getObjectID(); - } - - /** - * - * @param fileOrDirKey The key of a file or directory in RocksDB. - * @return true if the entry's parent is in the reachable table. - */ - protected boolean isReachable(String fileOrDirKey) throws IOException { - byte[] reachableParentKey = - buildReachableParentKey(fileOrDirKey).getBytes(StandardCharsets.UTF_8); - try { - if (reachableDB.keyMayExist( - reachableCF, reachableParentKey, new Holder<>())) { - return reachableDB.get(reachableCF, reachableParentKey) != null; - } else { - return false; - } - } catch (RocksDBException ex) { - throw new IOException(ex.getMessage(), ex); - } - } - - /** - * Build an entry in the reachable table for the current object's parent - * object. The object could be a file or directory. - */ - private static String buildReachableParentKey(String fileOrDirKey) { - String[] keyParts = fileOrDirKey.split(OM_KEY_PREFIX); - // Should be /volID/bucketID/parentID/name - // The first part will be blank since key begins with a slash. - Preconditions.assertTrue(keyParts.length >= 4); - String volumeID = keyParts[1]; - String bucketID = keyParts[2]; - String parentID = keyParts[3]; - - return OM_KEY_PREFIX + - volumeID + - OM_KEY_PREFIX + - bucketID + - OM_KEY_PREFIX + - parentID; - } - - private void openReachableDB() throws IOException { - File reachableDBFile = new File(new File(omDBPath).getParentFile(), - "reachable.db"); - LOG.info("Creating database of reachable directories at {}", - reachableDBFile); - try { - // Delete the DB from the last run if it exists. - if (reachableDBFile.exists()) { - FileUtils.deleteDirectory(reachableDBFile); - } - reachableDBPath = reachableDBFile.toString(); - reachableDB = ManagedRocksDB.open(reachableDBPath); - } catch (RocksDBException ex) { - if (reachableDB != null) { - reachableDB.close(); - } - throw new IOException(ex.getMessage(), ex); - } - } - - private void closeReachableDB() { - if (reachableDB != null) { - reachableDB.close(); - } - } - - private void dropReachableTableIfExists() throws IOException { - try { - List - availableCFs = ManagedRocksDB.listColumnFamilies(new ManagedOptions(), - reachableDBPath); - boolean cfFound = false; - for (byte[] cfNameBytes: availableCFs) { - if (new String(cfNameBytes, UTF_8).equals(new String(REACHABLE_TABLE, UTF_8))) { - cfFound = true; - break; - } - } - - if (cfFound) { - reachableDB.dropColumnFamily(reachableCF); - } - } catch (RocksDBException ex) { - throw new IOException(ex.getMessage(), ex); - } finally { - if (reachableCF != null) { - reachableCF.close(); - } - } - } - - private void createReachableTable() throws IOException { - try { - reachableCF = reachableDB.createColumnFamily( - new ColumnFamilyDescriptor(REACHABLE_TABLE)); - } catch (RocksDBException ex) { - if (reachableCF != null) { - reachableCF.close(); - } - throw new IOException(ex.getMessage(), ex); - } - } - - /** - * Define a Report to be created. - */ - public static class Report { - private long reachableBytes; - private long reachableFiles; - private long reachableDirs; - private long unreachableBytes; - private long unreachableFiles; - private long unreachableDirs; - - /** - * Builds one report that is the aggregate of multiple others. - */ - public Report(FSORepairTool.Report... reports) { - reachableBytes = 0; - reachableFiles = 0; - reachableDirs = 0; - unreachableBytes = 0; - unreachableFiles = 0; - unreachableDirs = 0; - - for (FSORepairTool.Report report: reports) { - reachableBytes += report.reachableBytes; - reachableFiles += report.reachableFiles; - reachableDirs += report.reachableDirs; - unreachableBytes += report.unreachableBytes; - unreachableFiles += report.unreachableFiles; - unreachableDirs += report.unreachableDirs; - } - } - - private Report(FSORepairTool.Report.Builder builder) { - reachableBytes = builder.reachableBytes; - reachableFiles = builder.reachableFiles; - reachableDirs = builder.reachableDirs; - unreachableBytes = builder.unreachableBytes; - unreachableFiles = builder.unreachableFiles; - unreachableDirs = builder.unreachableDirs; - } - - public long getReachableBytes() { - return reachableBytes; - } - - public long getReachableFiles() { - return reachableFiles; - } - - public long getReachableDirs() { - return reachableDirs; - } - - public long getUnreachableBytes() { - return unreachableBytes; - } - - public long getUnreachableFiles() { - return unreachableFiles; - } - - public long getUnreachableDirs() { - return unreachableDirs; - } - - @Override - public String toString() { - return "Reachable:" + - "\n\tDirectories: " + reachableDirs + - "\n\tFiles: " + reachableFiles + - "\n\tBytes: " + reachableBytes + - "\nUnreachable:" + - "\n\tDirectories: " + unreachableDirs + - "\n\tFiles: " + unreachableFiles + - "\n\tBytes: " + unreachableBytes; - } - - @Override - public boolean equals(Object other) { - if (other == this) { - return true; - } - if (other == null || getClass() != other.getClass()) { - return false; - } - FSORepairTool.Report report = (FSORepairTool.Report) other; - - // Useful for testing. - LOG.debug("Comparing reports\nExpect:\n{}\nActual:\n{}", this, report); - - return reachableBytes == report.reachableBytes && - reachableFiles == report.reachableFiles && - reachableDirs == report.reachableDirs && - unreachableBytes == report.unreachableBytes && - unreachableFiles == report.unreachableFiles && - unreachableDirs == report.unreachableDirs; - } - - @Override - public int hashCode() { - return Objects.hash(reachableBytes, - reachableFiles, - reachableDirs, - unreachableBytes, - unreachableFiles, - unreachableDirs); - } - - /** - * Builder class for a Report. - */ - public static final class Builder { - private long reachableBytes; - private long reachableFiles; - private long reachableDirs; - private long unreachableBytes; - private long unreachableFiles; - private long unreachableDirs; - - public Builder() { - } - - @SuppressWarnings("checkstyle:hiddenfield") - public FSOBaseTool.Report.Builder setReachableBytes(long reachableBytes) { - this.reachableBytes = reachableBytes; - return this; - } - - @SuppressWarnings("checkstyle:hiddenfield") - public FSOBaseTool.Report.Builder setReachableFiles(long reachableFiles) { - this.reachableFiles = reachableFiles; - return this; - } - - @SuppressWarnings("checkstyle:hiddenfield") - public FSOBaseTool.Report.Builder setReachableDirs(long reachableDirs) { - this.reachableDirs = reachableDirs; - return this; - } - - @SuppressWarnings("checkstyle:hiddenfield") - public FSOBaseTool.Report.Builder setUnreachableBytes(long unreachableBytes) { - this.unreachableBytes = unreachableBytes; - return this; - } - - @SuppressWarnings("checkstyle:hiddenfield") - public FSOBaseTool.Report.Builder setUnreachableFiles(long unreachableFiles) { - this.unreachableFiles = unreachableFiles; - return this; - } - - @SuppressWarnings("checkstyle:hiddenfield") - public FSOBaseTool.Report.Builder setUnreachableDirs(long unreachableDirs) { - this.unreachableDirs = unreachableDirs; - return this; - } - - public FSOBaseTool.Report build() { - return new FSOBaseTool.Report(this); - } - } - } -} diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java deleted file mode 100644 index 537abfad32fa..000000000000 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *

- * SCM related cli tools. - */ - -/** - * Ozone Admin tools. - */ -/** - * Ozone debug/repair tools uility class. - */ -package org.apache.hadoop.ozone.common; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java deleted file mode 100644 index 26ec8614aa21..000000000000 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.debug; - -import org.apache.hadoop.hdds.cli.SubcommandWithParent; -import org.apache.hadoop.ozone.common.FSOBaseCLI; -import org.apache.hadoop.ozone.common.FSOBaseTool; -import org.kohsuke.MetaInfServices; -import picocli.CommandLine; - -/** - * Parser for scm.db file. - */ -@CommandLine.Command( - name = "fso-tree", - description = "Identify a disconnected FSO tree, and optionally mark " + - "unreachable entries for deletion. OM should be " + - "stopped while this tool is run. Information will be logged at " + - "INFO and DEBUG levels." -) -@MetaInfServices(SubcommandWithParent.class) -public class FSODebugCLI extends FSOBaseCLI { - - @CommandLine.ParentCommand - private OzoneDebug parent; - - @Override - public Void call() throws Exception { - - try { - // TODO case insensitive enum options. - FSOBaseTool - baseTool = new FSOBaseTool(getDbPath(), true); - baseTool.run(); - } catch (Exception ex) { - throw new IllegalArgumentException("FSO inspection failed: " + ex.getMessage()); - } - - if (getVerbose()) { - System.out.println("FSO inspection finished. See client logs for results."); - } - - return null; - } - - @Override - public Class getParentType() { - return OzoneDebug.class; - } -} - diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index c95a60394ad8..35e3bd5936a2 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -19,11 +19,12 @@ package org.apache.hadoop.ozone.repair.om; import org.apache.hadoop.hdds.cli.SubcommandWithParent; -import org.apache.hadoop.ozone.common.FSOBaseCLI; import org.apache.hadoop.ozone.repair.OzoneRepair; import org.kohsuke.MetaInfServices; import picocli.CommandLine; +import java.util.concurrent.Callable; + /** * Parser for scm.db file. */ @@ -35,24 +36,36 @@ "INFO and DEBUG levels." ) @MetaInfServices(SubcommandWithParent.class) -public class FSORepairCLI extends FSOBaseCLI { +public class FSORepairCLI implements Callable, SubcommandWithParent { @CommandLine.ParentCommand private OzoneRepair parent; + @CommandLine.Option(names = {"--db"}, + required = true, + description = "Path to OM RocksDB") + private String dbPath; + + @CommandLine.Option(names = {"--dry-run"}, + description = "Path to OM RocksDB") + private boolean dryRun; + + @CommandLine.Option(names = {"--verbose"}, + description = "More verbose output. ") + private boolean verbose; + + @Override public Void call() throws Exception { - try { - // TODO case insensitive enum options. FSORepairTool - repairTool = new FSORepairTool(getDbPath(), false); + repairTool = new FSORepairTool(dbPath, dryRun); repairTool.run(); } catch (Exception ex) { throw new IllegalArgumentException("FSO repair failed: " + ex.getMessage()); } - if (getVerbose()) { + if (verbose) { System.out.println("FSO repair finished. See client logs for results."); } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index ea5bf8625123..6ee551a6580d 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -15,25 +15,674 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.ozone.repair.om; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.db.RocksDatabase; +import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.DBStore; -import org.apache.hadoop.ozone.common.FSOBaseTool; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.hdds.utils.db.TableConfig; +import org.apache.hadoop.hdds.utils.db.DBProfile; +import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions; +import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; +import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.WithObjectID; +import org.apache.hadoop.ozone.om.request.file.OMFileRequest; +import org.apache.ratis.util.Preconditions; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.Stack; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DB_PROFILE; +import static org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PROFILE; +import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; /** - * Tool to identify and repair disconnected FSO trees in all buckets. + * Base Tool to identify disconnected FSO trees in all buckets. + * The tool will log information about unreachable files or directories. + * If deletes are still in progress (the deleted directory table is not empty), the tool may + * report that the tree is disconnected, even though pending deletes would + * fix the issue. + * + * Before using the tool, make sure all OMs are stopped, + * and that all Ratis logs have been flushed to the OM DB. This can be + * done using `ozone admin prepare` before running the tool, and `ozone admin + * cancelprepare` when done. + * + * The tool will run a DFS from each bucket, and save all reachable + * directories as keys in a new temporary RocksDB instance called "reachable.db" + * In the same directory as om.db. + * will then scan the entire file and directory tables for each bucket to see + * if each object's parent is in the reachable table of reachable.db. The + * reachable table will be dropped and recreated for each bucket. + * The tool is idempotent. reachable.db will not be deleted automatically + * when the tool finishes, in case users want to manually inspect it. It can + * be safely deleted once the tool finishes. */ -public class FSORepairTool extends FSOBaseTool { +public class FSORepairTool { + public static final Logger LOG = + LoggerFactory.getLogger(org.apache.hadoop.ozone.repair.om.FSORepairTool.class); + + private final String omDBPath; + + private final DBStore store; + private final Table volumeTable; + private final Table bucketTable; + private final Table directoryTable; + private final Table fileTable; + private final Table deletedDirectoryTable; + private final Table deletedTable; + // The temporary DB is used to track which items have been seen. + // Since usage of this DB is simple, use it directly from + // RocksDB. + private String reachableDBPath; + private static final String REACHABLE_TABLE = "reachable"; + private static final byte[] REACHABLE_TABLE_BYTES = + REACHABLE_TABLE.getBytes(StandardCharsets.UTF_8); + private ColumnFamilyHandle reachableCFHandle; + private RocksDatabase reachableDB; + + private long reachableBytes; + private long reachableFiles; + private long reachableDirs; + private long unreachableBytes; + private long unreachableFiles; + private long unreachableDirs; + private boolean dryRun; public FSORepairTool(String dbPath, boolean dryRun) throws IOException { this(getStoreFromPath(dbPath), dbPath, dryRun); } - public FSORepairTool(DBStore dbStore, String dbPath, boolean dryRun) throws IOException { - super(dbStore, dbPath, dryRun); + /** + * Allows passing RocksDB instance from a MiniOzoneCluster directly to this + * class for testing. + */ + @VisibleForTesting + public FSORepairTool(DBStore dbStore, String dbPath, boolean isDryRun) throws IOException { + dryRun = isDryRun; + // Counters to track as we walk the tree. + reachableBytes = 0; + reachableFiles = 0; + reachableDirs = 0; + unreachableBytes = 0; + unreachableFiles = 0; + unreachableDirs = 0; + + this.store = dbStore; + this.omDBPath = dbPath; + volumeTable = store.getTable(OmMetadataManagerImpl.VOLUME_TABLE, + String.class, + OmVolumeArgs.class); + bucketTable = store.getTable(OmMetadataManagerImpl.BUCKET_TABLE, + String.class, + OmBucketInfo.class); + directoryTable = store.getTable(OmMetadataManagerImpl.DIRECTORY_TABLE, + String.class, + OmDirectoryInfo.class); + fileTable = store.getTable(OmMetadataManagerImpl.FILE_TABLE, + String.class, + OmKeyInfo.class); + deletedDirectoryTable = store.getTable( + OmMetadataManagerImpl.DELETED_DIR_TABLE, + String.class, + OmKeyInfo.class); + deletedTable = store.getTable( + OmMetadataManagerImpl.DELETED_TABLE, + String.class, + RepeatedOmKeyInfo.class); + } + + protected static DBStore getStoreFromPath(String dbPath) throws IOException { + File omDBFile = new File(dbPath); + if (!omDBFile.exists() || !omDBFile.isDirectory()) { + throw new IOException(String.format("Specified OM DB instance %s does " + + "not exist or is not a RocksDB directory.", dbPath)); + } + // Load RocksDB and tables needed. + return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), + new File(dbPath).getParentFile()); + } + + public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOException { + // Iterate all volumes. + try (TableIterator> + volumeIterator = volumeTable.iterator()) { + openReachableDB(); + + while (volumeIterator.hasNext()) { + Table.KeyValue volumeEntry = + volumeIterator.next(); + String volumeKey = volumeEntry.getKey(); + + // Iterate all buckets in the volume. + try (TableIterator> + bucketIterator = bucketTable.iterator()) { + bucketIterator.seek(volumeKey); + while (bucketIterator.hasNext()) { + Table.KeyValue bucketEntry = + bucketIterator.next(); + String bucketKey = bucketEntry.getKey(); + OmBucketInfo bucketInfo = bucketEntry.getValue(); + + if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { + LOG.debug("Skipping non-FSO bucket {}", bucketKey); + continue; + } + + // Stop this loop once we have seen all buckets in the current + // volume. + if (!bucketKey.startsWith(volumeKey)) { + break; + } + + // Start with a fresh list of reachable files for this bucket. + // Also clears partial state if the tool failed on a previous run. + dropReachableTableIfExists(); + createReachableTable(); + // Process one bucket's FSO tree at a time. + markReachableObjectsInBucket(volumeEntry.getValue(), bucketInfo); + handleUnreachableObjects(volumeEntry.getValue(), bucketInfo); + dropReachableTableIfExists(); + } + } + } + } finally { + closeReachableDB(); + } + + return buildReportAndLog(); + } + + private Report buildReportAndLog() { + Report report = new Report.Builder() + .setReachableDirs(reachableDirs) + .setReachableFiles(reachableFiles) + .setReachableBytes(reachableBytes) + .setUnreachableDirs(unreachableDirs) + .setUnreachableFiles(unreachableFiles) + .setUnreachableBytes(unreachableBytes) + .build(); + + LOG.info("\n{}", report); + return report; + } + + private void markReachableObjectsInBucket(OmVolumeArgs volume, + OmBucketInfo bucket) throws IOException { + LOG.info("Processing bucket {}", bucket.getBucketName()); + // Only put directories in the stack. + // Directory keys should have the form /volumeID/bucketID/parentID/name. + Stack dirKeyStack = new Stack<>(); + + // Since the tool uses parent directories to check for reachability, add + // a reachable entry for the bucket as well. + addReachableEntry(volume, bucket, bucket); + // Initialize the stack with all immediate child directories of the + // bucket, and mark them all as reachable. + Collection childDirs = + getChildDirectoriesAndMarkAsReachable(volume, bucket, bucket); + dirKeyStack.addAll(childDirs); + + while (!dirKeyStack.isEmpty()) { + // Get one directory and process its immediate children. + String currentDirKey = dirKeyStack.pop(); + OmDirectoryInfo currentDir = directoryTable.get(currentDirKey); + if (currentDir == null) { + LOG.error("Directory key {} to be processed was not found in the " + + "directory table", currentDirKey); + continue; + } + + // TODO revisit this for a more memory efficient implementation, + // possibly making better use of RocksDB iterators. + childDirs = getChildDirectoriesAndMarkAsReachable(volume, bucket, + currentDir); + dirKeyStack.addAll(childDirs); + } + } + + private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { + // Check for unreachable directories in the bucket. + String bucketPrefix = OM_KEY_PREFIX + + volume.getObjectID() + + OM_KEY_PREFIX + + bucket.getObjectID(); + + try (TableIterator> dirIterator = + directoryTable.iterator()) { + dirIterator.seek(bucketPrefix); + while (dirIterator.hasNext()) { + Table.KeyValue dirEntry = dirIterator.next(); + String dirKey = dirEntry.getKey(); + + // Only search directories in this bucket. + if (!dirKey.startsWith(bucketPrefix)) { + break; + } + + if (!isReachable(dirKey)) { + LOG.debug("Found unreachable directory: {}", dirKey); + unreachableDirs++; + + if (dryRun) { + LOG.debug("Marking unreachable directory {} for deletion.", dirKey); + OmDirectoryInfo dirInfo = dirEntry.getValue(); + markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), + dirKey, dirInfo); + } + } + } + } + + // Check for unreachable files + try (TableIterator> + fileIterator = fileTable.iterator()) { + fileIterator.seek(bucketPrefix); + while (fileIterator.hasNext()) { + Table.KeyValue fileEntry = fileIterator.next(); + String fileKey = fileEntry.getKey(); + // Only search files in this bucket. + if (!fileKey.startsWith(bucketPrefix)) { + break; + } + + OmKeyInfo fileInfo = fileEntry.getValue(); + if (!isReachable(fileKey)) { + LOG.debug("Found unreachable file: {}", fileKey); + unreachableBytes += fileInfo.getDataSize(); + unreachableFiles++; + + if (dryRun) { + LOG.debug("Marking unreachable file {} for deletion.", + fileKey); + markFileForDeletion(fileKey, fileInfo); + } + } else { + // NOTE: We are deserializing the proto of every reachable file + // just to log it's size. If we don't need this information we could + // save time by skipping this step. + reachableBytes += fileInfo.getDataSize(); + reachableFiles++; + } + } + } + } + + protected void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IOException { + try (BatchOperation batch = store.initBatchOperation()) { + fileTable.deleteWithBatch(batch, fileKey); + + RepeatedOmKeyInfo originalRepeatedKeyInfo = deletedTable.get(fileKey); + RepeatedOmKeyInfo updatedRepeatedOmKeyInfo = OmUtils.prepareKeyForDelete( + fileInfo, fileInfo.getUpdateID(), true); + // NOTE: The FSO code seems to write the open key entry with the whole + // path, using the object's names instead of their ID. This would onyl + // be possible when the file is deleted explicitly, and not part of a + // directory delete. It is also not possible here if the file's parent + // is gone. The name of the key does not matter so just use IDs. + deletedTable.putWithBatch(batch, fileKey, updatedRepeatedOmKeyInfo); + + LOG.debug("Added entry {} to open key table: {}", + fileKey, updatedRepeatedOmKeyInfo); + + store.commitBatchOperation(batch); + } + } + + protected void markDirectoryForDeletion(String volumeName, String bucketName, + String dirKeyName, OmDirectoryInfo dirInfo) throws IOException { + try (BatchOperation batch = store.initBatchOperation()) { + directoryTable.deleteWithBatch(batch, dirKeyName); + // HDDS-7592: Make directory entries in deleted dir table unique. + String deleteDirKeyName = + dirKeyName + OM_KEY_PREFIX + dirInfo.getObjectID(); + + // Convert the directory to OmKeyInfo for deletion. + OmKeyInfo dirAsKeyInfo = OMFileRequest.getOmKeyInfo( + volumeName, bucketName, dirInfo, dirInfo.getName()); + deletedDirectoryTable.putWithBatch(batch, deleteDirKeyName, dirAsKeyInfo); + + store.commitBatchOperation(batch); + } + } + + private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs volume, + OmBucketInfo bucket, + WithObjectID currentDir) throws IOException { + + Collection childDirs = new ArrayList<>(); + + try (TableIterator> + dirIterator = directoryTable.iterator()) { + String dirPrefix = buildReachableKey(volume, bucket, currentDir); + // Start searching the directory table at the current directory's + // prefix to get its immediate children. + dirIterator.seek(dirPrefix); + while (dirIterator.hasNext()) { + Table.KeyValue childDirEntry = + dirIterator.next(); + String childDirKey = childDirEntry.getKey(); + // Stop processing once we have seen all immediate children of this + // directory. + if (!childDirKey.startsWith(dirPrefix)) { + break; + } + // This directory was reached by search. + addReachableEntry(volume, bucket, childDirEntry.getValue()); + childDirs.add(childDirKey); + reachableDirs++; + } + } + + return childDirs; + } + + /** + * Add the specified object to the reachable table, indicating it is part + * of the connected FSO tree. + */ + private void addReachableEntry(OmVolumeArgs volume, + OmBucketInfo bucket, WithObjectID object) throws IOException { + byte[] reachableKey = buildReachableKey(volume, bucket, object) + .getBytes(StandardCharsets.UTF_8); + // No value is needed for this table. + reachableDB.put(reachableCFHandle, reachableKey, new byte[]{}); + } + + /** + * Build an entry in the reachable table for the current object, which + * could be a bucket, file or directory. + */ + private static String buildReachableKey(OmVolumeArgs volume, + OmBucketInfo bucket, WithObjectID object) { + return OM_KEY_PREFIX + + volume.getObjectID() + + OM_KEY_PREFIX + + bucket.getObjectID() + + OM_KEY_PREFIX + + object.getObjectID(); + } + + /** + * + * @param fileOrDirKey The key of a file or directory in RocksDB. + * @return true if the entry's parent is in the reachable table. + */ + protected boolean isReachable(String fileOrDirKey) throws IOException { + byte[] reachableParentKey = + buildReachableParentKey(fileOrDirKey).getBytes(StandardCharsets.UTF_8); + + return reachableDB.get(reachableCFHandle, reachableParentKey, REACHABLE_TABLE) != null; + } + + /** + * Build an entry in the reachable table for the current object's parent + * object. The object could be a file or directory. + */ + private static String buildReachableParentKey(String fileOrDirKey) { + String[] keyParts = fileOrDirKey.split(OM_KEY_PREFIX); + // Should be /volID/bucketID/parentID/name + // The first part will be blank since key begins with a slash. + Preconditions.assertTrue(keyParts.length >= 4); + String volumeID = keyParts[1]; + String bucketID = keyParts[2]; + String parentID = keyParts[3]; + + return OM_KEY_PREFIX + + volumeID + + OM_KEY_PREFIX + + bucketID + + OM_KEY_PREFIX + + parentID; + } + + private void openReachableDB() throws IOException { + File reachableDBFile = new File(new File(omDBPath).getParentFile(), + "reachable.db"); + LOG.info("Creating database of reachable directories at {}", + reachableDBFile); + // Delete the DB from the last run if it exists. + if (reachableDBFile.exists()) { + FileUtils.deleteDirectory(reachableDBFile); + } + reachableDBPath = reachableDBFile.toString(); + reachableDB = buildReachableRocksDB(reachableDBFile); + } + + private RocksDatabase buildReachableRocksDB(File reachableDBFile) throws IOException { + DBProfile profile = new OzoneConfiguration().getEnum(HDDS_DB_PROFILE, HDDS_DEFAULT_DB_PROFILE); + Set tableConfigs = new HashSet<>(); + tableConfigs.add(new TableConfig("default", profile.getColumnFamilyOptions())); + + return RocksDatabase.open(reachableDBFile, + profile.getDBOptions(), + new ManagedWriteOptions(), + tableConfigs, false); + } + + private void closeReachableDB() { + if (reachableDB != null) { + reachableDB.close(); + } + } + + private void dropReachableTableIfExists() throws IOException { + try { + List + availableCFs = reachableDB.listColumnFamiliesEmptyOptions(reachableDBPath); + boolean cfFound = false; + for (byte[] cfNameBytes: availableCFs) { + if (new String(cfNameBytes, UTF_8).equals(new String(REACHABLE_TABLE_BYTES, UTF_8))) { + cfFound = true; + break; + } + } + + if (cfFound) { + reachableDB.dropColumnFamily(reachableCFHandle); + } + } catch (RocksDBException ex) { + throw new IOException(ex.getMessage(), ex); + } finally { + if (reachableCFHandle != null) { + reachableCFHandle.close(); + } + } + } + + private void createReachableTable() throws IOException { + reachableCFHandle = reachableDB.createColumnFamily( + new ColumnFamilyDescriptor(REACHABLE_TABLE_BYTES)); } + /** + * Define a Report to be created. + */ + public static class Report { + private long reachableBytes; + private long reachableFiles; + private long reachableDirs; + private long unreachableBytes; + private long unreachableFiles; + private long unreachableDirs; + + /** + * Builds one report that is the aggregate of multiple others. + */ + public Report(org.apache.hadoop.ozone.repair.om.FSORepairTool.Report... reports) { + reachableBytes = 0; + reachableFiles = 0; + reachableDirs = 0; + unreachableBytes = 0; + unreachableFiles = 0; + unreachableDirs = 0; + + for (org.apache.hadoop.ozone.repair.om.FSORepairTool.Report report: reports) { + reachableBytes += report.reachableBytes; + reachableFiles += report.reachableFiles; + reachableDirs += report.reachableDirs; + unreachableBytes += report.unreachableBytes; + unreachableFiles += report.unreachableFiles; + unreachableDirs += report.unreachableDirs; + } + } + + private Report(org.apache.hadoop.ozone.repair.om.FSORepairTool.Report.Builder builder) { + reachableBytes = builder.reachableBytes; + reachableFiles = builder.reachableFiles; + reachableDirs = builder.reachableDirs; + unreachableBytes = builder.unreachableBytes; + unreachableFiles = builder.unreachableFiles; + unreachableDirs = builder.unreachableDirs; + } + + public long getReachableBytes() { + return reachableBytes; + } + + public long getReachableFiles() { + return reachableFiles; + } + + public long getReachableDirs() { + return reachableDirs; + } + + public long getUnreachableBytes() { + return unreachableBytes; + } + + public long getUnreachableFiles() { + return unreachableFiles; + } + + public long getUnreachableDirs() { + return unreachableDirs; + } + + @Override + public String toString() { + return "Reachable:" + + "\n\tDirectories: " + reachableDirs + + "\n\tFiles: " + reachableFiles + + "\n\tBytes: " + reachableBytes + + "\nUnreachable:" + + "\n\tDirectories: " + unreachableDirs + + "\n\tFiles: " + unreachableFiles + + "\n\tBytes: " + unreachableBytes; + } + + @Override + public boolean equals(Object other) { + if (other == this) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + FSORepairTool.Report report = (FSORepairTool.Report) other; + + // Useful for testing. + LOG.debug("Comparing reports\nExpect:\n{}\nActual:\n{}", this, report); + + return reachableBytes == report.reachableBytes && + reachableFiles == report.reachableFiles && + reachableDirs == report.reachableDirs && + unreachableBytes == report.unreachableBytes && + unreachableFiles == report.unreachableFiles && + unreachableDirs == report.unreachableDirs; + } + + @Override + public int hashCode() { + return Objects.hash(reachableBytes, + reachableFiles, + reachableDirs, + unreachableBytes, + unreachableFiles, + unreachableDirs); + } + + /** + * Builder class for a Report. + */ + public static final class Builder { + private long reachableBytes; + private long reachableFiles; + private long reachableDirs; + private long unreachableBytes; + private long unreachableFiles; + private long unreachableDirs; + + public Builder() { + } + + @SuppressWarnings("checkstyle:hiddenfield") + public Builder setReachableBytes(long reachableBytes) { + this.reachableBytes = reachableBytes; + return this; + } + + @SuppressWarnings("checkstyle:hiddenfield") + public Builder setReachableFiles(long reachableFiles) { + this.reachableFiles = reachableFiles; + return this; + } + + @SuppressWarnings("checkstyle:hiddenfield") + public Builder setReachableDirs(long reachableDirs) { + this.reachableDirs = reachableDirs; + return this; + } + + @SuppressWarnings("checkstyle:hiddenfield") + public Builder setUnreachableBytes(long unreachableBytes) { + this.unreachableBytes = unreachableBytes; + return this; + } + + @SuppressWarnings("checkstyle:hiddenfield") + public Builder setUnreachableFiles(long unreachableFiles) { + this.unreachableFiles = unreachableFiles; + return this; + } + + @SuppressWarnings("checkstyle:hiddenfield") + public Builder setUnreachableDirs(long unreachableDirs) { + this.unreachableDirs = unreachableDirs; + return this; + } + + public Report build() { + return new Report(this); + } + } + } } From 574ec846a75df00b522afac0343b40cd3df77fb5 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Mon, 28 Oct 2024 13:23:26 +0530 Subject: [PATCH 07/28] HDDS-8101. Add FSO repair tool to ozone CLI in read-only and repair modes --- .../hadoop/hdds/utils/db/RocksDatabase.java | 1 - .../hdds/utils/db/managed/ManagedRocksDB.java | 1 - .../hadoop/fs/ozone/TestFSORepairTool.java | 39 ++++- .../hadoop/ozone/repair/om/FSORepairCLI.java | 13 +- .../hadoop/ozone/repair/om/FSORepairTool.java | 154 ++++++++++++------ 5 files changed, 147 insertions(+), 61 deletions(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java index afe0153e1688..e1d9b29c5bea 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java @@ -694,7 +694,6 @@ public byte[] get(ColumnFamilyHandle handle, byte[] key, String familyName) thro } } - /** * Get the value mapped to the given key. * diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java index 6248dfba321c..5a5a577351b1 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java @@ -102,5 +102,4 @@ public void deleteFile(LiveFileMetaData fileToBeDeleted) File file = new File(fileToBeDeleted.path(), fileToBeDeleted.fileName()); ManagedRocksObjectUtils.waitForFileDelete(file, Duration.ofSeconds(60)); } - } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index 430a931d0547..5b1101e6752d 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.client.BucketArgs; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneClient; @@ -58,6 +59,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; /** * FSORepairTool test cases. @@ -97,7 +99,7 @@ public static void init() throws Exception { // Init ofs. final String rootPath = String.format("%s://%s/", - OZONE_OFS_URI_SCHEME, cluster.getOzoneManager().getOMNodeId()); + OZONE_OFS_URI_SCHEME, conf.get(OZONE_OM_ADDRESS_KEY)); conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); fs = FileSystem.get(conf); client = OzoneClientFactory.getRpcClient("omservice", conf); @@ -123,13 +125,32 @@ public static void teardown() { IOUtils.closeQuietly(fs); } + @Test + public void testFSORepairToolWithVolumeAndBucketFilter() throws Exception { + // Build a tree with unreachable points in multiple volumes and buckets + FSORepairTool.Report reportVol1Buck1 = buildDisconnectedTree("vol1", "bucket1", 10); + FSORepairTool.Report reportVol2Buck2 = buildDisconnectedTree("vol2", "bucket2", 10); + + // Case 1: Run repair tool for a specific volume and bucket. + FSORepairTool repairToolFiltered = new FSORepairTool( + getOmDB(), getOmDBLocation(), false, "vol1", "bucket1"); + FSORepairTool.Report filteredReport = repairToolFiltered.run(); + + // Ensure that only the unreachable points from vol1/bucket1 are in the report. + Assertions.assertEquals(reportVol1Buck1, filteredReport, "Filtered report should match the unreachable points in vol1/bucket1."); + Assertions.assertNotEquals(reportVol2Buck2, filteredReport, "Filtered report should not include vol2/bucket2."); + + // Ensure unreachable objects in vol1/bucket1 are properly marked for delete. + assertDisconnectedObjectsMarkedForDelete(1); + } + @Test public void testConnectedTreeOneBucket() throws Exception { org.apache.hadoop.ozone.repair.om.FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); // Test the connected tree in debug mode. FSORepairTool fsoTool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true); + getOmDBLocation(), true, null, null); FSORepairTool.Report debugReport = fsoTool.run(); Assertions.assertEquals(expectedReport, debugReport); @@ -139,7 +160,7 @@ public void testConnectedTreeOneBucket() throws Exception { // Running again in repair mode should give same results since the tree // is connected. fsoTool = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), - getOmDBLocation(), false); + getOmDBLocation(), false, null, null); org.apache.hadoop.ozone.repair.om.FSORepairTool.Report repairReport = fsoTool.run(); Assertions.assertEquals(expectedReport, repairReport); @@ -155,7 +176,7 @@ public void testReportedDataSize() throws Exception { FSORepairTool repair = new FSORepairTool(getOmDB(), - getOmDBLocation(), false); + getOmDBLocation(), false, null, null); FSORepairTool.Report debugReport = repair.run(); Assertions.assertEquals(expectedReport, debugReport); } @@ -169,7 +190,7 @@ public void testMultipleBucketsAndVolumes() throws Exception { org.apache.hadoop.ozone.repair.om.FSORepairTool repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), - getOmDBLocation(), false); + getOmDBLocation(), false, null, null); org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); Assertions.assertEquals(generatedReport, expectedAggregateReport); @@ -204,7 +225,7 @@ public void testDeleteOverwrite() throws Exception { org.apache.hadoop.ozone.repair.om.FSORepairTool repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), - getOmDBLocation(), false); + getOmDBLocation(), false, null, null); org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); Assertions.assertEquals(1, generatedReport.getUnreachableDirs()); @@ -218,7 +239,7 @@ public void testEmptyFileTrees() throws Exception { // Run when there are no file trees. org.apache.hadoop.ozone.repair.om.FSORepairTool repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), - getOmDBLocation(), false); + getOmDBLocation(), false, null, null); org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); Assertions.assertEquals(generatedReport, new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report()); assertDeleteTablesEmpty(); @@ -229,7 +250,7 @@ public void testEmptyFileTrees() throws Exception { // Run on an empty volume and bucket. repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), - getOmDBLocation(), false); + getOmDBLocation(), false, null, null); generatedReport = repair.run(); Assertions.assertEquals(generatedReport, new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report()); assertDeleteTablesEmpty(); @@ -269,7 +290,7 @@ public void testNonFSOBucketsSkipped() throws Exception { // will be skipped and FSO tree is connected. org.apache.hadoop.ozone.repair.om.FSORepairTool repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), - getOmDBLocation(), false); + getOmDBLocation(), false, null, null); org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); Assertions.assertEquals(connectReport, generatedReport); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index 35e3bd5936a2..d2d7d7d62f3f 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -47,9 +47,18 @@ public class FSORepairCLI implements Callable, SubcommandWithParent { private String dbPath; @CommandLine.Option(names = {"--dry-run"}, - description = "Path to OM RocksDB") + description = "Mode to run the tool in. Read-mode will just log information about unreachable files or " + + "directories; otherwise the tool will move those files and directories to the deleted tables." ) private boolean dryRun; + @CommandLine.Option(names = {"--volume"}, + description = "Filter by volume name") + private String volume; + + @CommandLine.Option(names = {"--bucket"}, + description = "Filter by bucket name") + private String bucket; + @CommandLine.Option(names = {"--verbose"}, description = "More verbose output. ") private boolean verbose; @@ -59,7 +68,7 @@ public class FSORepairCLI implements Callable, SubcommandWithParent { public Void call() throws Exception { try { FSORepairTool - repairTool = new FSORepairTool(dbPath, dryRun); + repairTool = new FSORepairTool(dbPath, dryRun, volume, bucket); repairTool.run(); } catch (Exception ex) { throw new IllegalArgumentException("FSO repair failed: " + ex.getMessage()); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index 6ee551a6580d..2b1c89cddf06 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.ozone.repair.om; - import com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -89,7 +88,6 @@ public class FSORepairTool { LoggerFactory.getLogger(org.apache.hadoop.ozone.repair.om.FSORepairTool.class); private final String omDBPath; - private final DBStore store; private final Table volumeTable; private final Table bucketTable; @@ -97,6 +95,8 @@ public class FSORepairTool { private final Table fileTable; private final Table deletedDirectoryTable; private final Table deletedTable; + private final String volumeFilter; + private final String bucketFilter; // The temporary DB is used to track which items have been seen. // Since usage of this DB is simple, use it directly from // RocksDB. @@ -115,8 +115,8 @@ public class FSORepairTool { private long unreachableDirs; private boolean dryRun; - public FSORepairTool(String dbPath, boolean dryRun) throws IOException { - this(getStoreFromPath(dbPath), dbPath, dryRun); + public FSORepairTool(String dbPath, boolean dryRun, String volume, String bucket) throws IOException { + this(getStoreFromPath(dbPath), dbPath, dryRun, volume, bucket); } /** @@ -124,7 +124,7 @@ public FSORepairTool(String dbPath, boolean dryRun) throws IOException { * class for testing. */ @VisibleForTesting - public FSORepairTool(DBStore dbStore, String dbPath, boolean isDryRun) throws IOException { + public FSORepairTool(DBStore dbStore, String dbPath, boolean isDryRun, String volume, String bucket) throws IOException { dryRun = isDryRun; // Counters to track as we walk the tree. reachableBytes = 0; @@ -136,6 +136,8 @@ public FSORepairTool(DBStore dbStore, String dbPath, boolean isDryRun) throws IO this.store = dbStore; this.omDBPath = dbPath; + this.volumeFilter = volume; + this.bucketFilter = bucket; volumeTable = store.getTable(OmMetadataManagerImpl.VOLUME_TABLE, String.class, OmVolumeArgs.class); @@ -170,7 +172,23 @@ protected static DBStore getStoreFromPath(String dbPath) throws IOException { } public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOException { - // Iterate all volumes. + System.out.println("Disclaimer: This tool currently does not support snapshots."); + + if (bucketFilter != null && volumeFilter == null) { + System.out.println("--bucket flag cannot be used without specifying --volume."); + return null; + } + if (volumeFilter != null) { + System.out.println("Looking up volume: " + volumeFilter); + OmVolumeArgs volumeArgs = volumeTable.get(volumeFilter); + if (volumeArgs == null) { + //Volume does not exist + System.out.println("Volume '" + volumeFilter + "' does not exist."); + return null; + } + } + + // Iterate all volumes or a specific volume if specified try (TableIterator> volumeIterator = volumeTable.iterator()) { openReachableDB(); @@ -180,35 +198,62 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc volumeIterator.next(); String volumeKey = volumeEntry.getKey(); - // Iterate all buckets in the volume. - try (TableIterator> - bucketIterator = bucketTable.iterator()) { - bucketIterator.seek(volumeKey); - while (bucketIterator.hasNext()) { - Table.KeyValue bucketEntry = - bucketIterator.next(); - String bucketKey = bucketEntry.getKey(); - OmBucketInfo bucketInfo = bucketEntry.getValue(); - - if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { - LOG.debug("Skipping non-FSO bucket {}", bucketKey); - continue; - } + if (volumeFilter!=null && !volumeFilter.equals(volumeKey)) { + continue; + } - // Stop this loop once we have seen all buckets in the current - // volume. - if (!bucketKey.startsWith(volumeKey)) { - break; - } + if (bucketFilter != null) { + OmBucketInfo bucketInfo = bucketTable.get(volumeKey + "/" + bucketFilter); + if (bucketInfo == null) { + //Bucket does not exist in the volume + System.out.println("Bucket '" + bucketFilter + "' does not exist in volume '" + volumeKey + "'."); + return null; + } + + if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { + //LOG.debug("Skipping non-FSO bucket {}", bucketKey); + System.out.println("Skipping non-FSO bucket " + bucketFilter); + continue; + } + + dropReachableTableIfExists(); + createReachableTable(); + markReachableObjectsInBucket(volumeEntry.getValue(), bucketInfo); + handleUnreachableObjects(volumeEntry.getValue(), bucketInfo); + dropReachableTableIfExists(); + } else { - // Start with a fresh list of reachable files for this bucket. - // Also clears partial state if the tool failed on a previous run. - dropReachableTableIfExists(); - createReachableTable(); - // Process one bucket's FSO tree at a time. - markReachableObjectsInBucket(volumeEntry.getValue(), bucketInfo); - handleUnreachableObjects(volumeEntry.getValue(), bucketInfo); - dropReachableTableIfExists(); + // Iterate all buckets in the volume. + try (TableIterator> + bucketIterator = bucketTable.iterator()) { + bucketIterator.seek(volumeKey); + while (bucketIterator.hasNext()) { + Table.KeyValue bucketEntry = + bucketIterator.next(); + String bucketKey = bucketEntry.getKey(); + OmBucketInfo bucketInfo = bucketEntry.getValue(); + + if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { + //LOG.debug("Skipping non-FSO bucket {}", bucketKey); + System.out.println("Skipping non-FSO bucket " + bucketKey); + continue; + } + + // Stop this loop once we have seen all buckets in the current + // volume. + if (!bucketKey.startsWith(volumeKey)) { + break; + } + + // Start with a fresh list of reachable files for this bucket. + // Also clears partial state if the tool failed on a previous run. + dropReachableTableIfExists(); + createReachableTable(); + // Process one bucket's FSO tree at a time. + markReachableObjectsInBucket(volumeEntry.getValue(), bucketInfo); + handleUnreachableObjects(volumeEntry.getValue(), bucketInfo); + dropReachableTableIfExists(); + } } } } @@ -229,13 +274,15 @@ private Report buildReportAndLog() { .setUnreachableBytes(unreachableBytes) .build(); - LOG.info("\n{}", report); + //LOG.info("\n{}", report); + System.out.println("\n" + report); return report; } private void markReachableObjectsInBucket(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { - LOG.info("Processing bucket {}", bucket.getBucketName()); + //LOG.info("Processing bucket {}", bucket.getBucketName()); + System.out.println("Processing bucket: " + volume.getVolume() + "/" + bucket.getBucketName()); // Only put directories in the stack. // Directory keys should have the form /volumeID/bucketID/parentID/name. Stack dirKeyStack = new Stack<>(); @@ -254,8 +301,10 @@ private void markReachableObjectsInBucket(OmVolumeArgs volume, String currentDirKey = dirKeyStack.pop(); OmDirectoryInfo currentDir = directoryTable.get(currentDirKey); if (currentDir == null) { - LOG.error("Directory key {} to be processed was not found in the " + - "directory table", currentDirKey); + //LOG.error("Directory key {} to be processed was not found in the " + + // "directory table", currentDirKey); + System.out.println("Directory key" + currentDirKey + "to be processed was not found in the " + + "directory table."); continue; } @@ -288,11 +337,13 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) } if (!isReachable(dirKey)) { - LOG.debug("Found unreachable directory: {}", dirKey); + //LOG.debug("Found unreachable directory: {}", dirKey); + System.out.println("Found unreachable directory: " + dirKey); unreachableDirs++; if (dryRun) { - LOG.debug("Marking unreachable directory {} for deletion.", dirKey); + //LOG.debug("Marking unreachable directory {} for deletion.", dirKey); + System.out.println("Marking unreachable directory " + dirKey + " for deletion."); OmDirectoryInfo dirInfo = dirEntry.getValue(); markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), dirKey, dirInfo); @@ -315,13 +366,14 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) OmKeyInfo fileInfo = fileEntry.getValue(); if (!isReachable(fileKey)) { - LOG.debug("Found unreachable file: {}", fileKey); + //LOG.debug("Found unreachable file: {}", fileKey); + System.out.println("Found unreachable file: " + fileKey); unreachableBytes += fileInfo.getDataSize(); unreachableFiles++; if (dryRun) { - LOG.debug("Marking unreachable file {} for deletion.", - fileKey); + //LOG.debug("Marking unreachable file {} for deletion.", fileKey); + System.out.println("Marking unreachable file " + fileKey + " for deletion." + fileKey); markFileForDeletion(fileKey, fileInfo); } } else { @@ -349,9 +401,8 @@ protected void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IO // is gone. The name of the key does not matter so just use IDs. deletedTable.putWithBatch(batch, fileKey, updatedRepeatedOmKeyInfo); - LOG.debug("Added entry {} to open key table: {}", - fileKey, updatedRepeatedOmKeyInfo); - + //LOG.debug("Added entry {} to open key table: {}", fileKey, updatedRepeatedOmKeyInfo); + System.out.println("Added entry " + fileKey + " to open key table: " + updatedRepeatedOmKeyInfo); store.commitBatchOperation(batch); } } @@ -466,8 +517,8 @@ private static String buildReachableParentKey(String fileOrDirKey) { private void openReachableDB() throws IOException { File reachableDBFile = new File(new File(omDBPath).getParentFile(), "reachable.db"); - LOG.info("Creating database of reachable directories at {}", - reachableDBFile); + //LOG.info("Creating database of reachable directories at {}", reachableDBFile); + System.out.println("Creating database of reachable directories at " + reachableDBFile); // Delete the DB from the last run if it exists. if (reachableDBFile.exists()) { FileUtils.deleteDirectory(reachableDBFile); @@ -522,6 +573,12 @@ private void createReachableTable() throws IOException { new ColumnFamilyDescriptor(REACHABLE_TABLE_BYTES)); } + private void estimateReplicatedSize() { + int replicationFactor = 3; + long totalReplicatedSize = (reachableBytes + unreachableBytes) * replicationFactor; + System.out.println("Estimated replicated size: " + totalReplicatedSize + " bytes"); + } + /** * Define a Report to be created. */ @@ -610,7 +667,8 @@ public boolean equals(Object other) { FSORepairTool.Report report = (FSORepairTool.Report) other; // Useful for testing. - LOG.debug("Comparing reports\nExpect:\n{}\nActual:\n{}", this, report); + //LOG.debug("Comparing reports\nExpect:\n{}\nActual:\n{}", this, report); + System.out.println("Comparing reports\nExpect:\n" + this + "\nActual:\n" + report); return reachableBytes == report.reachableBytes && reachableFiles == report.reachableFiles && From fe2d951ff1ce6f0c4a4f457b6e4c74e4e62f0c7f Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Mon, 28 Oct 2024 14:02:06 +0530 Subject: [PATCH 08/28] Fixed checkstyle issues --- .../org/apache/hadoop/fs/ozone/TestFSORepairTool.java | 11 ++++------- .../apache/hadoop/ozone/repair/om/FSORepairCLI.java | 2 +- .../apache/hadoop/ozone/repair/om/FSORepairTool.java | 5 +++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index 5b1101e6752d..ed25aef57640 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; -import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.client.BucketArgs; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneClient; @@ -127,20 +126,18 @@ public static void teardown() { @Test public void testFSORepairToolWithVolumeAndBucketFilter() throws Exception { - // Build a tree with unreachable points in multiple volumes and buckets FSORepairTool.Report reportVol1Buck1 = buildDisconnectedTree("vol1", "bucket1", 10); FSORepairTool.Report reportVol2Buck2 = buildDisconnectedTree("vol2", "bucket2", 10); - // Case 1: Run repair tool for a specific volume and bucket. FSORepairTool repairToolFiltered = new FSORepairTool( getOmDB(), getOmDBLocation(), false, "vol1", "bucket1"); FSORepairTool.Report filteredReport = repairToolFiltered.run(); - // Ensure that only the unreachable points from vol1/bucket1 are in the report. - Assertions.assertEquals(reportVol1Buck1, filteredReport, "Filtered report should match the unreachable points in vol1/bucket1."); - Assertions.assertNotEquals(reportVol2Buck2, filteredReport, "Filtered report should not include vol2/bucket2."); + Assertions.assertEquals(reportVol1Buck1, filteredReport, + "Filtered report should match the unreachable points in vol1/bucket1."); + Assertions.assertNotEquals(reportVol2Buck2, filteredReport, + "Filtered report should not include vol2/bucket2."); - // Ensure unreachable objects in vol1/bucket1 are properly marked for delete. assertDisconnectedObjectsMarkedForDelete(1); } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index d2d7d7d62f3f..cdf1f749fee2 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -48,7 +48,7 @@ public class FSORepairCLI implements Callable, SubcommandWithParent { @CommandLine.Option(names = {"--dry-run"}, description = "Mode to run the tool in. Read-mode will just log information about unreachable files or " + - "directories; otherwise the tool will move those files and directories to the deleted tables." ) + "directories; otherwise the tool will move those files and directories to the deleted tables.") private boolean dryRun; @CommandLine.Option(names = {"--volume"}, diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index 2b1c89cddf06..48d8a4cae9a5 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -124,7 +124,8 @@ public FSORepairTool(String dbPath, boolean dryRun, String volume, String bucket * class for testing. */ @VisibleForTesting - public FSORepairTool(DBStore dbStore, String dbPath, boolean isDryRun, String volume, String bucket) throws IOException { + public FSORepairTool(DBStore dbStore, String dbPath, boolean isDryRun, String volume, String bucket) + throws IOException { dryRun = isDryRun; // Counters to track as we walk the tree. reachableBytes = 0; @@ -198,7 +199,7 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc volumeIterator.next(); String volumeKey = volumeEntry.getKey(); - if (volumeFilter!=null && !volumeFilter.equals(volumeKey)) { + if (volumeFilter != null && !volumeFilter.equals(volumeKey)) { continue; } From 19a90ff5391a6c434681a8e52a83313b8d8bec52 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Tue, 29 Oct 2024 16:34:29 +0530 Subject: [PATCH 09/28] Corrected test class --- .../hadoop/fs/ozone/TestFSORepairTool.java | 52 ++++++++++--------- .../hadoop/ozone/repair/om/FSORepairCLI.java | 3 +- .../hadoop/ozone/repair/om/FSORepairTool.java | 5 +- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index ed25aef57640..1e4aa05a7f5c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -41,6 +41,7 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.repair.om.FSORepairTool; +import org.apache.hadoop.ozone.shell.OzoneShell; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; @@ -58,7 +59,6 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; /** * FSORepairTool test cases. @@ -70,6 +70,7 @@ public class TestFSORepairTool { private static MiniOzoneHAClusterImpl cluster; private static FileSystem fs; private static OzoneClient client; + private static OzoneShell shell; @BeforeAll @@ -98,22 +99,40 @@ public static void init() throws Exception { // Init ofs. final String rootPath = String.format("%s://%s/", - OZONE_OFS_URI_SCHEME, conf.get(OZONE_OM_ADDRESS_KEY)); + OZONE_OFS_URI_SCHEME, cluster.getOzoneManager().getOMServiceId()); + System.out.println("Rootpath: " + rootPath); conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); fs = FileSystem.get(conf); client = OzoneClientFactory.getRpcClient("omservice", conf); } +// @AfterEach +// public void cleanNamespace() throws Exception { +// if (fs.exists(new Path("/vol1"))) { +// fs.delete(new Path("/vol1"), true); +// } +// if (fs.exists(new Path("/vol2"))) { +// fs.delete(new Path("/vol2"), true); +// } +// runDeletes(); +// assertFileAndDirTablesEmpty(); +// } + @AfterEach public void cleanNamespace() throws Exception { - if (fs.exists(new Path("/vol1"))) { - fs.delete(new Path("/vol1"), true); - } - if (fs.exists(new Path("/vol2"))) { - fs.delete(new Path("/vol2"), true); - } + shell = new OzoneShell(); + String[] args1 = new String[]{"volume", "delete", "-r", "/vol1", "-y"}; + String[] args2 = new String[]{"volume", "delete", "-r", "/vol2", "-y"}; + + shell.execute(args1); + System.out.println("Deleted vol11"); + shell.execute(args2); + System.out.println("Deleted vol12"); + runDeletes(); + System.out.println("Deleted"); assertFileAndDirTablesEmpty(); + System.out.println("Deleted"); } @AfterAll @@ -124,23 +143,6 @@ public static void teardown() { IOUtils.closeQuietly(fs); } - @Test - public void testFSORepairToolWithVolumeAndBucketFilter() throws Exception { - FSORepairTool.Report reportVol1Buck1 = buildDisconnectedTree("vol1", "bucket1", 10); - FSORepairTool.Report reportVol2Buck2 = buildDisconnectedTree("vol2", "bucket2", 10); - - FSORepairTool repairToolFiltered = new FSORepairTool( - getOmDB(), getOmDBLocation(), false, "vol1", "bucket1"); - FSORepairTool.Report filteredReport = repairToolFiltered.run(); - - Assertions.assertEquals(reportVol1Buck1, filteredReport, - "Filtered report should match the unreachable points in vol1/bucket1."); - Assertions.assertNotEquals(reportVol2Buck2, filteredReport, - "Filtered report should not include vol2/bucket2."); - - assertDisconnectedObjectsMarkedForDelete(1); - } - @Test public void testConnectedTreeOneBucket() throws Exception { org.apache.hadoop.ozone.repair.om.FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index cdf1f749fee2..8700b5711a12 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -32,8 +32,7 @@ name = "fso-tree-repair", description = "Identify and repair a disconnected FSO tree, and mark " + "unreachable entries for deletion. OM should be " + - "stopped while this tool is run. Information will be logged at " + - "INFO and DEBUG levels." + "stopped while this tool is run." ) @MetaInfServices(SubcommandWithParent.class) public class FSORepairCLI implements Callable, SubcommandWithParent { diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index 48d8a4cae9a5..4b1bbd32e596 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -179,11 +179,10 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc System.out.println("--bucket flag cannot be used without specifying --volume."); return null; } + if (volumeFilter != null) { - System.out.println("Looking up volume: " + volumeFilter); OmVolumeArgs volumeArgs = volumeTable.get(volumeFilter); if (volumeArgs == null) { - //Volume does not exist System.out.println("Volume '" + volumeFilter + "' does not exist."); return null; } @@ -203,6 +202,8 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc continue; } + System.out.println("Processing volume: " + volumeKey); + if (bucketFilter != null) { OmBucketInfo bucketInfo = bucketTable.get(volumeKey + "/" + bucketFilter); if (bucketInfo == null) { From 6cbf2d1d487ad088292df5b0a3b960bc567a2537 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Tue, 29 Oct 2024 17:04:50 +0530 Subject: [PATCH 10/28] Fixed findbug issue --- .../org/apache/hadoop/fs/ozone/TestFSORepairTool.java | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index 1e4aa05a7f5c..5f9e8fb7c758 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -70,8 +70,6 @@ public class TestFSORepairTool { private static MiniOzoneHAClusterImpl cluster; private static FileSystem fs; private static OzoneClient client; - private static OzoneShell shell; - @BeforeAll public static void init() throws Exception { @@ -120,19 +118,14 @@ public static void init() throws Exception { @AfterEach public void cleanNamespace() throws Exception { - shell = new OzoneShell(); + OzoneShell shell = new OzoneShell(); String[] args1 = new String[]{"volume", "delete", "-r", "/vol1", "-y"}; String[] args2 = new String[]{"volume", "delete", "-r", "/vol2", "-y"}; - shell.execute(args1); - System.out.println("Deleted vol11"); shell.execute(args2); - System.out.println("Deleted vol12"); runDeletes(); - System.out.println("Deleted"); assertFileAndDirTablesEmpty(); - System.out.println("Deleted"); } @AfterAll From 7f3301f2c2770e12d07206fbeccd146d6bb54388 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Mon, 4 Nov 2024 12:29:02 +0530 Subject: [PATCH 11/28] Addressed comments --- .../hadoop/fs/ozone/TestFSORepairTool.java | 110 +++++++++++++++--- .../hadoop/ozone/repair/om/FSORepairCLI.java | 5 +- .../hadoop/ozone/repair/om/FSORepairTool.java | 46 ++------ 3 files changed, 107 insertions(+), 54 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index 5f9e8fb7c758..004c2e8e2497 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -23,6 +23,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.hdds.cli.GenericCli; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.Table; @@ -34,6 +35,7 @@ import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.ozone.ha.ConfUtils; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; @@ -50,15 +52,21 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import picocli.CommandLine; import java.io.IOException; +import java.io.PrintStream; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; +import static java.lang.System.err; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; +import static org.junit.jupiter.api.Assertions.assertEquals; /** * FSORepairTool test cases. @@ -67,14 +75,17 @@ public class TestFSORepairTool { public static final Logger LOG = LoggerFactory.getLogger(TestFSORepairTool.class); + private static final String DEFAULT_ENCODING = UTF_8.name(); private static MiniOzoneHAClusterImpl cluster; private static FileSystem fs; private static OzoneClient client; + private static OzoneConfiguration conf = null; + @BeforeAll public static void init() throws Exception { // Set configs. - OzoneConfiguration conf = new OzoneConfiguration(); + conf = new OzoneConfiguration(); // deletion services will be triggered manually. conf.setTimeDuration(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, 1_000_000, TimeUnit.SECONDS); @@ -98,36 +109,99 @@ public static void init() throws Exception { // Init ofs. final String rootPath = String.format("%s://%s/", OZONE_OFS_URI_SCHEME, cluster.getOzoneManager().getOMServiceId()); - System.out.println("Rootpath: " + rootPath); conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); fs = FileSystem.get(conf); client = OzoneClientFactory.getRpcClient("omservice", conf); } -// @AfterEach -// public void cleanNamespace() throws Exception { -// if (fs.exists(new Path("/vol1"))) { -// fs.delete(new Path("/vol1"), true); -// } -// if (fs.exists(new Path("/vol2"))) { -// fs.delete(new Path("/vol2"), true); -// } -// runDeletes(); -// assertFileAndDirTablesEmpty(); -// } - @AfterEach public void cleanNamespace() throws Exception { OzoneShell shell = new OzoneShell(); - String[] args1 = new String[]{"volume", "delete", "-r", "/vol1", "-y"}; - String[] args2 = new String[]{"volume", "delete", "-r", "/vol2", "-y"}; - shell.execute(args1); - shell.execute(args2); + + if (fs.exists(new Path("/vol1"))) { + String[] args1 = new String[]{"volume", "delete", "-r", "-y", "vol1"}; + int exitC = execute(shell, args1); + assertEquals(0, exitC); + } + + if (fs.exists(new Path("/vol2"))) { + String[] args1 = new String[]{"volume", "delete", "-r", "-y", "vol2"}; + int exitC = execute(shell, args1); + assertEquals(0, exitC); + } runDeletes(); assertFileAndDirTablesEmpty(); } + private int execute(GenericCli shell, String[] args) { + LOG.info("Executing shell command with args {}", Arrays.asList(args)); + CommandLine cmd = shell.getCmd(); + + CommandLine.IExecutionExceptionHandler exceptionHandler = + (ex, commandLine, parseResult) -> { + new PrintStream(err, true, DEFAULT_ENCODING).println(ex.getMessage()); + return commandLine.getCommandSpec().exitCodeOnExecutionException(); + }; + + // Since there is no elegant way to pass Ozone config to the shell, + // the idea is to use 'set' to place those OM HA configs. + String[] argsWithHAConf = getHASetConfStrings(args); + + cmd.setExecutionExceptionHandler(exceptionHandler); + return cmd.execute(argsWithHAConf); + } + private String getSetConfStringFromConf(String key) { + return String.format("--set=%s=%s", key, conf.get(key)); + } + + private String generateSetConfString(String key, String value) { + return String.format("--set=%s=%s", key, value); + } + + private String[] getHASetConfStrings(int numOfArgs) { + assert (numOfArgs >= 0); + String[] res = new String[1 + 1 + 1 + numOfArgs]; + final int indexOmServiceIds = 0; + final int indexOmNodes = 1; + final int indexOmAddressStart = 2; + + res[indexOmServiceIds] = getSetConfStringFromConf( + OMConfigKeys.OZONE_OM_SERVICE_IDS_KEY); + + String omNodesKey = ConfUtils.addKeySuffixes( + OMConfigKeys.OZONE_OM_NODES_KEY, "omservice"); + String omNodesVal = conf.get(omNodesKey); + res[indexOmNodes] = generateSetConfString(omNodesKey, omNodesVal); + + String[] omNodesArr = omNodesVal.split(","); + // Sanity check + assert (omNodesArr.length == 1); + for (int i = 0; i < 1; i++) { + res[indexOmAddressStart + i] = + getSetConfStringFromConf(ConfUtils.addKeySuffixes( + OMConfigKeys.OZONE_OM_ADDRESS_KEY, "omservice", omNodesArr[i])); + } + + return res; + } + + /** + * Helper function to create a new set of arguments that contains HA configs. + * @param existingArgs Existing arguments to be fed into OzoneShell command. + * @return String array. + */ + private String[] getHASetConfStrings(String[] existingArgs) { + // Get a String array populated with HA configs first + String[] res = getHASetConfStrings(existingArgs.length); + + int indexCopyStart = res.length - existingArgs.length; + // Then copy the existing args to the returned String array + System.arraycopy(existingArgs, 0, res, indexCopyStart, + existingArgs.length); + return res; + } + @AfterAll public static void teardown() { if (cluster != null) { diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index 8700b5711a12..d866bd42786d 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -46,8 +46,9 @@ public class FSORepairCLI implements Callable, SubcommandWithParent { private String dbPath; @CommandLine.Option(names = {"--dry-run"}, - description = "Mode to run the tool in. Read-mode will just log information about unreachable files or " + - "directories; otherwise the tool will move those files and directories to the deleted tables.") + defaultValue = "true", + description = "This tool will run in dry-run mode by default to log unreachable files or directories. " + + "Set the value to 'false' to move unreachable files and directories to the deleted tables.") private boolean dryRun; @CommandLine.Option(names = {"--volume"}, diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index 4b1bbd32e596..a0b917f255f3 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -181,7 +181,7 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc } if (volumeFilter != null) { - OmVolumeArgs volumeArgs = volumeTable.get(volumeFilter); + OmVolumeArgs volumeArgs = volumeTable.getIfExist(volumeFilter); if (volumeArgs == null) { System.out.println("Volume '" + volumeFilter + "' does not exist."); return null; @@ -205,7 +205,7 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc System.out.println("Processing volume: " + volumeKey); if (bucketFilter != null) { - OmBucketInfo bucketInfo = bucketTable.get(volumeKey + "/" + bucketFilter); + OmBucketInfo bucketInfo = bucketTable.getIfExist(volumeKey + "/" + bucketFilter); if (bucketInfo == null) { //Bucket does not exist in the volume System.out.println("Bucket '" + bucketFilter + "' does not exist in volume '" + volumeKey + "'."); @@ -213,16 +213,11 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc } if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { - //LOG.debug("Skipping non-FSO bucket {}", bucketKey); System.out.println("Skipping non-FSO bucket " + bucketFilter); continue; } - dropReachableTableIfExists(); - createReachableTable(); - markReachableObjectsInBucket(volumeEntry.getValue(), bucketInfo); - handleUnreachableObjects(volumeEntry.getValue(), bucketInfo); - dropReachableTableIfExists(); + processBucket(volumeEntry.getValue(), bucketInfo); } else { // Iterate all buckets in the volume. @@ -236,7 +231,6 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc OmBucketInfo bucketInfo = bucketEntry.getValue(); if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { - //LOG.debug("Skipping non-FSO bucket {}", bucketKey); System.out.println("Skipping non-FSO bucket " + bucketKey); continue; } @@ -247,14 +241,7 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc break; } - // Start with a fresh list of reachable files for this bucket. - // Also clears partial state if the tool failed on a previous run. - dropReachableTableIfExists(); - createReachableTable(); - // Process one bucket's FSO tree at a time. - markReachableObjectsInBucket(volumeEntry.getValue(), bucketInfo); - handleUnreachableObjects(volumeEntry.getValue(), bucketInfo); - dropReachableTableIfExists(); + processBucket(volumeEntry.getValue(), bucketInfo); } } } @@ -266,6 +253,14 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc return buildReportAndLog(); } + private void processBucket(OmVolumeArgs volume, OmBucketInfo bucketInfo) throws IOException { + dropReachableTableIfExists(); + createReachableTable(); + markReachableObjectsInBucket(volume, bucketInfo); + handleUnreachableObjects(volume, bucketInfo); + dropReachableTableIfExists(); + } + private Report buildReportAndLog() { Report report = new Report.Builder() .setReachableDirs(reachableDirs) @@ -276,14 +271,12 @@ private Report buildReportAndLog() { .setUnreachableBytes(unreachableBytes) .build(); - //LOG.info("\n{}", report); System.out.println("\n" + report); return report; } private void markReachableObjectsInBucket(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { - //LOG.info("Processing bucket {}", bucket.getBucketName()); System.out.println("Processing bucket: " + volume.getVolume() + "/" + bucket.getBucketName()); // Only put directories in the stack. // Directory keys should have the form /volumeID/bucketID/parentID/name. @@ -303,8 +296,6 @@ private void markReachableObjectsInBucket(OmVolumeArgs volume, String currentDirKey = dirKeyStack.pop(); OmDirectoryInfo currentDir = directoryTable.get(currentDirKey); if (currentDir == null) { - //LOG.error("Directory key {} to be processed was not found in the " + - // "directory table", currentDirKey); System.out.println("Directory key" + currentDirKey + "to be processed was not found in the " + "directory table."); continue; @@ -339,12 +330,10 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) } if (!isReachable(dirKey)) { - //LOG.debug("Found unreachable directory: {}", dirKey); System.out.println("Found unreachable directory: " + dirKey); unreachableDirs++; if (dryRun) { - //LOG.debug("Marking unreachable directory {} for deletion.", dirKey); System.out.println("Marking unreachable directory " + dirKey + " for deletion."); OmDirectoryInfo dirInfo = dirEntry.getValue(); markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), @@ -368,13 +357,11 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) OmKeyInfo fileInfo = fileEntry.getValue(); if (!isReachable(fileKey)) { - //LOG.debug("Found unreachable file: {}", fileKey); System.out.println("Found unreachable file: " + fileKey); unreachableBytes += fileInfo.getDataSize(); unreachableFiles++; if (dryRun) { - //LOG.debug("Marking unreachable file {} for deletion.", fileKey); System.out.println("Marking unreachable file " + fileKey + " for deletion." + fileKey); markFileForDeletion(fileKey, fileInfo); } @@ -403,7 +390,6 @@ protected void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IO // is gone. The name of the key does not matter so just use IDs. deletedTable.putWithBatch(batch, fileKey, updatedRepeatedOmKeyInfo); - //LOG.debug("Added entry {} to open key table: {}", fileKey, updatedRepeatedOmKeyInfo); System.out.println("Added entry " + fileKey + " to open key table: " + updatedRepeatedOmKeyInfo); store.commitBatchOperation(batch); } @@ -519,7 +505,6 @@ private static String buildReachableParentKey(String fileOrDirKey) { private void openReachableDB() throws IOException { File reachableDBFile = new File(new File(omDBPath).getParentFile(), "reachable.db"); - //LOG.info("Creating database of reachable directories at {}", reachableDBFile); System.out.println("Creating database of reachable directories at " + reachableDBFile); // Delete the DB from the last run if it exists. if (reachableDBFile.exists()) { @@ -575,12 +560,6 @@ private void createReachableTable() throws IOException { new ColumnFamilyDescriptor(REACHABLE_TABLE_BYTES)); } - private void estimateReplicatedSize() { - int replicationFactor = 3; - long totalReplicatedSize = (reachableBytes + unreachableBytes) * replicationFactor; - System.out.println("Estimated replicated size: " + totalReplicatedSize + " bytes"); - } - /** * Define a Report to be created. */ @@ -669,7 +648,6 @@ public boolean equals(Object other) { FSORepairTool.Report report = (FSORepairTool.Report) other; // Useful for testing. - //LOG.debug("Comparing reports\nExpect:\n{}\nActual:\n{}", this, report); System.out.println("Comparing reports\nExpect:\n" + this + "\nActual:\n" + report); return reachableBytes == report.reachableBytes && From 576200d60b6b08b8412e6b5e1e8b803b0e11cd06 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Tue, 12 Nov 2024 12:29:19 +0530 Subject: [PATCH 12/28] Added repair and debug options, exclude buckets with snapshots --- .../hadoop/fs/ozone/TestFSORepairTool.java | 338 ++++++++++-------- .../hadoop/ozone/repair/om/FSORepairCLI.java | 19 +- .../hadoop/ozone/repair/om/FSORepairTool.java | 80 ++++- 3 files changed, 275 insertions(+), 162 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index 004c2e8e2497..cd8ea84e163b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -41,9 +41,9 @@ import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; -import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.repair.om.FSORepairTool; import org.apache.hadoop.ozone.shell.OzoneShell; +import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; @@ -54,18 +54,18 @@ import org.slf4j.LoggerFactory; import picocli.CommandLine; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; import java.nio.charset.StandardCharsets; import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; import java.util.concurrent.TimeUnit; import static java.lang.System.err; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; +import static org.junit.jupiter.api.Assertions.fail; import static org.junit.jupiter.api.Assertions.assertEquals; /** @@ -80,23 +80,22 @@ public class TestFSORepairTool { private static FileSystem fs; private static OzoneClient client; private static OzoneConfiguration conf = null; - + private static FSORepairTool tool; @BeforeAll public static void init() throws Exception { // Set configs. conf = new OzoneConfiguration(); // deletion services will be triggered manually. - conf.setTimeDuration(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, - 1_000_000, TimeUnit.SECONDS); - conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 1_000_000, - TimeUnit.SECONDS); - conf.setInt(OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK, 10); - conf.setInt(OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK, 10); + conf.setInt(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, 2000); + conf.setInt(OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK, 5); + conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, + TimeUnit.MILLISECONDS); + conf.setInt(OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK, 20); conf.setBoolean(OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY, true); // Since delete services use RocksDB iterators, make sure the double // buffer is flushed between runs. - conf.setInt(OMConfigKeys.OZONE_OM_UNFLUSHED_TRANSACTION_MAX_COUNT, 1); + conf.setInt(OMConfigKeys.OZONE_OM_UNFLUSHED_TRANSACTION_MAX_COUNT, 2); // Build cluster. cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf) @@ -130,6 +129,7 @@ public void cleanNamespace() throws Exception { assertEquals(0, exitC); } + cluster.getOzoneManager().prepareOzoneManager(120L, 5L); runDeletes(); assertFileAndDirTablesEmpty(); } @@ -151,6 +151,7 @@ private int execute(GenericCli shell, String[] args) { cmd.setExecutionExceptionHandler(exceptionHandler); return cmd.execute(argsWithHAConf); } + private String getSetConfStringFromConf(String key) { return String.format("--set=%s=%s", key, conf.get(key)); } @@ -212,12 +213,12 @@ public static void teardown() { @Test public void testConnectedTreeOneBucket() throws Exception { - org.apache.hadoop.ozone.repair.om.FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); + FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); // Test the connected tree in debug mode. - FSORepairTool fsoTool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, null, null); - FSORepairTool.Report debugReport = fsoTool.run(); + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), true, false, null, null); + FSORepairTool.Report debugReport = tool.run(); Assertions.assertEquals(expectedReport, debugReport); assertConnectedTreeReadable("vol1", "bucket1"); @@ -225,9 +226,9 @@ public void testConnectedTreeOneBucket() throws Exception { // Running again in repair mode should give same results since the tree // is connected. - fsoTool = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), - getOmDBLocation(), false, null, null); - org.apache.hadoop.ozone.repair.om.FSORepairTool.Report repairReport = fsoTool.run(); + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), false, true, null, null); + FSORepairTool.Report repairReport = tool.run(); Assertions.assertEquals(expectedReport, repairReport); assertConnectedTreeReadable("vol1", "bucket1"); @@ -240,29 +241,103 @@ public void testReportedDataSize() throws Exception { FSORepairTool.Report report2 = buildConnectedTree("vol1", "bucket2", 10); FSORepairTool.Report expectedReport = new FSORepairTool.Report(report1, report2); - FSORepairTool - repair = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, null, null); - FSORepairTool.Report debugReport = repair.run(); + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), false, true, null, null); + FSORepairTool.Report debugReport = tool.run(); Assertions.assertEquals(expectedReport, debugReport); } + /** + * Test to verify how the tool processes the volume and bucket + * filters. + */ + @Test + public void testVolumeAndBucketFilter() throws Exception { + FSORepairTool.Report report1 = buildDisconnectedTree("vol1", "bucket1", 10); + FSORepairTool.Report report2 = buildConnectedTree("vol2", "bucket2", 10); + FSORepairTool.Report expectedReport1 = new FSORepairTool.Report(report1); + FSORepairTool.Report expectedReport2 = new FSORepairTool.Report(report2); + + // When volume filter is passed + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), false, true, "/vol1", null); + FSORepairTool.Report result1 = tool.run(); + Assertions.assertEquals(expectedReport1, result1); + + // When both volume and bucket filters are passed + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), false, true, "/vol2", "bucket2"); + FSORepairTool.Report result2 = tool.run(); + Assertions.assertEquals(expectedReport2, result2); + + PrintStream originalOut = System.out; + + // When a non-existent bucket filter is passed + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(outputStream)) { + System.setOut(ps); + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), false, true, "/vol1", "bucket2"); + tool.run(); + String output = outputStream.toString(); + Assertions.assertTrue(output.contains("Bucket 'bucket2' does not exist in volume '/vol1'.")); + } finally { + System.setOut(originalOut); + } + + // When a non-existent volume filter is passed + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(outputStream)) { + System.setOut(ps); + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), false, true, "/vol3", "bucket2"); + tool.run(); + String output = outputStream.toString(); + Assertions.assertTrue(output.contains("Volume '/vol3' does not exist.")); + } finally { + System.setOut(originalOut); + } + + // When bucket filter is passed without the volume filter. + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(outputStream)) { + System.setOut(ps); + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), false, true, null, "bucket2"); + tool.run(); + String output = outputStream.toString(); + Assertions.assertTrue(output.contains("--bucket flag cannot be used without specifying --volume.")); + } finally { + System.setOut(originalOut); + } + + } + @Test public void testMultipleBucketsAndVolumes() throws Exception { + Table dirTable = + cluster.getOzoneManager().getMetadataManager().getDirectoryTable(); + Table keyTable = + cluster.getOzoneManager().getMetadataManager().getKeyTable(getFSOBucketLayout()); FSORepairTool.Report report1 = buildConnectedTree("vol1", "bucket1"); FSORepairTool.Report report2 = buildDisconnectedTree("vol2", "bucket2"); - FSORepairTool.Report expectedAggregateReport = new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report( + FSORepairTool.Report expectedAggregateReport = new FSORepairTool.Report( report1, report2); - org.apache.hadoop.ozone.repair.om.FSORepairTool - repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), - getOmDBLocation(), false, null, null); - org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), false, true, null, null); + FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(generatedReport, expectedAggregateReport); assertConnectedTreeReadable("vol1", "bucket1"); assertDisconnectedTreePartiallyReadable("vol2", "bucket2"); - assertDisconnectedObjectsMarkedForDelete(1); + + // This assertion ensures that only specific directories and keys remain in the active tables, + // as the remaining entries are expected to be moved to the deleted tables by the background service. + // However, since the timing of the background deletion service is not predictable, + // assertions on the deleted tables themselves may lead to flaky tests. + assertEquals(4, countTableEntries(dirTable)); + assertEquals(5, countTableEntries(keyTable)); } /** @@ -271,6 +346,11 @@ public void testMultipleBucketsAndVolumes() throws Exception { */ @Test public void testDeleteOverwrite() throws Exception { + Table keyTable = + cluster.getOzoneManager().getMetadataManager().getKeyTable(getFSOBucketLayout()); + Table dirTable = + cluster.getOzoneManager().getMetadataManager().getDirectoryTable(); + // Create files and dirs under dir1. To make sure they are added to the // delete table, the keys must have data. buildConnectedTree("vol1", "bucket1", 10); @@ -289,25 +369,28 @@ public void testDeleteOverwrite() throws Exception { ContractTestUtils.touch(fs, new Path("/vol1/bucket1/dir1/file2")); disconnectDirectory("dir1"); - org.apache.hadoop.ozone.repair.om.FSORepairTool - repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), - getOmDBLocation(), false, null, null); - org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), false, true, null, null); + FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(1, generatedReport.getUnreachableDirs()); Assertions.assertEquals(3, generatedReport.getUnreachableFiles()); - assertDisconnectedObjectsMarkedForDelete(2); + // This assertion ensures that only specific directories and keys remain in the active tables, + // as the remaining entries are expected to be moved to the deleted tables by the background service. + // However, since the timing of the background deletion service is not predictable, + // assertions on the deleted tables themselves may lead to flaky tests. + assertEquals(1, countTableEntries(keyTable)); + assertEquals(1, countTableEntries(dirTable)); } @Test public void testEmptyFileTrees() throws Exception { // Run when there are no file trees. - org.apache.hadoop.ozone.repair.om.FSORepairTool - repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), - getOmDBLocation(), false, null, null); - org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); - Assertions.assertEquals(generatedReport, new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report()); + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), false, true, null, null); + FSORepairTool.Report generatedReport = tool.run(); + Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); assertDeleteTablesEmpty(); // Create an empty volume and bucket. @@ -315,11 +398,10 @@ public void testEmptyFileTrees() throws Exception { fs.mkdirs(new Path("/vol2/bucket1")); // Run on an empty volume and bucket. - repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), - getOmDBLocation(), false, null, null); - generatedReport = repair.run(); - Assertions.assertEquals(generatedReport, new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report()); - assertDeleteTablesEmpty(); + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), false, true, null, null); + generatedReport = tool.run(); + Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); } @Test @@ -349,15 +431,14 @@ public void testNonFSOBucketsSkipped() throws Exception { legacyStream.close(); // Add an FSO bucket with data. - org.apache.hadoop.ozone.repair.om.FSORepairTool.Report connectReport = buildConnectedTree("vol1", "fso" + - "-bucket"); + FSORepairTool.Report connectReport = + buildConnectedTree("vol1", "fso-bucket"); // Even in repair mode there should be no action. legacy and obs buckets // will be skipped and FSO tree is connected. - org.apache.hadoop.ozone.repair.om.FSORepairTool - repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), - getOmDBLocation(), false, null, null); - org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); + tool = new FSORepairTool(getOmDB(), + getOmDBLocation(), false, true, null, null); + FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(connectReport, generatedReport); assertConnectedTreeReadable("vol1", "fso-bucket"); @@ -372,7 +453,7 @@ public void testNonFSOBucketsSkipped() throws Exception { } - private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildConnectedTree(String volume, String bucket) + private FSORepairTool.Report buildConnectedTree(String volume, String bucket) throws Exception { return buildConnectedTree(volume, bucket, 0); } @@ -380,8 +461,7 @@ private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildConnectedTre /** * Creates a tree with 3 reachable directories and 4 reachable files. */ - private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildConnectedTree(String volume, String bucket, - int fileSize) + private FSORepairTool.Report buildConnectedTree(String volume, String bucket, int fileSize) throws Exception { Path bucketPath = new Path("/" + volume + "/" + bucket); Path dir1 = new Path(bucketPath, "dir1"); @@ -445,17 +525,17 @@ private void assertConnectedTreeReadable(String volume, String bucket) Assertions.assertTrue(fs.exists(file4)); } - private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) + private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) throws Exception { return buildDisconnectedTree(volume, bucket, 0); } /** - * Creates a tree with 2 reachable directories, 1 reachable file, 1 + * Creates a tree with 1 reachable directory, 1 reachable file, 1 * unreachable directory, and 3 unreachable files. */ - private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, - int fileSize) throws Exception { + private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, + int fileSize) throws Exception { buildConnectedTree(volume, bucket, fileSize); // Manually remove dir1. This should disconnect 3 of the files and 1 of @@ -516,91 +596,61 @@ private void assertDisconnectedTreePartiallyReadable( Assertions.assertTrue(fs.exists(file4)); } - /** - * Checks that the disconnected tree's unreachable objects are correctly - * moved to the delete table. If the tree was written and deleted multiple - * times, it makes sure the delete entries with the same name are preserved. - */ - private void assertDisconnectedObjectsMarkedForDelete(int numWrites) - throws Exception { - - Map pendingDeleteDirCounts = new HashMap<>(); - - // Check deleted directory table. + private void assertDeleteTablesEmpty() throws Exception { OzoneManager leader = cluster.getOMLeader(); - Table deletedDirTable = - leader.getMetadataManager().getDeletedDirTable(); - try (TableIterator> iterator = - deletedDirTable.iterator()) { - while (iterator.hasNext()) { - Table.KeyValue entry = iterator.next(); - String key = entry.getKey(); - OmKeyInfo value = entry.getValue(); - - String dirName = key.split("/")[4]; - LOG.info("In deletedDirTable, extracting directory name {} from DB " + - "key {}", dirName, key); - // Check that the correct dir info was added. - // FSO delete path will fill in the whole path to the key in the - // proto when it is deleted. Once the tree is disconnected that can't - // be done, so just make sure the dirName contained in the key name - // somewhere. - Assertions.assertTrue(value.getKeyName().contains(dirName)); - - int count = pendingDeleteDirCounts.getOrDefault(dirName, 0); - pendingDeleteDirCounts.put(dirName, count + 1); + GenericTestUtils.waitFor(() -> { + try { + return leader.getMetadataManager().getDeletedDirTable().isEmpty(); + } catch (Exception e) { + LOG.error("DB failure!", e); + fail("DB failure!"); + return false; } - } - - // 1 directory is disconnected in the tree. dir1 was totally deleted so - // the repair tool will not see it. - Assertions.assertEquals(1, pendingDeleteDirCounts.size()); - Assertions.assertEquals(numWrites, pendingDeleteDirCounts.get("dir2")); - - // Check that disconnected files were put in deleting tables. - Map pendingDeleteFileCounts = new HashMap<>(); - - Table deletedFileTable = - leader.getMetadataManager().getDeletedTable(); - try (TableIterator> iterator = - deletedFileTable.iterator()) { - while (iterator.hasNext()) { - Table.KeyValue entry = iterator.next(); - String key = entry.getKey(); - RepeatedOmKeyInfo value = entry.getValue(); - - String[] keyParts = key.split("/"); - String fileName = keyParts[keyParts.length - 1]; - - LOG.info("In deletedTable, extracting file name {} from DB " + - "key {}", fileName, key); - - for (OmKeyInfo fileInfo: value.getOmKeyInfoList()) { - // Check that the correct file info was added. - Assertions.assertTrue(fileInfo.getKeyName().contains(fileName)); + }, 1000, 120000); + GenericTestUtils.waitFor(() -> { + try { + return leader.getMetadataManager().getDeletedTable().isEmpty(); + } catch (Exception e) { + LOG.error("DB failure!", e); + fail("DB failure!"); + return false; + } + }, 1000, 120000); + } - int count = pendingDeleteFileCounts.getOrDefault(fileName, 0); - pendingDeleteFileCounts.put(fileName, count + 1); - } + private void assertFileAndDirTablesEmpty() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + GenericTestUtils.waitFor(() -> { + try { + return leader.getMetadataManager().getDirectoryTable().isEmpty(); + } catch (Exception e) { + LOG.error("DB failure!", e); + fail("DB failure!"); + return false; } - } + }, 1000, 120000); + GenericTestUtils.waitFor(() -> { + try { + return leader.getMetadataManager().getFileTable().isEmpty(); + } catch (Exception e) { + LOG.error("DB failure!", e); + fail("DB failure!"); + return false; + } + }, 1000, 120000); + } - // 3 files are disconnected in the tree. - // TODO: dir2 ended up in here with count = 1. file3 also had count=1 - // Likely that the dir2/file3 entry got split in two. - Assertions.assertEquals(3, pendingDeleteFileCounts.size()); - Assertions.assertEquals(numWrites, pendingDeleteFileCounts.get("file1")); - Assertions.assertEquals(numWrites, pendingDeleteFileCounts.get("file2")); - Assertions.assertEquals(numWrites, pendingDeleteFileCounts.get("file3")); + private DBStore getOmDB() { + return cluster.getOMLeader().getMetadataManager().getStore(); } - private void assertDeleteTablesEmpty() throws IOException { - OzoneManager leader = cluster.getOMLeader(); - Assertions.assertTrue(leader.getMetadataManager().getDeletedDirTable().isEmpty()); - Assertions.assertTrue(leader.getMetadataManager().getDeletedTable().isEmpty()); + private String getOmDBLocation() { + return cluster.getOMLeader().getMetadataManager().getStore().getDbLocation().toString(); + } + + private static BucketLayout getFSOBucketLayout() { + return BucketLayout.FILE_SYSTEM_OPTIMIZED; } private void runDeletes() throws Exception { @@ -623,17 +673,15 @@ private void runDeletes() throws Exception { } } - private void assertFileAndDirTablesEmpty() throws Exception { - OzoneManager leader = cluster.getOMLeader(); - Assertions.assertTrue(leader.getMetadataManager().getDirectoryTable().isEmpty()); - Assertions.assertTrue(leader.getMetadataManager().getFileTable().isEmpty()); - } - - private DBStore getOmDB() { - return cluster.getOMLeader().getMetadataManager().getStore(); - } - - private String getOmDBLocation() { - return cluster.getOMLeader().getMetadataManager().getStore().getDbLocation().toString(); + private int countTableEntries(Table table) throws Exception { + int count = 0; + try (TableIterator> iterator = table.iterator()) { + while (iterator.hasNext()) { + iterator.next(); + count++; + } + } + System.out.println("Total number of entries: " + count); + return count; } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index d866bd42786d..9a3418038a8d 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -47,12 +47,16 @@ public class FSORepairCLI implements Callable, SubcommandWithParent { @CommandLine.Option(names = {"--dry-run"}, defaultValue = "true", - description = "This tool will run in dry-run mode by default to log unreachable files or directories. " + - "Set the value to 'false' to move unreachable files and directories to the deleted tables.") + description = "Run in dry-run mode to log information about unreachable files or directories.") private boolean dryRun; + @CommandLine.Option(names = {"--repair"}, + defaultValue = "false", + description = "Run in repair mode to move unreachable files and directories to deleted tables.") + private boolean repair; + @CommandLine.Option(names = {"--volume"}, - description = "Filter by volume name") + description = "Filter by volume name. Add '/' before the volume name.") private String volume; @CommandLine.Option(names = {"--bucket"}, @@ -66,9 +70,15 @@ public class FSORepairCLI implements Callable, SubcommandWithParent { @Override public Void call() throws Exception { + if (repair) { + dryRun = false; //Disable dry-run if repair is passed. + System.out.println("FSO Repair Tool is running in repair mode"); + } else { + System.out.println("FSO Repair Tool is running in debug mode"); + } try { FSORepairTool - repairTool = new FSORepairTool(dbPath, dryRun, volume, bucket); + repairTool = new FSORepairTool(dbPath, dryRun, repair, volume, bucket); repairTool.run(); } catch (Exception ex) { throw new IllegalArgumentException("FSO repair failed: " + ex.getMessage()); @@ -86,4 +96,3 @@ public Class getParentType() { return OzoneRepair.class; } } - diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index a0b917f255f3..08a6b4a50a23 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -36,6 +36,7 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.helpers.WithObjectID; import org.apache.hadoop.ozone.om.request.file.OMFileRequest; import org.apache.ratis.util.Preconditions; @@ -95,6 +96,7 @@ public class FSORepairTool { private final Table fileTable; private final Table deletedDirectoryTable; private final Table deletedTable; + private final Table snapshotInfoTable; private final String volumeFilter; private final String bucketFilter; // The temporary DB is used to track which items have been seen. @@ -113,10 +115,10 @@ public class FSORepairTool { private long unreachableBytes; private long unreachableFiles; private long unreachableDirs; - private boolean dryRun; + private final boolean dryRun; - public FSORepairTool(String dbPath, boolean dryRun, String volume, String bucket) throws IOException { - this(getStoreFromPath(dbPath), dbPath, dryRun, volume, bucket); + public FSORepairTool(String dbPath, boolean dryRun, boolean repair, String volume, String bucket) throws IOException { + this(getStoreFromPath(dbPath), dbPath, dryRun, repair, volume, bucket); } /** @@ -124,7 +126,7 @@ public FSORepairTool(String dbPath, boolean dryRun, String volume, String bucket * class for testing. */ @VisibleForTesting - public FSORepairTool(DBStore dbStore, String dbPath, boolean isDryRun, String volume, String bucket) + public FSORepairTool(DBStore dbStore, String dbPath, boolean isDryRun, boolean isRepair, String volume, String bucket) throws IOException { dryRun = isDryRun; // Counters to track as we walk the tree. @@ -159,6 +161,10 @@ public FSORepairTool(DBStore dbStore, String dbPath, boolean isDryRun, String vo OmMetadataManagerImpl.DELETED_TABLE, String.class, RepeatedOmKeyInfo.class); + snapshotInfoTable = store.getTable( + OmMetadataManagerImpl.SNAPSHOT_INFO_TABLE, + String.class, + SnapshotInfo.class); } protected static DBStore getStoreFromPath(String dbPath) throws IOException { @@ -173,7 +179,6 @@ protected static DBStore getStoreFromPath(String dbPath) throws IOException { } public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOException { - System.out.println("Disclaimer: This tool currently does not support snapshots."); if (bucketFilter != null && volumeFilter == null) { System.out.println("--bucket flag cannot be used without specifying --volume."); @@ -217,6 +222,17 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc continue; } + // Check for snapshots in the specified bucket + if (checkIfSnapshotExistsForBucket(volumeFilter, bucketFilter)) { + if (dryRun) { + System.out.println("Snapshot detected in bucket '" + bucketFilter + "'"); + } else { + System.out.println("Snapshot exists in bucket '" + bucketFilter + "'. " + + "Repair is not allowed if snapshots exist."); + return null; + } + } + processBucket(volumeEntry.getValue(), bucketInfo); } else { @@ -253,7 +269,37 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc return buildReportAndLog(); } + private boolean checkIfSnapshotExistsForBucket(String volumeName, String bucketName) throws IOException { + if (snapshotInfoTable == null) { + return false; + } + + try (TableIterator> iterator = + snapshotInfoTable.iterator()) { + while (iterator.hasNext()) { + SnapshotInfo snapshotInfo = iterator.next().getValue(); + String snapshotPath = (volumeName + "/" + bucketName).replaceFirst("^/", ""); + if (snapshotInfo.getSnapshotPath().equals(snapshotPath)) { + return true; + } + } + } + return false; + } + private void processBucket(OmVolumeArgs volume, OmBucketInfo bucketInfo) throws IOException { + System.out.println("Processing bucket: " + volume.getVolume() + "/" + bucketInfo.getBucketName()); + if (checkIfSnapshotExistsForBucket(volume.getVolume(), bucketInfo.getBucketName())) { + if (dryRun) { + System.out.println( + "Snapshot detected in bucket '" + volume.getVolume() + "/" + bucketInfo.getBucketName() + "'. "); + } else { + System.out.println( + "Skipping repair for bucket '" + volume.getVolume() + "/" + bucketInfo.getBucketName() + "' " + + "due to snapshot presence."); + return; + } + } dropReachableTableIfExists(); createReachableTable(); markReachableObjectsInBucket(volume, bucketInfo); @@ -277,7 +323,6 @@ private Report buildReportAndLog() { private void markReachableObjectsInBucket(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { - System.out.println("Processing bucket: " + volume.getVolume() + "/" + bucket.getBucketName()); // Only put directories in the stack. // Directory keys should have the form /volumeID/bucketID/parentID/name. Stack dirKeyStack = new Stack<>(); @@ -335,6 +380,8 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) if (dryRun) { System.out.println("Marking unreachable directory " + dirKey + " for deletion."); + } else { + System.out.println("Deleting unreachable directory " + dirKey); OmDirectoryInfo dirInfo = dirEntry.getValue(); markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), dirKey, dirInfo); @@ -363,6 +410,8 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) if (dryRun) { System.out.println("Marking unreachable file " + fileKey + " for deletion." + fileKey); + } else { + System.out.println("Deleting unreachable file " + fileKey); markFileForDeletion(fileKey, fileInfo); } } else { @@ -384,7 +433,7 @@ protected void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IO RepeatedOmKeyInfo updatedRepeatedOmKeyInfo = OmUtils.prepareKeyForDelete( fileInfo, fileInfo.getUpdateID(), true); // NOTE: The FSO code seems to write the open key entry with the whole - // path, using the object's names instead of their ID. This would onyl + // path, using the object's names instead of their ID. This would only // be possible when the file is deleted explicitly, and not part of a // directory delete. It is also not possible here if the file's parent // is gone. The name of the key does not matter so just use IDs. @@ -517,12 +566,19 @@ private void openReachableDB() throws IOException { private RocksDatabase buildReachableRocksDB(File reachableDBFile) throws IOException { DBProfile profile = new OzoneConfiguration().getEnum(HDDS_DB_PROFILE, HDDS_DEFAULT_DB_PROFILE); Set tableConfigs = new HashSet<>(); - tableConfigs.add(new TableConfig("default", profile.getColumnFamilyOptions())); - return RocksDatabase.open(reachableDBFile, - profile.getDBOptions(), - new ManagedWriteOptions(), - tableConfigs, false); + try { + tableConfigs.add(new TableConfig("default", profile.getColumnFamilyOptions())); + + return RocksDatabase.open(reachableDBFile, + profile.getDBOptions(), + new ManagedWriteOptions(), + tableConfigs, false); + } finally { + for (TableConfig config : tableConfigs) { + config.close(); + } + } } private void closeReachableDB() { From b63139e7604298660039639d39ff4a6d93f46351 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 14 Nov 2024 11:21:04 +0530 Subject: [PATCH 13/28] wrapped the stats in an obj, dry-run implicit --- .../repair}/TestFSORepairTool.java | 69 ++--- .../hadoop/ozone/repair/om/FSORepairCLI.java | 9 +- .../hadoop/ozone/repair/om/FSORepairTool.java | 237 ++++++++---------- 3 files changed, 138 insertions(+), 177 deletions(-) rename hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/{fs/ozone => ozone/repair}/TestFSORepairTool.java (92%) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/TestFSORepairTool.java similarity index 92% rename from hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java rename to hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/TestFSORepairTool.java index cd8ea84e163b..e73dfaef979c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/TestFSORepairTool.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.fs.ozone; +package org.apache.hadoop.ozone.repair; import org.apache.commons.io.IOUtils; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -80,7 +80,7 @@ public class TestFSORepairTool { private static FileSystem fs; private static OzoneClient client; private static OzoneConfiguration conf = null; - private static FSORepairTool tool; + private FSORepairTool tool; @BeforeAll public static void init() throws Exception { @@ -217,7 +217,7 @@ public void testConnectedTreeOneBucket() throws Exception { // Test the connected tree in debug mode. tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, false, null, null); + getOmDBLocation(), false, null, null); FSORepairTool.Report debugReport = tool.run(); Assertions.assertEquals(expectedReport, debugReport); @@ -227,7 +227,7 @@ public void testConnectedTreeOneBucket() throws Exception { // Running again in repair mode should give same results since the tree // is connected. tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, true, null, null); + getOmDBLocation(), true, null, null); FSORepairTool.Report repairReport = tool.run(); Assertions.assertEquals(expectedReport, repairReport); @@ -242,7 +242,7 @@ public void testReportedDataSize() throws Exception { FSORepairTool.Report expectedReport = new FSORepairTool.Report(report1, report2); tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, true, null, null); + getOmDBLocation(), true, null, null); FSORepairTool.Report debugReport = tool.run(); Assertions.assertEquals(expectedReport, debugReport); } @@ -260,13 +260,13 @@ public void testVolumeAndBucketFilter() throws Exception { // When volume filter is passed tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, true, "/vol1", null); + getOmDBLocation(), true, "/vol1", null); FSORepairTool.Report result1 = tool.run(); Assertions.assertEquals(expectedReport1, result1); // When both volume and bucket filters are passed tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, true, "/vol2", "bucket2"); + getOmDBLocation(), true, "/vol2", "bucket2"); FSORepairTool.Report result2 = tool.run(); Assertions.assertEquals(expectedReport2, result2); @@ -274,12 +274,12 @@ public void testVolumeAndBucketFilter() throws Exception { // When a non-existent bucket filter is passed try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PrintStream ps = new PrintStream(outputStream)) { + PrintStream ps = new PrintStream(outputStream, true, DEFAULT_ENCODING)) { System.setOut(ps); tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, true, "/vol1", "bucket2"); + getOmDBLocation(), true, "/vol1", "bucket2"); tool.run(); - String output = outputStream.toString(); + String output = outputStream.toString(DEFAULT_ENCODING); Assertions.assertTrue(output.contains("Bucket 'bucket2' does not exist in volume '/vol1'.")); } finally { System.setOut(originalOut); @@ -287,12 +287,12 @@ public void testVolumeAndBucketFilter() throws Exception { // When a non-existent volume filter is passed try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PrintStream ps = new PrintStream(outputStream)) { + PrintStream ps = new PrintStream(outputStream, true, DEFAULT_ENCODING)) { System.setOut(ps); tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, true, "/vol3", "bucket2"); + getOmDBLocation(), true, "/vol3", "bucket2"); tool.run(); - String output = outputStream.toString(); + String output = outputStream.toString(DEFAULT_ENCODING); Assertions.assertTrue(output.contains("Volume '/vol3' does not exist.")); } finally { System.setOut(originalOut); @@ -300,12 +300,12 @@ public void testVolumeAndBucketFilter() throws Exception { // When bucket filter is passed without the volume filter. try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PrintStream ps = new PrintStream(outputStream)) { + PrintStream ps = new PrintStream(outputStream, true, DEFAULT_ENCODING)) { System.setOut(ps); tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, true, null, "bucket2"); + getOmDBLocation(), true, null, "bucket2"); tool.run(); - String output = outputStream.toString(); + String output = outputStream.toString(DEFAULT_ENCODING); Assertions.assertTrue(output.contains("--bucket flag cannot be used without specifying --volume.")); } finally { System.setOut(originalOut); @@ -325,7 +325,7 @@ public void testMultipleBucketsAndVolumes() throws Exception { report1, report2); tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, true, null, null); + getOmDBLocation(), true, null, null); FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(generatedReport, expectedAggregateReport); @@ -370,11 +370,11 @@ public void testDeleteOverwrite() throws Exception { disconnectDirectory("dir1"); tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, true, null, null); + getOmDBLocation(), true, null, null); FSORepairTool.Report generatedReport = tool.run(); - Assertions.assertEquals(1, generatedReport.getUnreachableDirs()); - Assertions.assertEquals(3, generatedReport.getUnreachableFiles()); + Assertions.assertEquals(1, generatedReport.getUnreachable().getDirs()); + Assertions.assertEquals(3, generatedReport.getUnreachable().getFiles()); // This assertion ensures that only specific directories and keys remain in the active tables, // as the remaining entries are expected to be moved to the deleted tables by the background service. @@ -388,7 +388,7 @@ public void testDeleteOverwrite() throws Exception { public void testEmptyFileTrees() throws Exception { // Run when there are no file trees. tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, true, null, null); + getOmDBLocation(), true, null, null); FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); assertDeleteTablesEmpty(); @@ -399,9 +399,10 @@ public void testEmptyFileTrees() throws Exception { // Run on an empty volume and bucket. tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, true, null, null); + getOmDBLocation(), true, null, null); generatedReport = tool.run(); Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); + assertDeleteTablesEmpty(); } @Test @@ -437,7 +438,7 @@ public void testNonFSOBucketsSkipped() throws Exception { // Even in repair mode there should be no action. legacy and obs buckets // will be skipped and FSO tree is connected. tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, true, null, null); + getOmDBLocation(), true, null, null); FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(connectReport, generatedReport); @@ -496,10 +497,10 @@ private FSORepairTool.Report buildConnectedTree(String volume, String bucket, in assertConnectedTreeReadable(volume, bucket); + FSORepairTool.ReportStatistics reachableCount = + new FSORepairTool.ReportStatistics(3, 4, fileSize * 4L); return new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report.Builder() - .setReachableDirs(3) - .setReachableFiles(4) - .setReachableBytes(fileSize * 4L) + .setReachable(reachableCount) .build(); } @@ -544,15 +545,15 @@ private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, assertDisconnectedTreePartiallyReadable(volume, bucket); + // dir1 does not count towards the unreachable directories the tool + // will see. It was deleted completely so the tool will never see it. + FSORepairTool.ReportStatistics reachableCount = + new FSORepairTool.ReportStatistics(1, 1, fileSize); + FSORepairTool.ReportStatistics unreachableCount = + new FSORepairTool.ReportStatistics(1, 3, fileSize * 3L); return new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report.Builder() - .setReachableDirs(1) - .setReachableFiles(1) - .setReachableBytes(fileSize) - // dir1 does not count towards the unreachable directories the tool - // will see. It was deleted completely so the tool will never see it. - .setUnreachableDirs(1) - .setUnreachableFiles(3) - .setUnreachableBytes(fileSize * 3L) + .setReachable(reachableCount) + .setUnreachable(unreachableCount) .build(); } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index 9a3418038a8d..65ab77fbe64c 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -45,11 +45,6 @@ public class FSORepairCLI implements Callable, SubcommandWithParent { description = "Path to OM RocksDB") private String dbPath; - @CommandLine.Option(names = {"--dry-run"}, - defaultValue = "true", - description = "Run in dry-run mode to log information about unreachable files or directories.") - private boolean dryRun; - @CommandLine.Option(names = {"--repair"}, defaultValue = "false", description = "Run in repair mode to move unreachable files and directories to deleted tables.") @@ -67,18 +62,16 @@ public class FSORepairCLI implements Callable, SubcommandWithParent { description = "More verbose output. ") private boolean verbose; - @Override public Void call() throws Exception { if (repair) { - dryRun = false; //Disable dry-run if repair is passed. System.out.println("FSO Repair Tool is running in repair mode"); } else { System.out.println("FSO Repair Tool is running in debug mode"); } try { FSORepairTool - repairTool = new FSORepairTool(dbPath, dryRun, repair, volume, bucket); + repairTool = new FSORepairTool(dbPath, repair, volume, bucket); repairTool.run(); } catch (Exception ex) { throw new IllegalArgumentException("FSO repair failed: " + ex.getMessage()); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index 08a6b4a50a23..23cac40416c2 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -109,16 +109,12 @@ public class FSORepairTool { private ColumnFamilyHandle reachableCFHandle; private RocksDatabase reachableDB; - private long reachableBytes; - private long reachableFiles; - private long reachableDirs; - private long unreachableBytes; - private long unreachableFiles; - private long unreachableDirs; - private final boolean dryRun; - - public FSORepairTool(String dbPath, boolean dryRun, boolean repair, String volume, String bucket) throws IOException { - this(getStoreFromPath(dbPath), dbPath, dryRun, repair, volume, bucket); + private final ReportStatistics reachableStats; + private final ReportStatistics unreachableStats; + private final boolean repair; + + public FSORepairTool(String dbPath, boolean repair, String volume, String bucket) throws IOException { + this(getStoreFromPath(dbPath), dbPath, repair, volume, bucket); } /** @@ -126,19 +122,14 @@ public FSORepairTool(String dbPath, boolean dryRun, boolean repair, String volum * class for testing. */ @VisibleForTesting - public FSORepairTool(DBStore dbStore, String dbPath, boolean isDryRun, boolean isRepair, String volume, String bucket) + public FSORepairTool(DBStore dbStore, String dbPath, boolean repair, String volume, String bucket) throws IOException { - dryRun = isDryRun; - // Counters to track as we walk the tree. - reachableBytes = 0; - reachableFiles = 0; - reachableDirs = 0; - unreachableBytes = 0; - unreachableFiles = 0; - unreachableDirs = 0; + this.reachableStats = new ReportStatistics(0, 0, 0); + this.unreachableStats = new ReportStatistics(0, 0, 0); this.store = dbStore; this.omDBPath = dbPath; + this.repair = repair; this.volumeFilter = volume; this.bucketFilter = bucket; volumeTable = store.getTable(OmMetadataManagerImpl.VOLUME_TABLE, @@ -224,7 +215,7 @@ public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOExc // Check for snapshots in the specified bucket if (checkIfSnapshotExistsForBucket(volumeFilter, bucketFilter)) { - if (dryRun) { + if (!repair) { System.out.println("Snapshot detected in bucket '" + bucketFilter + "'"); } else { System.out.println("Snapshot exists in bucket '" + bucketFilter + "'. " + @@ -290,7 +281,7 @@ private boolean checkIfSnapshotExistsForBucket(String volumeName, String bucketN private void processBucket(OmVolumeArgs volume, OmBucketInfo bucketInfo) throws IOException { System.out.println("Processing bucket: " + volume.getVolume() + "/" + bucketInfo.getBucketName()); if (checkIfSnapshotExistsForBucket(volume.getVolume(), bucketInfo.getBucketName())) { - if (dryRun) { + if (!repair) { System.out.println( "Snapshot detected in bucket '" + volume.getVolume() + "/" + bucketInfo.getBucketName() + "'. "); } else { @@ -309,12 +300,8 @@ private void processBucket(OmVolumeArgs volume, OmBucketInfo bucketInfo) throws private Report buildReportAndLog() { Report report = new Report.Builder() - .setReachableDirs(reachableDirs) - .setReachableFiles(reachableFiles) - .setReachableBytes(reachableBytes) - .setUnreachableDirs(unreachableDirs) - .setUnreachableFiles(unreachableFiles) - .setUnreachableBytes(unreachableBytes) + .setReachable(reachableStats) + .setUnreachable(unreachableStats) .build(); System.out.println("\n" + report); @@ -376,9 +363,9 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) if (!isReachable(dirKey)) { System.out.println("Found unreachable directory: " + dirKey); - unreachableDirs++; + unreachableStats.add(new ReportStatistics(1, 0, 0)); - if (dryRun) { + if (!repair) { System.out.println("Marking unreachable directory " + dirKey + " for deletion."); } else { System.out.println("Deleting unreachable directory " + dirKey); @@ -405,10 +392,9 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) OmKeyInfo fileInfo = fileEntry.getValue(); if (!isReachable(fileKey)) { System.out.println("Found unreachable file: " + fileKey); - unreachableBytes += fileInfo.getDataSize(); - unreachableFiles++; + unreachableStats.add(new ReportStatistics(0, 1, fileInfo.getDataSize())); - if (dryRun) { + if (!repair) { System.out.println("Marking unreachable file " + fileKey + " for deletion." + fileKey); } else { System.out.println("Deleting unreachable file " + fileKey); @@ -418,8 +404,7 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) // NOTE: We are deserializing the proto of every reachable file // just to log it's size. If we don't need this information we could // save time by skipping this step. - reachableBytes += fileInfo.getDataSize(); - reachableFiles++; + reachableStats.add(new ReportStatistics(0, 1, fileInfo.getDataSize())); } } } @@ -485,7 +470,7 @@ private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs vo // This directory was reached by search. addReachableEntry(volume, bucket, childDirEntry.getValue()); childDirs.add(childDirKey); - reachableDirs++; + reachableStats.add(new ReportStatistics(1, 0, 0)); } } @@ -620,77 +605,37 @@ private void createReachableTable() throws IOException { * Define a Report to be created. */ public static class Report { - private long reachableBytes; - private long reachableFiles; - private long reachableDirs; - private long unreachableBytes; - private long unreachableFiles; - private long unreachableDirs; + private final ReportStatistics reachable; + private final ReportStatistics unreachable; /** * Builds one report that is the aggregate of multiple others. */ public Report(org.apache.hadoop.ozone.repair.om.FSORepairTool.Report... reports) { - reachableBytes = 0; - reachableFiles = 0; - reachableDirs = 0; - unreachableBytes = 0; - unreachableFiles = 0; - unreachableDirs = 0; - - for (org.apache.hadoop.ozone.repair.om.FSORepairTool.Report report: reports) { - reachableBytes += report.reachableBytes; - reachableFiles += report.reachableFiles; - reachableDirs += report.reachableDirs; - unreachableBytes += report.unreachableBytes; - unreachableFiles += report.unreachableFiles; - unreachableDirs += report.unreachableDirs; + reachable = new ReportStatistics(); + unreachable = new ReportStatistics(); + + for (org.apache.hadoop.ozone.repair.om.FSORepairTool.Report report : reports) { + reachable.add(report.reachable); + unreachable.add(report.unreachable); } } private Report(org.apache.hadoop.ozone.repair.om.FSORepairTool.Report.Builder builder) { - reachableBytes = builder.reachableBytes; - reachableFiles = builder.reachableFiles; - reachableDirs = builder.reachableDirs; - unreachableBytes = builder.unreachableBytes; - unreachableFiles = builder.unreachableFiles; - unreachableDirs = builder.unreachableDirs; - } - - public long getReachableBytes() { - return reachableBytes; + this.reachable = builder.reachable; + this.unreachable = builder.unreachable; } - public long getReachableFiles() { - return reachableFiles; + public ReportStatistics getReachable() { + return reachable; } - public long getReachableDirs() { - return reachableDirs; + public ReportStatistics getUnreachable() { + return unreachable; } - public long getUnreachableBytes() { - return unreachableBytes; - } - - public long getUnreachableFiles() { - return unreachableFiles; - } - - public long getUnreachableDirs() { - return unreachableDirs; - } - - @Override public String toString() { - return "Reachable:" + - "\n\tDirectories: " + reachableDirs + - "\n\tFiles: " + reachableFiles + - "\n\tBytes: " + reachableBytes + - "\nUnreachable:" + - "\n\tDirectories: " + unreachableDirs + - "\n\tFiles: " + unreachableFiles + - "\n\tBytes: " + unreachableBytes; + return "Reachable: " + reachable + "\nUnreachable: " + unreachable; } @Override @@ -706,77 +651,99 @@ public boolean equals(Object other) { // Useful for testing. System.out.println("Comparing reports\nExpect:\n" + this + "\nActual:\n" + report); - return reachableBytes == report.reachableBytes && - reachableFiles == report.reachableFiles && - reachableDirs == report.reachableDirs && - unreachableBytes == report.unreachableBytes && - unreachableFiles == report.unreachableFiles && - unreachableDirs == report.unreachableDirs; + return reachable.equals(report.reachable) && unreachable.equals(report.unreachable); } @Override public int hashCode() { - return Objects.hash(reachableBytes, - reachableFiles, - reachableDirs, - unreachableBytes, - unreachableFiles, - unreachableDirs); + return Objects.hash(reachable, unreachable); } /** * Builder class for a Report. */ public static final class Builder { - private long reachableBytes; - private long reachableFiles; - private long reachableDirs; - private long unreachableBytes; - private long unreachableFiles; - private long unreachableDirs; + private ReportStatistics reachable = new ReportStatistics(); + private ReportStatistics unreachable = new ReportStatistics(); public Builder() { } - @SuppressWarnings("checkstyle:hiddenfield") - public Builder setReachableBytes(long reachableBytes) { - this.reachableBytes = reachableBytes; + public Builder setReachable(ReportStatistics reachable) { + this.reachable = reachable; return this; } - @SuppressWarnings("checkstyle:hiddenfield") - public Builder setReachableFiles(long reachableFiles) { - this.reachableFiles = reachableFiles; + public Builder setUnreachable(ReportStatistics unreachable) { + this.unreachable = unreachable; return this; } - @SuppressWarnings("checkstyle:hiddenfield") - public Builder setReachableDirs(long reachableDirs) { - this.reachableDirs = reachableDirs; - return this; + public Report build() { + return new Report(this); } + } + } - @SuppressWarnings("checkstyle:hiddenfield") - public Builder setUnreachableBytes(long unreachableBytes) { - this.unreachableBytes = unreachableBytes; - return this; - } + /** + * Represents the statistics of reachable and unreachable data. + * This gives the count of dirs, files and bytes. + */ - @SuppressWarnings("checkstyle:hiddenfield") - public Builder setUnreachableFiles(long unreachableFiles) { - this.unreachableFiles = unreachableFiles; - return this; - } + public static class ReportStatistics { + private long dirs; + private long files; + private long bytes; - @SuppressWarnings("checkstyle:hiddenfield") - public Builder setUnreachableDirs(long unreachableDirs) { - this.unreachableDirs = unreachableDirs; - return this; - } + public ReportStatistics() { } - public Report build() { - return new Report(this); + public ReportStatistics(long dirs, long files, long bytes) { + this.dirs = dirs; + this.files = files; + this.bytes = bytes; + } + + public void add(ReportStatistics other) { + this.dirs += other.dirs; + this.files += other.files; + this.bytes += other.bytes; + } + + public long getDirs() { + return dirs; + } + + public long getFiles() { + return files; + } + + public long getBytes() { + return bytes; + } + + @Override + public String toString() { + return "\n\tDirectories: " + dirs + + "\n\tFiles: " + files + + "\n\tBytes: " + bytes; + } + + @Override + public boolean equals(Object other) { + if (other == this) { + return true; } + if (other == null || getClass() != other.getClass()) { + return false; + } + ReportStatistics stats = (ReportStatistics) other; + + return bytes == stats.bytes && files == stats.files && dirs == stats.dirs; + } + + @Override + public int hashCode() { + return Objects.hash(bytes, files, dirs); } } } From b189917b80728348b89bfa059301cffe56b8aa0e Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 14 Nov 2024 15:40:20 +0530 Subject: [PATCH 14/28] removed package name and added methods in ReportStatistics --- .../repair/{ => om}/TestFSORepairTool.java | 7 ++--- .../hadoop/ozone/repair/om/FSORepairTool.java | 29 ++++++++++++------- 2 files changed, 21 insertions(+), 15 deletions(-) rename hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/{ => om}/TestFSORepairTool.java (99%) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java similarity index 99% rename from hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/TestFSORepairTool.java rename to hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java index e73dfaef979c..61af2696bd3f 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.ozone.repair; +package org.apache.hadoop.ozone.repair.om; import org.apache.commons.io.IOUtils; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -41,7 +41,6 @@ import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; -import org.apache.hadoop.ozone.repair.om.FSORepairTool; import org.apache.hadoop.ozone.shell.OzoneShell; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; @@ -499,7 +498,7 @@ private FSORepairTool.Report buildConnectedTree(String volume, String bucket, in FSORepairTool.ReportStatistics reachableCount = new FSORepairTool.ReportStatistics(3, 4, fileSize * 4L); - return new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report.Builder() + return new FSORepairTool.Report.Builder() .setReachable(reachableCount) .build(); } @@ -551,7 +550,7 @@ private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, new FSORepairTool.ReportStatistics(1, 1, fileSize); FSORepairTool.ReportStatistics unreachableCount = new FSORepairTool.ReportStatistics(1, 3, fileSize * 3L); - return new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report.Builder() + return new FSORepairTool.Report.Builder() .setReachable(reachableCount) .setUnreachable(unreachableCount) .build(); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index 23cac40416c2..291955d6f37f 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.ozone.repair.om; -import com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.utils.db.RocksDatabase; @@ -86,7 +85,7 @@ */ public class FSORepairTool { public static final Logger LOG = - LoggerFactory.getLogger(org.apache.hadoop.ozone.repair.om.FSORepairTool.class); + LoggerFactory.getLogger(FSORepairTool.class); private final String omDBPath; private final DBStore store; @@ -121,7 +120,6 @@ public FSORepairTool(String dbPath, boolean repair, String volume, String bucket * Allows passing RocksDB instance from a MiniOzoneCluster directly to this * class for testing. */ - @VisibleForTesting public FSORepairTool(DBStore dbStore, String dbPath, boolean repair, String volume, String bucket) throws IOException { this.reachableStats = new ReportStatistics(0, 0, 0); @@ -169,7 +167,7 @@ protected static DBStore getStoreFromPath(String dbPath) throws IOException { new File(dbPath).getParentFile()); } - public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOException { + public FSORepairTool.Report run() throws IOException { if (bucketFilter != null && volumeFilter == null) { System.out.println("--bucket flag cannot be used without specifying --volume."); @@ -363,7 +361,7 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) if (!isReachable(dirKey)) { System.out.println("Found unreachable directory: " + dirKey); - unreachableStats.add(new ReportStatistics(1, 0, 0)); + unreachableStats.addDir(); if (!repair) { System.out.println("Marking unreachable directory " + dirKey + " for deletion."); @@ -392,7 +390,7 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) OmKeyInfo fileInfo = fileEntry.getValue(); if (!isReachable(fileKey)) { System.out.println("Found unreachable file: " + fileKey); - unreachableStats.add(new ReportStatistics(0, 1, fileInfo.getDataSize())); + unreachableStats.addFile(fileInfo.getDataSize()); if (!repair) { System.out.println("Marking unreachable file " + fileKey + " for deletion." + fileKey); @@ -404,7 +402,7 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) // NOTE: We are deserializing the proto of every reachable file // just to log it's size. If we don't need this information we could // save time by skipping this step. - reachableStats.add(new ReportStatistics(0, 1, fileInfo.getDataSize())); + reachableStats.addFile(fileInfo.getDataSize()); } } } @@ -470,7 +468,7 @@ private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs vo // This directory was reached by search. addReachableEntry(volume, bucket, childDirEntry.getValue()); childDirs.add(childDirKey); - reachableStats.add(new ReportStatistics(1, 0, 0)); + reachableStats.addDir(); } } @@ -611,17 +609,17 @@ public static class Report { /** * Builds one report that is the aggregate of multiple others. */ - public Report(org.apache.hadoop.ozone.repair.om.FSORepairTool.Report... reports) { + public Report(FSORepairTool.Report... reports) { reachable = new ReportStatistics(); unreachable = new ReportStatistics(); - for (org.apache.hadoop.ozone.repair.om.FSORepairTool.Report report : reports) { + for (FSORepairTool.Report report : reports) { reachable.add(report.reachable); unreachable.add(report.unreachable); } } - private Report(org.apache.hadoop.ozone.repair.om.FSORepairTool.Report.Builder builder) { + private Report(FSORepairTool.Report.Builder builder) { this.reachable = builder.reachable; this.unreachable = builder.unreachable; } @@ -745,5 +743,14 @@ public boolean equals(Object other) { public int hashCode() { return Objects.hash(bytes, files, dirs); } + + public void addDir() { + dirs++; + } + + public void addFile(long size) { + files++; + bytes += size; + } } } From b64593e4715f3999f05e53b5a5d5f2d2f7040397 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Fri, 15 Nov 2024 10:49:10 +0530 Subject: [PATCH 15/28] Addressed minor comments --- .../ozone/repair/om/TestFSORepairTool.java | 96 +++++---------- .../hadoop/ozone/repair/om/FSORepairCLI.java | 5 +- .../hadoop/ozone/repair/om/FSORepairTool.java | 116 ++++++------------ 3 files changed, 70 insertions(+), 147 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java index 61af2696bd3f..ac4242b60f67 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java @@ -71,8 +71,7 @@ * FSORepairTool test cases. */ public class TestFSORepairTool { - public static final Logger LOG = - LoggerFactory.getLogger(TestFSORepairTool.class); + public static final Logger LOG = LoggerFactory.getLogger(TestFSORepairTool.class); private static final String DEFAULT_ENCODING = UTF_8.name(); private static MiniOzoneHAClusterImpl cluster; @@ -85,16 +84,14 @@ public class TestFSORepairTool { public static void init() throws Exception { // Set configs. conf = new OzoneConfiguration(); - // deletion services will be triggered manually. conf.setInt(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, 2000); conf.setInt(OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK, 5); - conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, - TimeUnit.MILLISECONDS); + conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); conf.setInt(OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK, 20); conf.setBoolean(OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY, true); // Since delete services use RocksDB iterators, make sure the double // buffer is flushed between runs. - conf.setInt(OMConfigKeys.OZONE_OM_UNFLUSHED_TRANSACTION_MAX_COUNT, 2); + conf.setInt(OMConfigKeys.OZONE_OM_UNFLUSHED_TRANSACTION_MAX_COUNT, 1); // Build cluster. cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf) @@ -105,8 +102,7 @@ public static void init() throws Exception { cluster.waitForClusterToBeReady(); // Init ofs. - final String rootPath = String.format("%s://%s/", - OZONE_OFS_URI_SCHEME, cluster.getOzoneManager().getOMServiceId()); + final String rootPath = String.format("%s://%s/", OZONE_OFS_URI_SCHEME, cluster.getOzoneManager().getOMServiceId()); conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); fs = FileSystem.get(conf); client = OzoneClientFactory.getRpcClient("omservice", conf); @@ -161,16 +157,14 @@ private String generateSetConfString(String key, String value) { private String[] getHASetConfStrings(int numOfArgs) { assert (numOfArgs >= 0); - String[] res = new String[1 + 1 + 1 + numOfArgs]; + String[] res = new String[3 + numOfArgs]; final int indexOmServiceIds = 0; final int indexOmNodes = 1; final int indexOmAddressStart = 2; - res[indexOmServiceIds] = getSetConfStringFromConf( - OMConfigKeys.OZONE_OM_SERVICE_IDS_KEY); + res[indexOmServiceIds] = getSetConfStringFromConf(OMConfigKeys.OZONE_OM_SERVICE_IDS_KEY); - String omNodesKey = ConfUtils.addKeySuffixes( - OMConfigKeys.OZONE_OM_NODES_KEY, "omservice"); + String omNodesKey = ConfUtils.addKeySuffixes(OMConfigKeys.OZONE_OM_NODES_KEY, "omservice"); String omNodesVal = conf.get(omNodesKey); res[indexOmNodes] = generateSetConfString(omNodesKey, omNodesVal); @@ -197,8 +191,7 @@ private String[] getHASetConfStrings(String[] existingArgs) { int indexCopyStart = res.length - existingArgs.length; // Then copy the existing args to the returned String array - System.arraycopy(existingArgs, 0, res, indexCopyStart, - existingArgs.length); + System.arraycopy(existingArgs, 0, res, indexCopyStart, existingArgs.length); return res; } @@ -215,8 +208,7 @@ public void testConnectedTreeOneBucket() throws Exception { FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); // Test the connected tree in debug mode. - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), false, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), false, null, null); FSORepairTool.Report debugReport = tool.run(); Assertions.assertEquals(expectedReport, debugReport); @@ -225,8 +217,7 @@ public void testConnectedTreeOneBucket() throws Exception { // Running again in repair mode should give same results since the tree // is connected. - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); FSORepairTool.Report repairReport = tool.run(); Assertions.assertEquals(expectedReport, repairReport); @@ -240,8 +231,7 @@ public void testReportedDataSize() throws Exception { FSORepairTool.Report report2 = buildConnectedTree("vol1", "bucket2", 10); FSORepairTool.Report expectedReport = new FSORepairTool.Report(report1, report2); - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); FSORepairTool.Report debugReport = tool.run(); Assertions.assertEquals(expectedReport, debugReport); } @@ -258,14 +248,12 @@ public void testVolumeAndBucketFilter() throws Exception { FSORepairTool.Report expectedReport2 = new FSORepairTool.Report(report2); // When volume filter is passed - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, "/vol1", null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol1", null); FSORepairTool.Report result1 = tool.run(); Assertions.assertEquals(expectedReport1, result1); // When both volume and bucket filters are passed - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, "/vol2", "bucket2"); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol2", "bucket2"); FSORepairTool.Report result2 = tool.run(); Assertions.assertEquals(expectedReport2, result2); @@ -275,8 +263,7 @@ public void testVolumeAndBucketFilter() throws Exception { try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(outputStream, true, DEFAULT_ENCODING)) { System.setOut(ps); - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, "/vol1", "bucket2"); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol1", "bucket2"); tool.run(); String output = outputStream.toString(DEFAULT_ENCODING); Assertions.assertTrue(output.contains("Bucket 'bucket2' does not exist in volume '/vol1'.")); @@ -288,8 +275,7 @@ public void testVolumeAndBucketFilter() throws Exception { try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(outputStream, true, DEFAULT_ENCODING)) { System.setOut(ps); - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, "/vol3", "bucket2"); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol3", "bucket2"); tool.run(); String output = outputStream.toString(DEFAULT_ENCODING); Assertions.assertTrue(output.contains("Volume '/vol3' does not exist.")); @@ -301,8 +287,7 @@ public void testVolumeAndBucketFilter() throws Exception { try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(outputStream, true, DEFAULT_ENCODING)) { System.setOut(ps); - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, null, "bucket2"); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, "bucket2"); tool.run(); String output = outputStream.toString(DEFAULT_ENCODING); Assertions.assertTrue(output.contains("--bucket flag cannot be used without specifying --volume.")); @@ -320,11 +305,9 @@ public void testMultipleBucketsAndVolumes() throws Exception { cluster.getOzoneManager().getMetadataManager().getKeyTable(getFSOBucketLayout()); FSORepairTool.Report report1 = buildConnectedTree("vol1", "bucket1"); FSORepairTool.Report report2 = buildDisconnectedTree("vol2", "bucket2"); - FSORepairTool.Report expectedAggregateReport = new FSORepairTool.Report( - report1, report2); + FSORepairTool.Report expectedAggregateReport = new FSORepairTool.Report(report1, report2); - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(generatedReport, expectedAggregateReport); @@ -368,8 +351,7 @@ public void testDeleteOverwrite() throws Exception { ContractTestUtils.touch(fs, new Path("/vol1/bucket1/dir1/file2")); disconnectDirectory("dir1"); - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(1, generatedReport.getUnreachable().getDirs()); @@ -386,8 +368,7 @@ public void testDeleteOverwrite() throws Exception { @Test public void testEmptyFileTrees() throws Exception { // Run when there are no file trees. - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); assertDeleteTablesEmpty(); @@ -397,8 +378,7 @@ public void testEmptyFileTrees() throws Exception { fs.mkdirs(new Path("/vol2/bucket1")); // Run on an empty volume and bucket. - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); generatedReport = tool.run(); Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); assertDeleteTablesEmpty(); @@ -431,13 +411,11 @@ public void testNonFSOBucketsSkipped() throws Exception { legacyStream.close(); // Add an FSO bucket with data. - FSORepairTool.Report connectReport = - buildConnectedTree("vol1", "fso-bucket"); + FSORepairTool.Report connectReport = buildConnectedTree("vol1", "fso-bucket"); // Even in repair mode there should be no action. legacy and obs buckets // will be skipped and FSO tree is connected. - tool = new FSORepairTool(getOmDB(), - getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(connectReport, generatedReport); @@ -446,23 +424,20 @@ public void testNonFSOBucketsSkipped() throws Exception { } finally { // Need to manually delete obs bucket. It cannot be deleted with ofs as // part of the normal test cleanup. - store.getVolume("vol1").getBucket("obs-bucket") - .deleteKey("prefix/test-key"); + store.getVolume("vol1").getBucket("obs-bucket").deleteKey("prefix/test-key"); store.getVolume("vol1").deleteBucket("obs-bucket"); } } - private FSORepairTool.Report buildConnectedTree(String volume, String bucket) - throws Exception { + private FSORepairTool.Report buildConnectedTree(String volume, String bucket) throws Exception { return buildConnectedTree(volume, bucket, 0); } /** * Creates a tree with 3 reachable directories and 4 reachable files. */ - private FSORepairTool.Report buildConnectedTree(String volume, String bucket, int fileSize) - throws Exception { + private FSORepairTool.Report buildConnectedTree(String volume, String bucket, int fileSize) throws Exception { Path bucketPath = new Path("/" + volume + "/" + bucket); Path dir1 = new Path(bucketPath, "dir1"); Path file1 = new Path(dir1, "file1"); @@ -503,8 +478,7 @@ private FSORepairTool.Report buildConnectedTree(String volume, String bucket, in .build(); } - private void assertConnectedTreeReadable(String volume, String bucket) - throws IOException { + private void assertConnectedTreeReadable(String volume, String bucket) throws IOException { Path bucketPath = new Path("/" + volume + "/" + bucket); Path dir1 = new Path(bucketPath, "dir1"); Path file1 = new Path(dir1, "file1"); @@ -525,8 +499,7 @@ private void assertConnectedTreeReadable(String volume, String bucket) Assertions.assertTrue(fs.exists(file4)); } - private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) - throws Exception { + private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) throws Exception { return buildDisconnectedTree(volume, bucket, 0); } @@ -534,8 +507,7 @@ private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) * Creates a tree with 1 reachable directory, 1 reachable file, 1 * unreachable directory, and 3 unreachable files. */ - private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, - int fileSize) throws Exception { + private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, int fileSize) throws Exception { buildConnectedTree(volume, bucket, fileSize); // Manually remove dir1. This should disconnect 3 of the files and 1 of @@ -558,11 +530,8 @@ private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, private void disconnectDirectory(String dirName) throws Exception { OzoneManager leader = cluster.getOMLeader(); - Table dirTable = - leader.getMetadataManager().getDirectoryTable(); - try (TableIterator> iterator = - dirTable.iterator()) { + Table dirTable = leader.getMetadataManager().getDirectoryTable(); + try (TableIterator> iterator = dirTable.iterator()) { while (iterator.hasNext()) { Table.KeyValue entry = iterator.next(); String key = entry.getKey(); @@ -574,8 +543,7 @@ private void disconnectDirectory(String dirName) throws Exception { } } - private void assertDisconnectedTreePartiallyReadable( - String volume, String bucket) throws Exception { + private void assertDisconnectedTreePartiallyReadable(String volume, String bucket) throws Exception { Path bucketPath = new Path("/" + volume + "/" + bucket); Path dir1 = new Path(bucketPath, "dir1"); Path file1 = new Path(dir1, "file1"); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index 65ab77fbe64c..8c73aae4dae3 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -30,9 +30,8 @@ */ @CommandLine.Command( name = "fso-tree-repair", - description = "Identify and repair a disconnected FSO tree, and mark " + - "unreachable entries for deletion. OM should be " + - "stopped while this tool is run." + description = "Identify and repair a disconnected FSO tree, and mark unreachable entries for deletion. " + + "OM should be stopped while this tool is run." ) @MetaInfServices(SubcommandWithParent.class) public class FSORepairCLI implements Callable, SubcommandWithParent { diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index 291955d6f37f..9735b4fcd988 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -65,27 +65,21 @@ * Base Tool to identify disconnected FSO trees in all buckets. * The tool will log information about unreachable files or directories. * If deletes are still in progress (the deleted directory table is not empty), the tool may - * report that the tree is disconnected, even though pending deletes would - * fix the issue. + * report that the tree is disconnected, even though pending deletes would fix the issue. * - * Before using the tool, make sure all OMs are stopped, - * and that all Ratis logs have been flushed to the OM DB. This can be - * done using `ozone admin prepare` before running the tool, and `ozone admin + * Before using the tool, make sure all OMs are stopped, and that all Ratis logs have been flushed to the OM DB. + * This can be done using `ozone admin prepare` before running the tool, and `ozone admin * cancelprepare` when done. * - * The tool will run a DFS from each bucket, and save all reachable - * directories as keys in a new temporary RocksDB instance called "reachable.db" - * In the same directory as om.db. - * will then scan the entire file and directory tables for each bucket to see - * if each object's parent is in the reachable table of reachable.db. The - * reachable table will be dropped and recreated for each bucket. - * The tool is idempotent. reachable.db will not be deleted automatically - * when the tool finishes, in case users want to manually inspect it. It can - * be safely deleted once the tool finishes. + * The tool will run a DFS from each bucket, and save all reachable directories as keys in a new temporary RocksDB + * instance called "reachable.db" in the same directory as om.db. + * It will then scan the entire file and directory tables for each bucket to see if each object's parent is in the + * reachable table of reachable.db. The reachable table will be dropped and recreated for each bucket. + * The tool is idempotent. reachable.db will not be deleted automatically when the tool finishes, + * in case users want to manually inspect it. It can be safely deleted once the tool finishes. */ public class FSORepairTool { - public static final Logger LOG = - LoggerFactory.getLogger(FSORepairTool.class); + public static final Logger LOG = LoggerFactory.getLogger(FSORepairTool.class); private final String omDBPath; private final DBStore store; @@ -99,15 +93,12 @@ public class FSORepairTool { private final String volumeFilter; private final String bucketFilter; // The temporary DB is used to track which items have been seen. - // Since usage of this DB is simple, use it directly from - // RocksDB. + // Since usage of this DB is simple, use it directly from RocksDB. private String reachableDBPath; private static final String REACHABLE_TABLE = "reachable"; - private static final byte[] REACHABLE_TABLE_BYTES = - REACHABLE_TABLE.getBytes(StandardCharsets.UTF_8); + private static final byte[] REACHABLE_TABLE_BYTES = REACHABLE_TABLE.getBytes(StandardCharsets.UTF_8); private ColumnFamilyHandle reachableCFHandle; private RocksDatabase reachableDB; - private final ReportStatistics reachableStats; private final ReportStatistics unreachableStats; private final boolean repair; @@ -117,8 +108,7 @@ public FSORepairTool(String dbPath, boolean repair, String volume, String bucket } /** - * Allows passing RocksDB instance from a MiniOzoneCluster directly to this - * class for testing. + * Allows passing RocksDB instance from a MiniOzoneCluster directly to this class for testing. */ public FSORepairTool(DBStore dbStore, String dbPath, boolean repair, String volume, String bucket) throws IOException { @@ -142,16 +132,13 @@ public FSORepairTool(DBStore dbStore, String dbPath, boolean repair, String volu fileTable = store.getTable(OmMetadataManagerImpl.FILE_TABLE, String.class, OmKeyInfo.class); - deletedDirectoryTable = store.getTable( - OmMetadataManagerImpl.DELETED_DIR_TABLE, + deletedDirectoryTable = store.getTable(OmMetadataManagerImpl.DELETED_DIR_TABLE, String.class, OmKeyInfo.class); - deletedTable = store.getTable( - OmMetadataManagerImpl.DELETED_TABLE, + deletedTable = store.getTable(OmMetadataManagerImpl.DELETED_TABLE, String.class, RepeatedOmKeyInfo.class); - snapshotInfoTable = store.getTable( - OmMetadataManagerImpl.SNAPSHOT_INFO_TABLE, + snapshotInfoTable = store.getTable(OmMetadataManagerImpl.SNAPSHOT_INFO_TABLE, String.class, SnapshotInfo.class); } @@ -163,8 +150,7 @@ protected static DBStore getStoreFromPath(String dbPath) throws IOException { "not exist or is not a RocksDB directory.", dbPath)); } // Load RocksDB and tables needed. - return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), - new File(dbPath).getParentFile()); + return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), new File(dbPath).getParentFile()); } public FSORepairTool.Report run() throws IOException { @@ -188,8 +174,7 @@ public FSORepairTool.Report run() throws IOException { openReachableDB(); while (volumeIterator.hasNext()) { - Table.KeyValue volumeEntry = - volumeIterator.next(); + Table.KeyValue volumeEntry = volumeIterator.next(); String volumeKey = volumeEntry.getKey(); if (volumeFilter != null && !volumeFilter.equals(volumeKey)) { @@ -211,17 +196,6 @@ public FSORepairTool.Report run() throws IOException { continue; } - // Check for snapshots in the specified bucket - if (checkIfSnapshotExistsForBucket(volumeFilter, bucketFilter)) { - if (!repair) { - System.out.println("Snapshot detected in bucket '" + bucketFilter + "'"); - } else { - System.out.println("Snapshot exists in bucket '" + bucketFilter + "'. " + - "Repair is not allowed if snapshots exist."); - return null; - } - } - processBucket(volumeEntry.getValue(), bucketInfo); } else { @@ -230,8 +204,7 @@ public FSORepairTool.Report run() throws IOException { bucketIterator = bucketTable.iterator()) { bucketIterator.seek(volumeKey); while (bucketIterator.hasNext()) { - Table.KeyValue bucketEntry = - bucketIterator.next(); + Table.KeyValue bucketEntry = bucketIterator.next(); String bucketKey = bucketEntry.getKey(); OmBucketInfo bucketInfo = bucketEntry.getValue(); @@ -306,8 +279,7 @@ private Report buildReportAndLog() { return report; } - private void markReachableObjectsInBucket(OmVolumeArgs volume, - OmBucketInfo bucket) throws IOException { + private void markReachableObjectsInBucket(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { // Only put directories in the stack. // Directory keys should have the form /volumeID/bucketID/parentID/name. Stack dirKeyStack = new Stack<>(); @@ -317,8 +289,7 @@ private void markReachableObjectsInBucket(OmVolumeArgs volume, addReachableEntry(volume, bucket, bucket); // Initialize the stack with all immediate child directories of the // bucket, and mark them all as reachable. - Collection childDirs = - getChildDirectoriesAndMarkAsReachable(volume, bucket, bucket); + Collection childDirs = getChildDirectoriesAndMarkAsReachable(volume, bucket, bucket); dirKeyStack.addAll(childDirs); while (!dirKeyStack.isEmpty()) { @@ -326,15 +297,13 @@ private void markReachableObjectsInBucket(OmVolumeArgs volume, String currentDirKey = dirKeyStack.pop(); OmDirectoryInfo currentDir = directoryTable.get(currentDirKey); if (currentDir == null) { - System.out.println("Directory key" + currentDirKey + "to be processed was not found in the " + - "directory table."); + System.out.println("Directory key" + currentDirKey + "to be processed was not found in the directory table."); continue; } // TODO revisit this for a more memory efficient implementation, // possibly making better use of RocksDB iterators. - childDirs = getChildDirectoriesAndMarkAsReachable(volume, bucket, - currentDir); + childDirs = getChildDirectoriesAndMarkAsReachable(volume, bucket, currentDir); dirKeyStack.addAll(childDirs); } } @@ -346,8 +315,7 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) OM_KEY_PREFIX + bucket.getObjectID(); - try (TableIterator> dirIterator = + try (TableIterator> dirIterator = directoryTable.iterator()) { dirIterator.seek(bucketPrefix); while (dirIterator.hasNext()) { @@ -368,8 +336,7 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) } else { System.out.println("Deleting unreachable directory " + dirKey); OmDirectoryInfo dirInfo = dirEntry.getValue(); - markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), - dirKey, dirInfo); + markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), dirKey, dirInfo); } } } @@ -432,20 +399,17 @@ protected void markDirectoryForDeletion(String volumeName, String bucketName, try (BatchOperation batch = store.initBatchOperation()) { directoryTable.deleteWithBatch(batch, dirKeyName); // HDDS-7592: Make directory entries in deleted dir table unique. - String deleteDirKeyName = - dirKeyName + OM_KEY_PREFIX + dirInfo.getObjectID(); + String deleteDirKeyName = dirKeyName + OM_KEY_PREFIX + dirInfo.getObjectID(); // Convert the directory to OmKeyInfo for deletion. - OmKeyInfo dirAsKeyInfo = OMFileRequest.getOmKeyInfo( - volumeName, bucketName, dirInfo, dirInfo.getName()); + OmKeyInfo dirAsKeyInfo = OMFileRequest.getOmKeyInfo(volumeName, bucketName, dirInfo, dirInfo.getName()); deletedDirectoryTable.putWithBatch(batch, deleteDirKeyName, dirAsKeyInfo); store.commitBatchOperation(batch); } } - private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs volume, - OmBucketInfo bucket, + private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs volume, OmBucketInfo bucket, WithObjectID currentDir) throws IOException { Collection childDirs = new ArrayList<>(); @@ -457,8 +421,7 @@ private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs vo // prefix to get its immediate children. dirIterator.seek(dirPrefix); while (dirIterator.hasNext()) { - Table.KeyValue childDirEntry = - dirIterator.next(); + Table.KeyValue childDirEntry = dirIterator.next(); String childDirKey = childDirEntry.getKey(); // Stop processing once we have seen all immediate children of this // directory. @@ -479,10 +442,8 @@ private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs vo * Add the specified object to the reachable table, indicating it is part * of the connected FSO tree. */ - private void addReachableEntry(OmVolumeArgs volume, - OmBucketInfo bucket, WithObjectID object) throws IOException { - byte[] reachableKey = buildReachableKey(volume, bucket, object) - .getBytes(StandardCharsets.UTF_8); + private void addReachableEntry(OmVolumeArgs volume, OmBucketInfo bucket, WithObjectID object) throws IOException { + byte[] reachableKey = buildReachableKey(volume, bucket, object).getBytes(StandardCharsets.UTF_8); // No value is needed for this table. reachableDB.put(reachableCFHandle, reachableKey, new byte[]{}); } @@ -491,8 +452,7 @@ private void addReachableEntry(OmVolumeArgs volume, * Build an entry in the reachable table for the current object, which * could be a bucket, file or directory. */ - private static String buildReachableKey(OmVolumeArgs volume, - OmBucketInfo bucket, WithObjectID object) { + private static String buildReachableKey(OmVolumeArgs volume, OmBucketInfo bucket, WithObjectID object) { return OM_KEY_PREFIX + volume.getObjectID() + OM_KEY_PREFIX + @@ -507,8 +467,7 @@ private static String buildReachableKey(OmVolumeArgs volume, * @return true if the entry's parent is in the reachable table. */ protected boolean isReachable(String fileOrDirKey) throws IOException { - byte[] reachableParentKey = - buildReachableParentKey(fileOrDirKey).getBytes(StandardCharsets.UTF_8); + byte[] reachableParentKey = buildReachableParentKey(fileOrDirKey).getBytes(StandardCharsets.UTF_8); return reachableDB.get(reachableCFHandle, reachableParentKey, REACHABLE_TABLE) != null; } @@ -535,8 +494,7 @@ private static String buildReachableParentKey(String fileOrDirKey) { } private void openReachableDB() throws IOException { - File reachableDBFile = new File(new File(omDBPath).getParentFile(), - "reachable.db"); + File reachableDBFile = new File(new File(omDBPath).getParentFile(), "reachable.db"); System.out.println("Creating database of reachable directories at " + reachableDBFile); // Delete the DB from the last run if it exists. if (reachableDBFile.exists()) { @@ -572,8 +530,7 @@ private void closeReachableDB() { private void dropReachableTableIfExists() throws IOException { try { - List - availableCFs = reachableDB.listColumnFamiliesEmptyOptions(reachableDBPath); + List availableCFs = reachableDB.listColumnFamiliesEmptyOptions(reachableDBPath); boolean cfFound = false; for (byte[] cfNameBytes: availableCFs) { if (new String(cfNameBytes, UTF_8).equals(new String(REACHABLE_TABLE_BYTES, UTF_8))) { @@ -595,8 +552,7 @@ private void dropReachableTableIfExists() throws IOException { } private void createReachableTable() throws IOException { - reachableCFHandle = reachableDB.createColumnFamily( - new ColumnFamilyDescriptor(REACHABLE_TABLE_BYTES)); + reachableCFHandle = reachableDB.createColumnFamily(new ColumnFamilyDescriptor(REACHABLE_TABLE_BYTES)); } /** From 28514b480056659de2cc8b86d7cc989126951cfa Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Fri, 15 Nov 2024 11:51:48 +0530 Subject: [PATCH 16/28] Revert pom.xml changes --- hadoop-ozone/tools/pom.xml | 195 ++++++++++++++++++++++++++++++++++++- 1 file changed, 193 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/tools/pom.xml b/hadoop-ozone/tools/pom.xml index 839d01f0fa84..924408122594 100644 --- a/hadoop-ozone/tools/pom.xml +++ b/hadoop-ozone/tools/pom.xml @@ -20,15 +20,55 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone - 1.5.0-SNAPSHOT + 2.0.0-SNAPSHOT ozone-tools - 1.5.0-SNAPSHOT + 2.0.0-SNAPSHOT Apache Ozone Tools Apache Ozone Tools jar + + false + + + + org.apache.ozone + hdds-client + + + org.apache.ozone + hdds-common + + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-container-service + + + org.apache.ozone + hdds-interface-admin + + + org.apache.ozone + hdds-interface-client + + + org.apache.ozone + hdds-interface-server + + + org.apache.ozone + hdds-managed-rocksdb + + + org.apache.ozone + hdds-server-scm + org.apache.ozone ozone-manager @@ -45,10 +85,22 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone-client + + org.apache.ozone + ozone-filesystem-common + org.apache.ozone ozone-filesystem + + org.apache.ozone + ozone-interface-client + + + org.apache.ozone + ozone-interface-storage + org.apache.ozone hdds-server-framework @@ -68,6 +120,59 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> hdds-hadoop-dependency-server + + commons-codec + commons-codec + + + commons-io + commons-io + + + org.apache.commons + commons-lang3 + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.ratis + ratis-client + + + org.apache.ratis + ratis-common + + + org.apache.ratis + ratis-proto + + + org.apache.ratis + ratis-server-api + + + org.apache.ratis + ratis-thirdparty-misc + + + org.apache.ratis + ratis-tools + + + org.apache.ratis + ratis-shell + + + + info.picocli + picocli + jakarta.xml.bind jakarta.xml.bind-api @@ -76,6 +181,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.glassfish.jaxb jaxb-runtime + + jakarta.annotation + jakarta.annotation-api + jakarta.activation jakarta.activation-api @@ -84,6 +193,14 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> io.dropwizard.metrics metrics-core + + io.opentracing + opentracing-api + + + io.opentracing + opentracing-util + com.amazonaws aws-java-sdk-core @@ -92,10 +209,42 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> com.amazonaws aws-java-sdk-s3 + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + + + com.google.guava + guava + + + org.jooq + jooq + org.kohsuke.metainf-services metainf-services + + org.rocksdb + rocksdbjni + + + org.slf4j + slf4j-api + @@ -121,6 +270,48 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> 2048 + + org.apache.maven.plugins + maven-compiler-plugin + + + + org.apache.ozone + hdds-config + ${hdds.version} + + + org.kohsuke.metainf-services + metainf-services + ${metainf-services.version} + + + + org.apache.hadoop.hdds.conf.ConfigFileGenerator + org.kohsuke.metainf_services.AnnotationProcessorImpl + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + ban-annotations + + + + Only selected annotation processors are enabled, see configuration of maven-compiler-plugin. + + org.apache.hadoop.ozone.om.request.validation.RequestFeatureValidator + org.apache.hadoop.hdds.scm.metadata.Replicate + + + + + + + From cc08bf940922bd266e15c08893a3155bedcbc014 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Fri, 15 Nov 2024 11:51:48 +0530 Subject: [PATCH 17/28] Revert pom.xml changes --- hadoop-ozone/tools/pom.xml | 195 ++++++++++++++++++++++++++++++++++++- 1 file changed, 193 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/tools/pom.xml b/hadoop-ozone/tools/pom.xml index 839d01f0fa84..924408122594 100644 --- a/hadoop-ozone/tools/pom.xml +++ b/hadoop-ozone/tools/pom.xml @@ -20,15 +20,55 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone - 1.5.0-SNAPSHOT + 2.0.0-SNAPSHOT ozone-tools - 1.5.0-SNAPSHOT + 2.0.0-SNAPSHOT Apache Ozone Tools Apache Ozone Tools jar + + false + + + + org.apache.ozone + hdds-client + + + org.apache.ozone + hdds-common + + + org.apache.ozone + hdds-config + + + org.apache.ozone + hdds-container-service + + + org.apache.ozone + hdds-interface-admin + + + org.apache.ozone + hdds-interface-client + + + org.apache.ozone + hdds-interface-server + + + org.apache.ozone + hdds-managed-rocksdb + + + org.apache.ozone + hdds-server-scm + org.apache.ozone ozone-manager @@ -45,10 +85,22 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.ozone ozone-client + + org.apache.ozone + ozone-filesystem-common + org.apache.ozone ozone-filesystem + + org.apache.ozone + ozone-interface-client + + + org.apache.ozone + ozone-interface-storage + org.apache.ozone hdds-server-framework @@ -68,6 +120,59 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> hdds-hadoop-dependency-server + + commons-codec + commons-codec + + + commons-io + commons-io + + + org.apache.commons + commons-lang3 + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.ratis + ratis-client + + + org.apache.ratis + ratis-common + + + org.apache.ratis + ratis-proto + + + org.apache.ratis + ratis-server-api + + + org.apache.ratis + ratis-thirdparty-misc + + + org.apache.ratis + ratis-tools + + + org.apache.ratis + ratis-shell + + + + info.picocli + picocli + jakarta.xml.bind jakarta.xml.bind-api @@ -76,6 +181,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.glassfish.jaxb jaxb-runtime + + jakarta.annotation + jakarta.annotation-api + jakarta.activation jakarta.activation-api @@ -84,6 +193,14 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> io.dropwizard.metrics metrics-core + + io.opentracing + opentracing-api + + + io.opentracing + opentracing-util + com.amazonaws aws-java-sdk-core @@ -92,10 +209,42 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> com.amazonaws aws-java-sdk-s3 + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + + + com.google.guava + guava + + + org.jooq + jooq + org.kohsuke.metainf-services metainf-services + + org.rocksdb + rocksdbjni + + + org.slf4j + slf4j-api + @@ -121,6 +270,48 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> 2048 + + org.apache.maven.plugins + maven-compiler-plugin + + + + org.apache.ozone + hdds-config + ${hdds.version} + + + org.kohsuke.metainf-services + metainf-services + ${metainf-services.version} + + + + org.apache.hadoop.hdds.conf.ConfigFileGenerator + org.kohsuke.metainf_services.AnnotationProcessorImpl + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + ban-annotations + + + + Only selected annotation processors are enabled, see configuration of maven-compiler-plugin. + + org.apache.hadoop.ozone.om.request.validation.RequestFeatureValidator + org.apache.hadoop.hdds.scm.metadata.Replicate + + + + + + + From 0e3c294d550027b71fd9f966fbf48cc99e9d7928 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 21 Nov 2024 17:13:32 +0530 Subject: [PATCH 18/28] Added 3 categories in report. Repair only unreferenced objects --- .../ozone/repair/om/TestFSORepairTool.java | 38 +++--- .../hadoop/ozone/repair/om/FSORepairCLI.java | 14 +-- .../hadoop/ozone/repair/om/FSORepairTool.java | 113 ++++++++++++------ 3 files changed, 107 insertions(+), 58 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java index ac4242b60f67..fae950689910 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java @@ -208,7 +208,7 @@ public void testConnectedTreeOneBucket() throws Exception { FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); // Test the connected tree in debug mode. - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), false, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), false, null, null, false); FSORepairTool.Report debugReport = tool.run(); Assertions.assertEquals(expectedReport, debugReport); @@ -217,7 +217,7 @@ public void testConnectedTreeOneBucket() throws Exception { // Running again in repair mode should give same results since the tree // is connected. - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); FSORepairTool.Report repairReport = tool.run(); Assertions.assertEquals(expectedReport, repairReport); @@ -231,7 +231,7 @@ public void testReportedDataSize() throws Exception { FSORepairTool.Report report2 = buildConnectedTree("vol1", "bucket2", 10); FSORepairTool.Report expectedReport = new FSORepairTool.Report(report1, report2); - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); FSORepairTool.Report debugReport = tool.run(); Assertions.assertEquals(expectedReport, debugReport); } @@ -248,12 +248,13 @@ public void testVolumeAndBucketFilter() throws Exception { FSORepairTool.Report expectedReport2 = new FSORepairTool.Report(report2); // When volume filter is passed - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol1", null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol1", null, false); FSORepairTool.Report result1 = tool.run(); Assertions.assertEquals(expectedReport1, result1); // When both volume and bucket filters are passed - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol2", "bucket2"); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol2", "bucket2", + false); FSORepairTool.Report result2 = tool.run(); Assertions.assertEquals(expectedReport2, result2); @@ -263,7 +264,8 @@ public void testVolumeAndBucketFilter() throws Exception { try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(outputStream, true, DEFAULT_ENCODING)) { System.setOut(ps); - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol1", "bucket2"); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol1", "bucket2", + false); tool.run(); String output = outputStream.toString(DEFAULT_ENCODING); Assertions.assertTrue(output.contains("Bucket 'bucket2' does not exist in volume '/vol1'.")); @@ -275,7 +277,8 @@ public void testVolumeAndBucketFilter() throws Exception { try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(outputStream, true, DEFAULT_ENCODING)) { System.setOut(ps); - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol3", "bucket2"); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol3", "bucket2", + false); tool.run(); String output = outputStream.toString(DEFAULT_ENCODING); Assertions.assertTrue(output.contains("Volume '/vol3' does not exist.")); @@ -287,7 +290,8 @@ public void testVolumeAndBucketFilter() throws Exception { try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(outputStream, true, DEFAULT_ENCODING)) { System.setOut(ps); - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, "bucket2"); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, "bucket2", + false); tool.run(); String output = outputStream.toString(DEFAULT_ENCODING); Assertions.assertTrue(output.contains("--bucket flag cannot be used without specifying --volume.")); @@ -307,7 +311,7 @@ public void testMultipleBucketsAndVolumes() throws Exception { FSORepairTool.Report report2 = buildDisconnectedTree("vol2", "bucket2"); FSORepairTool.Report expectedAggregateReport = new FSORepairTool.Report(report1, report2); - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(generatedReport, expectedAggregateReport); @@ -351,11 +355,11 @@ public void testDeleteOverwrite() throws Exception { ContractTestUtils.touch(fs, new Path("/vol1/bucket1/dir1/file2")); disconnectDirectory("dir1"); - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); FSORepairTool.Report generatedReport = tool.run(); - Assertions.assertEquals(1, generatedReport.getUnreachable().getDirs()); - Assertions.assertEquals(3, generatedReport.getUnreachable().getFiles()); + Assertions.assertEquals(1, generatedReport.getUnreferenced().getDirs()); + Assertions.assertEquals(3, generatedReport.getUnreferenced().getFiles()); // This assertion ensures that only specific directories and keys remain in the active tables, // as the remaining entries are expected to be moved to the deleted tables by the background service. @@ -368,7 +372,7 @@ public void testDeleteOverwrite() throws Exception { @Test public void testEmptyFileTrees() throws Exception { // Run when there are no file trees. - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); assertDeleteTablesEmpty(); @@ -378,7 +382,7 @@ public void testEmptyFileTrees() throws Exception { fs.mkdirs(new Path("/vol2/bucket1")); // Run on an empty volume and bucket. - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); generatedReport = tool.run(); Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); assertDeleteTablesEmpty(); @@ -415,7 +419,7 @@ public void testNonFSOBucketsSkipped() throws Exception { // Even in repair mode there should be no action. legacy and obs buckets // will be skipped and FSO tree is connected. - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null); + tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); FSORepairTool.Report generatedReport = tool.run(); Assertions.assertEquals(connectReport, generatedReport); @@ -520,11 +524,11 @@ private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, // will see. It was deleted completely so the tool will never see it. FSORepairTool.ReportStatistics reachableCount = new FSORepairTool.ReportStatistics(1, 1, fileSize); - FSORepairTool.ReportStatistics unreachableCount = + FSORepairTool.ReportStatistics unreferencedCount = new FSORepairTool.ReportStatistics(1, 3, fileSize * 3L); return new FSORepairTool.Report.Builder() .setReachable(reachableCount) - .setUnreachable(unreachableCount) + .setUnreferenced(unreferencedCount) .build(); } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index 8c73aae4dae3..81c5f3b71604 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -29,7 +29,7 @@ * Parser for scm.db file. */ @CommandLine.Command( - name = "fso-tree-repair", + name = "fso-tree", description = "Identify and repair a disconnected FSO tree, and mark unreachable entries for deletion. " + "OM should be stopped while this tool is run." ) @@ -44,21 +44,21 @@ public class FSORepairCLI implements Callable, SubcommandWithParent { description = "Path to OM RocksDB") private String dbPath; - @CommandLine.Option(names = {"--repair"}, + @CommandLine.Option(names = {"-r", "--repair"}, defaultValue = "false", description = "Run in repair mode to move unreachable files and directories to deleted tables.") private boolean repair; - @CommandLine.Option(names = {"--volume"}, + @CommandLine.Option(names = {"-v", "--volume"}, description = "Filter by volume name. Add '/' before the volume name.") private String volume; - @CommandLine.Option(names = {"--bucket"}, + @CommandLine.Option(names = {"-b", "--bucket"}, description = "Filter by bucket name") private String bucket; @CommandLine.Option(names = {"--verbose"}, - description = "More verbose output. ") + description = "Verbose output. Show all intermediate steps and deleted keys info.") private boolean verbose; @Override @@ -70,14 +70,14 @@ public Void call() throws Exception { } try { FSORepairTool - repairTool = new FSORepairTool(dbPath, repair, volume, bucket); + repairTool = new FSORepairTool(dbPath, repair, volume, bucket, verbose); repairTool.run(); } catch (Exception ex) { throw new IllegalArgumentException("FSO repair failed: " + ex.getMessage()); } if (verbose) { - System.out.println("FSO repair finished. See client logs for results."); + System.out.println("FSO repair finished."); } return null; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index 9735b4fcd988..88c4685373e5 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -62,15 +62,18 @@ import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; /** - * Base Tool to identify disconnected FSO trees in all buckets. - * The tool will log information about unreachable files or directories. - * If deletes are still in progress (the deleted directory table is not empty), the tool may - * report that the tree is disconnected, even though pending deletes would fix the issue. - * + * Base Tool to identify and repair disconnected FSO trees across all buckets. + * This tool logs information about reachable, unreachable and unreferenced files and directories in debug mode + * and moves these unreferenced files and directories to the deleted tables in repair mode. + + * If deletes are still in progress (the deleted directory table is not empty), the tool + * reports that the tree is unreachable, even though pending deletes would fix the issue. + * If not, the tool reports them as unreferenced and deletes them in repair mode. + * Before using the tool, make sure all OMs are stopped, and that all Ratis logs have been flushed to the OM DB. * This can be done using `ozone admin prepare` before running the tool, and `ozone admin * cancelprepare` when done. - * + * The tool will run a DFS from each bucket, and save all reachable directories as keys in a new temporary RocksDB * instance called "reachable.db" in the same directory as om.db. * It will then scan the entire file and directory tables for each bucket to see if each object's parent is in the @@ -101,25 +104,30 @@ public class FSORepairTool { private RocksDatabase reachableDB; private final ReportStatistics reachableStats; private final ReportStatistics unreachableStats; + private final ReportStatistics unreferencedStats; private final boolean repair; + private final boolean verbose; - public FSORepairTool(String dbPath, boolean repair, String volume, String bucket) throws IOException { - this(getStoreFromPath(dbPath), dbPath, repair, volume, bucket); + public FSORepairTool(String dbPath, boolean repair, String volume, String bucket, boolean verbose) + throws IOException { + this(getStoreFromPath(dbPath), dbPath, repair, volume, bucket, verbose); } /** * Allows passing RocksDB instance from a MiniOzoneCluster directly to this class for testing. */ - public FSORepairTool(DBStore dbStore, String dbPath, boolean repair, String volume, String bucket) + public FSORepairTool(DBStore dbStore, String dbPath, boolean repair, String volume, String bucket, boolean verbose) throws IOException { this.reachableStats = new ReportStatistics(0, 0, 0); this.unreachableStats = new ReportStatistics(0, 0, 0); + this.unreferencedStats = new ReportStatistics(0, 0, 0); this.store = dbStore; this.omDBPath = dbPath; this.repair = repair; this.volumeFilter = volume; this.bucketFilter = bucket; + this.verbose = verbose; volumeTable = store.getTable(OmMetadataManagerImpl.VOLUME_TABLE, String.class, OmVolumeArgs.class); @@ -150,10 +158,10 @@ protected static DBStore getStoreFromPath(String dbPath) throws IOException { "not exist or is not a RocksDB directory.", dbPath)); } // Load RocksDB and tables needed. - return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), new File(dbPath).getParentFile()); + return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), new File(dbPath).getParentFile(), -1); } - public FSORepairTool.Report run() throws IOException { + public FSORepairTool.Report run() throws Exception { if (bucketFilter != null && volumeFilter == null) { System.out.println("--bucket flag cannot be used without specifying --volume."); @@ -265,7 +273,7 @@ private void processBucket(OmVolumeArgs volume, OmBucketInfo bucketInfo) throws dropReachableTableIfExists(); createReachableTable(); markReachableObjectsInBucket(volume, bucketInfo); - handleUnreachableObjects(volume, bucketInfo); + handleUnreachableAndUnreferencedObjects(volume, bucketInfo); dropReachableTableIfExists(); } @@ -273,6 +281,7 @@ private Report buildReportAndLog() { Report report = new Report.Builder() .setReachable(reachableStats) .setUnreachable(unreachableStats) + .setUnreferenced(unreferencedStats) .build(); System.out.println("\n" + report); @@ -308,8 +317,16 @@ private void markReachableObjectsInBucket(OmVolumeArgs volume, OmBucketInfo buck } } - private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { - // Check for unreachable directories in the bucket. + private boolean isDirectoryInDeletedDirTable(String dirKey) throws IOException { + return deletedDirectoryTable.isExist(dirKey); + } + + private boolean isFileKeyInDeletedTable(String fileKey) throws IOException { + return deletedTable.isExist(fileKey); + } + + private void handleUnreachableAndUnreferencedObjects(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { + // Check for unreachable and unreferenced directories in the bucket. String bucketPrefix = OM_KEY_PREFIX + volume.getObjectID() + OM_KEY_PREFIX + @@ -328,21 +345,27 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) } if (!isReachable(dirKey)) { - System.out.println("Found unreachable directory: " + dirKey); - unreachableStats.addDir(); - - if (!repair) { - System.out.println("Marking unreachable directory " + dirKey + " for deletion."); + if (!isDirectoryInDeletedDirTable(dirKey)) { + System.out.println("Found unreferenced directory: " + dirKey); + unreferencedStats.addDir(); + + if (!repair) { + if (verbose) { + System.out.println("Marking unreferenced directory " + dirKey + " for deletion."); + } + } else { + System.out.println("Deleting unreferenced directory " + dirKey); + OmDirectoryInfo dirInfo = dirEntry.getValue(); + markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), dirKey, dirInfo); + } } else { - System.out.println("Deleting unreachable directory " + dirKey); - OmDirectoryInfo dirInfo = dirEntry.getValue(); - markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), dirKey, dirInfo); + unreachableStats.addDir(); } } } } - // Check for unreachable files + // Check for unreachable and unreferenced files try (TableIterator> fileIterator = fileTable.iterator()) { fileIterator.seek(bucketPrefix); @@ -356,14 +379,20 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) OmKeyInfo fileInfo = fileEntry.getValue(); if (!isReachable(fileKey)) { - System.out.println("Found unreachable file: " + fileKey); - unreachableStats.addFile(fileInfo.getDataSize()); + if (!isFileKeyInDeletedTable(fileKey)) { + System.out.println("Found unreferenced file: " + fileKey); + unreferencedStats.addFile(fileInfo.getDataSize()); - if (!repair) { - System.out.println("Marking unreachable file " + fileKey + " for deletion." + fileKey); + if (!repair) { + if (verbose) { + System.out.println("Marking unreferenced file " + fileKey + " for deletion." + fileKey); + } + } else { + System.out.println("Deleting unreferenced file " + fileKey); + markFileForDeletion(fileKey, fileInfo); + } } else { - System.out.println("Deleting unreachable file " + fileKey); - markFileForDeletion(fileKey, fileInfo); + unreachableStats.addFile(fileInfo.getDataSize()); } } else { // NOTE: We are deserializing the proto of every reachable file @@ -388,8 +417,9 @@ protected void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IO // directory delete. It is also not possible here if the file's parent // is gone. The name of the key does not matter so just use IDs. deletedTable.putWithBatch(batch, fileKey, updatedRepeatedOmKeyInfo); - - System.out.println("Added entry " + fileKey + " to open key table: " + updatedRepeatedOmKeyInfo); + if (verbose) { + System.out.println("Added entry " + fileKey + " to open key table: " + updatedRepeatedOmKeyInfo); + } store.commitBatchOperation(batch); } } @@ -561,6 +591,7 @@ private void createReachableTable() throws IOException { public static class Report { private final ReportStatistics reachable; private final ReportStatistics unreachable; + private final ReportStatistics unreferenced; /** * Builds one report that is the aggregate of multiple others. @@ -568,16 +599,19 @@ public static class Report { public Report(FSORepairTool.Report... reports) { reachable = new ReportStatistics(); unreachable = new ReportStatistics(); + unreferenced = new ReportStatistics(); for (FSORepairTool.Report report : reports) { reachable.add(report.reachable); unreachable.add(report.unreachable); + unreferenced.add(report.unreferenced); } } private Report(FSORepairTool.Report.Builder builder) { this.reachable = builder.reachable; this.unreachable = builder.unreachable; + this.unreferenced = builder.unreferenced; } public ReportStatistics getReachable() { @@ -588,8 +622,12 @@ public ReportStatistics getUnreachable() { return unreachable; } + public ReportStatistics getUnreferenced() { + return unreferenced; + } + public String toString() { - return "Reachable: " + reachable + "\nUnreachable: " + unreachable; + return "Reachable: " + reachable + "\nUnreachable: " + unreachable + "\nUnreferenced: " + unreferenced; } @Override @@ -605,12 +643,13 @@ public boolean equals(Object other) { // Useful for testing. System.out.println("Comparing reports\nExpect:\n" + this + "\nActual:\n" + report); - return reachable.equals(report.reachable) && unreachable.equals(report.unreachable); + return reachable.equals(report.reachable) && unreachable.equals(report.unreachable) && + unreferenced.equals(report.unreferenced); } @Override public int hashCode() { - return Objects.hash(reachable, unreachable); + return Objects.hash(reachable, unreachable, unreferenced); } /** @@ -619,6 +658,7 @@ public int hashCode() { public static final class Builder { private ReportStatistics reachable = new ReportStatistics(); private ReportStatistics unreachable = new ReportStatistics(); + private ReportStatistics unreferenced = new ReportStatistics(); public Builder() { } @@ -633,6 +673,11 @@ public Builder setUnreachable(ReportStatistics unreachable) { return this; } + public Builder setUnreferenced(ReportStatistics unreferenced) { + this.unreferenced = unreferenced; + return this; + } + public Report build() { return new Report(this); } From 4dce94a0caf63f2bacb8152da5547b0a34ca949d Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Thu, 21 Nov 2024 19:47:49 +0100 Subject: [PATCH 19/28] Fix checkstyle --- .../java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index 88c4685373e5..af8a49f134cd 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -352,7 +352,7 @@ private void handleUnreachableAndUnreferencedObjects(OmVolumeArgs volume, OmBuck if (!repair) { if (verbose) { System.out.println("Marking unreferenced directory " + dirKey + " for deletion."); - } + } } else { System.out.println("Deleting unreferenced directory " + dirKey); OmDirectoryInfo dirInfo = dirEntry.getValue(); From 9aa705ff66e2741b783489cd046eb48e95da0f5f Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Mon, 25 Nov 2024 12:16:23 +0530 Subject: [PATCH 20/28] Simplified db management --- .../hadoop/hdds/utils/db/RocksDatabase.java | 44 ++------- .../hadoop/ozone/repair/om/FSORepairTool.java | 96 ++++--------------- 2 files changed, 26 insertions(+), 114 deletions(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java index e1d9b29c5bea..945138b8b8b3 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java @@ -139,7 +139,7 @@ public static List listColumnFamiliesEmptyOptions(final String path) } } - public static RocksDatabase open(File dbFile, ManagedDBOptions dbOptions, + static RocksDatabase open(File dbFile, ManagedDBOptions dbOptions, ManagedWriteOptions writeOptions, Set families, boolean readOnly) throws IOException { List descriptors = null; @@ -460,13 +460,8 @@ public void ingestExternalFile(ColumnFamily family, List files, public void put(ColumnFamily family, byte[] key, byte[] value) throws IOException { - put(family.getHandle(), key, value); - } - - public void put(ColumnFamilyHandle handle, byte[] key, byte[] value) - throws IOException { try (UncheckedAutoCloseable ignored = acquire()) { - db.get().put(handle, writeOptions, key, value); + db.get().put(family.getHandle(), writeOptions, key, value); } catch (RocksDBException e) { closeOnError(e); throw toIOException(this, "put " + bytes2String(key), e); @@ -626,14 +621,9 @@ RocksCheckpoint createCheckpoint() { */ Supplier keyMayExist(ColumnFamily family, byte[] key) throws IOException { - return keyMayExist(family.getHandle(), key); - } - - public Supplier keyMayExist(ColumnFamilyHandle handle, byte[] key) - throws IOException { try (UncheckedAutoCloseable ignored = acquire()) { final Holder out = new Holder<>(); - return db.get().keyMayExist(handle, key, out) ? + return db.get().keyMayExist(family.getHandle(), key, out) ? out::getValue : null; } } @@ -662,34 +652,12 @@ public Collection getExtraColumnFamilies() { return Collections.unmodifiableCollection(columnFamilies.values()); } - public void dropColumnFamily(ColumnFamilyHandle handle) throws IOException { - try (UncheckedAutoCloseable ignored = acquire()) { - db.get().dropColumnFamily(handle); - } catch (RocksDBException e) { - closeOnError(e); - throw toIOException(this, "dropColumnFamily", e); - } - } - - public ColumnFamilyHandle createColumnFamily(ColumnFamilyDescriptor descriptor) throws IOException { + byte[] get(ColumnFamily family, byte[] key) throws IOException { try (UncheckedAutoCloseable ignored = acquire()) { - return db.get().createColumnFamily(descriptor); + return db.get().get(family.getHandle(), key); } catch (RocksDBException e) { closeOnError(e); - throw toIOException(this, "createColumnFamily", e); - } - } - - public byte[] get(ColumnFamily family, byte[] key) throws IOException { - return get(family.getHandle(), key, family.getName()); - } - - public byte[] get(ColumnFamilyHandle handle, byte[] key, String familyName) throws IOException { - try (UncheckedAutoCloseable ignored = acquire()) { - return db.get().get(handle, key); - } catch (RocksDBException e) { - closeOnError(e); - final String message = "get " + bytes2String(key) + " from " + familyName; + final String message = "get " + bytes2String(key) + " from " + family; throw toIOException(this, message, e); } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index af8a49f134cd..68c5688b85ce 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -18,15 +18,13 @@ package org.apache.hadoop.ozone.repair.om; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.utils.db.RocksDatabase; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.DBStoreBuilder; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.hdds.utils.db.BatchOperation; -import org.apache.hadoop.hdds.utils.db.TableConfig; -import org.apache.hadoop.hdds.utils.db.DBProfile; -import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions; import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.helpers.BucketLayout; @@ -39,26 +37,16 @@ import org.apache.hadoop.ozone.om.helpers.WithObjectID; import org.apache.hadoop.ozone.om.request.file.OMFileRequest; import org.apache.ratis.util.Preconditions; -import org.rocksdb.ColumnFamilyDescriptor; -import org.rocksdb.ColumnFamilyHandle; -import org.rocksdb.RocksDBException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; -import java.util.HashSet; -import java.util.List; import java.util.Objects; -import java.util.Set; import java.util.Stack; -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DB_PROFILE; -import static org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PROFILE; import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; /** @@ -95,13 +83,8 @@ public class FSORepairTool { private final Table snapshotInfoTable; private final String volumeFilter; private final String bucketFilter; - // The temporary DB is used to track which items have been seen. - // Since usage of this DB is simple, use it directly from RocksDB. - private String reachableDBPath; private static final String REACHABLE_TABLE = "reachable"; - private static final byte[] REACHABLE_TABLE_BYTES = REACHABLE_TABLE.getBytes(StandardCharsets.UTF_8); - private ColumnFamilyHandle reachableCFHandle; - private RocksDatabase reachableDB; + private DBStore reachableDB; private final ReportStatistics reachableStats; private final ReportStatistics unreachableStats; private final ReportStatistics unreferencedStats; @@ -270,11 +253,8 @@ private void processBucket(OmVolumeArgs volume, OmBucketInfo bucketInfo) throws return; } } - dropReachableTableIfExists(); - createReachableTable(); markReachableObjectsInBucket(volume, bucketInfo); handleUnreachableAndUnreferencedObjects(volume, bucketInfo); - dropReachableTableIfExists(); } private Report buildReportAndLog() { @@ -473,9 +453,9 @@ private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs vo * of the connected FSO tree. */ private void addReachableEntry(OmVolumeArgs volume, OmBucketInfo bucket, WithObjectID object) throws IOException { - byte[] reachableKey = buildReachableKey(volume, bucket, object).getBytes(StandardCharsets.UTF_8); + String reachableKey = buildReachableKey(volume, bucket, object); // No value is needed for this table. - reachableDB.put(reachableCFHandle, reachableKey, new byte[]{}); + reachableDB.getTable(REACHABLE_TABLE, String.class, byte[].class).put(reachableKey, new byte[]{}); } /** @@ -497,9 +477,9 @@ private static String buildReachableKey(OmVolumeArgs volume, OmBucketInfo bucket * @return true if the entry's parent is in the reachable table. */ protected boolean isReachable(String fileOrDirKey) throws IOException { - byte[] reachableParentKey = buildReachableParentKey(fileOrDirKey).getBytes(StandardCharsets.UTF_8); + String reachableParentKey = buildReachableParentKey(fileOrDirKey); - return reachableDB.get(reachableCFHandle, reachableParentKey, REACHABLE_TABLE) != null; + return reachableDB.getTable(REACHABLE_TABLE, String.class, byte[].class).get(reachableParentKey) != null; } /** @@ -523,68 +503,32 @@ private static String buildReachableParentKey(String fileOrDirKey) { parentID; } - private void openReachableDB() throws IOException { + private void openReachableDB() { File reachableDBFile = new File(new File(omDBPath).getParentFile(), "reachable.db"); System.out.println("Creating database of reachable directories at " + reachableDBFile); // Delete the DB from the last run if it exists. - if (reachableDBFile.exists()) { - FileUtils.deleteDirectory(reachableDBFile); - } - reachableDBPath = reachableDBFile.toString(); - reachableDB = buildReachableRocksDB(reachableDBFile); - } - - private RocksDatabase buildReachableRocksDB(File reachableDBFile) throws IOException { - DBProfile profile = new OzoneConfiguration().getEnum(HDDS_DB_PROFILE, HDDS_DEFAULT_DB_PROFILE); - Set tableConfigs = new HashSet<>(); - try { - tableConfigs.add(new TableConfig("default", profile.getColumnFamilyOptions())); - - return RocksDatabase.open(reachableDBFile, - profile.getDBOptions(), - new ManagedWriteOptions(), - tableConfigs, false); - } finally { - for (TableConfig config : tableConfigs) { - config.close(); + if (reachableDBFile.exists()) { + FileUtils.deleteDirectory(reachableDBFile); } + + ConfigurationSource conf = new OzoneConfiguration(); + reachableDB = DBStoreBuilder.newBuilder(conf) + .setName("reachable.db") + .setPath(reachableDBFile.getParentFile().toPath()) + .addTable(REACHABLE_TABLE) + .build(); + } catch (IOException e) { + System.out.println("Error creating reachable.db: " + e.getMessage()); } } - private void closeReachableDB() { + private void closeReachableDB() throws IOException { if (reachableDB != null) { reachableDB.close(); } } - private void dropReachableTableIfExists() throws IOException { - try { - List availableCFs = reachableDB.listColumnFamiliesEmptyOptions(reachableDBPath); - boolean cfFound = false; - for (byte[] cfNameBytes: availableCFs) { - if (new String(cfNameBytes, UTF_8).equals(new String(REACHABLE_TABLE_BYTES, UTF_8))) { - cfFound = true; - break; - } - } - - if (cfFound) { - reachableDB.dropColumnFamily(reachableCFHandle); - } - } catch (RocksDBException ex) { - throw new IOException(ex.getMessage(), ex); - } finally { - if (reachableCFHandle != null) { - reachableCFHandle.close(); - } - } - } - - private void createReachableTable() throws IOException { - reachableCFHandle = reachableDB.createColumnFamily(new ColumnFamilyDescriptor(REACHABLE_TABLE_BYTES)); - } - /** * Define a Report to be created. */ From 561b9650f58c79f377cf4a86e68ef7d7f0467bea Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Mon, 25 Nov 2024 13:30:36 +0530 Subject: [PATCH 21/28] Added OM repair command --- .../hadoop/ozone/repair/om/FSORepairCLI.java | 5 +- .../hadoop/ozone/repair/om/OMRepair.java | 53 +++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index 81c5f3b71604..ab24deb72374 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -19,7 +19,6 @@ package org.apache.hadoop.ozone.repair.om; import org.apache.hadoop.hdds.cli.SubcommandWithParent; -import org.apache.hadoop.ozone.repair.OzoneRepair; import org.kohsuke.MetaInfServices; import picocli.CommandLine; @@ -37,7 +36,7 @@ public class FSORepairCLI implements Callable, SubcommandWithParent { @CommandLine.ParentCommand - private OzoneRepair parent; + private OMRepair parent; @CommandLine.Option(names = {"--db"}, required = true, @@ -85,6 +84,6 @@ public Void call() throws Exception { @Override public Class getParentType() { - return OzoneRepair.class; + return OMRepair.class; } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java new file mode 100644 index 000000000000..6682ccda7474 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import org.apache.hadoop.hdds.cli.GenericCli; +import org.apache.hadoop.hdds.cli.SubcommandWithParent; +import org.apache.hadoop.ozone.repair.OzoneRepair; +import org.kohsuke.MetaInfServices; +import picocli.CommandLine; + +import java.util.concurrent.Callable; + +/** + * Ozone Repair CLI for OM. + */ +@CommandLine.Command(name = "om", + description = "Operational tool to repair OM.") +@MetaInfServices(SubcommandWithParent.class) +public class OMRepair implements Callable, SubcommandWithParent { + + @CommandLine.Spec + private CommandLine.Model.CommandSpec spec; + + @CommandLine.ParentCommand + private OzoneRepair parent; + + @Override + public Void call() { + GenericCli.missingSubcommand(spec); + return null; + } + + @Override + public Class getParentType() { + return OzoneRepair.class; + } +} From d943b11a79c1baf5e977f53ced518348a800dc7f Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Mon, 2 Dec 2024 18:41:58 +0530 Subject: [PATCH 22/28] Test using cli options and close all DB --- .../ozone/repair/om/TestFSORepairTool.java | 606 ++++++++---------- .../ozone/shell/TestOzoneRepairShell.java | 19 + .../hadoop/ozone/repair/om/FSORepairTool.java | 116 ++-- 3 files changed, 343 insertions(+), 398 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java index fae950689910..d83afc907ac4 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java @@ -23,30 +23,23 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.apache.hadoop.hdds.cli.GenericCli; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.MiniOzoneCluster; -import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; import org.apache.hadoop.ozone.client.BucketArgs; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; -import org.apache.hadoop.ozone.ha.ConfUtils; -import org.apache.hadoop.ozone.om.OMConfigKeys; -import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.OMStorage; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; -import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; -import org.apache.hadoop.ozone.shell.OzoneShell; -import org.apache.ozone.test.GenericTestUtils; +import org.apache.hadoop.ozone.repair.OzoneRepair; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.slf4j.Logger; @@ -54,17 +47,15 @@ import picocli.CommandLine; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.concurrent.TimeUnit; -import static java.lang.System.err; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; -import static org.junit.jupiter.api.Assertions.fail; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; import static org.junit.jupiter.api.Assertions.assertEquals; /** @@ -72,127 +63,44 @@ */ public class TestFSORepairTool { public static final Logger LOG = LoggerFactory.getLogger(TestFSORepairTool.class); - + private final ByteArrayOutputStream out = new ByteArrayOutputStream(); + private final ByteArrayOutputStream err = new ByteArrayOutputStream(); + private static final PrintStream OLD_OUT = System.out; + private static final PrintStream OLD_ERR = System.err; private static final String DEFAULT_ENCODING = UTF_8.name(); - private static MiniOzoneHAClusterImpl cluster; + private static MiniOzoneCluster cluster; private static FileSystem fs; private static OzoneClient client; private static OzoneConfiguration conf = null; - private FSORepairTool tool; - @BeforeAll - public static void init() throws Exception { + @BeforeEach + public void init() throws Exception { // Set configs. conf = new OzoneConfiguration(); - conf.setInt(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, 2000); - conf.setInt(OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK, 5); - conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); - conf.setInt(OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK, 20); - conf.setBoolean(OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY, true); - // Since delete services use RocksDB iterators, make sure the double - // buffer is flushed between runs. - conf.setInt(OMConfigKeys.OZONE_OM_UNFLUSHED_TRANSACTION_MAX_COUNT, 1); // Build cluster. - cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf) - .setNumOfOzoneManagers(1) - .setOMServiceId("omservice") - .setNumDatanodes(3) - .build(); + cluster = MiniOzoneCluster.newBuilder(conf).build(); cluster.waitForClusterToBeReady(); // Init ofs. - final String rootPath = String.format("%s://%s/", OZONE_OFS_URI_SCHEME, cluster.getOzoneManager().getOMServiceId()); + final String rootPath = String.format("%s://%s/", OZONE_OFS_URI_SCHEME, conf.get(OZONE_OM_ADDRESS_KEY)); conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); fs = FileSystem.get(conf); - client = OzoneClientFactory.getRpcClient("omservice", conf); - } - - @AfterEach - public void cleanNamespace() throws Exception { - OzoneShell shell = new OzoneShell(); - - if (fs.exists(new Path("/vol1"))) { - String[] args1 = new String[]{"volume", "delete", "-r", "-y", "vol1"}; - int exitC = execute(shell, args1); - assertEquals(0, exitC); - } - - if (fs.exists(new Path("/vol2"))) { - String[] args1 = new String[]{"volume", "delete", "-r", "-y", "vol2"}; - int exitC = execute(shell, args1); - assertEquals(0, exitC); - } - - cluster.getOzoneManager().prepareOzoneManager(120L, 5L); - runDeletes(); - assertFileAndDirTablesEmpty(); - } - - private int execute(GenericCli shell, String[] args) { - LOG.info("Executing shell command with args {}", Arrays.asList(args)); - CommandLine cmd = shell.getCmd(); - - CommandLine.IExecutionExceptionHandler exceptionHandler = - (ex, commandLine, parseResult) -> { - new PrintStream(err, true, DEFAULT_ENCODING).println(ex.getMessage()); - return commandLine.getCommandSpec().exitCodeOnExecutionException(); - }; + client = OzoneClientFactory.getRpcClient(conf); - // Since there is no elegant way to pass Ozone config to the shell, - // the idea is to use 'set' to place those OM HA configs. - String[] argsWithHAConf = getHASetConfStrings(args); - - cmd.setExecutionExceptionHandler(exceptionHandler); - return cmd.execute(argsWithHAConf); - } - - private String getSetConfStringFromConf(String key) { - return String.format("--set=%s=%s", key, conf.get(key)); - } - - private String generateSetConfString(String key, String value) { - return String.format("--set=%s=%s", key, value); - } - - private String[] getHASetConfStrings(int numOfArgs) { - assert (numOfArgs >= 0); - String[] res = new String[3 + numOfArgs]; - final int indexOmServiceIds = 0; - final int indexOmNodes = 1; - final int indexOmAddressStart = 2; - - res[indexOmServiceIds] = getSetConfStringFromConf(OMConfigKeys.OZONE_OM_SERVICE_IDS_KEY); - - String omNodesKey = ConfUtils.addKeySuffixes(OMConfigKeys.OZONE_OM_NODES_KEY, "omservice"); - String omNodesVal = conf.get(omNodesKey); - res[indexOmNodes] = generateSetConfString(omNodesKey, omNodesVal); - - String[] omNodesArr = omNodesVal.split(","); - // Sanity check - assert (omNodesArr.length == 1); - for (int i = 0; i < 1; i++) { - res[indexOmAddressStart + i] = - getSetConfStringFromConf(ConfUtils.addKeySuffixes( - OMConfigKeys.OZONE_OM_ADDRESS_KEY, "omservice", omNodesArr[i])); - } - - return res; + System.setOut(new PrintStream(out, false, DEFAULT_ENCODING)); + System.setErr(new PrintStream(err, false, DEFAULT_ENCODING)); } - /** - * Helper function to create a new set of arguments that contains HA configs. - * @param existingArgs Existing arguments to be fed into OzoneShell command. - * @return String array. - */ - private String[] getHASetConfStrings(String[] existingArgs) { - // Get a String array populated with HA configs first - String[] res = getHASetConfStrings(existingArgs.length); - - int indexCopyStart = res.length - existingArgs.length; - // Then copy the existing args to the returned String array - System.arraycopy(existingArgs, 0, res, indexCopyStart, existingArgs.length); - return res; + @AfterEach + public void reset() throws IOException { + // reset stream after each unit test + out.reset(); + err.reset(); + + // restore system streams + System.setOut(OLD_OUT); + System.setErr(OLD_ERR); } @AfterAll @@ -205,35 +113,61 @@ public static void teardown() { @Test public void testConnectedTreeOneBucket() throws Exception { + CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) + .addSubcommand(new FSORepairCLI())); + String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); + FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); + String expectedOutput = serializeReport(expectedReport); // Test the connected tree in debug mode. - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), false, null, null, false); - FSORepairTool.Report debugReport = tool.run(); + cluster.getOzoneManager().stop(); + + String[] args = new String[] {"om", "fso-tree", "--db", dbPath}; + int exitCode = cmd.execute(args); + assertEquals(0, exitCode); + + String cliOutput = out.toString(DEFAULT_ENCODING); + String reportOutput = extractRelevantSection(cliOutput); + Assertions.assertEquals(expectedOutput, reportOutput); - Assertions.assertEquals(expectedReport, debugReport); - assertConnectedTreeReadable("vol1", "bucket1"); - assertDeleteTablesEmpty(); + out.reset(); + err.reset(); - // Running again in repair mode should give same results since the tree - // is connected. - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); - FSORepairTool.Report repairReport = tool.run(); + // Running again in repair mode should give same results since the tree is connected. + String[] args1 = new String[] {"om", "fso-tree", "--db", dbPath, "--repair"}; + int exitCode1 = cmd.execute(args1); + assertEquals(0, exitCode1); - Assertions.assertEquals(expectedReport, repairReport); - assertConnectedTreeReadable("vol1", "bucket1"); - assertDeleteTablesEmpty(); + String cliOutput1 = out.toString(DEFAULT_ENCODING); + String reportOutput1 = extractRelevantSection(cliOutput1); + Assertions.assertEquals(expectedOutput, reportOutput1); + + cluster.getOzoneManager().restart(); } @Test public void testReportedDataSize() throws Exception { + CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) + .addSubcommand(new FSORepairCLI())); + String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); + FSORepairTool.Report report1 = buildDisconnectedTree("vol1", "bucket1", 10); FSORepairTool.Report report2 = buildConnectedTree("vol1", "bucket2", 10); FSORepairTool.Report expectedReport = new FSORepairTool.Report(report1, report2); + String expectedOutput = serializeReport(expectedReport); + + cluster.getOzoneManager().stop(); - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); - FSORepairTool.Report debugReport = tool.run(); - Assertions.assertEquals(expectedReport, debugReport); + String[] args = new String[] {"om", "fso-tree", "--db", dbPath, "--repair"}; + int exitCode = cmd.execute(args); + assertEquals(0, exitCode); + + String cliOutput = out.toString(DEFAULT_ENCODING); + String reportOutput = extractRelevantSection(cliOutput); + + Assertions.assertEquals(expectedOutput, reportOutput); + cluster.getOzoneManager().restart(); } /** @@ -242,88 +176,97 @@ public void testReportedDataSize() throws Exception { */ @Test public void testVolumeAndBucketFilter() throws Exception { + CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) + .addSubcommand(new FSORepairCLI())); + String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); + FSORepairTool.Report report1 = buildDisconnectedTree("vol1", "bucket1", 10); FSORepairTool.Report report2 = buildConnectedTree("vol2", "bucket2", 10); FSORepairTool.Report expectedReport1 = new FSORepairTool.Report(report1); FSORepairTool.Report expectedReport2 = new FSORepairTool.Report(report2); + cluster.getOzoneManager().stop(); + // When volume filter is passed - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol1", null, false); - FSORepairTool.Report result1 = tool.run(); - Assertions.assertEquals(expectedReport1, result1); + String[] args1 = new String[]{"om", "fso-tree", "--db", dbPath, "--volume", "/vol1"}; + int exitCode1 = cmd.execute(args1); + assertEquals(0, exitCode1); + + String cliOutput1 = out.toString(DEFAULT_ENCODING); + String reportOutput1 = extractRelevantSection(cliOutput1); + String expectedOutput1 = serializeReport(expectedReport1); + Assertions.assertEquals(expectedOutput1, reportOutput1); + + out.reset(); + err.reset(); // When both volume and bucket filters are passed - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol2", "bucket2", - false); - FSORepairTool.Report result2 = tool.run(); - Assertions.assertEquals(expectedReport2, result2); + String[] args2 = new String[]{"om", "fso-tree", "--db", dbPath, "--volume", "/vol2", + "--bucket", "bucket2"}; + int exitCode2 = cmd.execute(args2); + assertEquals(0, exitCode2); - PrintStream originalOut = System.out; + String cliOutput2 = out.toString(DEFAULT_ENCODING); + String reportOutput2 = extractRelevantSection(cliOutput2); + String expectedOutput2 = serializeReport(expectedReport2); + Assertions.assertEquals(expectedOutput2, reportOutput2); + + out.reset(); + err.reset(); // When a non-existent bucket filter is passed - try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PrintStream ps = new PrintStream(outputStream, true, DEFAULT_ENCODING)) { - System.setOut(ps); - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol1", "bucket2", - false); - tool.run(); - String output = outputStream.toString(DEFAULT_ENCODING); - Assertions.assertTrue(output.contains("Bucket 'bucket2' does not exist in volume '/vol1'.")); - } finally { - System.setOut(originalOut); - } + String[] args3 = new String[]{"om", "fso-tree", "--db", dbPath, "--volume", "/vol1", + "--bucket", "bucket2"}; + int exitCode3 = cmd.execute(args3); + assertEquals(0, exitCode3); + String cliOutput3 = out.toString(DEFAULT_ENCODING); + Assertions.assertTrue(cliOutput3.contains("Bucket 'bucket2' does not exist in volume '/vol1'.")); + + out.reset(); + err.reset(); // When a non-existent volume filter is passed - try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PrintStream ps = new PrintStream(outputStream, true, DEFAULT_ENCODING)) { - System.setOut(ps); - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, "/vol3", "bucket2", - false); - tool.run(); - String output = outputStream.toString(DEFAULT_ENCODING); - Assertions.assertTrue(output.contains("Volume '/vol3' does not exist.")); - } finally { - System.setOut(originalOut); - } + String[] args4 = new String[]{"om", "fso-tree", "--db", dbPath, "--volume", "/vol3"}; + int exitCode4 = cmd.execute(args4); + assertEquals(0, exitCode4); + String cliOutput4 = out.toString(DEFAULT_ENCODING); + Assertions.assertTrue(cliOutput4.contains("Volume '/vol3' does not exist.")); + + out.reset(); + err.reset(); // When bucket filter is passed without the volume filter. - try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PrintStream ps = new PrintStream(outputStream, true, DEFAULT_ENCODING)) { - System.setOut(ps); - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, "bucket2", - false); - tool.run(); - String output = outputStream.toString(DEFAULT_ENCODING); - Assertions.assertTrue(output.contains("--bucket flag cannot be used without specifying --volume.")); - } finally { - System.setOut(originalOut); - } + String[] args5 = new String[]{"om", "fso-tree", "--db", dbPath}; + int exitCode5 = cmd.execute(args5); + assertEquals(1, exitCode5); + String cliOutput5 = out.toString(DEFAULT_ENCODING); + Assertions.assertTrue(cliOutput5.contains("--bucket flag cannot be used without specifying --volume.")); + cluster.getOzoneManager().restart(); } @Test public void testMultipleBucketsAndVolumes() throws Exception { - Table dirTable = - cluster.getOzoneManager().getMetadataManager().getDirectoryTable(); - Table keyTable = - cluster.getOzoneManager().getMetadataManager().getKeyTable(getFSOBucketLayout()); + CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) + .addSubcommand(new FSORepairCLI())); + String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); + FSORepairTool.Report report1 = buildConnectedTree("vol1", "bucket1"); FSORepairTool.Report report2 = buildDisconnectedTree("vol2", "bucket2"); FSORepairTool.Report expectedAggregateReport = new FSORepairTool.Report(report1, report2); + String expectedOutput = serializeReport(expectedAggregateReport); + + cluster.getOzoneManager().stop(); - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); - FSORepairTool.Report generatedReport = tool.run(); + String[] args = new String[] {"om", "fso-tree", "--db", dbPath, "--repair"}; + int exitCode = cmd.execute(args); + assertEquals(0, exitCode); - Assertions.assertEquals(generatedReport, expectedAggregateReport); - assertConnectedTreeReadable("vol1", "bucket1"); - assertDisconnectedTreePartiallyReadable("vol2", "bucket2"); + String cliOutput = out.toString(DEFAULT_ENCODING); + String reportOutput = extractRelevantSection(cliOutput); + Assertions.assertEquals(expectedOutput, reportOutput); - // This assertion ensures that only specific directories and keys remain in the active tables, - // as the remaining entries are expected to be moved to the deleted tables by the background service. - // However, since the timing of the background deletion service is not predictable, - // assertions on the deleted tables themselves may lead to flaky tests. - assertEquals(4, countTableEntries(dirTable)); - assertEquals(5, countTableEntries(keyTable)); + cluster.getOzoneManager().restart(); } /** @@ -332,10 +275,9 @@ public void testMultipleBucketsAndVolumes() throws Exception { */ @Test public void testDeleteOverwrite() throws Exception { - Table keyTable = - cluster.getOzoneManager().getMetadataManager().getKeyTable(getFSOBucketLayout()); - Table dirTable = - cluster.getOzoneManager().getMetadataManager().getDirectoryTable(); + CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) + .addSubcommand(new FSORepairCLI())); + String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); // Create files and dirs under dir1. To make sure they are added to the // delete table, the keys must have data. @@ -355,89 +297,141 @@ public void testDeleteOverwrite() throws Exception { ContractTestUtils.touch(fs, new Path("/vol1/bucket1/dir1/file2")); disconnectDirectory("dir1"); - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); - FSORepairTool.Report generatedReport = tool.run(); + cluster.getOzoneManager().stop(); + + String[] args = new String[]{"om", "fso-tree", "--db", dbPath, "--repair"}; + int exitCode = cmd.execute(args); + assertEquals(0, exitCode); - Assertions.assertEquals(1, generatedReport.getUnreferenced().getDirs()); - Assertions.assertEquals(3, generatedReport.getUnreferenced().getFiles()); + String cliOutput = out.toString(DEFAULT_ENCODING); + Assertions.assertTrue(cliOutput.contains("Unreachable:\n\tDirectories: 1\n\tFiles: 3\n")); - // This assertion ensures that only specific directories and keys remain in the active tables, - // as the remaining entries are expected to be moved to the deleted tables by the background service. - // However, since the timing of the background deletion service is not predictable, - // assertions on the deleted tables themselves may lead to flaky tests. - assertEquals(1, countTableEntries(keyTable)); - assertEquals(1, countTableEntries(dirTable)); + cluster.getOzoneManager().restart(); } @Test public void testEmptyFileTrees() throws Exception { + CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) + .addSubcommand(new FSORepairCLI())); + String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); + + FSORepairTool.Report emptyReport = buildEmptyTree(); + String expectedOutput = serializeReport(emptyReport); + + cluster.getOzoneManager().stop(); + // Run when there are no file trees. - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); - FSORepairTool.Report generatedReport = tool.run(); - Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); - assertDeleteTablesEmpty(); + String[] args = new String[] {"om", "fso-tree", "--db", dbPath, "--repair"}; + int exitCode = cmd.execute(args); + assertEquals(0, exitCode); + + String cliOutput = out.toString(DEFAULT_ENCODING); + String reportOutput = extractRelevantSection(cliOutput); + Assertions.assertEquals(expectedOutput, reportOutput); + + out.reset(); + err.reset(); + cluster.getOzoneManager().restart(); // Create an empty volume and bucket. fs.mkdirs(new Path("/vol1")); fs.mkdirs(new Path("/vol2/bucket1")); + cluster.getOzoneManager().stop(); + // Run on an empty volume and bucket. - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); - generatedReport = tool.run(); - Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); - assertDeleteTablesEmpty(); + String[] args1 = new String[] {"om", "fso-tree", "--db", dbPath, "--repair"}; + int exitCode1 = cmd.execute(args1); + assertEquals(0, exitCode1); + + String cliOutput2 = out.toString(DEFAULT_ENCODING); + String reportOutput2 = extractRelevantSection(cliOutput2); + Assertions.assertEquals(expectedOutput, reportOutput2); + + cluster.getOzoneManager().restart(); } @Test public void testNonFSOBucketsSkipped() throws Exception { ObjectStore store = client.getObjectStore(); - try { - // Create legacy and OBS buckets. - store.createVolume("vol1"); - store.getVolume("vol1").createBucket("obs-bucket", - BucketArgs.newBuilder().setBucketLayout(BucketLayout.OBJECT_STORE) - .build()); - store.getVolume("vol1").createBucket("legacy-bucket", - BucketArgs.newBuilder().setBucketLayout(BucketLayout.LEGACY) - .build()); - - // Put a key in the legacy and OBS buckets. - OzoneOutputStream obsStream = store.getVolume("vol1") - .getBucket("obs-bucket") - .createKey("prefix/test-key", 3); - obsStream.write(new byte[]{1, 1, 1}); - obsStream.close(); - - OzoneOutputStream legacyStream = store.getVolume("vol1") - .getBucket("legacy-bucket") - .createKey("prefix/test-key", 3); - legacyStream.write(new byte[]{1, 1, 1}); - legacyStream.close(); - - // Add an FSO bucket with data. - FSORepairTool.Report connectReport = buildConnectedTree("vol1", "fso-bucket"); - - // Even in repair mode there should be no action. legacy and obs buckets - // will be skipped and FSO tree is connected. - tool = new FSORepairTool(getOmDB(), getOmDBLocation(), true, null, null, false); - FSORepairTool.Report generatedReport = tool.run(); - - Assertions.assertEquals(connectReport, generatedReport); - assertConnectedTreeReadable("vol1", "fso-bucket"); - assertDeleteTablesEmpty(); - } finally { - // Need to manually delete obs bucket. It cannot be deleted with ofs as - // part of the normal test cleanup. - store.getVolume("vol1").getBucket("obs-bucket").deleteKey("prefix/test-key"); - store.getVolume("vol1").deleteBucket("obs-bucket"); - } - } + // Create legacy and OBS buckets. + store.createVolume("vol1"); + store.getVolume("vol1").createBucket("obs-bucket", + BucketArgs.newBuilder().setBucketLayout(BucketLayout.OBJECT_STORE) + .build()); + store.getVolume("vol1").createBucket("legacy-bucket", + BucketArgs.newBuilder().setBucketLayout(BucketLayout.LEGACY) + .build()); + + // Put a key in the legacy and OBS buckets. + OzoneOutputStream obsStream = store.getVolume("vol1") + .getBucket("obs-bucket") + .createKey("prefix/test-key", 3); + obsStream.write(new byte[]{1, 1, 1}); + obsStream.close(); + + OzoneOutputStream legacyStream = store.getVolume("vol1") + .getBucket("legacy-bucket") + .createKey("prefix/test-key", 3); + legacyStream.write(new byte[]{1, 1, 1}); + legacyStream.close(); + + CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) + .addSubcommand(new FSORepairCLI())); + String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); + + // Add an FSO bucket with data. + FSORepairTool.Report connectReport = buildConnectedTree("vol1", "fso-bucket"); + + cluster.getOzoneManager().stop(); + + // Even in repair mode there should be no action. legacy and obs buckets + // will be skipped and FSO tree is connected. + String[] args = new String[] {"om", "fso-tree", "--db", dbPath, "--repair"}; + int exitCode = cmd.execute(args); + assertEquals(0, exitCode); + + String cliOutput = out.toString(DEFAULT_ENCODING); + String reportOutput = extractRelevantSection(cliOutput); + String expectedOutput = serializeReport(connectReport); + + Assertions.assertEquals(expectedOutput, reportOutput); + Assertions.assertTrue(cliOutput.contains("Skipping non-FSO bucket /vol1/obs-bucket")); + Assertions.assertTrue(cliOutput.contains("Skipping non-FSO bucket /vol1/legacy-bucket")); + + cluster.getOzoneManager().restart(); + } private FSORepairTool.Report buildConnectedTree(String volume, String bucket) throws Exception { return buildConnectedTree(volume, bucket, 0); } + private String extractRelevantSection(String cliOutput) { + int startIndex = cliOutput.indexOf("Reachable:"); + if (startIndex == -1) { + throw new AssertionError("Output does not contain 'Reachable' section."); + } + return cliOutput.substring(startIndex).trim(); + } + + private String serializeReport(FSORepairTool.Report report) { + return String.format( + "Reachable:\n\tDirectories: %d\n\tFiles: %d\n\tBytes: %d\n" + + "Unreachable:\n\tDirectories: %d\n\tFiles: %d\n\tBytes: %d\n" + + "Unreferenced:\n\tDirectories: %d\n\tFiles: %d\n\tBytes: %d", + report.getReachable().getDirs(), + report.getReachable().getFiles(), + report.getReachable().getBytes(), + report.getUnreachable().getDirs(), + report.getUnreachable().getFiles(), + report.getUnreachable().getBytes(), + report.getUnreferenced().getDirs(), + report.getUnreferenced().getFiles(), + report.getUnreferenced().getBytes() + ); + } + /** * Creates a tree with 3 reachable directories and 4 reachable files. */ @@ -482,6 +476,20 @@ private FSORepairTool.Report buildConnectedTree(String volume, String bucket, in .build(); } + private FSORepairTool.Report buildEmptyTree() { + FSORepairTool.ReportStatistics reachableCount = + new FSORepairTool.ReportStatistics(0, 0, 0); + FSORepairTool.ReportStatistics unreachableCount = + new FSORepairTool.ReportStatistics(0, 0, 0); + FSORepairTool.ReportStatistics unreferencedCount = + new FSORepairTool.ReportStatistics(0, 0, 0); + return new FSORepairTool.Report.Builder() + .setReachable(reachableCount) + .setUnreachable(unreachableCount) + .setUnreferenced(unreferencedCount) + .build(); + } + private void assertConnectedTreeReadable(String volume, String bucket) throws IOException { Path bucketPath = new Path("/" + volume + "/" + bucket); Path dir1 = new Path(bucketPath, "dir1"); @@ -533,8 +541,7 @@ private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, } private void disconnectDirectory(String dirName) throws Exception { - OzoneManager leader = cluster.getOMLeader(); - Table dirTable = leader.getMetadataManager().getDirectoryTable(); + Table dirTable = cluster.getOzoneManager().getMetadataManager().getDirectoryTable(); try (TableIterator> iterator = dirTable.iterator()) { while (iterator.hasNext()) { Table.KeyValue entry = iterator.next(); @@ -567,93 +574,4 @@ private void assertDisconnectedTreePartiallyReadable(String volume, String bucke Assertions.assertFalse(fs.exists(file3)); Assertions.assertTrue(fs.exists(file4)); } - - private void assertDeleteTablesEmpty() throws Exception { - OzoneManager leader = cluster.getOMLeader(); - - GenericTestUtils.waitFor(() -> { - try { - return leader.getMetadataManager().getDeletedDirTable().isEmpty(); - } catch (Exception e) { - LOG.error("DB failure!", e); - fail("DB failure!"); - return false; - } - }, 1000, 120000); - GenericTestUtils.waitFor(() -> { - try { - return leader.getMetadataManager().getDeletedTable().isEmpty(); - } catch (Exception e) { - LOG.error("DB failure!", e); - fail("DB failure!"); - return false; - } - }, 1000, 120000); - } - - private void assertFileAndDirTablesEmpty() throws Exception { - OzoneManager leader = cluster.getOMLeader(); - GenericTestUtils.waitFor(() -> { - try { - return leader.getMetadataManager().getDirectoryTable().isEmpty(); - } catch (Exception e) { - LOG.error("DB failure!", e); - fail("DB failure!"); - return false; - } - }, 1000, 120000); - GenericTestUtils.waitFor(() -> { - try { - return leader.getMetadataManager().getFileTable().isEmpty(); - } catch (Exception e) { - LOG.error("DB failure!", e); - fail("DB failure!"); - return false; - } - }, 1000, 120000); - } - - private DBStore getOmDB() { - return cluster.getOMLeader().getMetadataManager().getStore(); - } - - private String getOmDBLocation() { - return cluster.getOMLeader().getMetadataManager().getStore().getDbLocation().toString(); - } - - private static BucketLayout getFSOBucketLayout() { - return BucketLayout.FILE_SYSTEM_OPTIMIZED; - } - - private void runDeletes() throws Exception { - OzoneManager leader = cluster.getOMLeader(); - - int i = 0; - while (!leader.getMetadataManager().getDeletedDirTable().isEmpty()) { - LOG.info("Running iteration {} of DirectoryDeletingService.", i++); - leader.getKeyManager().getDirDeletingService().runPeriodicalTaskNow(); - // Wait for work from this run to flush through the double buffer. - Thread.sleep(500); - } - - i = 0; - while (!leader.getMetadataManager().getDeletedTable().isEmpty()) { - LOG.info("Running iteration {} of KeyDeletingService.", i++); - leader.getKeyManager().getDeletingService().runPeriodicalTaskNow(); - // Wait for work from this run to flush through the double buffer. - Thread.sleep(500); - } - } - - private int countTableEntries(Table table) throws Exception { - int count = 0; - try (TableIterator> iterator = table.iterator()) { - while (iterator.hasNext()) { - iterator.next(); - count++; - } - } - System.out.println("Total number of entries: " + count); - return count; - } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneRepairShell.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneRepairShell.java index 9216c909ee4b..653c78314158 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneRepairShell.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneRepairShell.java @@ -25,6 +25,8 @@ import org.apache.hadoop.ozone.repair.OzoneRepair; import org.apache.hadoop.ozone.repair.RDBRepair; import org.apache.hadoop.ozone.repair.TransactionInfoRepair; +import org.apache.hadoop.ozone.repair.om.FSORepairCLI; +import org.apache.hadoop.ozone.repair.om.OMRepair; import org.apache.hadoop.ozone.repair.quota.QuotaRepair; import org.apache.hadoop.ozone.repair.quota.QuotaStatus; import org.apache.hadoop.ozone.repair.quota.QuotaTrigger; @@ -160,4 +162,21 @@ public void testQuotaRepair() throws Exception { return false; }, 1000, 10000); } + + @Test + public void testFSORepair() throws Exception { + CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) + .addSubcommand(new FSORepairCLI())); + String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); + + cluster.getOzoneManager().stop(); + + String[] args = new String[] {"om", "fso-tree", "--db", dbPath}; + int exitCode = cmd.execute(args); + + assertEquals(0, exitCode); + assertThat(out.toString(DEFAULT_ENCODING)).contains("Creating database of reachable directories at"); + + cluster.getOzoneManager().restart(); + } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index 68c5688b85ce..185b8d08ba1f 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -145,78 +145,82 @@ protected static DBStore getStoreFromPath(String dbPath) throws IOException { } public FSORepairTool.Report run() throws Exception { - - if (bucketFilter != null && volumeFilter == null) { - System.out.println("--bucket flag cannot be used without specifying --volume."); - return null; - } - - if (volumeFilter != null) { - OmVolumeArgs volumeArgs = volumeTable.getIfExist(volumeFilter); - if (volumeArgs == null) { - System.out.println("Volume '" + volumeFilter + "' does not exist."); + try { + if (bucketFilter != null && volumeFilter == null) { + System.out.println("--bucket flag cannot be used without specifying --volume."); return null; } - } - - // Iterate all volumes or a specific volume if specified - try (TableIterator> - volumeIterator = volumeTable.iterator()) { - openReachableDB(); - while (volumeIterator.hasNext()) { - Table.KeyValue volumeEntry = volumeIterator.next(); - String volumeKey = volumeEntry.getKey(); - - if (volumeFilter != null && !volumeFilter.equals(volumeKey)) { - continue; + if (volumeFilter != null) { + OmVolumeArgs volumeArgs = volumeTable.getIfExist(volumeFilter); + if (volumeArgs == null) { + System.out.println("Volume '" + volumeFilter + "' does not exist."); + return null; } + } - System.out.println("Processing volume: " + volumeKey); + // Iterate all volumes or a specific volume if specified + try (TableIterator> + volumeIterator = volumeTable.iterator()) { + openReachableDB(); + while (volumeIterator.hasNext()) { + Table.KeyValue volumeEntry = volumeIterator.next(); + String volumeKey = volumeEntry.getKey(); - if (bucketFilter != null) { - OmBucketInfo bucketInfo = bucketTable.getIfExist(volumeKey + "/" + bucketFilter); - if (bucketInfo == null) { - //Bucket does not exist in the volume - System.out.println("Bucket '" + bucketFilter + "' does not exist in volume '" + volumeKey + "'."); - return null; - } - - if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { - System.out.println("Skipping non-FSO bucket " + bucketFilter); + if (volumeFilter != null && !volumeFilter.equals(volumeKey)) { continue; } - processBucket(volumeEntry.getValue(), bucketInfo); - } else { + System.out.println("Processing volume: " + volumeKey); - // Iterate all buckets in the volume. - try (TableIterator> - bucketIterator = bucketTable.iterator()) { - bucketIterator.seek(volumeKey); - while (bucketIterator.hasNext()) { - Table.KeyValue bucketEntry = bucketIterator.next(); - String bucketKey = bucketEntry.getKey(); - OmBucketInfo bucketInfo = bucketEntry.getValue(); - - if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { - System.out.println("Skipping non-FSO bucket " + bucketKey); - continue; - } + if (bucketFilter != null) { + OmBucketInfo bucketInfo = bucketTable.getIfExist(volumeKey + "/" + bucketFilter); + if (bucketInfo == null) { + //Bucket does not exist in the volume + System.out.println("Bucket '" + bucketFilter + "' does not exist in volume '" + volumeKey + "'."); + return null; + } - // Stop this loop once we have seen all buckets in the current - // volume. - if (!bucketKey.startsWith(volumeKey)) { - break; - } + if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { + System.out.println("Skipping non-FSO bucket " + bucketFilter); + continue; + } + + processBucket(volumeEntry.getValue(), bucketInfo); + } else { - processBucket(volumeEntry.getValue(), bucketInfo); + // Iterate all buckets in the volume. + try (TableIterator> + bucketIterator = bucketTable.iterator()) { + bucketIterator.seek(volumeKey); + while (bucketIterator.hasNext()) { + Table.KeyValue bucketEntry = bucketIterator.next(); + String bucketKey = bucketEntry.getKey(); + OmBucketInfo bucketInfo = bucketEntry.getValue(); + + if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { + System.out.println("Skipping non-FSO bucket " + bucketKey); + continue; + } + + // Stop this loop once we have seen all buckets in the current + // volume. + if (!bucketKey.startsWith(volumeKey)) { + break; + } + + processBucket(volumeEntry.getValue(), bucketInfo); + } } } } } + } catch (IOException e) { + System.out.println("An error occurred while processing" + e.getMessage()); + throw e; } finally { closeReachableDB(); + store.close(); } return buildReportAndLog(); @@ -527,6 +531,10 @@ private void closeReachableDB() throws IOException { if (reachableDB != null) { reachableDB.close(); } + File reachableDBFile = new File(new File(omDBPath).getParentFile(), "reachable.db"); + if (reachableDBFile.exists()) { + FileUtils.deleteDirectory(reachableDBFile); + } } /** From 0730947abd6c527a3fbdb1244c9f3014e0513c41 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Mon, 2 Dec 2024 20:00:05 +0530 Subject: [PATCH 23/28] Fix bugs --- .../ozone/repair/om/TestFSORepairTool.java | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java index d83afc907ac4..469abea843c9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java @@ -36,7 +36,6 @@ import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.repair.OzoneRepair; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; @@ -68,10 +67,10 @@ public class TestFSORepairTool { private static final PrintStream OLD_OUT = System.out; private static final PrintStream OLD_ERR = System.err; private static final String DEFAULT_ENCODING = UTF_8.name(); - private static MiniOzoneCluster cluster; - private static FileSystem fs; - private static OzoneClient client; - private static OzoneConfiguration conf = null; + private MiniOzoneCluster cluster; + private FileSystem fs; + private OzoneClient client; + private OzoneConfiguration conf = null; @BeforeEach public void init() throws Exception { @@ -101,10 +100,7 @@ public void reset() throws IOException { // restore system streams System.setOut(OLD_OUT); System.setErr(OLD_ERR); - } - @AfterAll - public static void teardown() { if (cluster != null) { cluster.shutdown(); } @@ -417,9 +413,9 @@ private String extractRelevantSection(String cliOutput) { private String serializeReport(FSORepairTool.Report report) { return String.format( - "Reachable:\n\tDirectories: %d\n\tFiles: %d\n\tBytes: %d\n" + - "Unreachable:\n\tDirectories: %d\n\tFiles: %d\n\tBytes: %d\n" + - "Unreferenced:\n\tDirectories: %d\n\tFiles: %d\n\tBytes: %d", + "Reachable:%n\tDirectories: %d%n\tFiles: %d%n\tBytes: %d%n" + + "Unreachable:%n\tDirectories: %d%n\tFiles: %d%n\tBytes: %d%n" + + "Unreferenced:%n\tDirectories: %d%n\tFiles: %d%n\tBytes: %d", report.getReachable().getDirs(), report.getReachable().getFiles(), report.getReachable().getBytes(), From 56cec4ff0f982f1b58c83f10b102a7bc9ee2229c Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Tue, 3 Dec 2024 09:46:38 +0530 Subject: [PATCH 24/28] Corrected the test cases --- .../ozone/repair/om/TestFSORepairTool.java | 6 ++-- .../hadoop/ozone/repair/om/FSORepairTool.java | 33 ++++++++++--------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java index 469abea843c9..536f3b3b9f62 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java @@ -232,9 +232,9 @@ public void testVolumeAndBucketFilter() throws Exception { err.reset(); // When bucket filter is passed without the volume filter. - String[] args5 = new String[]{"om", "fso-tree", "--db", dbPath}; + String[] args5 = new String[]{"om", "fso-tree", "--db", dbPath, "--bucket", "bucket1"}; int exitCode5 = cmd.execute(args5); - assertEquals(1, exitCode5); + assertEquals(0, exitCode5); String cliOutput5 = out.toString(DEFAULT_ENCODING); Assertions.assertTrue(cliOutput5.contains("--bucket flag cannot be used without specifying --volume.")); @@ -300,7 +300,7 @@ public void testDeleteOverwrite() throws Exception { assertEquals(0, exitCode); String cliOutput = out.toString(DEFAULT_ENCODING); - Assertions.assertTrue(cliOutput.contains("Unreachable:\n\tDirectories: 1\n\tFiles: 3\n")); + Assertions.assertTrue(cliOutput.contains("Unreferenced:\n\tDirectories: 1\n\tFiles: 3")); cluster.getOzoneManager().restart(); } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index 185b8d08ba1f..7e0fb23f5aad 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -162,7 +162,12 @@ public FSORepairTool.Report run() throws Exception { // Iterate all volumes or a specific volume if specified try (TableIterator> volumeIterator = volumeTable.iterator()) { - openReachableDB(); + try { + openReachableDB(); + } catch (IOException e) { + System.out.println("Failed to open reachable database: " + e.getMessage()); + throw e; + } while (volumeIterator.hasNext()) { Table.KeyValue volumeEntry = volumeIterator.next(); String volumeKey = volumeEntry.getKey(); @@ -507,24 +512,20 @@ private static String buildReachableParentKey(String fileOrDirKey) { parentID; } - private void openReachableDB() { + private void openReachableDB() throws IOException { File reachableDBFile = new File(new File(omDBPath).getParentFile(), "reachable.db"); System.out.println("Creating database of reachable directories at " + reachableDBFile); // Delete the DB from the last run if it exists. - try { - if (reachableDBFile.exists()) { - FileUtils.deleteDirectory(reachableDBFile); - } - - ConfigurationSource conf = new OzoneConfiguration(); - reachableDB = DBStoreBuilder.newBuilder(conf) - .setName("reachable.db") - .setPath(reachableDBFile.getParentFile().toPath()) - .addTable(REACHABLE_TABLE) - .build(); - } catch (IOException e) { - System.out.println("Error creating reachable.db: " + e.getMessage()); + if (reachableDBFile.exists()) { + FileUtils.deleteDirectory(reachableDBFile); } + + ConfigurationSource conf = new OzoneConfiguration(); + reachableDB = DBStoreBuilder.newBuilder(conf) + .setName("reachable.db") + .setPath(reachableDBFile.getParentFile().toPath()) + .addTable(REACHABLE_TABLE) + .build(); } private void closeReachableDB() throws IOException { @@ -579,7 +580,7 @@ public ReportStatistics getUnreferenced() { } public String toString() { - return "Reachable: " + reachable + "\nUnreachable: " + unreachable + "\nUnreferenced: " + unreferenced; + return "Reachable:" + reachable + "\nUnreachable:" + unreachable + "\nUnreferenced:" + unreferenced; } @Override From aa48845aae870fa2defa8b03772a82255ccdf314 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Wed, 4 Dec 2024 17:32:21 +0530 Subject: [PATCH 25/28] Closed ozone client in the test --- .../org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java index 536f3b3b9f62..41ff582d54d4 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java @@ -104,6 +104,9 @@ public void reset() throws IOException { if (cluster != null) { cluster.shutdown(); } + if (client != null) { + client.close(); + } IOUtils.closeQuietly(fs); } From c70d0b9afb4d4ee04381e21d5960253ed7baae05 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 5 Dec 2024 14:45:37 +0530 Subject: [PATCH 26/28] CommandLine improvements --- .../ozone/repair/om/TestFSORepairTool.java | 21 +++++++------------ .../ozone/shell/TestOzoneRepairShell.java | 19 ----------------- .../hadoop/ozone/repair/om/FSORepairCLI.java | 13 +----------- .../hadoop/ozone/repair/om/OMRepair.java | 8 +++---- 4 files changed, 12 insertions(+), 49 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java index 41ff582d54d4..4006ec6e822e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java @@ -112,8 +112,7 @@ public void reset() throws IOException { @Test public void testConnectedTreeOneBucket() throws Exception { - CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) - .addSubcommand(new FSORepairCLI())); + CommandLine cmd = new OzoneRepair().getCmd(); String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); @@ -147,8 +146,7 @@ public void testConnectedTreeOneBucket() throws Exception { @Test public void testReportedDataSize() throws Exception { - CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) - .addSubcommand(new FSORepairCLI())); + CommandLine cmd = new OzoneRepair().getCmd(); String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); FSORepairTool.Report report1 = buildDisconnectedTree("vol1", "bucket1", 10); @@ -175,8 +173,7 @@ public void testReportedDataSize() throws Exception { */ @Test public void testVolumeAndBucketFilter() throws Exception { - CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) - .addSubcommand(new FSORepairCLI())); + CommandLine cmd = new OzoneRepair().getCmd(); String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); FSORepairTool.Report report1 = buildDisconnectedTree("vol1", "bucket1", 10); @@ -246,8 +243,7 @@ public void testVolumeAndBucketFilter() throws Exception { @Test public void testMultipleBucketsAndVolumes() throws Exception { - CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) - .addSubcommand(new FSORepairCLI())); + CommandLine cmd = new OzoneRepair().getCmd(); String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); FSORepairTool.Report report1 = buildConnectedTree("vol1", "bucket1"); @@ -274,8 +270,7 @@ public void testMultipleBucketsAndVolumes() throws Exception { */ @Test public void testDeleteOverwrite() throws Exception { - CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) - .addSubcommand(new FSORepairCLI())); + CommandLine cmd = new OzoneRepair().getCmd(); String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); // Create files and dirs under dir1. To make sure they are added to the @@ -310,8 +305,7 @@ public void testDeleteOverwrite() throws Exception { @Test public void testEmptyFileTrees() throws Exception { - CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) - .addSubcommand(new FSORepairCLI())); + CommandLine cmd = new OzoneRepair().getCmd(); String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); FSORepairTool.Report emptyReport = buildEmptyTree(); @@ -376,8 +370,7 @@ public void testNonFSOBucketsSkipped() throws Exception { legacyStream.write(new byte[]{1, 1, 1}); legacyStream.close(); - CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) - .addSubcommand(new FSORepairCLI())); + CommandLine cmd = new OzoneRepair().getCmd(); String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); // Add an FSO bucket with data. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneRepairShell.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneRepairShell.java index 3b8195f5b97b..1860d695f5e7 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneRepairShell.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneRepairShell.java @@ -25,8 +25,6 @@ import org.apache.hadoop.ozone.repair.OzoneRepair; import org.apache.hadoop.ozone.repair.ldb.RDBRepair; import org.apache.hadoop.ozone.repair.ldb.TransactionInfoRepair; -import org.apache.hadoop.ozone.repair.om.FSORepairCLI; -import org.apache.hadoop.ozone.repair.om.OMRepair; import org.apache.hadoop.ozone.repair.quota.QuotaRepair; import org.apache.hadoop.ozone.repair.quota.QuotaStatus; import org.apache.hadoop.ozone.repair.quota.QuotaTrigger; @@ -162,21 +160,4 @@ public void testQuotaRepair() throws Exception { return false; }, 1000, 10000); } - - @Test - public void testFSORepair() throws Exception { - CommandLine cmd = new CommandLine(new OzoneRepair()).addSubcommand(new CommandLine(new OMRepair()) - .addSubcommand(new FSORepairCLI())); - String dbPath = new File(OMStorage.getOmDbDir(conf) + "/" + OM_DB_NAME).getPath(); - - cluster.getOzoneManager().stop(); - - String[] args = new String[] {"om", "fso-tree", "--db", dbPath}; - int exitCode = cmd.execute(args); - - assertEquals(0, exitCode); - assertThat(out.toString(DEFAULT_ENCODING)).contains("Creating database of reachable directories at"); - - cluster.getOzoneManager().restart(); - } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index ab24deb72374..24d4442ad052 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -18,8 +18,6 @@ package org.apache.hadoop.ozone.repair.om; -import org.apache.hadoop.hdds.cli.SubcommandWithParent; -import org.kohsuke.MetaInfServices; import picocli.CommandLine; import java.util.concurrent.Callable; @@ -32,11 +30,7 @@ description = "Identify and repair a disconnected FSO tree, and mark unreachable entries for deletion. " + "OM should be stopped while this tool is run." ) -@MetaInfServices(SubcommandWithParent.class) -public class FSORepairCLI implements Callable, SubcommandWithParent { - - @CommandLine.ParentCommand - private OMRepair parent; +public class FSORepairCLI implements Callable { @CommandLine.Option(names = {"--db"}, required = true, @@ -81,9 +75,4 @@ public Void call() throws Exception { return null; } - - @Override - public Class getParentType() { - return OMRepair.class; - } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java index 6682ccda7474..d8b759b327ac 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java @@ -30,16 +30,16 @@ * Ozone Repair CLI for OM. */ @CommandLine.Command(name = "om", - description = "Operational tool to repair OM.") + subcommands = { + FSORepairCLI.class, + }, + description = "Operational tool to repair OM.") @MetaInfServices(SubcommandWithParent.class) public class OMRepair implements Callable, SubcommandWithParent { @CommandLine.Spec private CommandLine.Model.CommandSpec spec; - @CommandLine.ParentCommand - private OzoneRepair parent; - @Override public Void call() { GenericCli.missingSubcommand(spec); From b40bf67136b09891b90f5f3db0a64fa1bdfcb98f Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Sat, 7 Dec 2024 09:51:54 +0100 Subject: [PATCH 27/28] implement RepairSubcommand instead of SubcommandWithParent --- .../org/apache/hadoop/ozone/repair/om/OMRepair.java | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java index d8b759b327ac..56d42d23f494 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java @@ -19,8 +19,7 @@ package org.apache.hadoop.ozone.repair.om; import org.apache.hadoop.hdds.cli.GenericCli; -import org.apache.hadoop.hdds.cli.SubcommandWithParent; -import org.apache.hadoop.ozone.repair.OzoneRepair; +import org.apache.hadoop.hdds.cli.RepairSubcommand; import org.kohsuke.MetaInfServices; import picocli.CommandLine; @@ -34,8 +33,8 @@ FSORepairCLI.class, }, description = "Operational tool to repair OM.") -@MetaInfServices(SubcommandWithParent.class) -public class OMRepair implements Callable, SubcommandWithParent { +@MetaInfServices(RepairSubcommand.class) +public class OMRepair implements Callable, RepairSubcommand { @CommandLine.Spec private CommandLine.Model.CommandSpec spec; @@ -45,9 +44,4 @@ public Void call() { GenericCli.missingSubcommand(spec); return null; } - - @Override - public Class getParentType() { - return OzoneRepair.class; - } } From 75ae9aaab611195473f04034fb6644ff04f23147 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Mon, 9 Dec 2024 12:10:21 +0530 Subject: [PATCH 28/28] Modified the cli description --- .../java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index 24d4442ad052..5a217e9f2de8 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -27,7 +27,7 @@ */ @CommandLine.Command( name = "fso-tree", - description = "Identify and repair a disconnected FSO tree, and mark unreachable entries for deletion. " + + description = "Identify and repair a disconnected FSO tree by marking unreferenced entries for deletion. " + "OM should be stopped while this tool is run." ) public class FSORepairCLI implements Callable { @@ -39,7 +39,7 @@ public class FSORepairCLI implements Callable { @CommandLine.Option(names = {"-r", "--repair"}, defaultValue = "false", - description = "Run in repair mode to move unreachable files and directories to deleted tables.") + description = "Run in repair mode to move unreferenced files and directories to deleted tables.") private boolean repair; @CommandLine.Option(names = {"-v", "--volume"},