From f982ecebb8fcea2c1ba275667330fc7fe274a681 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Fri, 20 Jun 2025 11:56:18 -0700 Subject: [PATCH 1/3] HDDS-13309. Add keyIterator/valueIterator methods to Table. --- .../hadoop/hdds/utils/db/CodecTestUtil.java | 29 ++++ .../KeyValueContainerMetadataInspector.java | 14 +- .../background/BlockDeletingTask.java | 12 +- .../container/ozoneimpl/OzoneContainer.java | 7 +- .../utils/db/RDBStoreAbstractIterator.java | 2 +- .../utils/db/RDBStoreCodecBufferIterator.java | 6 +- .../apache/hadoop/hdds/utils/db/Table.java | 45 ++++-- .../hadoop/hdds/utils/db/TableIterator.java | 48 ++++++ .../db/TestRDBStoreByteArrayIterator.java | 6 - .../hadoop/hdds/utils/db/TestTypedTable.java | 146 ++++++++++++++---- .../container/ContainerStateManagerImpl.java | 6 +- .../hdds/scm/ha/SequenceIdGenerator.java | 20 +-- .../pipeline/PipelineStateManagerImpl.java | 7 +- .../ozone/om/service/QuotaRepairTask.java | 8 +- 14 files changed, 272 insertions(+), 84 deletions(-) diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/utils/db/CodecTestUtil.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/utils/db/CodecTestUtil.java index bdb02b991e24..14ebbd63db27 100644 --- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/utils/db/CodecTestUtil.java +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/utils/db/CodecTestUtil.java @@ -19,6 +19,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import java.lang.ref.WeakReference; @@ -99,4 +100,32 @@ public static void runTest(Codec codec, T original, wrapped.release(); assertEquals(original, fromWrappedArray); } + + public static Codec newCodecWithoutCodecBuffer(Codec codec) { + assertTrue(codec.supportCodecBuffer()); + final Codec newCodec = new Codec() { + @Override + public byte[] toPersistedFormat(T object) throws CodecException { + return codec.toPersistedFormat(object); + } + + @Override + public T fromPersistedFormat(byte[] rawData) throws CodecException { + return codec.fromPersistedFormat(rawData); + } + + @Override + public Class getTypeClass() { + return codec.getTypeClass(); + } + + @Override + public T copyObject(T object) { + return codec.copyObject(object); + } + }; + + assertFalse(newCodec.supportCodecBuffer()); + return newCodec; + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java index e1064039efe5..36f41ca982ff 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java @@ -17,7 +17,6 @@ package org.apache.hadoop.ozone.container.keyvalue; -import static org.apache.hadoop.hdds.utils.db.Table.KeyValueIterator.Type.VALUE_ONLY; import static org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil.isSameSchemaVersion; import com.fasterxml.jackson.databind.JsonNode; @@ -36,6 +35,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; import org.apache.hadoop.hdds.server.JsonUtils; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.impl.ContainerData; @@ -500,9 +500,9 @@ static PendingDelete countPendingDeletesSchemaV2( Table delTxTable = schemaTwoStore.getDeleteTransactionTable(); - try (Table.KeyValueIterator iterator = delTxTable.iterator(VALUE_ONLY)) { + try (TableIterator iterator = delTxTable.valueIterator()) { while (iterator.hasNext()) { - DeletedBlocksTransaction txn = iterator.next().getValue(); + final DeletedBlocksTransaction txn = iterator.next(); final List localIDs = txn.getLocalIDList(); // In schema 2, pending delete blocks are stored in the // transaction object. Since the actual blocks still exist in the @@ -543,10 +543,10 @@ static PendingDelete countPendingDeletesSchemaV3( KeyValueContainerData containerData) throws IOException { long pendingDeleteBlockCountTotal = 0; long pendingDeleteBytes = 0; - try (Table.KeyValueIterator iter - = store.getDeleteTransactionTable().iterator(containerData.containerPrefix(), VALUE_ONLY)) { - while (iter.hasNext()) { - DeletedBlocksTransaction delTx = iter.next().getValue(); + try (TableIterator iterator + = store.getDeleteTransactionTable().valueIterator(containerData.containerPrefix())) { + while (iterator.hasNext()) { + final DeletedBlocksTransaction delTx = iterator.next(); final List localIDs = delTx.getLocalIDList(); pendingDeleteBlockCountTotal += localIDs.size(); pendingDeleteBytes += computePendingDeleteBytes( diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java index 411aab97b186..8a474a3773de 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java @@ -17,7 +17,6 @@ package org.apache.hadoop.ozone.container.keyvalue.statemachine.background; -import static org.apache.hadoop.hdds.utils.db.Table.KeyValueIterator.Type.VALUE_ONLY; import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V1; import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V2; import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V3; @@ -40,6 +39,7 @@ import org.apache.hadoop.hdds.utils.MetadataKeyFilters.KeyPrefixFilter; import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.BlockDeletingServiceMetrics; import org.apache.hadoop.ozone.container.common.impl.BlockDeletingService; @@ -277,7 +277,7 @@ public ContainerBackgroundTaskResult deleteViaSchema2( Table deleteTxns = ((DeleteTransactionStore) meta.getStore()) .getDeleteTransactionTable(); - try (Table.KeyValueIterator iterator = deleteTxns.iterator(VALUE_ONLY)) { + try (TableIterator iterator = deleteTxns.valueIterator()) { return deleteViaTransactionStore( iterator, meta, container, dataDir, startTime, schema2Deleter); @@ -296,8 +296,8 @@ public ContainerBackgroundTaskResult deleteViaSchema3( Table deleteTxns = ((DeleteTransactionStore) meta.getStore()) .getDeleteTransactionTable(); - try (Table.KeyValueIterator iterator - = deleteTxns.iterator(containerData.containerPrefix(), VALUE_ONLY)) { + try (TableIterator iterator + = deleteTxns.valueIterator(containerData.containerPrefix())) { return deleteViaTransactionStore( iterator, meta, container, dataDir, startTime, schema3Deleter); @@ -305,7 +305,7 @@ public ContainerBackgroundTaskResult deleteViaSchema3( } private ContainerBackgroundTaskResult deleteViaTransactionStore( - Table.KeyValueIterator iter, DBHandle meta, Container container, File dataDir, + TableIterator iter, DBHandle meta, Container container, File dataDir, long startTime, Deleter deleter) throws IOException { ContainerBackgroundTaskResult crr = new ContainerBackgroundTaskResult(); if (!checkDataDir(dataDir)) { @@ -323,7 +323,7 @@ private ContainerBackgroundTaskResult deleteViaTransactionStore( List delBlocks = new ArrayList<>(); int numBlocks = 0; while (iter.hasNext() && (numBlocks < blocksToDelete)) { - DeletedBlocksTransaction delTx = iter.next().getValue(); + final DeletedBlocksTransaction delTx = iter.next(); numBlocks += delTx.getLocalIDList().size(); delBlocks.add(delTx); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index 4448d127ef5f..bbda98c65c39 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -17,7 +17,6 @@ package org.apache.hadoop.ozone.container.ozoneimpl; -import static org.apache.hadoop.hdds.utils.db.Table.KeyValueIterator.Type.KEY_ONLY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_TIMEOUT; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_WORKERS; @@ -65,7 +64,7 @@ import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.hdds.utils.IOUtils; -import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.container.common.DatanodeLayoutStorage; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; @@ -346,10 +345,10 @@ public void buildContainerSet() throws IOException { for (Thread volumeThread : volumeThreads) { volumeThread.join(); } - try (Table.KeyValueIterator itr = containerSet.getContainerIdsTable().iterator(KEY_ONLY)) { + try (TableIterator itr = containerSet.getContainerIdsTable().keyIterator()) { final Map containerIds = new HashMap<>(); while (itr.hasNext()) { - containerIds.put(itr.next().getKey(), 0L); + containerIds.put(itr.next(), 0L); } containerSet.buildMissingContainerSetAndValidate(containerIds, ContainerID::getId); } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStoreAbstractIterator.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStoreAbstractIterator.java index 31162a67d82a..81bdb9887f77 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStoreAbstractIterator.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStoreAbstractIterator.java @@ -47,7 +47,7 @@ abstract class RDBStoreAbstractIterator this.rocksDBIterator = iterator; this.rocksDBTable = table; this.prefix = prefix; - this.type = this.prefix == null ? type : type.addKey(); // it has to read key for matching prefix. + this.type = type; } Type getType() { diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStoreCodecBufferIterator.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStoreCodecBufferIterator.java index 6bed6dfee840..abcb361ecba4 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStoreCodecBufferIterator.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStoreCodecBufferIterator.java @@ -38,7 +38,8 @@ class RDBStoreCodecBufferIterator extends RDBStoreAbstractIterator final String name = table != null ? table.getName() : null; this.keyBuffer = new Buffer( new CodecBuffer.Capacity(name + "-iterator-key", 1 << 10), - getType().readKey() ? buffer -> getRocksDBIterator().get().key(buffer) : null); + // it has to read key for matching prefix. + getType().readKey() || prefix != null ? buffer -> getRocksDBIterator().get().key(buffer) : null); this.valueBuffer = new Buffer( new CodecBuffer.Capacity(name + "-iterator-value", 4 << 10), getType().readValue() ? buffer -> getRocksDBIterator().get().value(buffer) : null); @@ -58,7 +59,8 @@ CodecBuffer key() { @Override Table.KeyValue getKeyValue() { assertOpen(); - return Table.newKeyValue(key(), valueBuffer.getFromDb()); + final CodecBuffer key = getType().readKey() ? key() : CodecBuffer.getEmptyBuffer(); + return Table.newKeyValue(key, valueBuffer.getFromDb()); } @Override diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java index 4e5a64055079..8660d70961da 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java @@ -160,17 +160,48 @@ default KeyValueIterator iterator(KeyValueIterator.Type type) /** * Iterate the elements in this table. + *

+ * Using a more restrictive type may improve performance + * since the unrequired data will not be read from the DB. + * Note that, when the prefix is non-empty, + * using a non-key type may not improve performance + * since it has to read keys for matching the prefix. * * @param prefix The prefix of the elements to be iterated. * @param type Specify whether key and/or value are required. - * When the prefix is non-empty, it has to read keys for matching the prefix. - * The type will be automatically changed to including keys; - * see {@link KeyValueIterator.Type#addKey()}. * @return an iterator. */ KeyValueIterator iterator(KEY prefix, KeyValueIterator.Type type) throws RocksDatabaseException, CodecException; + /** + * @param prefix The prefix of the elements to be iterated. + * @return a key-only iterator + */ + default TableIterator keyIterator(KEY prefix) throws RocksDatabaseException, CodecException { + final KeyValueIterator i = iterator(prefix, KeyValueIterator.Type.KEY_ONLY); + return TableIterator.convert(i, KeyValue::getKey); + } + + /** The same as keyIterator(null). */ + default TableIterator keyIterator() throws RocksDatabaseException, CodecException { + return keyIterator(null); + } + + /** + * @param prefix The prefix of the elements to be iterated. + * @return a value-only iterator. + */ + default TableIterator valueIterator(KEY prefix) throws RocksDatabaseException, CodecException { + final KeyValueIterator i = iterator(prefix, KeyValueIterator.Type.VALUE_ONLY); + return TableIterator.convert(i, KeyValue::getValue); + } + + /** The same as valueIterator(null). */ + default TableIterator valueIterator() throws RocksDatabaseException, CodecException { + return valueIterator(null); + } + /** * Returns the Name of this Table. * @return - Table Name. @@ -355,8 +386,8 @@ public boolean equals(Object obj) { return false; } final KeyValue that = (KeyValue) obj; - return this.getKey().equals(that.getKey()) - && this.getValue().equals(that.getValue()); + return Objects.equals(this.getKey(), that.getKey()) + && Objects.equals(this.getValue(), that.getValue()); } @Override @@ -395,10 +426,6 @@ boolean readKey() { boolean readValue() { return (this.ordinal() & VALUE_ONLY.ordinal()) != 0; } - - Type addKey() { - return values()[ordinal() | KEY_ONLY.ordinal()]; - } } } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TableIterator.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TableIterator.java index 85c02fa95300..5fcf5e1ff703 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TableIterator.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TableIterator.java @@ -19,6 +19,8 @@ import java.io.Closeable; import java.util.Iterator; +import java.util.function.Function; +import org.apache.hadoop.hdds.utils.db.Table.KeyValueIterator; /** * To iterate a {@link Table}. @@ -54,4 +56,50 @@ public interface TableIterator extends Iterator, Closeable { */ void removeFromDB() throws RocksDatabaseException, CodecException; + /** + * Convert the given {@link KeyValueIterator} to a {@link TableIterator} using the given converter. + * + * @param The key type of both the input and the output iterators + * @param The value type of the input iterator + * @param The value type of the output iterator + */ + static TableIterator convert(KeyValueIterator i, + Function, OUTPUT> converter) throws RocksDatabaseException, CodecException { + return new TableIterator() { + @Override + public boolean hasNext() { + return i.hasNext(); + } + + @Override + public OUTPUT next() { + return converter.apply(i.next()); + } + + @Override + public void close() throws RocksDatabaseException { + i.close(); + } + + @Override + public void seekToFirst() { + i.seekToFirst(); + } + + @Override + public void seekToLast() { + i.seekToLast(); + } + + @Override + public OUTPUT seek(K key) throws RocksDatabaseException, CodecException { + return converter.apply(i.seek(key)); + } + + @Override + public void removeFromDB() throws RocksDatabaseException, CodecException { + i.removeFromDB(); + } + }; + } } diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStoreByteArrayIterator.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStoreByteArrayIterator.java index 436eee4006c6..0a7da423879a 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStoreByteArrayIterator.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStoreByteArrayIterator.java @@ -22,7 +22,6 @@ import static org.apache.hadoop.hdds.utils.db.Table.KeyValueIterator.Type.NEITHER; import static org.apache.hadoop.hdds.utils.db.Table.KeyValueIterator.Type.VALUE_ONLY; import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -316,10 +315,5 @@ public void testIteratorType() { assertTrue(KEY_AND_VALUE.readKey()); assertTrue(KEY_AND_VALUE.readValue()); - - assertEquals(KEY_ONLY, NEITHER.addKey()); - assertEquals(KEY_ONLY, KEY_ONLY.addKey()); - assertEquals(KEY_AND_VALUE, VALUE_ONLY.addKey()); - assertEquals(KEY_AND_VALUE, KEY_AND_VALUE.addKey()); } } diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestTypedTable.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestTypedTable.java index 8b68f8e57269..cedcc42b1647 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestTypedTable.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestTypedTable.java @@ -34,6 +34,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.TreeMap; import java.util.concurrent.ThreadLocalRandom; import java.util.function.LongFunction; import java.util.stream.Collectors; @@ -55,7 +56,7 @@ */ public class TestTypedTable { private final List families = Arrays.asList(StringUtils.bytes2String(RocksDB.DEFAULT_COLUMN_FAMILY), - "First", "Second"); + "First", "Second", "Third", "Fourth", "Fifth", "Sixth"); private RDBStore rdb; private final List closeables = new ArrayList<>(); @@ -95,7 +96,7 @@ static V put(Map map, long key, LongFunction constructor) { return map.put(key, constructor.apply(key)); } - static Map newMap(LongFunction constructor) { + static Map newMap(int numRandom, LongFunction constructor) { final Map map = new HashMap<>(); for (long n = 1; n > 0; n <<= 1) { put(map, n, constructor); @@ -103,7 +104,7 @@ static Map newMap(LongFunction constructor) { put(map, n + 1, constructor); } put(map, Long.MAX_VALUE, constructor); - for (int i = 0; i < 1000; i++) { + for (int i = 0; i < numRandom; i++) { final long key = ThreadLocalRandom.current().nextLong(Long.MAX_VALUE) + 1; put(map, key, constructor); } @@ -113,7 +114,7 @@ static Map newMap(LongFunction constructor) { @Test public void testContainerIDvsLong() throws Exception { - final Map keys = newMap(ContainerID::valueOf); + final Map keys = newMap(1000, ContainerID::valueOf); // Table 1: ContainerID -> String // Table 2: Long -> String @@ -146,34 +147,73 @@ public void testContainerIDvsLong() throws Exception { final String longValue = longTable2.get(n); assertEquals(expected, longValue); } + } + + @Test + public void testIteratorWithoutPrefixByteArray() throws Exception { + final Codec keyCodec = CodecTestUtil.newCodecWithoutCodecBuffer(LongCodec.get()); + assertFalse(keyCodec.supportCodecBuffer()); + runTestIteratorWithoutPrefix(3, keyCodec); + } + + @Test + public void testIteratorWithoutPrefixCodecBuffer() throws Exception { + final LongCodec keyCodec = LongCodec.get(); + assertTrue(keyCodec.supportCodecBuffer()); + runTestIteratorWithoutPrefix(4, keyCodec); + } + + void runTestIteratorWithoutPrefix(int tableIndex, Codec keyCodec) throws Exception { + final Map keys = newMap(10000, ContainerID::valueOf); + final TypedTable table = newTypedTable(tableIndex, keyCodec, ContainerID.getCodec()); + for (Map.Entry e : keys.entrySet()) { + table.put(e.getKey(), e.getValue()); + } + runTestIterators(null, keys, table); + } + + static void runTestIterators(K prefix, Map map, TypedTable table) + throws Exception { + try (Table.KeyValueIterator neither = table.iterator(prefix, NEITHER); + Table.KeyValueIterator keyOnly = table.iterator(prefix, KEY_ONLY); + Table.KeyValueIterator valueOnly = table.iterator(prefix, VALUE_ONLY); + Table.KeyValueIterator keyAndValue = table.iterator(prefix, KEY_AND_VALUE); + TableIterator keyIterator = table.keyIterator(prefix); + TableIterator valueIterator = table.valueIterator(prefix)) { + while (keyAndValue.hasNext()) { + final KeyValue keyValue = keyAndValue.next(); + final K expectedKey = Objects.requireNonNull(keyValue.getKey()); + assertIterator(expectedKey, keyIterator); + + final ContainerID expectedValue = map.remove(expectedKey); + assertEquals(expectedValue, Objects.requireNonNull(keyValue.getValue())); + assertIterator(expectedValue, valueIterator); + + final int expectedValueSize = keyValue.getValueByteSize(); + assertEquals(ContainerID.getCodec().toPersistedFormat(expectedValue).length, expectedValueSize); + + assertIterator(expectedKey, null, 0, keyOnly); + assertIterator(null, expectedValue, expectedValueSize, valueOnly); + assertIterator(null, null, 0, neither); + } - // test iterator type - final TypedTable longTable3 = newTypedTable(1, LongCodec.get(), StringCodec.get()); - final Table.KeyValueIterator neither = longTable3.iterator(NEITHER); - final Table.KeyValueIterator keyOnly = longTable3.iterator(KEY_ONLY); - final Table.KeyValueIterator valueOnly = longTable3.iterator(VALUE_ONLY); - final Table.KeyValueIterator keyAndValue = longTable3.iterator(KEY_AND_VALUE); - while (keyAndValue.hasNext()) { - final Table.KeyValue keyValue = keyAndValue.next(); - final Long expectedKey = Objects.requireNonNull(keyValue.getKey()); - - final String expectedValue = Objects.requireNonNull(keyValue.getValue()); - assertEquals(keys.get(expectedKey).toString(), expectedValue); - - final int expectedValueSize = keyValue.getValueByteSize(); - assertEquals(expectedValue.length(), expectedValueSize); - - assertKeyValue(expectedKey, null, 0, keyOnly); - assertKeyValue(null, expectedValue, expectedValueSize, valueOnly); - assertKeyValue(null, null, 0, neither); + assertFalse(keyIterator.hasNext()); + assertFalse(valueIterator.hasNext()); + assertFalse(keyOnly.hasNext()); + assertFalse(valueOnly.hasNext()); + assertFalse(neither.hasNext()); } - assertFalse(keyOnly.hasNext()); - assertFalse(valueOnly.hasNext()); - assertFalse(neither.hasNext()); + assertEquals(0, map.size()); } - static void assertKeyValue(K expectedKey, V expectedValue, int expectedValueSize, + static void assertIterator(V expected, TableIterator iterator) { + assertTrue(iterator.hasNext()); + final V computed = iterator.next(); + assertEquals(expected, computed); + } + + static void assertIterator(K expectedKey, V expectedValue, int expectedValueSize, Table.KeyValueIterator iterator) { assertTrue(iterator.hasNext()); final KeyValue computed = iterator.next(); @@ -181,4 +221,56 @@ static void assertKeyValue(K expectedKey, V expectedValue, int expectedVa assertEquals(expectedValue, computed.getValue()); assertEquals(expectedValueSize, computed.getValueByteSize()); } + + @Test + public void testIteratorWithPrefixCodecBuffer() throws Exception { + final StringCodec keyCodec = StringCodec.get(); + assertTrue(keyCodec.supportCodecBuffer()); + runTestIteratorWithPrefix(5, keyCodec); + } + + @Test + public void testIteratorWithPrefixByteArray() throws Exception { + final Codec keyCodec = CodecTestUtil.newCodecWithoutCodecBuffer(StringCodec.get()); + assertFalse(keyCodec.supportCodecBuffer()); + runTestIteratorWithPrefix(6, keyCodec); + } + + void runTestIteratorWithPrefix(int tableIndex, Codec keyCodec) throws Exception { + final TypedTable table = newTypedTable(tableIndex, keyCodec, ContainerID.getCodec()); + final Map keys = newMap(10_000, ContainerID::valueOf); + for (Map.Entry e : keys.entrySet()) { + final ContainerID id = e.getValue(); + final String key = id.toString(); + table.put(key, id); + } + + for (int numDigits = 1; numDigits < 10; numDigits++) { + runTestIteratorWithPrefix(numDigits, keys, table); + } + } + + static void runTestIteratorWithPrefix(int prefixLength, Map keys, + TypedTable table) throws Exception { + final Map> prefixMap = new TreeMap<>(); + int shortIdCount = 0; + for (Map.Entry e : keys.entrySet()) { + final ContainerID id = e.getValue(); + final String key = id.toString(); + if (key.length() < prefixLength) { + shortIdCount++; + } else { + final String prefix = key.substring(0, prefixLength); + prefixMap.computeIfAbsent(prefix, k -> new TreeMap<>()).put(key, id); + } + } + + // check size + final int size = prefixMap.values().stream().map(Map::size).reduce(0, Integer::sum); + assertEquals(keys.size(), size + shortIdCount); + + for (Map.Entry> e : prefixMap.entrySet()) { + runTestIterators(e.getKey(), e.getValue(), table); + } + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index e0e587eea04c..4b4578894a61 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -31,7 +31,6 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.QUASI_CLOSED; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_LOCK_STRIPE_SIZE; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_LOCK_STRIPE_SIZE_DEFAULT; -import static org.apache.hadoop.hdds.utils.db.Table.KeyValueIterator.Type.VALUE_ONLY; import com.google.common.util.concurrent.Striped; import java.io.IOException; @@ -65,6 +64,7 @@ import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException; import org.apache.hadoop.ozone.common.statemachine.StateMachine; import org.apache.ratis.util.AutoCloseableLock; @@ -233,10 +233,10 @@ private long getConfiguredContainerSize(final Configuration conf) { * @throws IOException in case of error while loading the containers */ private void initialize() throws IOException { - try (Table.KeyValueIterator iterator = containerStore.iterator(VALUE_ONLY)) { + try (TableIterator iterator = containerStore.valueIterator()) { while (iterator.hasNext()) { - final ContainerInfo container = iterator.next().getValue(); + final ContainerInfo container = iterator.next(); Objects.requireNonNull(container, "container == null"); containers.addContainer(container); if (container.getState() == LifeCycleState.OPEN) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java index 296a86047918..78561cbc2d4d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java @@ -20,7 +20,6 @@ import static org.apache.hadoop.hdds.protocol.proto.SCMRatisProtocol.RequestType.SEQUENCE_ID; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_SEQUENCE_ID_BATCH_SIZE; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_SEQUENCE_ID_BATCH_SIZE_DEFAULT; -import static org.apache.hadoop.hdds.utils.db.Table.KeyValueIterator.Type.VALUE_ONLY; import com.google.common.base.Preconditions; import java.io.IOException; @@ -43,6 +42,7 @@ import org.apache.hadoop.hdds.scm.metadata.SCMMetadataStore; import org.apache.hadoop.hdds.utils.UniqueId; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -366,10 +366,10 @@ public static void upgradeToSequenceId(SCMMetadataStore scmMetadataStore) // upgrade containerId if (sequenceIdTable.get(CONTAINER_ID) == null) { long largestContainerId = 0; - try (Table.KeyValueIterator iterator = - scmMetadataStore.getContainerTable().iterator(VALUE_ONLY)) { + try (TableIterator iterator + = scmMetadataStore.getContainerTable().valueIterator()) { while (iterator.hasNext()) { - ContainerInfo containerInfo = iterator.next().getValue(); + final ContainerInfo containerInfo = iterator.next(); largestContainerId = Long.max(containerInfo.getContainerID(), largestContainerId); } @@ -392,19 +392,19 @@ public static void upgradeToCertificateSequenceId( // Start from ID 2. // ID 1 - root certificate, ID 2 - first SCM certificate. long largestCertId = BigInteger.ONE.add(BigInteger.ONE).longValueExact(); - try (Table.KeyValueIterator iterator = - scmMetadataStore.getValidSCMCertsTable().iterator(VALUE_ONLY)) { + try (TableIterator iterator + = scmMetadataStore.getValidSCMCertsTable().valueIterator()) { while (iterator.hasNext()) { - X509Certificate cert = iterator.next().getValue(); + final X509Certificate cert = iterator.next(); largestCertId = Long.max(cert.getSerialNumber().longValueExact(), largestCertId); } } - try (Table.KeyValueIterator iterator = - scmMetadataStore.getValidCertsTable().iterator(VALUE_ONLY)) { + try (TableIterator iterator + = scmMetadataStore.getValidCertsTable().valueIterator()) { while (iterator.hasNext()) { - X509Certificate cert = iterator.next().getValue(); + final X509Certificate cert = iterator.next(); largestCertId = Long.max( cert.getSerialNumber().longValueExact(), largestCertId); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManagerImpl.java index 0a12bda00b80..ad76f41c2df8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineStateManagerImpl.java @@ -17,8 +17,6 @@ package org.apache.hadoop.hdds.scm.pipeline; -import static org.apache.hadoop.hdds.utils.db.Table.KeyValueIterator.Type.VALUE_ONLY; - import com.google.common.base.Preconditions; import java.io.IOException; import java.util.Collection; @@ -35,6 +33,7 @@ import org.apache.hadoop.hdds.scm.metadata.DBTransactionBuffer; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -76,9 +75,9 @@ private void initialize() throws IOException { LOG.info("No pipeline exists in current db"); return; } - try (Table.KeyValueIterator iterator = pipelineStore.iterator(VALUE_ONLY)) { + try (TableIterator iterator = pipelineStore.valueIterator()) { while (iterator.hasNext()) { - Pipeline pipeline = iterator.next().getValue(); + final Pipeline pipeline = iterator.next(); pipelineStateMap.addPipeline(pipeline); nodeManager.addPipeline(pipeline); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/QuotaRepairTask.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/QuotaRepairTask.java index 9f1a27e9ff9f..272278da52b0 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/QuotaRepairTask.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/QuotaRepairTask.java @@ -19,7 +19,6 @@ import static org.apache.hadoop.hdds.utils.db.Table.KeyValueIterator.Type.KEY_AND_VALUE; import static org.apache.hadoop.hdds.utils.db.Table.KeyValueIterator.Type.KEY_ONLY; -import static org.apache.hadoop.hdds.utils.db.Table.KeyValueIterator.Type.VALUE_ONLY; import static org.apache.hadoop.ozone.OzoneConsts.OLD_QUOTA_DEFAULT; import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; @@ -50,6 +49,7 @@ import org.apache.hadoop.hdds.server.JsonUtils; import org.apache.hadoop.hdds.utils.db.DBCheckpoint; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OzoneManager; @@ -240,11 +240,9 @@ private void prepareAllBucketInfo( } return; } - try (Table.KeyValueIterator iterator - = metadataManager.getBucketTable().iterator(VALUE_ONLY)) { + try (TableIterator iterator = metadataManager.getBucketTable().valueIterator()) { while (iterator.hasNext()) { - Table.KeyValue entry = iterator.next(); - OmBucketInfo bucketInfo = entry.getValue(); + final OmBucketInfo bucketInfo = iterator.next(); populateBucket(nameBucketInfoMap, idBucketInfoMap, oriBucketInfoMap, metadataManager, bucketInfo); } } From 9202b7b59a7fc0e4104fd08831086307da6c4f7f Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Sat, 21 Jun 2025 14:04:02 -0700 Subject: [PATCH 2/3] remove iterator(KeyValueIterator.Type type) --- .../java/org/apache/hadoop/hdds/utils/db/Table.java | 13 ++++--------- .../hadoop/ozone/om/service/QuotaRepairTask.java | 2 +- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java index 8660d70961da..cdb366c4ce52 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java @@ -152,18 +152,13 @@ default KeyValueIterator iterator(KEY prefix) throws RocksDatabaseEx return iterator(prefix, KeyValueIterator.Type.KEY_AND_VALUE); } - /** The same as iterator(null, type). */ - default KeyValueIterator iterator(KeyValueIterator.Type type) - throws RocksDatabaseException, CodecException { - return iterator(null, type); - } - /** * Iterate the elements in this table. *

- * Using a more restrictive type may improve performance - * since the unrequired data will not be read from the DB. - * Note that, when the prefix is non-empty, + * Note that using a more restrictive type may improve performance + * since the unrequired data may not be read from the DB. + *

+ * Note also that, when the prefix is non-empty, * using a non-key type may not improve performance * since it has to read keys for matching the prefix. * diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/QuotaRepairTask.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/QuotaRepairTask.java index 272278da52b0..81cf9b362034 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/QuotaRepairTask.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/QuotaRepairTask.java @@ -351,7 +351,7 @@ private void recalculateUsages( int count = 0; long startTime = Time.monotonicNow(); try (Table.KeyValueIterator keyIter - = table.iterator(haveValue ? KEY_AND_VALUE : KEY_ONLY)) { + = table.iterator(null, haveValue ? KEY_AND_VALUE : KEY_ONLY)) { while (keyIter.hasNext()) { count++; kvList.add(keyIter.next()); From b2e7156ab699e498cb4981e32ce734fdad9a3399 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Mon, 23 Jun 2025 11:22:25 -0700 Subject: [PATCH 3/3] Fix merge conflict --- .../java/org/apache/hadoop/hdds/utils/db/TestTypedTable.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestTypedTable.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestTypedTable.java index 5aca5a7a5603..35d468e3b5da 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestTypedTable.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestTypedTable.java @@ -263,9 +263,9 @@ static void runTestIterators(K prefix, Map map, TypedTable