From 57ffd270b31638541365063aad97c45ebd2387d0 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 9 Jan 2024 16:14:59 -0800 Subject: [PATCH 1/7] Simplified snapshotCache by using single reentrant lock instead multiple locks --- .../ozone/om/snapshot/SnapshotCache.java | 286 ++++++++++-------- 1 file changed, 154 insertions(+), 132 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index d8932c0e7e0a..94a0506c6f61 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -20,6 +20,8 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.cache.CacheLoader; +import com.google.common.util.concurrent.Striped; +import org.apache.hadoop.hdds.utils.SimpleStriped; import org.apache.hadoop.ozone.om.IOmMetadataReader; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; @@ -28,12 +30,14 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.FILE_NOT_FOUND; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE; @@ -44,47 +48,48 @@ public class SnapshotCache implements ReferenceCountedCallback { static final Logger LOG = LoggerFactory.getLogger(SnapshotCache.class); - // Snapshot cache internal hash map. // Key: DB snapshot table key // Value: OmSnapshot instance, each holds a DB instance handle inside // TODO: [SNAPSHOT] Consider wrapping SoftReference<> around IOmMetadataReader - private final ConcurrentHashMap> dbMap; + private final Map> dbMap; - // Linked hash set that holds OmSnapshot instances whose reference count + // Linked a hash set that holds OmSnapshot instances whose reference count // has reached zero. Those entries are eligible to be evicted and closed. // Sorted in last used order. - // Least-recently-used entry located at the beginning. - private final Set< - ReferenceCounted> pendingEvictionList; + // Least-recently used entry located at the beginning. + private final Set> pendingEvictionList; private final OmSnapshotManager omSnapshotManager; private final CacheLoader cacheLoader; // Soft-limit of the total number of snapshot DB instances allowed to be // opened on the OM. private final int cacheSizeLimit; + private final Striped striped; public SnapshotCache( OmSnapshotManager omSnapshotManager, CacheLoader cacheLoader, int cacheSizeLimit) { this.dbMap = new ConcurrentHashMap<>(); - this.pendingEvictionList = - Collections.synchronizedSet(new LinkedHashSet<>()); + this.pendingEvictionList = new LinkedHashSet<>(); this.omSnapshotManager = omSnapshotManager; this.cacheLoader = cacheLoader; this.cacheSizeLimit = cacheSizeLimit; + this.striped = SimpleStriped.readWriteLock(cacheSizeLimit, true); + } + + private Lock getLock(String key) { + ReentrantReadWriteLock readWriteLock = (ReentrantReadWriteLock) striped.get(key); + return readWriteLock.writeLock(); } @VisibleForTesting - ConcurrentHashMap> getDbMap() { + Map> getDbMap() { return dbMap; } @VisibleForTesting - Set> getPendingEvictionList() { + Set> getPendingEvictionList() { return pendingEvictionList; } @@ -104,17 +109,22 @@ public int getPendingEvictionListSize() { * @param key DB snapshot table key */ public void invalidate(String key) throws IOException { - ReferenceCounted - rcOmSnapshot = dbMap.get(key); - if (rcOmSnapshot != null) { - pendingEvictionList.remove(rcOmSnapshot); - try { - ((OmSnapshot) rcOmSnapshot.get()).close(); - } catch (IOException e) { - throw new IllegalStateException("Failed to close snapshot: " + key, e); + Lock lock = getLock(key); + lock.lock(); + try { + ReferenceCounted rcOmSnapshot = dbMap.get(key); + if (rcOmSnapshot != null) { + pendingEvictionList.remove(rcOmSnapshot); + try { + ((OmSnapshot) rcOmSnapshot.get()).close(); + } catch (IOException e) { + throw new IllegalStateException("Failed to close snapshot: " + key, e); + } + // Remove the entry from map + dbMap.remove(key); } - // Remove the entry from map - dbMap.remove(key); + } finally { + lock.unlock(); } } @@ -122,13 +132,11 @@ public void invalidate(String key) throws IOException { * Immediately invalidate all entries and close their DB instances in cache. */ public void invalidateAll() { - Iterator< - Map.Entry>> + Iterator>> it = dbMap.entrySet().iterator(); while (it.hasNext()) { - Map.Entry> - entry = it.next(); + Map.Entry> entry = it.next(); pendingEvictionList.remove(entry.getValue()); OmSnapshot omSnapshot = (OmSnapshot) entry.getValue().get(); try { @@ -152,8 +160,7 @@ public enum Reason { GARBAGE_COLLECTION_WRITE } - public ReferenceCounted get(String key) - throws IOException { + public ReferenceCounted get(String key) throws IOException { return get(key, false); } @@ -163,66 +170,68 @@ public ReferenceCounted get(String key) * @param key snapshot table key * @return an OmSnapshot instance, or null on error */ - public ReferenceCounted get(String key, - boolean skipActiveCheck) throws IOException { - // Atomic operation to initialize the OmSnapshot instance (once) if the key - // does not exist, and increment the reference count on the instance. - ReferenceCounted rcOmSnapshot = - dbMap.compute(key, (k, v) -> { - if (v == null) { - LOG.info("Loading snapshot. Table key: {}", k); - try { - v = new ReferenceCounted<>(cacheLoader.load(k), false, this); - } catch (OMException omEx) { - // Return null if the snapshot is no longer active - if (!omEx.getResult().equals(FILE_NOT_FOUND)) { - throw new IllegalStateException(omEx); + public ReferenceCounted get(String key, boolean skipActiveCheck) + throws IOException { + Lock lock = getLock(key); + lock.lock(); + ReferenceCounted rcOmSnapshot; + try { + rcOmSnapshot = + dbMap.compute(key, (k, v) -> { + if (v == null) { + LOG.info("Loading snapshot. Table key: {}", k); + try { + v = new ReferenceCounted<>(cacheLoader.load(k), false, this); + } catch (OMException omEx) { + // Return null if the snapshot is no longer active + if (!omEx.getResult().equals(FILE_NOT_FOUND)) { + throw new IllegalStateException(omEx); + } + } catch (IOException ioEx) { + // Failed to load snapshot DB + throw new IllegalStateException(ioEx); + } catch (Exception ex) { + // Unexpected and unknown exception thrown from CacheLoader#load + throw new IllegalStateException(ex); } - } catch (IOException ioEx) { - // Failed to load snapshot DB - throw new IllegalStateException(ioEx); - } catch (Exception ex) { - // Unexpected and unknown exception thrown from CacheLoader#load - throw new IllegalStateException(ex); } - } - return v; - }); - - if (rcOmSnapshot == null) { - // The only exception that would fall through the loader logic above - // is OMException with FILE_NOT_FOUND. - throw new OMException("Snapshot table key '" + key + "' not found, " - + "or the snapshot is no longer active", - OMException.ResultCodes.FILE_NOT_FOUND); - } else { - // When RC OmSnapshot is successfully loaded - rcOmSnapshot.incrementRefCount(); - } + if (v != null) { + // When RC OmSnapshot is successfully loaded + v.incrementRefCount(); + } + return v; + }); + if (rcOmSnapshot == null) { + // The only exception that would fall through the loader logic above + // is OMException with FILE_NOT_FOUND. + throw new OMException("Snapshot table key '" + key + "' not found, " + + "or the snapshot is no longer active", + OMException.ResultCodes.FILE_NOT_FOUND); + } - // If the snapshot is already loaded in cache, the check inside the loader - // above is ignored. But we would still want to reject all get()s except - // when called from SDT (and some) if the snapshot is not active anymore. - if (!skipActiveCheck && - !omSnapshotManager.isSnapshotStatus(key, SNAPSHOT_ACTIVE)) { - // Ref count was incremented. Need to decrement on exception here. - rcOmSnapshot.decrementRefCount(); - throw new OMException("Unable to load snapshot. " + - "Snapshot with table key '" + key + "' is no longer active", - FILE_NOT_FOUND); - } + // If the snapshot is already loaded in cache, the check inside the loader + // above is ignored. But we would still want to reject all get()s except + // when called from SDT (and some) if the snapshot is not active anymore. + if (!skipActiveCheck && + !omSnapshotManager.isSnapshotStatus(key, SNAPSHOT_ACTIVE)) { + // Ref count was incremented. Need to decrement on exception here. + rcOmSnapshot.decrementRefCount(); + throw new OMException("Unable to load snapshot. " + + "Snapshot with table key '" + key + "' is no longer active", + FILE_NOT_FOUND); + } - synchronized (pendingEvictionList) { - // Remove instance from clean up list when it exists. pendingEvictionList.remove(rcOmSnapshot); - } - // Check if any entries can be cleaned up. - // At this point, cache size might temporarily exceed cacheSizeLimit - // even if there are entries that can be evicted, which is fine since it - // is a soft limit. - cleanup(); + // Check if any entries can be cleaned up. + // At this point, cache size might temporarily exceed cacheSizeLimit + // even if there are entries that can be evicted, which is fine since it + // is a soft limit. + cleanup(); + } finally { + lock.unlock(); + } return rcOmSnapshot; } @@ -231,23 +240,25 @@ public ReferenceCounted get(String key, * @param key snapshot table key */ public void release(String key) { - ReferenceCounted - rcOmSnapshot = dbMap.get(key); - if (rcOmSnapshot == null) { - throw new IllegalArgumentException( - "Key '" + key + "' does not exist in cache"); - } + Lock lock = getLock(key); + lock.lock(); + try { + ReferenceCounted rcOmSnapshot = dbMap.get(key); + if (rcOmSnapshot == null) { + throw new IllegalArgumentException("Key '" + key + "' does not exist in cache"); + } - if (rcOmSnapshot.decrementRefCount() == 0L) { - synchronized (pendingEvictionList) { + if (rcOmSnapshot.decrementRefCount() == 0L) { // v is eligible to be evicted and closed pendingEvictionList.add(rcOmSnapshot); } - } - // The cache size might have already exceed the soft limit - // Thus triggering cleanup() to check and evict if applicable - cleanup(); + // The cache size might have already exceed the soft limit + // Thus triggering cleanup() to check and evict if applicable + cleanup(); + } finally { + lock.unlock(); + } } /** @@ -256,7 +267,13 @@ public void release(String key) { */ public void release(OmSnapshot omSnapshot) { final String snapshotTableKey = omSnapshot.getSnapshotTableKey(); - release(snapshotTableKey); + Lock lock = getLock(snapshotTableKey); + lock.lock(); + try { + release(snapshotTableKey); + } finally { + lock.unlock(); + } } /** @@ -266,7 +283,12 @@ public void release(OmSnapshot omSnapshot) { */ @Override public void callback(ReferenceCounted referenceCounted) { - synchronized (pendingEvictionList) { + ReferenceCounted rcOmSnapshot = + (ReferenceCounted) referenceCounted; + String key = ((OmSnapshot) rcOmSnapshot.get()).getSnapshotTableKey(); + Lock lock = getLock(key); + lock.lock(); + try { if (referenceCounted.getTotalRefCount() == 0L) { // Reference count reaches zero, add to pendingEvictionList Preconditions.checkState( @@ -277,6 +299,8 @@ public void callback(ReferenceCounted referenceCounted) { } else if (referenceCounted.getTotalRefCount() == 1L) { pendingEvictionList.remove(referenceCounted); } + } finally { + lock.unlock(); } } @@ -284,18 +308,7 @@ public void callback(ReferenceCounted referenceCounted) { * Wrapper for cleanupInternal() that is synchronized to prevent multiple * threads from interleaving into the cleanup method. */ - private synchronized void cleanup() { - synchronized (pendingEvictionList) { - cleanupInternal(); - } - } - - /** - * If cache size exceeds soft limit, attempt to clean up and close the - * instances that has zero reference count. - * TODO: [SNAPSHOT] Add new ozone debug CLI command to trigger this directly. - */ - private void cleanupInternal() { + private void cleanup() { long numEntriesToEvict = (long) dbMap.size() - cacheSizeLimit; while (numEntriesToEvict > 0L && pendingEvictionList.size() > 0) { // Get the first instance in the clean up list @@ -304,31 +317,41 @@ private void cleanupInternal() { OmSnapshot omSnapshot = (OmSnapshot) rcOmSnapshot.get(); LOG.debug("Evicting OmSnapshot instance {} with table key {}", rcOmSnapshot, omSnapshot.getSnapshotTableKey()); - // Sanity check - Preconditions.checkState(rcOmSnapshot.getTotalRefCount() == 0L, - "Illegal state: OmSnapshot reference count non-zero (" - + rcOmSnapshot.getTotalRefCount() + ") but shows up in the " - + "clean up list"); - final String key = omSnapshot.getSnapshotTableKey(); - final ReferenceCounted result = - dbMap.remove(key); - // Sanity check - Preconditions.checkState(rcOmSnapshot == result, - "Cache map entry removal failure. The cache is in an inconsistent " - + "state. Expected OmSnapshot instance: " + rcOmSnapshot - + ", actual: " + result + " for key: " + key); + Lock lock = getLock(key); + lock.lock(); + try { + // Sanity check + Preconditions.checkState(rcOmSnapshot.getTotalRefCount() == 0L, + "Illegal state: OmSnapshot reference count non-zero (" + + rcOmSnapshot.getTotalRefCount() + ") but shows up in the " + + "clean up list"); + final ReferenceCounted result = dbMap.remove(key); + // Sanity check + Preconditions.checkState(rcOmSnapshot == result, + "Cache map entry removal failure. The cache is in an inconsistent " + + "state. Expected OmSnapshot instance: " + rcOmSnapshot + + ", actual: " + result + " for key: " + key); + + if (result.getTotalRefCount() != 0) { + LOG.warn("Snapshot {} is still being referenced ({}), skipping its clean up", + key, rcOmSnapshot.getTotalRefCount()); + continue; + } - pendingEvictionList.remove(result); + pendingEvictionList.remove(result); - // Close the instance, which also closes its DB handle. - try { - ((OmSnapshot) rcOmSnapshot.get()).close(); - } catch (IOException ex) { - throw new IllegalStateException("Error while closing snapshot DB", ex); - } + // Close the instance, which also closes its DB handle. + try { + ((OmSnapshot) rcOmSnapshot.get()).close(); + } catch (IOException ex) { + throw new IllegalStateException("Error while closing snapshot DB", ex); + } - --numEntriesToEvict; + --numEntriesToEvict; + } finally { + lock.unlock(); + } } // Print warning message if actual cache size is exceeding the soft limit @@ -383,7 +406,7 @@ public boolean isConsistent() { pendingEvictionList.forEach(v -> { // Objects in pendingEvictionList should still be in dbMap - if (!dbMap.contains(v)) { + if (!dbMap.containsValue(v)) { LOG.error("Instance '{}' is in pendingEvictionList but not in " + "dbMap", v); } @@ -396,5 +419,4 @@ public boolean isConsistent() { return true; } - } From 5daa3a96649ebe1979701fd26eecb53ae5daab48 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 9 Jan 2024 18:05:22 -0800 Subject: [PATCH 2/7] Fixed unit test --- .../org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java | 5 ++--- .../hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index 94a0506c6f61..1b64320f690b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -30,11 +30,11 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -70,7 +70,7 @@ public SnapshotCache( OmSnapshotManager omSnapshotManager, CacheLoader cacheLoader, int cacheSizeLimit) { - this.dbMap = new ConcurrentHashMap<>(); + this.dbMap = new HashMap<>(); this.pendingEvictionList = new LinkedHashSet<>(); this.omSnapshotManager = omSnapshotManager; this.cacheLoader = cacheLoader; @@ -228,7 +228,6 @@ public ReferenceCounted get(String key, boolea // even if there are entries that can be evicted, which is fine since it // is a soft limit. cleanup(); - } finally { lock.unlock(); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java index 54d0a116ef7d..a24e1faf361f 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java @@ -417,6 +417,7 @@ private OmSnapshot getMockedOmSnapshot(String snapshot) { when(omSnapshot.getName()).thenReturn(snapshot); when(omSnapshot.getMetadataManager()).thenReturn(omMetadataManager); when(omMetadataManager.getStore()).thenReturn(dbStore); + when(omSnapshot.getSnapshotTableKey()).thenReturn(snapshot); return omSnapshot; } From 369df59898a17f6cbc0b3f515229b76af452f010 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 9 Jan 2024 18:40:23 -0800 Subject: [PATCH 3/7] Removed pendingEvictionList and its usage from snapshotCache --- .../ozone/om/TestSnapshotDeletingService.java | 1 - .../ozone/om/snapshot/SnapshotCache.java | 150 +++--------------- .../ozone/om/snapshot/TestSnapshotCache.java | 77 +-------- 3 files changed, 28 insertions(+), 200 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestSnapshotDeletingService.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestSnapshotDeletingService.java index a14255d3c15e..12844c23cd7b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestSnapshotDeletingService.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestSnapshotDeletingService.java @@ -194,7 +194,6 @@ public void testMultipleSnapshotKeyReclaim() throws Exception { // /vol1/bucket2/bucket2snap1 has been cleaned up from cache map SnapshotCache snapshotCache = om.getOmSnapshotManager().getSnapshotCache(); assertEquals(2, snapshotCache.size()); - assertEquals(2, snapshotCache.getPendingEvictionListSize()); } @SuppressWarnings("checkstyle:MethodLength") diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index 1b64320f690b..628e38700e75 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -18,7 +18,6 @@ package org.apache.hadoop.ozone.om.snapshot; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.cache.CacheLoader; import com.google.common.util.concurrent.Striped; import org.apache.hadoop.hdds.utils.SimpleStriped; @@ -32,9 +31,7 @@ import java.io.IOException; import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashSet; import java.util.Map; -import java.util.Set; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -54,11 +51,6 @@ public class SnapshotCache implements ReferenceCountedCallback { // TODO: [SNAPSHOT] Consider wrapping SoftReference<> around IOmMetadataReader private final Map> dbMap; - // Linked a hash set that holds OmSnapshot instances whose reference count - // has reached zero. Those entries are eligible to be evicted and closed. - // Sorted in last used order. - // Least-recently used entry located at the beginning. - private final Set> pendingEvictionList; private final OmSnapshotManager omSnapshotManager; private final CacheLoader cacheLoader; // Soft-limit of the total number of snapshot DB instances allowed to be @@ -71,7 +63,6 @@ public SnapshotCache( CacheLoader cacheLoader, int cacheSizeLimit) { this.dbMap = new HashMap<>(); - this.pendingEvictionList = new LinkedHashSet<>(); this.omSnapshotManager = omSnapshotManager; this.cacheLoader = cacheLoader; this.cacheSizeLimit = cacheSizeLimit; @@ -88,11 +79,6 @@ Map> getDbMap() { return dbMap; } - @VisibleForTesting - Set> getPendingEvictionList() { - return pendingEvictionList; - } - /** * @return number of DB instances currently held in cache. */ @@ -100,10 +86,6 @@ public int size() { return dbMap.size(); } - public int getPendingEvictionListSize() { - return pendingEvictionList.size(); - } - /** * Immediately invalidate an entry. * @param key DB snapshot table key @@ -114,7 +96,6 @@ public void invalidate(String key) throws IOException { try { ReferenceCounted rcOmSnapshot = dbMap.get(key); if (rcOmSnapshot != null) { - pendingEvictionList.remove(rcOmSnapshot); try { ((OmSnapshot) rcOmSnapshot.get()).close(); } catch (IOException e) { @@ -137,7 +118,6 @@ public void invalidateAll() { while (it.hasNext()) { Map.Entry> entry = it.next(); - pendingEvictionList.remove(entry.getValue()); OmSnapshot omSnapshot = (OmSnapshot) entry.getValue().get(); try { // TODO: If wrapped with SoftReference<>, omSnapshot could be null? @@ -221,8 +201,6 @@ public ReferenceCounted get(String key, boolea FILE_NOT_FOUND); } - pendingEvictionList.remove(rcOmSnapshot); - // Check if any entries can be cleaned up. // At this point, cache size might temporarily exceed cacheSizeLimit // even if there are entries that can be evicted, which is fine since it @@ -246,13 +224,8 @@ public void release(String key) { if (rcOmSnapshot == null) { throw new IllegalArgumentException("Key '" + key + "' does not exist in cache"); } - - if (rcOmSnapshot.decrementRefCount() == 0L) { - // v is eligible to be evicted and closed - pendingEvictionList.add(rcOmSnapshot); - } - - // The cache size might have already exceed the soft limit + rcOmSnapshot.decrementRefCount(); + // The cache size might have already exceeded the soft limit // Thus triggering cleanup() to check and evict if applicable cleanup(); } finally { @@ -278,6 +251,7 @@ public void release(OmSnapshot omSnapshot) { /** * Callback method used to enqueue or dequeue ReferenceCounted from * pendingEvictionList. + * * @param referenceCounted ReferenceCounted object */ @Override @@ -288,16 +262,7 @@ public void callback(ReferenceCounted referenceCounted) { Lock lock = getLock(key); lock.lock(); try { - if (referenceCounted.getTotalRefCount() == 0L) { - // Reference count reaches zero, add to pendingEvictionList - Preconditions.checkState( - !pendingEvictionList.contains(referenceCounted), - "SnapshotCache is inconsistent. Entry should not be in the " - + "pendingEvictionList when ref count just reached zero."); - pendingEvictionList.add(referenceCounted); - } else if (referenceCounted.getTotalRefCount() == 1L) { - pendingEvictionList.remove(referenceCounted); - } + referenceCounted.getTotalRefCount(); } finally { lock.unlock(); } @@ -308,38 +273,34 @@ public void callback(ReferenceCounted referenceCounted) { * threads from interleaving into the cleanup method. */ private void cleanup() { - long numEntriesToEvict = (long) dbMap.size() - cacheSizeLimit; - while (numEntriesToEvict > 0L && pendingEvictionList.size() > 0) { + if (dbMap.size() > cacheSizeLimit) { + cleanupInternal(); + } + } + + /** + * If cache size exceeds soft limit, attempt to clean up and close the + * instances that has zero reference count. + * TODO: [SNAPSHOT] Add new ozone debug CLI command to trigger this directly. + */ + private void cleanupInternal() { + Iterator>> iterator = + dbMap.entrySet().iterator(); + while (iterator.hasNext()) { + Map.Entry> entry = iterator.next(); // Get the first instance in the clean up list - ReferenceCounted rcOmSnapshot = - pendingEvictionList.iterator().next(); + ReferenceCounted rcOmSnapshot = entry.getValue(); OmSnapshot omSnapshot = (OmSnapshot) rcOmSnapshot.get(); - LOG.debug("Evicting OmSnapshot instance {} with table key {}", - rcOmSnapshot, omSnapshot.getSnapshotTableKey()); final String key = omSnapshot.getSnapshotTableKey(); Lock lock = getLock(key); lock.lock(); try { - // Sanity check - Preconditions.checkState(rcOmSnapshot.getTotalRefCount() == 0L, - "Illegal state: OmSnapshot reference count non-zero (" - + rcOmSnapshot.getTotalRefCount() + ") but shows up in the " - + "clean up list"); - final ReferenceCounted result = dbMap.remove(key); - // Sanity check - Preconditions.checkState(rcOmSnapshot == result, - "Cache map entry removal failure. The cache is in an inconsistent " - + "state. Expected OmSnapshot instance: " + rcOmSnapshot - + ", actual: " + result + " for key: " + key); - - if (result.getTotalRefCount() != 0) { - LOG.warn("Snapshot {} is still being referenced ({}), skipping its clean up", + if (rcOmSnapshot.getTotalRefCount() != 0) { + LOG.debug("Snapshot {} is still being referenced ({}), skipping its clean up", key, rcOmSnapshot.getTotalRefCount()); continue; } - pendingEvictionList.remove(result); - // Close the instance, which also closes its DB handle. try { ((OmSnapshot) rcOmSnapshot.get()).close(); @@ -347,75 +308,10 @@ private void cleanup() { throw new IllegalStateException("Error while closing snapshot DB", ex); } - --numEntriesToEvict; + iterator.remove(); } finally { lock.unlock(); } } - - // Print warning message if actual cache size is exceeding the soft limit - // even after the cleanup procedure above. - if ((long) dbMap.size() > cacheSizeLimit) { - LOG.warn("Current snapshot cache size ({}) is exceeding configured " - + "soft-limit ({}) after possible evictions.", - dbMap.size(), cacheSizeLimit); - - Preconditions.checkState(pendingEvictionList.size() == 0); - } - } - - /** - * Check cache consistency. - * @return true if the cache internal structure is consistent to the best of - * its knowledge, false if found to be inconsistent and details logged. - */ - @VisibleForTesting - public boolean isConsistent() { - // Uses dbMap as the source of truth for this check, whether dbMap entries - // are in OM DB's snapshotInfoTable is out of the scope of this check. - - LOG.info("dbMap has {} entries", dbMap.size()); - LOG.info("pendingEvictionList has {} entries", - pendingEvictionList.size()); - - // pendingEvictionList must be empty if cache size exceeds limit - if (dbMap.size() > cacheSizeLimit) { - if (pendingEvictionList.size() != 0) { - // cleanup() is not functioning correctly - LOG.error("pendingEvictionList is not empty even when cache size" - + "exceeds limit"); - } - } - - dbMap.forEach((k, v) -> { - if (v.getTotalRefCount() == 0L) { - long threadRefCount = v.getCurrentThreadRefCount(); - if (threadRefCount != 0L) { - LOG.error("snapshotTableKey='{}' instance has inconsistent " - + "ref count. Total ref count is 0 but thread " - + "ref count is {}", k, threadRefCount); - } - // Zero ref count values in dbMap must be in pendingEvictionList - if (!pendingEvictionList.contains(v)) { - LOG.error("snapshotTableKey='{}' instance has zero ref count but " - + "not in pendingEvictionList", k); - } - } - }); - - pendingEvictionList.forEach(v -> { - // Objects in pendingEvictionList should still be in dbMap - if (!dbMap.containsValue(v)) { - LOG.error("Instance '{}' is in pendingEvictionList but not in " - + "dbMap", v); - } - // Instances in pendingEvictionList must have ref count equals 0 - if (v.getTotalRefCount() != 0L) { - LOG.error("Instance '{}' is in pendingEvictionList but ref count " - + "is not zero", v); - } - }); - - return true; } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java index f8c939297781..621baae106c5 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java @@ -39,7 +39,6 @@ import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; @@ -102,7 +101,6 @@ void testGet() throws IOException { assertNotNull(omSnapshot.get()); assertInstanceOf(OmSnapshot.class, omSnapshot.get()); assertEquals(1, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); } @Test @@ -113,7 +111,6 @@ void testGetTwice() throws IOException { snapshotCache.get(dbKey1); assertNotNull(omSnapshot1); assertEquals(1, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); ReferenceCounted omSnapshot1again = snapshotCache.get(dbKey1); @@ -121,7 +118,6 @@ void testGetTwice() throws IOException { assertEquals(omSnapshot1, omSnapshot1again); assertEquals(omSnapshot1.get(), omSnapshot1again.get()); assertEquals(1, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); } @Test @@ -133,15 +129,10 @@ void testReleaseByDbKey() throws IOException { assertNotNull(omSnapshot1); assertNotNull(omSnapshot1.get()); assertEquals(1, snapshotCache.size()); - assertEquals(0, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); snapshotCache.release(dbKey1); // Entry will not be immediately evicted assertEquals(1, snapshotCache.size()); - // Entry is queued for eviction as its ref count reaches zero - assertEquals(1, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); } @Test @@ -152,15 +143,10 @@ void testReleaseByOmSnapshotInstance() throws IOException { snapshotCache.get(dbKey1); assertNotNull(omSnapshot1); assertEquals(1, snapshotCache.size()); - assertEquals(0, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); snapshotCache.release((OmSnapshot) omSnapshot1.get()); // Entry will not be immediately evicted assertEquals(1, snapshotCache.size()); - // Entry is queued for eviction as its ref count reaches zero - assertEquals(1, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); } @Test @@ -171,16 +157,13 @@ void testInvalidate() throws IOException { snapshotCache.get(dbKey1); assertNotNull(omSnapshot); assertEquals(1, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); snapshotCache.release(dbKey1); // Entry will not be immediately evicted assertEquals(1, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); snapshotCache.invalidate(dbKey1); assertEquals(0, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); } @Test @@ -191,7 +174,6 @@ void testInvalidateAll() throws IOException { snapshotCache.get(dbKey1); assertNotNull(omSnapshot1); assertEquals(1, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); final String dbKey2 = "dbKey2"; ReferenceCounted omSnapshot2 = @@ -200,31 +182,22 @@ void testInvalidateAll() throws IOException { assertEquals(2, snapshotCache.size()); // Should be difference omSnapshot instances assertNotEquals(omSnapshot1, omSnapshot2); - assertTrue(snapshotCache.isConsistent()); final String dbKey3 = "dbKey3"; ReferenceCounted omSnapshot3 = snapshotCache.get(dbKey3); assertNotNull(omSnapshot3); assertEquals(3, snapshotCache.size()); - assertEquals(0, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); snapshotCache.release(dbKey1); // Entry will not be immediately evicted assertEquals(3, snapshotCache.size()); - assertEquals(1, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); snapshotCache.invalidate(dbKey1); assertEquals(2, snapshotCache.size()); - assertEquals(0, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); snapshotCache.invalidateAll(); assertEquals(0, snapshotCache.size()); - assertEquals(0, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); } private void assertEntryExistence(String key, boolean shouldExist) { @@ -248,39 +221,27 @@ void testEviction1() throws IOException { final String dbKey1 = "dbKey1"; snapshotCache.get(dbKey1); assertEquals(1, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); snapshotCache.release(dbKey1); assertEquals(1, snapshotCache.size()); - assertEquals(1, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); final String dbKey2 = "dbKey2"; snapshotCache.get(dbKey2); assertEquals(2, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); snapshotCache.release(dbKey2); assertEquals(2, snapshotCache.size()); - assertEquals(2, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); final String dbKey3 = "dbKey3"; snapshotCache.get(dbKey3); assertEquals(3, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); snapshotCache.release(dbKey3); assertEquals(3, snapshotCache.size()); - assertEquals(3, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); final String dbKey4 = "dbKey4"; snapshotCache.get(dbKey4); - // dbKey1 would have been evicted by the end of the last get() because - // it was release()d. - assertEquals(3, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); + // dbKey1, dbKey2 and dbKey3 would have been evicted by the end of the last get() because + // those were release()d. + assertEquals(1, snapshotCache.size()); assertEntryExistence(dbKey1, false); - assertEquals(2, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); } @Test @@ -290,25 +251,20 @@ void testEviction2() throws IOException { final String dbKey1 = "dbKey1"; snapshotCache.get(dbKey1); assertEquals(1, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); final String dbKey2 = "dbKey2"; snapshotCache.get(dbKey2); assertEquals(2, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); final String dbKey3 = "dbKey3"; snapshotCache.get(dbKey3); assertEquals(3, snapshotCache.size()); - assertTrue(snapshotCache.isConsistent()); final String dbKey4 = "dbKey4"; snapshotCache.get(dbKey4); // dbKey1 would not have been evicted because it is not release()d assertEquals(4, snapshotCache.size()); assertEntryExistence(dbKey1, true); - assertEquals(0, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); // Releasing dbKey2 at this point should immediately trigger its eviction // because the cache size exceeded the soft limit @@ -316,8 +272,6 @@ void testEviction2() throws IOException { assertEquals(3, snapshotCache.size()); assertEntryExistence(dbKey2, false); assertEntryExistence(dbKey1, true); - assertEquals(0, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); } @Test @@ -329,67 +283,46 @@ void testEviction3WithClose() throws IOException { snapshotCache.get(dbKey1)) { assertEquals(1L, rcOmSnapshot.getTotalRefCount()); assertEquals(1, snapshotCache.size()); - assertEquals(0, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); } // ref count should have been decreased because it would be close()d // upon exiting try-with-resources. assertEquals(0L, snapshotCache.getDbMap().get(dbKey1).getTotalRefCount()); assertEquals(1, snapshotCache.size()); - assertEquals(1, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); final String dbKey2 = "dbKey2"; try (ReferenceCounted rcOmSnapshot = snapshotCache.get(dbKey2)) { assertEquals(1L, rcOmSnapshot.getTotalRefCount()); assertEquals(2, snapshotCache.size()); - assertEquals(1, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); // Get dbKey2 entry a second time try (ReferenceCounted rcOmSnapshot2 = snapshotCache.get(dbKey2)) { assertEquals(2L, rcOmSnapshot.getTotalRefCount()); assertEquals(2L, rcOmSnapshot2.getTotalRefCount()); assertEquals(2, snapshotCache.size()); - assertEquals(1, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); } assertEquals(1L, rcOmSnapshot.getTotalRefCount()); - assertTrue(snapshotCache.isConsistent()); } assertEquals(0L, snapshotCache.getDbMap().get(dbKey2).getTotalRefCount()); assertEquals(2, snapshotCache.size()); - assertEquals(2, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); final String dbKey3 = "dbKey3"; try (ReferenceCounted rcOmSnapshot = snapshotCache.get(dbKey3)) { assertEquals(1L, rcOmSnapshot.getTotalRefCount()); assertEquals(3, snapshotCache.size()); - assertEquals(2, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); } assertEquals(0L, snapshotCache.getDbMap().get(dbKey3).getTotalRefCount()); assertEquals(3, snapshotCache.size()); - assertEquals(3, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); final String dbKey4 = "dbKey4"; try (ReferenceCounted rcOmSnapshot = snapshotCache.get(dbKey4)) { assertEquals(1L, rcOmSnapshot.getTotalRefCount()); - assertEquals(3, snapshotCache.size()); - // An entry has been evicted at this point - assertEquals(2, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); + assertEquals(1, snapshotCache.size()); } assertEquals(0L, snapshotCache.getDbMap().get(dbKey4).getTotalRefCount()); - // Reached cache size limit - assertEquals(3, snapshotCache.size()); - assertEquals(3, snapshotCache.getPendingEvictionList().size()); - assertTrue(snapshotCache.isConsistent()); + assertEquals(1, snapshotCache.size()); } } From 7b0285a335b21fec3e94789478928335e15b27c5 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Wed, 10 Jan 2024 09:20:56 -0800 Subject: [PATCH 4/7] Removed ReferenceCountedCallback --- .../ozone/om/snapshot/ReferenceCounted.java | 18 ++---------- .../om/snapshot/ReferenceCountedCallback.java | 25 ----------------- .../ozone/om/snapshot/SnapshotCache.java | 28 +++---------------- 3 files changed, 7 insertions(+), 64 deletions(-) delete mode 100644 hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/ReferenceCountedCallback.java diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/ReferenceCounted.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/ReferenceCounted.java index e064812de8ef..30210d7a532a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/ReferenceCounted.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/ReferenceCounted.java @@ -25,8 +25,7 @@ /** * Add reference counter to an object instance. */ -public class ReferenceCounted - implements AutoCloseable { +public class ReferenceCounted implements AutoCloseable { /** * Object that is being reference counted. e.g. OmSnapshot @@ -95,12 +94,6 @@ public long incrementRefCount() { long newValTotal = refCount.incrementAndGet(); Preconditions.checkState(newValTotal > 0L, "Total reference count overflown"); - - if (refCount.get() == 1L) { - // ref count increased to one (from zero), remove from - // pendingEvictionList if added - parentWithCallback.callback(this); - } } return refCount.get(); @@ -131,11 +124,6 @@ public long decrementRefCount() { long newValTotal = refCount.decrementAndGet(); Preconditions.checkState(newValTotal >= 0L, "Total reference count underflow"); - - if (refCount.get() == 0L) { - // ref count decreased to zero, add to pendingEvictionList - parentWithCallback.callback(this); - } } return refCount.get(); @@ -153,7 +141,7 @@ public long getTotalRefCount() { } /** - * @return Number of times current thread has held reference to the object. + * @return Number of times the current thread has held reference to the object. */ public long getCurrentThreadRefCount() { if (refCount == null || threadMap == null) { @@ -166,7 +154,7 @@ public long getCurrentThreadRefCount() { @Override public void close() { - // Decrease ref count by 1 when close() is called on this object + // Decrease ref count by 1 when close() is called on this object, // so it is eligible to be used with try-with-resources. decrementRefCount(); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/ReferenceCountedCallback.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/ReferenceCountedCallback.java deleted file mode 100644 index d63f5783b808..000000000000 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/ReferenceCountedCallback.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.ozone.om.snapshot; - -/** - * Callback interface for ReferenceCounted. - */ -public interface ReferenceCountedCallback { - void callback(ReferenceCounted referenceCounted); -} diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index 628e38700e75..a3659698fa51 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -42,7 +42,7 @@ /** * Thread-safe custom unbounded LRU cache to manage open snapshot DB instances. */ -public class SnapshotCache implements ReferenceCountedCallback { +public class SnapshotCache { static final Logger LOG = LoggerFactory.getLogger(SnapshotCache.class); // Snapshot cache internal hash map. @@ -248,31 +248,11 @@ public void release(OmSnapshot omSnapshot) { } } - /** - * Callback method used to enqueue or dequeue ReferenceCounted from - * pendingEvictionList. - * - * @param referenceCounted ReferenceCounted object - */ - @Override - public void callback(ReferenceCounted referenceCounted) { - ReferenceCounted rcOmSnapshot = - (ReferenceCounted) referenceCounted; - String key = ((OmSnapshot) rcOmSnapshot.get()).getSnapshotTableKey(); - Lock lock = getLock(key); - lock.lock(); - try { - referenceCounted.getTotalRefCount(); - } finally { - lock.unlock(); - } - } - /** * Wrapper for cleanupInternal() that is synchronized to prevent multiple * threads from interleaving into the cleanup method. */ - private void cleanup() { + private synchronized void cleanup() { if (dbMap.size() > cacheSizeLimit) { cleanupInternal(); } @@ -288,14 +268,14 @@ private void cleanupInternal() { dbMap.entrySet().iterator(); while (iterator.hasNext()) { Map.Entry> entry = iterator.next(); - // Get the first instance in the clean up list + // Get the first instance in the clean-up list ReferenceCounted rcOmSnapshot = entry.getValue(); OmSnapshot omSnapshot = (OmSnapshot) rcOmSnapshot.get(); final String key = omSnapshot.getSnapshotTableKey(); Lock lock = getLock(key); lock.lock(); try { - if (rcOmSnapshot.getTotalRefCount() != 0) { + if (rcOmSnapshot.getTotalRefCount() > 0) { LOG.debug("Snapshot {} is still being referenced ({}), skipping its clean up", key, rcOmSnapshot.getTotalRefCount()); continue; From 684fd31034c9dcded6d7afea4e19f249ddf3673c Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Wed, 10 Jan 2024 16:25:40 -0800 Subject: [PATCH 5/7] Moved out cleanup in get function out of the lock --- .../hadoop/ozone/om/snapshot/SnapshotCache.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index a3659698fa51..074e8916ea5a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -200,15 +200,14 @@ public ReferenceCounted get(String key, boolea "Snapshot with table key '" + key + "' is no longer active", FILE_NOT_FOUND); } - - // Check if any entries can be cleaned up. - // At this point, cache size might temporarily exceed cacheSizeLimit - // even if there are entries that can be evicted, which is fine since it - // is a soft limit. - cleanup(); } finally { lock.unlock(); } + + // Check if any entries can be cleaned up. + // At this point, cache size might temporarily exceed cacheSizeLimit + // even if there are entries that can be evicted, which is fine since it is a soft limit. + cleanup(); return rcOmSnapshot; } From 98a771b12ad93b2dbdeed8acdd0e888c66269a3d Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Wed, 10 Jan 2024 16:35:54 -0800 Subject: [PATCH 6/7] Moved out cleanup in release function out of the lock --- .../org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index 074e8916ea5a..a100133c630f 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -224,12 +224,12 @@ public void release(String key) { throw new IllegalArgumentException("Key '" + key + "' does not exist in cache"); } rcOmSnapshot.decrementRefCount(); - // The cache size might have already exceeded the soft limit - // Thus triggering cleanup() to check and evict if applicable - cleanup(); } finally { lock.unlock(); } + // The cache size might have already exceeded the soft limit + // Thus triggering cleanup() to check and evict if applicable + cleanup(); } /** From dee9b7ae2915e04b1a53798ec960d2a74e29a858 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Wed, 10 Jan 2024 16:43:25 -0800 Subject: [PATCH 7/7] minor fix --- .../org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index a100133c630f..e7507a0fbb00 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -94,15 +94,14 @@ public void invalidate(String key) throws IOException { Lock lock = getLock(key); lock.lock(); try { - ReferenceCounted rcOmSnapshot = dbMap.get(key); + // Remove the entry from the map. + ReferenceCounted rcOmSnapshot = dbMap.remove(key); if (rcOmSnapshot != null) { try { ((OmSnapshot) rcOmSnapshot.get()).close(); } catch (IOException e) { throw new IllegalStateException("Failed to close snapshot: " + key, e); } - // Remove the entry from map - dbMap.remove(key); } } finally { lock.unlock();