From 740f8f5ebed137f1cfe0b6b332b5b4668b3bd491 Mon Sep 17 00:00:00 2001 From: Vinayak Hegde Date: Thu, 2 May 2024 18:25:16 +0530 Subject: [PATCH 1/7] Integrated Data Tiering to cache compacted blocks on write --- .../hadoop/hbase/io/hfile/BlockCache.java | 13 ++ .../hadoop/hbase/io/hfile/BlockCacheKey.java | 15 ++ .../hbase/io/hfile/CombinedBlockCache.java | 17 +- .../apache/hadoop/hbase/io/hfile/HFile.java | 5 + .../hbase/io/hfile/HFileWriterImpl.java | 52 ++++- .../hbase/io/hfile/bucket/BucketCache.java | 18 ++ .../regionserver/DataTieringManager.java | 33 +++- .../hbase/regionserver/HRegionFileSystem.java | 25 +++ .../hbase/regionserver/TimeRangeTracker.java | 4 +- .../regionserver/TestDataTieringManager.java | 183 +++++++++++++++--- 10 files changed, 330 insertions(+), 35 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java index ac83af1053a0..c1e72939a138 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java @@ -207,6 +207,19 @@ default Optional shouldCacheFile(HFileInfo hFileInfo, Configuration con return Optional.empty(); } + /** + * Checks whether the block represented by the given key should be cached or not. This method may + * not be overridden by all implementing classes. In such cases, the returned Optional will be + * empty. For subclasses implementing this logic, the returned Optional would contain the boolean + * value reflecting if the passed block should indeed be cached. + * @param key The key representing the block to check if it should be cached. + * @return An empty Optional if this method is not supported; otherwise, the returned Optional + * contains the boolean value indicating if the block should be cached. + */ + default Optional shouldCacheBlock(BlockCacheKey key) { + return Optional.empty(); + } + /** * Checks whether the block for the passed key is already cached. This method may not be * overridden by all implementing classes. In such cases, the returned Optional will be empty. For diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java index bf22d38e373b..dcce80abdb51 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.io.hfile; +import java.util.Optional; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.util.ClassSize; @@ -33,6 +34,7 @@ public class BlockCacheKey implements HeapSize, java.io.Serializable { private BlockType blockType; private final boolean isPrimaryReplicaBlock; private Path filePath; + private Optional maxTimestamp = Optional.empty(); /** * Construct a new BlockCacheKey @@ -59,6 +61,16 @@ public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica, Bloc this.blockType = blockType; } + public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica, BlockType blockType, + long maxTimestamp) { + this.filePath = hfilePath; + this.isPrimaryReplicaBlock = isPrimaryReplica; + this.hfileName = hfilePath.getName(); + this.offset = offset; + this.blockType = blockType; + this.maxTimestamp = Optional.of(maxTimestamp); + } + @Override public int hashCode() { return hfileName.hashCode() * 127 + (int) (offset ^ (offset >>> 32)); @@ -117,4 +129,7 @@ public Path getFilePath() { return filePath; } + public Optional getMaxTimestamp() { + return maxTimestamp; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java index b12510cdccd0..ba0b07f9d7d3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java @@ -484,11 +484,20 @@ public Optional blockFitsIntoTheCache(HFileBlock block) { @Override public Optional shouldCacheFile(HFileInfo hFileInfo, Configuration conf) { - Optional l1Result = l1Cache.shouldCacheFile(hFileInfo, conf); - Optional l2Result = l2Cache.shouldCacheFile(hFileInfo, conf); + return combineCacheResults(l1Cache.shouldCacheFile(hFileInfo, conf), + l2Cache.shouldCacheFile(hFileInfo, conf)); + } + + @Override + public Optional shouldCacheBlock(BlockCacheKey key) { + return combineCacheResults(l1Cache.shouldCacheBlock(key), l2Cache.shouldCacheBlock(key)); + } + + private Optional combineCacheResults(Optional result1, + Optional result2) { final Mutable combinedResult = new MutableBoolean(true); - l1Result.ifPresent(b -> combinedResult.setValue(b && combinedResult.getValue())); - l2Result.ifPresent(b -> combinedResult.setValue(b && combinedResult.getValue())); + result1.ifPresent(b -> combinedResult.setValue(b && combinedResult.getValue())); + result2.ifPresent(b -> combinedResult.setValue(b && combinedResult.getValue())); return Optional.of(combinedResult.getValue()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index ae79ad857244..2c3908aa33f3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -212,6 +212,11 @@ public interface Writer extends Closeable, CellSink, ShipperListener { /** Add an element to the file info map. */ void appendFileInfo(byte[] key, byte[] value) throws IOException; + /** + * Add TimestampRange and earliest put timestamp to Metadata + */ + void appendTrackedTimestampsToMetadata() throws IOException; + /** Returns the path to this {@link HFile} */ Path getPath(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index d2dfaf62106a..26dd7a73b7e2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hbase.io.hfile; import static org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator.MAX_BLOCK_SIZE_UNCOMPRESSED; +import static org.apache.hadoop.hbase.regionserver.HStoreFile.EARLIEST_PUT_TS; +import static org.apache.hadoop.hbase.regionserver.HStoreFile.TIMERANGE_KEY; import java.io.DataOutput; import java.io.DataOutputStream; @@ -26,6 +28,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -45,6 +48,7 @@ import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding; import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable; +import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.security.EncryptionUtil; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.util.BloomFilterWriter; @@ -170,12 +174,16 @@ public class HFileWriterImpl implements HFile.Writer { protected long maxMemstoreTS = 0; + private final TimeRangeTracker timeRangeTracker; + private long earliestPutTs = HConstants.LATEST_TIMESTAMP; + public HFileWriterImpl(final Configuration conf, CacheConfig cacheConf, Path path, FSDataOutputStream outputStream, HFileContext fileContext) { this.outputStream = outputStream; this.path = path; this.name = path != null ? path.getName() : outputStream.toString(); this.hFileContext = fileContext; + this.timeRangeTracker = TimeRangeTracker.create(TimeRangeTracker.Type.NON_SYNC); DataBlockEncoding encoding = hFileContext.getDataBlockEncoding(); if (encoding != DataBlockEncoding.NONE) { this.blockEncoder = new HFileDataBlockEncoderImpl(encoding); @@ -555,9 +563,12 @@ private void writeInlineBlocks(boolean closing) throws IOException { private void doCacheOnWrite(long offset) { cacheConf.getBlockCache().ifPresent(cache -> { HFileBlock cacheFormatBlock = blockWriter.getBlockForCaching(cacheConf); + BlockCacheKey key = buildBlockCacheKey(offset, cacheFormatBlock); + if (!shouldCacheBlock(cache, key)) { + return; + } try { - cache.cacheBlock(new BlockCacheKey(name, offset, true, cacheFormatBlock.getBlockType()), - cacheFormatBlock, cacheConf.isInMemory(), true); + cache.cacheBlock(key, cacheFormatBlock, cacheConf.isInMemory(), true); } finally { // refCnt will auto increase when block add to Cache, see RAMCache#putIfAbsent cacheFormatBlock.release(); @@ -565,6 +576,20 @@ private void doCacheOnWrite(long offset) { }); } + private BlockCacheKey buildBlockCacheKey(long offset, HFileBlock cacheFormatBlock) { + if (path != null && timeRangeTracker.getMax() != TimeRangeTracker.INITIAL_MAX_TIMESTAMP) { + return new BlockCacheKey(path, offset, true, cacheFormatBlock.getBlockType(), + timeRangeTracker.getMax()); + } else { + return new BlockCacheKey(name, offset, true, cacheFormatBlock.getBlockType()); + } + } + + private boolean shouldCacheBlock(BlockCache cache, BlockCacheKey key) { + Optional result = cache.shouldCacheBlock(key); + return result.orElse(true); + } + /** * Ready a new block for writing. */ @@ -767,6 +792,8 @@ public void append(final Cell cell) throws IOException { if (tagsLength > this.maxTagsLength) { this.maxTagsLength = tagsLength; } + + trackTimestamps(cell); } @Override @@ -859,4 +886,25 @@ protected void finishClose(FixedFileTrailer trailer) throws IOException { outputStream = null; } } + + /** + * Add TimestampRange and earliest put timestamp to Metadata + */ + public void appendTrackedTimestampsToMetadata() throws IOException { + // TODO: The StoreFileReader always converts the byte[] to TimeRange + // via TimeRangeTracker, so we should write the serialization data of TimeRange directly. + appendFileInfo(TIMERANGE_KEY, TimeRangeTracker.toByteArray(timeRangeTracker)); + appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs)); + } + + /** + * Record the earliest Put timestamp. If the timeRangeTracker is not set, update TimeRangeTracker + * to include the timestamp of this key + */ + private void trackTimestamps(final Cell cell) { + if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) { + earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp()); + } + timeRangeTracker.includeTimestamp(cell); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index 0b53d0479902..d49a06d63649 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -77,6 +77,7 @@ import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.nio.RefCnt; import org.apache.hadoop.hbase.protobuf.ProtobufMagic; +import org.apache.hadoop.hbase.regionserver.DataTieringException; import org.apache.hadoop.hbase.regionserver.DataTieringManager; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; @@ -2203,6 +2204,23 @@ public Optional shouldCacheFile(HFileInfo hFileInfo, Configuration conf return Optional.of(!fullyCachedFiles.containsKey(fileName)); } + @Override + public Optional shouldCacheBlock(BlockCacheKey key) { + try { + DataTieringManager dataTieringManager = DataTieringManager.getInstance(); + if (!dataTieringManager.isHotData(key)) { + LOG.debug("Data tiering is enabled for file: '{}' and it is not hot data", + key.getHfileName()); + return Optional.of(false); + } + } catch (IllegalStateException e) { + LOG.warn("Error while getting DataTieringManager instance: {}", e.getMessage()); + } catch (DataTieringException e) { + LOG.warn("Error while checking hotness of the block: {}", e.getMessage()); + } + return Optional.of(true); + } + @Override public Optional isAlreadyCached(BlockCacheKey key) { return Optional.of(getBackingMap().containsKey(key)); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java index 952b4d4938d7..f2e62779eae8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java @@ -128,6 +128,10 @@ public boolean isHotData(BlockCacheKey key) throws DataTieringException { if (hFilePath == null) { throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path"); } + + if (key.getMaxTimestamp().isPresent()) { + return isHotData(hFilePath, key.getMaxTimestamp().get()); + } return isHotData(hFilePath); } @@ -151,6 +155,27 @@ public boolean isHotData(Path hFilePath) throws DataTieringException { return true; } + /** + * Determines whether the data in the HFile at the given path is considered hot based on the + * configured data tiering type and hot data age. If the data tiering type is set to + * {@link DataTieringType#TIME_RANGE}, it validates the data against the provided maximum + * timestamp. + * @param hFilePath the path to the HFile + * @param maxTimestamp the maximum timestamp to validate against + * @return {@code true} if the data is hot, {@code false} otherwise + * @throws DataTieringException if there is an error retrieving data tiering information + */ + public boolean isHotData(Path hFilePath, long maxTimestamp) throws DataTieringException { + Configuration configuration = getConfiguration(hFilePath); + DataTieringType dataTieringType = getDataTieringType(configuration); + + if (dataTieringType.equals(DataTieringType.TIME_RANGE)) { + return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration)); + } + // DataTieringType.NONE or other types are considered hot by default + return true; + } + /** * Determines whether the data in the HFile being read is considered hot based on the configured * data tiering type and hot data age. If the data tiering type is set to @@ -231,10 +256,12 @@ public Set getColdDataFiles(Set allCachedBlocks) } private HRegion getHRegion(Path hFilePath) throws DataTieringException { - if (hFilePath.getParent() == null || hFilePath.getParent().getParent() == null) { - throw new DataTieringException("Incorrect HFile Path: " + hFilePath); + String regionId; + try { + regionId = HRegionFileSystem.getRegionId(hFilePath); + } catch (IOException e) { + throw new DataTieringException(e.getMessage()); } - String regionId = hFilePath.getParent().getParent().getName(); HRegion hRegion = this.onlineRegions.get(regionId); if (hRegion == null) { throw new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist"); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java index 6fccccfc8203..88b612e4ac57 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java @@ -1067,6 +1067,31 @@ public static void deleteRegionFromFileSystem(final Configuration conf, final Fi } } + /** + * Retrieves the Region ID from the given HFile path. + * @param hFilePath The path of the HFile. + * @return The Region ID extracted from the HFile path. + * @throws IOException If an I/O error occurs or if the HFile path is incorrect. + */ + public static String getRegionId(Path hFilePath) throws IOException { + if (hFilePath.getParent() == null || hFilePath.getParent().getParent() == null) { + throw new IOException("Incorrect HFile Path: " + hFilePath); + } + Path dir = hFilePath.getParent().getParent(); + if (isTemporaryDirectoryName(dir.getName())) { + if (dir.getParent() == null) { + throw new IOException("Incorrect HFile Path: " + hFilePath); + } + return dir.getParent().getName(); + } + return dir.getName(); + } + + private static boolean isTemporaryDirectoryName(String dirName) { + return REGION_MERGES_DIR.equals(dirName) || REGION_SPLITS_DIR.equals(dirName) + || REGION_TEMP_DIR.equals(dirName); + } + /** * Creates a directory. Assumes the user has already checked for this directory existence. * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java index 51807658f2a8..af15ebcc1263 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java @@ -53,8 +53,8 @@ public enum Type { SYNC } - static final long INITIAL_MIN_TIMESTAMP = Long.MAX_VALUE; - static final long INITIAL_MAX_TIMESTAMP = -1L; + public static final long INITIAL_MIN_TIMESTAMP = Long.MAX_VALUE; + public static final long INITIAL_MAX_TIMESTAMP = -1L; public static TimeRangeTracker create(Type type) { switch (type) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java index f999a73c4732..fbd88a6f58b7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java @@ -32,6 +32,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Random; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -97,6 +98,7 @@ public class TestDataTieringManager { private static final Logger LOG = LoggerFactory.getLogger(TestDataTieringManager.class); private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private static final long DAY = 24 * 60 * 60 * 1000; private static Configuration defaultConf; private static FileSystem fs; private static BlockCache blockCache; @@ -107,20 +109,27 @@ public class TestDataTieringManager { private static DataTieringManager dataTieringManager; private static final List hStoreFiles = new ArrayList<>(); + /** + * Represents the current lexicographically increasing string used as a row key when writing + * HFiles. It is incremented each time {@link #nextString()} is called to generate unique row + * keys. + */ + private static String rowKeyString; + @BeforeClass public static void setupBeforeClass() throws Exception { testDir = TEST_UTIL.getDataTestDir(TestDataTieringManager.class.getSimpleName()); defaultConf = TEST_UTIL.getConfiguration(); - defaultConf.setBoolean(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, true); - defaultConf.setStrings(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap"); - defaultConf.setLong(BUCKET_CACHE_SIZE_KEY, 32); - defaultConf.setBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, true); - fs = HFileSystem.get(defaultConf); - blockCache = BlockCacheFactory.createBlockCache(defaultConf); - cacheConf = new CacheConfig(defaultConf, blockCache); + updateCommonConfigurations(); assertTrue(DataTieringManager.instantiate(defaultConf, testOnlineRegions)); - setupOnlineRegions(); dataTieringManager = DataTieringManager.getInstance(); + rowKeyString = ""; + } + + private static void updateCommonConfigurations() { + defaultConf.setBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, true); + defaultConf.setStrings(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap"); + defaultConf.setLong(BUCKET_CACHE_SIZE_KEY, 32); } @FunctionalInterface @@ -134,7 +143,8 @@ interface DataTieringMethodCallerWithKey { } @Test - public void testDataTieringEnabledWithKey() { + public void testDataTieringEnabledWithKey() throws IOException { + initializeTestEnvironment(); DataTieringMethodCallerWithKey methodCallerWithKey = DataTieringManager::isDataTieringEnabled; // Test with valid key @@ -152,7 +162,8 @@ public void testDataTieringEnabledWithKey() { } @Test - public void testDataTieringEnabledWithPath() { + public void testDataTieringEnabledWithPath() throws IOException { + initializeTestEnvironment(); DataTieringMethodCallerWithPath methodCallerWithPath = DataTieringManager::isDataTieringEnabled; // Test with valid path @@ -182,7 +193,8 @@ public void testDataTieringEnabledWithPath() { } @Test - public void testHotDataWithKey() { + public void testHotDataWithKey() throws IOException { + initializeTestEnvironment(); DataTieringMethodCallerWithKey methodCallerWithKey = DataTieringManager::isHotData; // Test with valid key @@ -195,7 +207,8 @@ public void testHotDataWithKey() { } @Test - public void testHotDataWithPath() { + public void testHotDataWithPath() throws IOException { + initializeTestEnvironment(); DataTieringMethodCallerWithPath methodCallerWithPath = DataTieringManager::isHotData; // Test with valid path @@ -213,6 +226,8 @@ public void testHotDataWithPath() { @Test public void testPrefetchWhenDataTieringEnabled() throws IOException { + setPrefetchBlocksOnOpen(); + initializeTestEnvironment(); // Evict blocks from cache by closing the files and passing evict on close. // Then initialize the reader again. Since Prefetch on open is set to true, it should prefetch // those blocks. @@ -224,12 +239,17 @@ public void testPrefetchWhenDataTieringEnabled() throws IOException { // Since we have one cold file among four files, only three should get prefetched. Optional>> fullyCachedFiles = blockCache.getFullyCachedFiles(); assertTrue("We should get the fully cached files from the cache", fullyCachedFiles.isPresent()); - Waiter.waitFor(defaultConf, 60000, () -> fullyCachedFiles.get().size() == 3); + Waiter.waitFor(defaultConf, 10000, () -> fullyCachedFiles.get().size() == 3); assertEquals("Number of fully cached files are incorrect", 3, fullyCachedFiles.get().size()); } + private void setPrefetchBlocksOnOpen() { + defaultConf.setBoolean(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, true); + } + @Test - public void testColdDataFiles() { + public void testColdDataFiles() throws IOException { + initializeTestEnvironment(); Set allCachedBlocks = new HashSet<>(); for (HStoreFile file : hStoreFiles) { allCachedBlocks.add(new BlockCacheKey(file.getPath(), 0, true, BlockType.DATA)); @@ -255,7 +275,71 @@ public void testColdDataFiles() { } @Test - public void testPickColdDataFiles() { + public void testCacheCompactedBlocksOnWriteDataTieringDisabled() throws IOException { + setCacheCompactBlocksOnWrite(); + initializeTestEnvironment(); + + HRegion region = createHRegion("table3"); + testCacheCompactedBlocksOnWrite(region, true); + } + + @Test + public void testCacheCompactedBlocksOnWriteWithHotData() throws IOException { + setCacheCompactBlocksOnWrite(); + initializeTestEnvironment(); + + HRegion region = createHRegion("table3", getConfWithTimeRangeDataTieringEnabled(5 * DAY)); + testCacheCompactedBlocksOnWrite(region, true); + } + + @Test + public void testCacheCompactedBlocksOnWriteWithColdData() throws IOException { + setCacheCompactBlocksOnWrite(); + initializeTestEnvironment(); + + HRegion region = createHRegion("table3", getConfWithTimeRangeDataTieringEnabled(DAY)); + testCacheCompactedBlocksOnWrite(region, false); + } + + private void setCacheCompactBlocksOnWrite() { + defaultConf.setBoolean(CacheConfig.CACHE_COMPACTED_BLOCKS_ON_WRITE_KEY, true); + } + + private void testCacheCompactedBlocksOnWrite(HRegion region, boolean expectDataBlocksCached) + throws IOException { + HStore hStore = createHStore(region, "cf1"); + createTestFilesForCompaction(hStore); + hStore.refreshStoreFiles(); + + region.stores.put(Bytes.toBytes("cf1"), hStore); + testOnlineRegions.put(region.getRegionInfo().getEncodedName(), region); + + long initialStoreFilesCount = hStore.getStorefilesCount(); + long initialCacheDataBlockCount = blockCache.getDataBlockCount(); + assertEquals(3, initialStoreFilesCount); + assertEquals(0, initialCacheDataBlockCount); + + region.compact(true); + + long compactedStoreFilesCount = hStore.getStorefilesCount(); + long compactedCacheDataBlockCount = blockCache.getDataBlockCount(); + assertEquals(1, compactedStoreFilesCount); + assertEquals(expectDataBlocksCached, compactedCacheDataBlockCount > 0); + } + + private void createTestFilesForCompaction(HStore hStore) throws IOException { + long currentTime = System.currentTimeMillis(); + Path storeDir = hStore.getStoreContext().getFamilyStoreDirectoryPath(); + Configuration configuration = hStore.getReadOnlyConfiguration(); + + createHStoreFile(storeDir, configuration, currentTime - 2 * DAY); + createHStoreFile(storeDir, configuration, currentTime - 3 * DAY); + createHStoreFile(storeDir, configuration, currentTime - 4 * DAY); + } + + @Test + public void testPickColdDataFiles() throws IOException { + initializeTestEnvironment(); Map coldDataFiles = dataTieringManager.getColdFilesList(); assertEquals(1, coldDataFiles.size()); // hStoreFiles[3] is the cold file. @@ -268,6 +352,7 @@ public void testPickColdDataFiles() { */ @Test public void testBlockEvictions() throws Exception { + initializeTestEnvironment(); long capacitySize = 40 * 1024; int writeThreads = 3; int writerQLen = 64; @@ -318,6 +403,7 @@ public void testBlockEvictions() throws Exception { */ @Test public void testBlockEvictionsAllColdBlocks() throws Exception { + initializeTestEnvironment(); long capacitySize = 40 * 1024; int writeThreads = 3; int writerQLen = 64; @@ -365,6 +451,7 @@ public void testBlockEvictionsAllColdBlocks() throws Exception { */ @Test public void testBlockEvictionsHotBlocks() throws Exception { + initializeTestEnvironment(); long capacitySize = 40 * 1024; int writeThreads = 3; int writerQLen = 64; @@ -412,6 +499,8 @@ public void testBlockEvictionsHotBlocks() throws Exception { public void testFeatureKeyDisabled() throws Exception { DataTieringManager.resetForTestingOnly(); defaultConf.setBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, false); + initializeTestEnvironment(); + try { assertFalse(DataTieringManager.instantiate(defaultConf, testOnlineRegions)); // Verify that the DataaTieringManager instance is not instantiated in the @@ -544,7 +633,20 @@ private void testDataTieringMethodWithKeyNoException(DataTieringMethodCallerWith testDataTieringMethodWithKey(caller, key, expectedResult, null); } + private static void initializeTestEnvironment() throws IOException { + setupFileSystemAndCache(); + setupOnlineRegions(); + } + + private static void setupFileSystemAndCache() throws IOException { + fs = HFileSystem.get(defaultConf); + blockCache = BlockCacheFactory.createBlockCache(defaultConf); + cacheConf = new CacheConfig(defaultConf, blockCache); + } + private static void setupOnlineRegions() throws IOException { + testOnlineRegions.clear(); + hStoreFiles.clear(); long day = 24 * 60 * 60 * 1000; long currentTime = System.currentTimeMillis(); @@ -604,7 +706,12 @@ private static HRegion createHRegion(String table, Configuration conf) throws IO HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, CommonFSUtils.getTableDir(testDir, hri.getTable()), hri); - return new HRegion(regionFs, null, conf, htd, null); + HRegion region = new HRegion(regionFs, null, conf, htd, null); + // Manually sets the BlockCache for the HRegion instance. + // This is necessary because the region server is not started within this method, + // and therefore the BlockCache needs to be explicitly configured. + region.setBlockCache(blockCache); + return region; } private static HStore createHStore(HRegion region, String columnFamily) throws IOException { @@ -637,24 +744,52 @@ private static HStoreFile createHStoreFile(Path storeDir, Configuration conf, lo StoreFileWriter storeFileWriter = new StoreFileWriter.Builder(conf, cacheConf, fs) .withOutputDir(storeDir).withFileContext(new HFileContextBuilder().build()).build(); - writeStoreFileRandomData(storeFileWriter, Bytes.toBytes(columnFamily), Bytes.toBytes("random"), - timestamp); + writeStoreFileRandomData(storeFileWriter, Bytes.toBytes(columnFamily), timestamp); return new HStoreFile(fs, storeFileWriter.getPath(), conf, cacheConf, BloomType.NONE, true); } + /** + * Writes random data to a store file with rows arranged in lexicographically increasing order. + * Each row is generated using the {@link #nextString()} method, ensuring that each subsequent row + * is lexicographically larger than the previous one. + */ private static void writeStoreFileRandomData(final StoreFileWriter writer, byte[] columnFamily, - byte[] qualifier, long timestamp) throws IOException { + long timestamp) throws IOException { + int cellsPerFile = 10; + byte[] qualifier = Bytes.toBytes("qualifier"); + byte[] value = generateRandomBytes(4 * 1024); try { - for (char d = 'a'; d <= 'z'; d++) { - for (char e = 'a'; e <= 'z'; e++) { - byte[] b = new byte[] { (byte) d, (byte) e }; - writer.append(new KeyValue(b, columnFamily, qualifier, timestamp, b)); - } + for (int i = 0; i < cellsPerFile; i++) { + byte[] row = Bytes.toBytes(nextString()); + writer.append(new KeyValue(row, columnFamily, qualifier, timestamp, value)); } } finally { writer.appendTrackedTimestampsToMetadata(); writer.close(); } } + + private static byte[] generateRandomBytes(int sizeInBytes) { + Random random = new Random(); + byte[] randomBytes = new byte[sizeInBytes]; + random.nextBytes(randomBytes); + return randomBytes; + } + + /** + * Returns the lexicographically larger string every time it's called. + */ + private static String nextString() { + if (rowKeyString == null || rowKeyString.isEmpty()) { + rowKeyString = "a"; + } + char lastChar = rowKeyString.charAt(rowKeyString.length() - 1); + if (lastChar < 'z') { + rowKeyString = rowKeyString.substring(0, rowKeyString.length() - 1) + (char) (lastChar + 1); + } else { + rowKeyString = rowKeyString + "a"; + } + return rowKeyString; + } } From f6480810ad221bcdc0a1cd0f96d449d9896eca21 Mon Sep 17 00:00:00 2001 From: Vinayak Hegde Date: Thu, 2 May 2024 20:24:12 +0530 Subject: [PATCH 2/7] removed spotless bugs --- .../apache/hadoop/hbase/io/hfile/BlockCacheKey.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java index dcce80abdb51..23914788478d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java @@ -20,6 +20,7 @@ import java.util.Optional; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.ClassSize; import org.apache.yetus.audience.InterfaceAudience; @@ -34,7 +35,7 @@ public class BlockCacheKey implements HeapSize, java.io.Serializable { private BlockType blockType; private final boolean isPrimaryReplicaBlock; private Path filePath; - private Optional maxTimestamp = Optional.empty(); + private long maxTimestamp = TimeRangeTracker.INITIAL_MAX_TIMESTAMP; /** * Construct a new BlockCacheKey @@ -68,7 +69,7 @@ public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica, Bloc this.hfileName = hfilePath.getName(); this.offset = offset; this.blockType = blockType; - this.maxTimestamp = Optional.of(maxTimestamp); + this.maxTimestamp = maxTimestamp; } @Override @@ -130,6 +131,9 @@ public Path getFilePath() { } public Optional getMaxTimestamp() { - return maxTimestamp; + if (maxTimestamp == TimeRangeTracker.INITIAL_MAX_TIMESTAMP) { + return Optional.empty(); + } + return Optional.of(maxTimestamp); } } From e17df62ce64fd6ad03158e277e379a7b98e02faf Mon Sep 17 00:00:00 2001 From: Vinayak Hegde Date: Fri, 3 May 2024 09:04:12 +0530 Subject: [PATCH 3/7] removed some errors --- .../org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index d49a06d63649..88c3dd2b034d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -2208,13 +2208,11 @@ public Optional shouldCacheFile(HFileInfo hFileInfo, Configuration conf public Optional shouldCacheBlock(BlockCacheKey key) { try { DataTieringManager dataTieringManager = DataTieringManager.getInstance(); - if (!dataTieringManager.isHotData(key)) { + if (dataTieringManager != null && !dataTieringManager.isHotData(key)) { LOG.debug("Data tiering is enabled for file: '{}' and it is not hot data", key.getHfileName()); return Optional.of(false); } - } catch (IllegalStateException e) { - LOG.warn("Error while getting DataTieringManager instance: {}", e.getMessage()); } catch (DataTieringException e) { LOG.warn("Error while checking hotness of the block: {}", e.getMessage()); } From d7d15eb48fb9f07e5f8cfddeffb48e22f2f0f78b Mon Sep 17 00:00:00 2001 From: Vinayak Hegde Date: Wed, 8 May 2024 22:59:01 +0530 Subject: [PATCH 4/7] refactored code according to review comments --- .../hadoop/hbase/io/hfile/BlockCache.java | 5 ++-- .../hbase/io/hfile/CombinedBlockCache.java | 5 ++-- .../hbase/io/hfile/HFileWriterImpl.java | 5 +++- .../hbase/io/hfile/bucket/BucketCache.java | 4 ++-- .../regionserver/DataTieringManager.java | 24 ++++++++++++++++--- 5 files changed, 33 insertions(+), 10 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java index c1e72939a138..d87abcb885c1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java @@ -212,11 +212,12 @@ default Optional shouldCacheFile(HFileInfo hFileInfo, Configuration con * not be overridden by all implementing classes. In such cases, the returned Optional will be * empty. For subclasses implementing this logic, the returned Optional would contain the boolean * value reflecting if the passed block should indeed be cached. - * @param key The key representing the block to check if it should be cached. + * @param key The key representing the block to check if it should be cached. + * @param conf The configuration object to use for determining caching behavior. * @return An empty Optional if this method is not supported; otherwise, the returned Optional * contains the boolean value indicating if the block should be cached. */ - default Optional shouldCacheBlock(BlockCacheKey key) { + default Optional shouldCacheBlock(BlockCacheKey key, Configuration conf) { return Optional.empty(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java index ba0b07f9d7d3..ff7312e84463 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java @@ -489,8 +489,9 @@ public Optional shouldCacheFile(HFileInfo hFileInfo, Configuration conf } @Override - public Optional shouldCacheBlock(BlockCacheKey key) { - return combineCacheResults(l1Cache.shouldCacheBlock(key), l2Cache.shouldCacheBlock(key)); + public Optional shouldCacheBlock(BlockCacheKey key, Configuration conf) { + return combineCacheResults(l1Cache.shouldCacheBlock(key, conf), + l2Cache.shouldCacheBlock(key, conf)); } private Optional combineCacheResults(Optional result1, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index 26dd7a73b7e2..f83de13bb9b7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -121,6 +121,8 @@ public class HFileWriterImpl implements HFile.Writer { /** May be null if we were passed a stream. */ protected final Path path; + protected final Configuration conf; + /** Cache configuration for caching data on write. */ protected final CacheConfig cacheConf; @@ -198,6 +200,7 @@ public HFileWriterImpl(final Configuration conf, CacheConfig cacheConf, Path pat } closeOutputStream = path != null; this.cacheConf = cacheConf; + this.conf = conf; float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 0f); this.encodedBlockSizeLimit = (int) (hFileContext.getBlocksize() * encodeBlockSizeRatio); @@ -586,7 +589,7 @@ private BlockCacheKey buildBlockCacheKey(long offset, HFileBlock cacheFormatBloc } private boolean shouldCacheBlock(BlockCache cache, BlockCacheKey key) { - Optional result = cache.shouldCacheBlock(key); + Optional result = cache.shouldCacheBlock(key, conf); return result.orElse(true); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index 88c3dd2b034d..4bdf61a0a122 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -2205,10 +2205,10 @@ public Optional shouldCacheFile(HFileInfo hFileInfo, Configuration conf } @Override - public Optional shouldCacheBlock(BlockCacheKey key) { + public Optional shouldCacheBlock(BlockCacheKey key, Configuration conf) { try { DataTieringManager dataTieringManager = DataTieringManager.getInstance(); - if (dataTieringManager != null && !dataTieringManager.isHotData(key)) { + if (dataTieringManager != null && !dataTieringManager.isHotData(key, conf)) { LOG.debug("Data tiering is enabled for file: '{}' and it is not hot data", key.getHfileName()); return Optional.of(false); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java index f2e62779eae8..c801d202b64a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java @@ -128,11 +128,29 @@ public boolean isHotData(BlockCacheKey key) throws DataTieringException { if (hFilePath == null) { throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path"); } + return isHotData(hFilePath); + } - if (key.getMaxTimestamp().isPresent()) { - return isHotData(hFilePath, key.getMaxTimestamp().get()); + /** + * Determines whether the data associated with the given block cache key is considered hot. If the + * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not + * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by + * default. + * @param key the block cache key + * @param conf The configuration object to use for determining hot data criteria. + * @return {@code true} if the data is hot, {@code false} otherwise + * @throws DataTieringException if there is an error retrieving data tiering information + */ + public boolean isHotData(BlockCacheKey key, Configuration conf) throws DataTieringException { + DataTieringType dataTieringType = getDataTieringType(conf); + if (dataTieringType.equals(DataTieringType.TIME_RANGE)) { + if (key.getMaxTimestamp().isPresent()) { + return hotDataValidator(key.getMaxTimestamp().get(), getDataTieringHotDataAge(conf)); + } + return isHotData(key); } - return isHotData(hFilePath); + // DataTieringType.NONE or other types are considered hot by default + return true; } /** From d757ecc60838efb41a4f1a2fd5fe7cbacb4f4951 Mon Sep 17 00:00:00 2001 From: Vinayak Hegde Date: Fri, 17 May 2024 12:13:21 +0530 Subject: [PATCH 5/7] enhance the code according to PR comments --- .../hadoop/hbase/io/hfile/BlockCache.java | 9 ++++--- .../hadoop/hbase/io/hfile/BlockCacheKey.java | 20 --------------- .../hbase/io/hfile/CombinedBlockCache.java | 8 +++--- .../hbase/io/hfile/HFileWriterImpl.java | 13 ++-------- .../hbase/io/hfile/bucket/BucketCache.java | 19 ++++++-------- .../regionserver/DataTieringManager.java | 25 +++++++++---------- 6 files changed, 33 insertions(+), 61 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java index d87abcb885c1..922ac5dd144c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java @@ -22,6 +22,7 @@ import java.util.Optional; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; @@ -212,12 +213,14 @@ default Optional shouldCacheFile(HFileInfo hFileInfo, Configuration con * not be overridden by all implementing classes. In such cases, the returned Optional will be * empty. For subclasses implementing this logic, the returned Optional would contain the boolean * value reflecting if the passed block should indeed be cached. - * @param key The key representing the block to check if it should be cached. - * @param conf The configuration object to use for determining caching behavior. + * @param key The key representing the block to check if it should be cached. + * @param timeRangeTracker the time range tracker containing the timestamps + * @param conf The configuration object to use for determining caching behavior. * @return An empty Optional if this method is not supported; otherwise, the returned Optional * contains the boolean value indicating if the block should be cached. */ - default Optional shouldCacheBlock(BlockCacheKey key, Configuration conf) { + default Optional shouldCacheBlock(BlockCacheKey key, TimeRangeTracker timeRangeTracker, + Configuration conf) { return Optional.empty(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java index 23914788478d..bcc1f58ba5e2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java @@ -17,10 +17,8 @@ */ package org.apache.hadoop.hbase.io.hfile; -import java.util.Optional; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.io.HeapSize; -import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.ClassSize; import org.apache.yetus.audience.InterfaceAudience; @@ -35,7 +33,6 @@ public class BlockCacheKey implements HeapSize, java.io.Serializable { private BlockType blockType; private final boolean isPrimaryReplicaBlock; private Path filePath; - private long maxTimestamp = TimeRangeTracker.INITIAL_MAX_TIMESTAMP; /** * Construct a new BlockCacheKey @@ -62,16 +59,6 @@ public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica, Bloc this.blockType = blockType; } - public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica, BlockType blockType, - long maxTimestamp) { - this.filePath = hfilePath; - this.isPrimaryReplicaBlock = isPrimaryReplica; - this.hfileName = hfilePath.getName(); - this.offset = offset; - this.blockType = blockType; - this.maxTimestamp = maxTimestamp; - } - @Override public int hashCode() { return hfileName.hashCode() * 127 + (int) (offset ^ (offset >>> 32)); @@ -129,11 +116,4 @@ public void setBlockType(BlockType blockType) { public Path getFilePath() { return filePath; } - - public Optional getMaxTimestamp() { - if (maxTimestamp == TimeRangeTracker.INITIAL_MAX_TIMESTAMP) { - return Optional.empty(); - } - return Optional.of(maxTimestamp); - } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java index ff7312e84463..c29ed1ecf31e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; +import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; @@ -489,9 +490,10 @@ public Optional shouldCacheFile(HFileInfo hFileInfo, Configuration conf } @Override - public Optional shouldCacheBlock(BlockCacheKey key, Configuration conf) { - return combineCacheResults(l1Cache.shouldCacheBlock(key, conf), - l2Cache.shouldCacheBlock(key, conf)); + public Optional shouldCacheBlock(BlockCacheKey key, TimeRangeTracker timeRangeTracker, + Configuration conf) { + return combineCacheResults(l1Cache.shouldCacheBlock(key, timeRangeTracker, conf), + l2Cache.shouldCacheBlock(key, timeRangeTracker, conf)); } private Optional combineCacheResults(Optional result1, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index f83de13bb9b7..ca1fecbb8a00 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -566,7 +566,7 @@ private void writeInlineBlocks(boolean closing) throws IOException { private void doCacheOnWrite(long offset) { cacheConf.getBlockCache().ifPresent(cache -> { HFileBlock cacheFormatBlock = blockWriter.getBlockForCaching(cacheConf); - BlockCacheKey key = buildBlockCacheKey(offset, cacheFormatBlock); + BlockCacheKey key = new BlockCacheKey(name, offset, true, cacheFormatBlock.getBlockType()); if (!shouldCacheBlock(cache, key)) { return; } @@ -579,17 +579,8 @@ private void doCacheOnWrite(long offset) { }); } - private BlockCacheKey buildBlockCacheKey(long offset, HFileBlock cacheFormatBlock) { - if (path != null && timeRangeTracker.getMax() != TimeRangeTracker.INITIAL_MAX_TIMESTAMP) { - return new BlockCacheKey(path, offset, true, cacheFormatBlock.getBlockType(), - timeRangeTracker.getMax()); - } else { - return new BlockCacheKey(name, offset, true, cacheFormatBlock.getBlockType()); - } - } - private boolean shouldCacheBlock(BlockCache cache, BlockCacheKey key) { - Optional result = cache.shouldCacheBlock(key, conf); + Optional result = cache.shouldCacheBlock(key, timeRangeTracker, conf); return result.orElse(true); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index 4bdf61a0a122..c14cf76ae94f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -77,8 +77,8 @@ import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.nio.RefCnt; import org.apache.hadoop.hbase.protobuf.ProtobufMagic; -import org.apache.hadoop.hbase.regionserver.DataTieringException; import org.apache.hadoop.hbase.regionserver.DataTieringManager; +import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.IdReadWriteLock; @@ -2205,16 +2205,13 @@ public Optional shouldCacheFile(HFileInfo hFileInfo, Configuration conf } @Override - public Optional shouldCacheBlock(BlockCacheKey key, Configuration conf) { - try { - DataTieringManager dataTieringManager = DataTieringManager.getInstance(); - if (dataTieringManager != null && !dataTieringManager.isHotData(key, conf)) { - LOG.debug("Data tiering is enabled for file: '{}' and it is not hot data", - key.getHfileName()); - return Optional.of(false); - } - } catch (DataTieringException e) { - LOG.warn("Error while checking hotness of the block: {}", e.getMessage()); + public Optional shouldCacheBlock(BlockCacheKey key, TimeRangeTracker timeRangeTracker, + Configuration conf) { + DataTieringManager dataTieringManager = DataTieringManager.getInstance(); + if (dataTieringManager != null && !dataTieringManager.isHotData(timeRangeTracker, conf)) { + LOG.debug("Data tiering is enabled for file: '{}' and it is not hot data", + key.getHfileName()); + return Optional.of(false); } return Optional.of(true); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java index c801d202b64a..aa56e3f64445 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java @@ -132,22 +132,21 @@ public boolean isHotData(BlockCacheKey key) throws DataTieringException { } /** - * Determines whether the data associated with the given block cache key is considered hot. If the - * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not - * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by - * default. - * @param key the block cache key - * @param conf The configuration object to use for determining hot data criteria. + * Determines whether the data associated with the given time range tracker is considered hot. If + * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum + * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers + * the data as hot by default. + * @param timeRangeTracker the time range tracker containing the timestamps + * @param conf The configuration object to use for determining hot data criteria. * @return {@code true} if the data is hot, {@code false} otherwise - * @throws DataTieringException if there is an error retrieving data tiering information */ - public boolean isHotData(BlockCacheKey key, Configuration conf) throws DataTieringException { + public boolean isHotData(TimeRangeTracker timeRangeTracker, Configuration conf) { DataTieringType dataTieringType = getDataTieringType(conf); - if (dataTieringType.equals(DataTieringType.TIME_RANGE)) { - if (key.getMaxTimestamp().isPresent()) { - return hotDataValidator(key.getMaxTimestamp().get(), getDataTieringHotDataAge(conf)); - } - return isHotData(key); + if ( + dataTieringType.equals(DataTieringType.TIME_RANGE) + && timeRangeTracker.getMax() != TimeRangeTracker.INITIAL_MAX_TIMESTAMP + ) { + return hotDataValidator(timeRangeTracker.getMax(), getDataTieringHotDataAge(conf)); } // DataTieringType.NONE or other types are considered hot by default return true; From 3da7b432e7d998067cc335fd671a264b9d5f2247 Mon Sep 17 00:00:00 2001 From: Vinayak Hegde Date: Mon, 20 May 2024 13:41:27 +0530 Subject: [PATCH 6/7] add path BlockCacheKey if present --- .../apache/hadoop/hbase/io/hfile/HFileWriterImpl.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index ca1fecbb8a00..cb7d0509cd9e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -566,7 +566,7 @@ private void writeInlineBlocks(boolean closing) throws IOException { private void doCacheOnWrite(long offset) { cacheConf.getBlockCache().ifPresent(cache -> { HFileBlock cacheFormatBlock = blockWriter.getBlockForCaching(cacheConf); - BlockCacheKey key = new BlockCacheKey(name, offset, true, cacheFormatBlock.getBlockType()); + BlockCacheKey key = buildCacheBlockKey(offset, cacheFormatBlock.getBlockType()); if (!shouldCacheBlock(cache, key)) { return; } @@ -579,6 +579,13 @@ private void doCacheOnWrite(long offset) { }); } + private BlockCacheKey buildCacheBlockKey(long offset, BlockType blockType) { + if (path != null) { + return new BlockCacheKey(path, offset, true, blockType); + } + return new BlockCacheKey(name, offset, true, blockType); + } + private boolean shouldCacheBlock(BlockCache cache, BlockCacheKey key) { Optional result = cache.shouldCacheBlock(key, timeRangeTracker, conf); return result.orElse(true); From 9ea998d461cf8970d66db87986779a0e274af878 Mon Sep 17 00:00:00 2001 From: Vinayak Hegde Date: Tue, 21 May 2024 12:53:49 +0530 Subject: [PATCH 7/7] rebased to HBASE-28463 --- .../hbase/regionserver/StoreFileWriter.java | 23 +------------------ 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java index 67fa2244e957..2f0fd4cfe547 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java @@ -22,13 +22,11 @@ import static org.apache.hadoop.hbase.regionserver.HStoreFile.BLOOM_FILTER_TYPE_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.COMPACTION_EVENT_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.DELETE_FAMILY_COUNT; -import static org.apache.hadoop.hbase.regionserver.HStoreFile.EARLIEST_PUT_TS; import static org.apache.hadoop.hbase.regionserver.HStoreFile.HISTORICAL_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAJOR_COMPACTION_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAX_SEQ_ID_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_CELLS_COUNT; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_FILE_REFS; -import static org.apache.hadoop.hbase.regionserver.HStoreFile.TIMERANGE_KEY; import static org.apache.hadoop.hbase.regionserver.StoreEngine.STORE_ENGINE_CLASS_KEY; import java.io.IOException; @@ -51,7 +49,6 @@ import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.PrivateCellUtil; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; @@ -495,11 +492,9 @@ private static class SingleStoreFileWriter { private final BloomFilterWriter deleteFamilyBloomFilterWriter; private final BloomType bloomType; private byte[] bloomParam = null; - private long earliestPutTs = HConstants.LATEST_TIMESTAMP; private long deleteFamilyCnt = 0; private BloomContext bloomContext = null; private BloomContext deleteFamilyBloomContext = null; - private final TimeRangeTracker timeRangeTracker; private final Supplier> compactedFilesSupplier; private HFile.Writer writer; @@ -523,7 +518,6 @@ private SingleStoreFileWriter(FileSystem fs, Path path, final Configuration conf HFileContext fileContext, boolean shouldDropCacheBehind, Supplier> compactedFilesSupplier) throws IOException { this.compactedFilesSupplier = compactedFilesSupplier; - this.timeRangeTracker = TimeRangeTracker.create(TimeRangeTracker.Type.NON_SYNC); // TODO : Change all writers to be specifically created for compaction context writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, path).withFavoredNodes(favoredNodes) @@ -665,21 +659,7 @@ private void appendMobMetadata(SetMultimap mobRefSet) throws * Add TimestampRange and earliest put timestamp to Metadata */ private void appendTrackedTimestampsToMetadata() throws IOException { - // TODO: The StoreFileReader always converts the byte[] to TimeRange - // via TimeRangeTracker, so we should write the serialization data of TimeRange directly. - appendFileInfo(TIMERANGE_KEY, TimeRangeTracker.toByteArray(timeRangeTracker)); - appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs)); - } - - /** - * Record the earlest Put timestamp. If the timeRangeTracker is not set, update TimeRangeTracker - * to include the timestamp of this key - */ - private void trackTimestamps(final Cell cell) { - if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) { - earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp()); - } - timeRangeTracker.includeTimestamp(cell); + writer.appendTrackedTimestampsToMetadata(); } private void appendGeneralBloomfilter(final Cell cell) throws IOException { @@ -710,7 +690,6 @@ private void append(final Cell cell) throws IOException { appendGeneralBloomfilter(cell); appendDeleteFamilyBloomFilter(cell); writer.append(cell); - trackTimestamps(cell); } private void beforeShipped() throws IOException {