-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Fix race between canHandle() and addSegment() in StorageLocation #8114
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
a21655a
Fix race between canHandle() and addSegment() in StorageLocation
jihoonson d75b8de
add comment
jihoonson d7903f5
add comments
jihoonson 5a1acaa
fix test
jihoonson a8e894f
address comments
jihoonson 9ca05b6
remove <p> tag from javadoc
jihoonson f674682
address comments
jihoonson 6532313
comparingLong
jihoonson File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,33 +19,51 @@ | |
|
|
||
| package org.apache.druid.segment.loading; | ||
|
|
||
| import com.google.common.annotations.VisibleForTesting; | ||
| import org.apache.commons.io.FileUtils; | ||
| import org.apache.druid.java.util.common.ISE; | ||
| import org.apache.druid.java.util.common.logger.Logger; | ||
| import org.apache.druid.timeline.DataSegment; | ||
| import org.apache.druid.timeline.SegmentId; | ||
|
|
||
| import javax.annotation.Nullable; | ||
| import javax.annotation.concurrent.GuardedBy; | ||
| import java.io.File; | ||
| import java.util.HashSet; | ||
| import java.util.Set; | ||
|
|
||
| /** | ||
| * This class is a very simple logical representation of a local path. It keeps track of files stored under the | ||
| * {@link #path} via {@link #reserve}, so that the total size of stored files doesn't exceed the {@link #maxSizeBytes} | ||
| * and available space is always kept smaller than {@link #freeSpaceToKeep}. | ||
| * | ||
| * This class is thread-safe, so that multiple threads can update its state at the same time. | ||
| * One example usage is that a historical can use multiple threads to load different segments in parallel | ||
| * from deep storage. | ||
| */ | ||
| public class StorageLocation | ||
| { | ||
| private static final Logger log = new Logger(StorageLocation.class); | ||
|
|
||
| private final File path; | ||
| private final long maxSize; | ||
| private final long maxSizeBytes; | ||
| private final long freeSpaceToKeep; | ||
|
|
||
| /** | ||
| * Set of files stored under the {@link #path}. | ||
| */ | ||
| @GuardedBy("this") | ||
| private final Set<File> files = new HashSet<>(); | ||
|
|
||
| private volatile long currSize = 0; | ||
| /** | ||
| * Current total size of files in bytes. | ||
| */ | ||
| @GuardedBy("this") | ||
| private long currSizeBytes = 0; | ||
|
|
||
| public StorageLocation(File path, long maxSize, @Nullable Double freeSpacePercent) | ||
| public StorageLocation(File path, long maxSizeBytes, @Nullable Double freeSpacePercent) | ||
| { | ||
| this.path = path; | ||
| this.maxSize = maxSize; | ||
| this.maxSizeBytes = maxSizeBytes; | ||
|
|
||
| if (freeSpacePercent != null) { | ||
| long totalSpaceInPartition = path.getTotalSpace(); | ||
|
|
@@ -66,73 +84,86 @@ public File getPath() | |
| return path; | ||
| } | ||
|
|
||
| public long getMaxSize() | ||
| { | ||
| return maxSize; | ||
| } | ||
|
|
||
| /** | ||
| * Add a new file to this location. The given file argument must be a file rather than directory. | ||
| * Remove a segment file from this location. The given file argument must be a file rather than directory. | ||
| */ | ||
| public synchronized void addFile(File file) | ||
| public synchronized void removeFile(File file) | ||
| { | ||
| if (file.isDirectory()) { | ||
| throw new ISE("[%s] must be a file. Use a"); | ||
| } | ||
| if (files.add(file)) { | ||
| currSize += FileUtils.sizeOf(file); | ||
| if (files.remove(file)) { | ||
| currSizeBytes -= FileUtils.sizeOf(file); | ||
| } else { | ||
| log.warn("File[%s] is not found under this location[%s]", file, path); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Add a new segment dir to this location. The segment size is added to currSize. | ||
| * Remove a segment dir from this location. The segment size is subtracted from currSizeBytes. | ||
| */ | ||
| public synchronized void addSegmentDir(File segmentDir, DataSegment segment) | ||
| public synchronized void removeSegmentDir(File segmentDir, DataSegment segment) | ||
| { | ||
| if (files.add(segmentDir)) { | ||
| currSize += segment.getSize(); | ||
| if (files.remove(segmentDir)) { | ||
| currSizeBytes -= segment.getSize(); | ||
| } else { | ||
| log.warn("SegmentDir[%s] is not found under this location[%s]", segmentDir, path); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Remove a segment file from this location. The given file argument must be a file rather than directory. | ||
| * Reserves space to store the given segment. The segment size is added to currSizeBytes. | ||
| * If it succeeds, it returns a file for the given segmentDir in this storage location. Returns null otherwise. | ||
| */ | ||
| public synchronized void removeFile(File file) | ||
| @Nullable | ||
| public synchronized File reserve(String segmentDir, DataSegment segment) | ||
| { | ||
| if (files.remove(file)) { | ||
| currSize -= FileUtils.sizeOf(file); | ||
| } | ||
| return reserve(segmentDir, segment.getId(), segment.getSize()); | ||
| } | ||
|
|
||
| /** | ||
| * Remove a segment dir from this location. The segment size is subtracted from currSize. | ||
| * Reserves space to store the given segment. | ||
| * If it succeeds, it returns a file for the given segmentFilePathToAdd in this storage location. | ||
| * Returns null otherwise. | ||
| */ | ||
| public synchronized void removeSegmentDir(File segmentDir, DataSegment segment) | ||
| @Nullable | ||
| public synchronized File reserve(String segmentFilePathToAdd, SegmentId segmentId, long segmentSize) | ||
| { | ||
| if (files.remove(segmentDir)) { | ||
| currSize -= segment.getSize(); | ||
| final File segmentFileToAdd = new File(path, segmentFilePathToAdd); | ||
| if (files.contains(segmentFileToAdd)) { | ||
| return null; | ||
| } | ||
| if (canHandle(segmentId, segmentSize)) { | ||
| files.add(segmentFileToAdd); | ||
| currSizeBytes += segmentSize; | ||
| return segmentFileToAdd; | ||
| } else { | ||
| return null; | ||
| } | ||
| } | ||
|
|
||
| public boolean canHandle(DataSegment segment) | ||
| /** | ||
| * This method is only package-private to use it in unit tests. Production code must not call this method directly. | ||
| * Use {@link #reserve} instead. | ||
| */ | ||
| @VisibleForTesting | ||
| @GuardedBy("this") | ||
| boolean canHandle(SegmentId segmentId, long segmentSize) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method should be annotated
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added. |
||
| { | ||
| if (available() < segment.getSize()) { | ||
| if (availableSizeBytes() < segmentSize) { | ||
| log.warn( | ||
| "Segment[%s:%,d] too large for storage[%s:%,d]. Check your druid.segmentCache.locations maxSize param", | ||
| segment.getId(), segment.getSize(), getPath(), available() | ||
| segmentId, segmentSize, getPath(), availableSizeBytes() | ||
| ); | ||
| return false; | ||
| } | ||
|
|
||
| if (freeSpaceToKeep > 0) { | ||
| long currFreeSpace = path.getFreeSpace(); | ||
| if ((freeSpaceToKeep + segment.getSize()) > currFreeSpace) { | ||
| if ((freeSpaceToKeep + segmentSize) > currFreeSpace) { | ||
| log.warn( | ||
| "Segment[%s:%,d] too large for storage[%s:%,d] to maintain suggested freeSpace[%d], current freeSpace is [%d].", | ||
| segment.getId(), | ||
| segment.getSize(), | ||
| segmentId, | ||
| segmentSize, | ||
| getPath(), | ||
| available(), | ||
| availableSizeBytes(), | ||
| freeSpaceToKeep, | ||
| currFreeSpace | ||
| ); | ||
|
|
@@ -143,8 +174,8 @@ public boolean canHandle(DataSegment segment) | |
| return true; | ||
| } | ||
|
|
||
| public synchronized long available() | ||
| public synchronized long availableSizeBytes() | ||
| { | ||
| return maxSize - currSize; | ||
| return maxSizeBytes - currSizeBytes; | ||
| } | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After thinking about it a bit, it doesn't seem particularly beneficial for
StorageLocationto have to be aware ofDataSegmentandSegmentId, sinceDataSegmentis only used as a shorthand to get the size andSegmentId, and theSegmentIdis only used for logging. It seems like reworking this might make the implementation ofStorageLocationa bit more simple and allowIntermediaryDataManagerandSegmentLoaderLocalCacheManagerto use the samereserveandremovemethods. I would consider this optional however, up to you if you want to investigate.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I already looked into that way. The thing is, the way to use
StorageLocationis different inIntermediaryDataManagerandSegmentLoaderLocalCacheManager. InIntermediaryDataManager, the compressed segment files are stored and registered inStorageLocation. InSegmentLoaderLocalCacheManager, the uncompressed segment directories are registered inStorageLocation. This led to use different size computations inremoveFile(File)andremoveSegmentDir(File, DataSegment). I wanted to make sure that the caller must be aware of what it's registering and call the right method.I guess we could have an abstract
StorageLocationclass and its two implementations for different use cases. But I'm not sure it's super beneficial at this point becauseStorageLocationis still pretty simple.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the explanation, sgtm 👍