-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Allow using composed storage for SuperSorter intermediate data #13368
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3a24539
246620a
e1cda34
9a04190
b052196
3553f9e
159c5d7
98c91c7
55bddeb
7b41c0b
5855d69
5ea228d
8ac983a
fddcb0b
95393c9
8b82b15
5bfe457
cc96a2b
7b5dadf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,7 +65,7 @@ public interface StorageConnector | |
| boolean pathExists(String path) throws IOException; | ||
|
|
||
| /** | ||
| * Reads the data present at the path the underlying storage system. Most implementations prepend the input path | ||
| * Reads the data present at the path in the underlying storage system. Most implementations prepend the input path | ||
| * with a basePath. | ||
| * The caller should take care of closing the stream when done or in case of error. | ||
| * | ||
|
|
@@ -75,6 +75,19 @@ public interface StorageConnector | |
| */ | ||
| InputStream read(String path) throws IOException; | ||
|
|
||
| /** | ||
| * Reads the data present for a given range at the path in the underlying storage system. | ||
| * Most implementations prepend the input path with a basePath. | ||
| * The caller should take care of closing the stream when done or in case of error. Further, the caller must ensure | ||
| * that the start offset and the size of the read are valid parameters for the given path for correct behavior. | ||
| * @param path The path to read data from | ||
| * @param from Start offset of the read in the path | ||
| * @param size Length of the read to be done | ||
| * @return InputStream starting from the given offset limited by the given size | ||
| * @throws IOException if the path is not present or the unable to read the data present on the path | ||
| */ | ||
| InputStream readRange(String path, long from, long size) throws IOException; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the behaviour of the implementations be defined here under following conditions:
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. have added some validation checks for inputs in local and s3 connector. haven't done the size based check in s3 connector yet since it'll require a status api call for the object first - I'm not sure if it is necessary to do that check as of now. |
||
|
|
||
| /** | ||
| * Open an {@link OutputStream} for writing data to the path in the underlying storage system. | ||
| * Most implementations prepend the input path with a basePath. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,17 +19,22 @@ | |
|
|
||
| package org.apache.druid.storage.local; | ||
|
|
||
| import org.apache.commons.io.input.BoundedInputStream; | ||
| import org.apache.druid.java.util.common.FileUtils; | ||
| import org.apache.druid.java.util.common.IAE; | ||
| import org.apache.druid.java.util.common.ISE; | ||
| import org.apache.druid.java.util.common.StringUtils; | ||
| import org.apache.druid.storage.StorageConnector; | ||
|
|
||
| import java.io.File; | ||
| import java.io.FileNotFoundException; | ||
| import java.io.IOException; | ||
| import java.io.InputStream; | ||
| import java.io.OutputStream; | ||
| import java.nio.channels.Channels; | ||
| import java.nio.channels.FileChannel; | ||
| import java.nio.file.Files; | ||
| import java.nio.file.StandardOpenOption; | ||
| import java.util.Arrays; | ||
| import java.util.List; | ||
| import java.util.stream.Collectors; | ||
|
|
@@ -55,16 +60,32 @@ public boolean pathExists(String path) | |
| return fileWithBasePath(path).exists(); | ||
| } | ||
|
|
||
| /** | ||
| * Reads the file present as basePath + path. Will throw an IO exception in case the file is not present. | ||
| * Closing of the stream is the responsibility of the caller. | ||
| */ | ||
| @Override | ||
| public InputStream read(String path) throws IOException | ||
| { | ||
| return Files.newInputStream(fileWithBasePath(path).toPath()); | ||
| } | ||
|
|
||
| @Override | ||
| public InputStream readRange(String path, long from, long size) throws IOException | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: from-> start seems better. Feel free to ignore this. |
||
| { | ||
| if (!pathExists(path)) { | ||
| throw new FileNotFoundException("Unable to find file " + fileWithBasePath(path).toPath() + " for reading"); | ||
| } | ||
| long length = fileWithBasePath(path).length(); | ||
| if (from < 0 || size < 0 || (from + size) > length) { | ||
| throw new IAE( | ||
| "Invalid arguments for reading %s. from = %d, readSize = %d, fileSize = %d", | ||
| fileWithBasePath(path).toPath(), | ||
| from, | ||
| size, | ||
| length | ||
| ); | ||
| } | ||
| FileChannel fileChannel = FileChannel.open(fileWithBasePath(path).toPath(), StandardOpenOption.READ); | ||
| return new BoundedInputStream(Channels.newInputStream(fileChannel.position(from)), size); | ||
| } | ||
|
|
||
| /** | ||
| * Writes the file present with the materialized location as basePath + path. | ||
| * In case the parent directory does not exist, we create the parent dir recursively. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,6 +53,7 @@ | |
| import org.apache.druid.frame.key.SortColumn; | ||
| import org.apache.druid.frame.processor.FrameProcessorExecutor; | ||
| import org.apache.druid.frame.processor.FrameProcessors; | ||
| import org.apache.druid.frame.util.DurableStorageUtils; | ||
| import org.apache.druid.indexer.TaskState; | ||
| import org.apache.druid.indexer.TaskStatus; | ||
| import org.apache.druid.indexing.common.LockGranularity; | ||
|
|
@@ -153,7 +154,6 @@ | |
| import org.apache.druid.msq.querykit.groupby.GroupByQueryKit; | ||
| import org.apache.druid.msq.querykit.scan.ScanQueryKit; | ||
| import org.apache.druid.msq.shuffle.DurableStorageInputChannelFactory; | ||
| import org.apache.druid.msq.shuffle.DurableStorageUtils; | ||
| import org.apache.druid.msq.shuffle.WorkerInputChannelFactory; | ||
| import org.apache.druid.msq.statistics.PartialKeyStatisticsInformation; | ||
| import org.apache.druid.msq.util.DimensionSchemaUtils; | ||
|
|
@@ -293,6 +293,9 @@ public ControllerImpl( | |
| { | ||
| this.task = task; | ||
| this.context = context; | ||
| this.isDurableStorageEnabled = MultiStageQueryContext.isDurableStorageEnabled( | ||
| task.getQuerySpec().getQuery().context() | ||
| ); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -587,6 +590,25 @@ private QueryDefinition initializeQueryDefAndState(final Closer closer) | |
| .orElse(MSQWarnings.DEFAULT_MAX_PARSE_EXCEPTIONS_ALLOWED); | ||
| } | ||
|
|
||
| ImmutableMap.Builder<String, Object> taskContextOverridesBuilder = ImmutableMap.builder(); | ||
| taskContextOverridesBuilder | ||
| .put( | ||
| MultiStageQueryContext.CTX_DURABLE_SHUFFLE_STORAGE, | ||
| isDurableStorageEnabled | ||
| ).put( | ||
| MultiStageQueryContext.CTX_COMPOSED_INTERMEDIATE_SUPER_SORTER_STORAGE, | ||
| MultiStageQueryContext.isComposedIntermediateSuperSorterStorageEnabled( | ||
| task.getQuerySpec().getQuery().context() | ||
| ) | ||
| ).put( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think they can work independently - the local bytes limit just enforces a resource limit on the local storage and the composition storage enables joining local disk and durable storage together.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should throw an error if one sets CTX_COMPOSED_INTERMEDIATE_SUPER_SORTER_STORAGE and CTX_INTERMEDIATE_SUPER_SORTER_STORAGE_MAX_LOCAL_BYTES is not set. What will happen is the users will set this flag thinking stuff is going to work but they would be proved wrong.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I agree with the documentation part but setting |
||
| MultiStageQueryContext.CTX_INTERMEDIATE_SUPER_SORTER_STORAGE_MAX_LOCAL_BYTES, | ||
| MultiStageQueryContext.getIntermediateSuperSorterStorageMaxLocalBytes( | ||
| task.getQuerySpec().getQuery().context() | ||
| ) | ||
| ).put( | ||
| MSQWarnings.CTX_MAX_PARSE_EXCEPTIONS_ALLOWED, | ||
| maxParseExceptions | ||
| ); | ||
| this.workerTaskLauncher = new MSQWorkerTaskLauncher( | ||
| id(), | ||
| task.getDataSource(), | ||
|
|
@@ -600,8 +622,7 @@ private QueryDefinition initializeQueryDefAndState(final Closer closer) | |
| } | ||
| }); | ||
| }, | ||
| isDurableStorageEnabled, | ||
| maxParseExceptions, | ||
| taskContextOverridesBuilder.build(), | ||
| // 10 minutes +- 2 minutes jitter | ||
| TimeUnit.SECONDS.toMillis(600 + ThreadLocalRandom.current().nextInt(-4, 5) * 30L) | ||
| ); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this change would be orthogonal to the PR no?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, this is needed since
SuperSorterTestruns for > 10 minutes sometimes and doesn't produce any log in a successful case. So, the wait for test to finish is increased to 15 minutes.