-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Do persist IncrementalIndex in another thread in IndexGeneratorReducer #2149
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,9 +29,15 @@ | |
| import com.google.common.hash.HashFunction; | ||
| import com.google.common.hash.Hashing; | ||
| import com.google.common.primitives.Longs; | ||
| import com.google.common.util.concurrent.Futures; | ||
| import com.google.common.util.concurrent.ListenableFuture; | ||
| import com.google.common.util.concurrent.ListeningExecutorService; | ||
| import com.google.common.util.concurrent.MoreExecutors; | ||
| import com.metamx.common.IAE; | ||
| import com.metamx.common.ISE; | ||
| import com.metamx.common.logger.Logger; | ||
| import io.druid.common.guava.ThreadRenamingRunnable; | ||
| import io.druid.concurrent.Execs; | ||
| import io.druid.data.input.InputRow; | ||
| import io.druid.data.input.Row; | ||
| import io.druid.data.input.Rows; | ||
|
|
@@ -73,6 +79,15 @@ | |
| import java.util.LinkedHashSet; | ||
| import java.util.List; | ||
| import java.util.Set; | ||
| import java.util.concurrent.BlockingQueue; | ||
| import java.util.concurrent.ExecutionException; | ||
| import java.util.concurrent.ExecutorService; | ||
| import java.util.concurrent.RejectedExecutionException; | ||
| import java.util.concurrent.RejectedExecutionHandler; | ||
| import java.util.concurrent.SynchronousQueue; | ||
| import java.util.concurrent.ThreadPoolExecutor; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.concurrent.TimeoutException; | ||
|
|
||
| /** | ||
| */ | ||
|
|
@@ -531,6 +546,8 @@ protected void reduce( | |
|
|
||
| final Interval interval = config.getGranularitySpec().bucketInterval(bucket.time).get(); | ||
|
|
||
| ListeningExecutorService persistExecutor = null; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need ListeningExecutorService, can't see the need for listeners ?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Most places where new executor services are created are using listener services right now, mostly because they provide more functionality and realizing later that you should have made it a listening service is a pain. Plus handy things like Futures.allAsList only work on listenable futures. So in essence the practice of always using a listening service makes the code easier to add stuff to later at the cost of depending on an external library instead of just java.* classes. Is there any reason you know of why using listening executor services by default is a bad idea?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dint realize that we use ListeningExecutorService by default, ok then. |
||
| List<ListenableFuture<?>> persistFutures = Lists.newArrayList(); | ||
| IncrementalIndex index = makeIncrementalIndex( | ||
| bucket, | ||
| combiningAggs, | ||
|
|
@@ -550,6 +567,35 @@ protected void reduce( | |
|
|
||
| Set<String> allDimensionNames = Sets.newLinkedHashSet(); | ||
| final ProgressIndicator progressIndicator = makeProgressIndicator(context); | ||
| int persistBackgroundCount = config.getSchema().getTuningConfig().getPersistBackgroundCount(); | ||
| if (persistBackgroundCount > 0) { | ||
| final BlockingQueue<Runnable> queue = new SynchronousQueue<>(); | ||
| ExecutorService executorService = new ThreadPoolExecutor( | ||
| persistBackgroundCount, | ||
| persistBackgroundCount, | ||
| 0L, | ||
| TimeUnit.MILLISECONDS, | ||
| queue, | ||
| Execs.makeThreadFactory("IndexGeneratorJob_persist_%d"), | ||
| new RejectedExecutionHandler() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use Execs.newBlockingSingleThreaded(..) instead ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Execs.newBlockingSingleThreaded(..) only have one background thread to persist incremental Index, so i have not use it.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
| { | ||
| @Override | ||
| public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) | ||
| { | ||
| try { | ||
| executor.getQueue().put(r); | ||
| } | ||
| catch (InterruptedException e) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thread.currentThread.interrupt() to reset interrupted flag status? |
||
| Thread.currentThread().interrupt(); | ||
| throw new RejectedExecutionException("Got Interrupted while adding to the Queue"); | ||
| } | ||
| } | ||
| } | ||
| ); | ||
| persistExecutor = MoreExecutors.listeningDecorator(executorService); | ||
| } else { | ||
| persistExecutor = MoreExecutors.sameThreadExecutor(); | ||
| } | ||
|
|
||
| for (final BytesWritable bw : values) { | ||
| context.progress(); | ||
|
|
@@ -575,9 +621,29 @@ protected void reduce( | |
| toMerge.add(file); | ||
|
|
||
| context.progress(); | ||
| persist(index, interval, file, progressIndicator); | ||
| // close this index and make a new one, reusing same buffer | ||
| index.close(); | ||
| final IncrementalIndex persistIndex = index; | ||
| persistFutures.add( | ||
| persistExecutor.submit( | ||
| new ThreadRenamingRunnable(String.format("%s-persist", file.getName())) | ||
| { | ||
| @Override | ||
| public void doRun() | ||
| { | ||
| try { | ||
| persist(persistIndex, interval, file, progressIndicator); | ||
| } | ||
| catch (Exception e) { | ||
| log.error("persist index error", e); | ||
| throw Throwables.propagate(e); | ||
| } | ||
| finally { | ||
| // close this index | ||
| persistIndex.close(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After the runnable exits, does the future still hold a non-weak reference to persistIndex?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @drcrallen How to get persistIndex from the future? I do not understand.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After the runnable exits, the future do not hold a non-weak reference to persistIndex, see When runnable exits, the future will nulls out callable.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cool. Thanks for digging in on that
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the runnable is still held somewhere though ( in the future?), which would hold onto a persistIndex. Is there a way you can check to make sure you don't accidentally hold onto all incremental indices in the futures?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The future hold a reference to the runnable which hold onto a persistIndex. |
||
| } | ||
| } | ||
| } | ||
| ) | ||
| ); | ||
|
|
||
| index = makeIncrementalIndex( | ||
| bucket, | ||
|
|
@@ -611,6 +677,9 @@ protected void reduce( | |
| toMerge.add(finalFile); | ||
| } | ||
|
|
||
| Futures.allAsList(persistFutures).get(1, TimeUnit.HOURS); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need to hardcode a timeout here? wouldn't hadoop automatically kill the containers if no progress is being made.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. on second thought, it is probably ok as 1 hr should be practically infinite in this case. |
||
| persistExecutor.shutdown(); | ||
|
|
||
| for (File file : toMerge) { | ||
| indexes.add(HadoopDruidIndexerConfig.INDEX_IO.loadIndex(file)); | ||
| } | ||
|
|
@@ -665,8 +734,17 @@ indexes, aggregators, new File(baseFlushFile, "merged"), progressIndicator | |
| FileUtils.deleteDirectory(file); | ||
| } | ||
| } | ||
| catch (ExecutionException e) { | ||
| throw Throwables.propagate(e); | ||
| } | ||
| catch (TimeoutException e) { | ||
| throw Throwables.propagate(e); | ||
| } | ||
| finally { | ||
| index.close(); | ||
| if (persistExecutor != null) { | ||
| persistExecutor.shutdownNow(); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -265,6 +265,7 @@ public DeterminePartitionsJobTest( | |
| false, | ||
| false, | ||
| null, | ||
| null, | ||
| null | ||
| ) | ||
| ) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -208,6 +208,7 @@ public void testHashedBucketSelection() | |
| false, | ||
| false, | ||
| null, | ||
| null, | ||
| null | ||
| ) | ||
| ); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -116,6 +116,7 @@ public void setup() throws Exception | |
| false, | ||
| false, | ||
| null, | ||
| null, | ||
| null | ||
| ) | ||
| ) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -202,6 +202,7 @@ public InputStream openStream() throws IOException | |
| false, | ||
| false, | ||
| null, | ||
| null, | ||
| null | ||
| ) | ||
| ) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
numBackgroundPersistThreadswould be more consistent with our other properties, such asdruid.processing.numThreads. We should try to keep property naming consistent.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@xvrl, this has been merged, file a new PR to rename the properties?