-
Notifications
You must be signed in to change notification settings - Fork 4.5k
[BEAM-22] Return a map of CommittedBundle to Consumers from handleResult #249
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -800,15 +800,19 @@ public TransformWatermarks getWatermarks(AppliedPTransform<?, ?, ?> transform) { | |
| * | ||
| * @param completed the input that has completed | ||
| * @param transform the transform that has completed processing the input | ||
| * @param outputs the bundles the transform has output | ||
| * @param timerUpdate the timers that fired to produce this update, plus the timers that were | ||
| * added or removed as part of processing this update | ||
| * @param outputs the CommittedBundles that were output by processing the input bundle, and the | ||
| * PTransforms that the bundles will be consumed by. Elements in each output bundle | ||
| * become pending on each AppliedPTransform that will consume them | ||
| * @param earliestHold the earliest watermark hold in the transform's state. {@code null} if there | ||
| * is no hold | ||
| */ | ||
| public void updateWatermarks( | ||
| @Nullable CommittedBundle<?> completed, | ||
| AppliedPTransform<?, ?, ?> transform, | ||
| TimerUpdate timerUpdate, | ||
| Iterable<? extends CommittedBundle<?>> outputs, | ||
| Map<? extends CommittedBundle<?>, Collection<AppliedPTransform<?, ?, ?>>> outputs, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| @Nullable Instant earliestHold) { | ||
| updatePending(completed, transform, timerUpdate, outputs); | ||
| TransformWatermarks transformWms = transformToWatermarks.get(transform); | ||
|
|
@@ -836,20 +840,30 @@ private void refreshWatermarks(AppliedPTransform<?, ?, ?> transform) { | |
| * and removes all deleted timers. Removes all elements consumed by the input bundle from the | ||
| * {@link PTransform PTransforms} collection of pending elements, and adds all elements produced | ||
| * by the {@link PTransform} to the pending queue of each consumer. | ||
| * | ||
| * @param input the CommittedBundle that produced this update | ||
| * @param transform the AppliedPTransform that consumed the input to produce the outputs | ||
| * @param timerUpdate the timers that fired to produce this update, plus the timers that were | ||
| * added or removed as part of processing this update | ||
| * @param outputs the CommittedBundles that were output by processing the input bundle, and the | ||
| * PTransforms that the bundles will be consumed by. Elements in each output bundle | ||
| * become pending on each AppliedPTransform that will consume them | ||
| */ | ||
| private void updatePending( | ||
| CommittedBundle<?> input, | ||
| AppliedPTransform<?, ?, ?> transform, | ||
| TimerUpdate timerUpdate, | ||
| Iterable<? extends CommittedBundle<?>> outputs) { | ||
| Map<? extends CommittedBundle<?>, Collection<AppliedPTransform<?, ?, ?>>> outputs) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| TransformWatermarks completedTransform = transformToWatermarks.get(transform); | ||
| completedTransform.updateTimers(timerUpdate); | ||
| if (input != null) { | ||
| completedTransform.removePending(input); | ||
| } | ||
|
|
||
| for (CommittedBundle<?> bundle : outputs) { | ||
| for (AppliedPTransform<?, ?, ?> consumer : consumers.get(bundle.getPCollection())) { | ||
| for (Map.Entry<? extends CommittedBundle<?>, Collection<AppliedPTransform<?, ?, ?>>> | ||
| outputEntry : outputs.entrySet()) { | ||
| CommittedBundle<?> bundle = outputEntry.getKey(); | ||
| for (AppliedPTransform<?, ?, ?> consumer : outputEntry.getValue()) { | ||
| TransformWatermarks watermarks = transformToWatermarks.get(consumer); | ||
| watermarks.addPending(bundle); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,7 +42,7 @@ | |
| import org.apache.beam.sdk.values.PCollectionView; | ||
| import org.apache.beam.sdk.values.PValue; | ||
|
|
||
| import com.google.common.collect.ImmutableList; | ||
| import com.google.common.collect.ImmutableMap; | ||
| import com.google.common.collect.Iterables; | ||
|
|
||
| import java.util.Collection; | ||
|
|
@@ -74,6 +74,11 @@ | |
| class InProcessEvaluationContext { | ||
| /** The step name for each {@link AppliedPTransform} in the {@link Pipeline}. */ | ||
| private final Map<AppliedPTransform<?, ?, ?>, String> stepNames; | ||
| /** | ||
| * The mapping from each {@link PValue} contained within the {@link Pipeline} to each | ||
| * {@link AppliedPTransform} that consumes it. | ||
| */ | ||
| private final Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> valueToConsumers; | ||
|
|
||
| /** The options that were used to create this {@link Pipeline}. */ | ||
| private final InProcessPipelineOptions options; | ||
|
|
@@ -114,7 +119,7 @@ private InProcessEvaluationContext( | |
| this.options = checkNotNull(options); | ||
| this.bundleFactory = checkNotNull(bundleFactory); | ||
| checkNotNull(rootTransforms); | ||
| checkNotNull(valueToConsumers); | ||
| this.valueToConsumers = checkNotNull(valueToConsumers); | ||
| checkNotNull(stepNames); | ||
| checkNotNull(views); | ||
| this.stepNames = stepNames; | ||
|
|
@@ -143,13 +148,14 @@ private InProcessEvaluationContext( | |
| * @param completedTimers the timers that were delivered to produce the {@code completedBundle}, | ||
| * or an empty iterable if no timers were delivered | ||
| * @param result the result of evaluating the input bundle | ||
| * @return the committed bundles contained within the handled {@code result} | ||
| * @return a mapping between the Committed {@link UncommittedBundle bundles} contained within the | ||
| * result to each {@link AppliedPTransform} that will consume them | ||
| */ | ||
| public synchronized Iterable<? extends CommittedBundle<?>> handleResult( | ||
| public synchronized Map<CommittedBundle<?>, Collection<AppliedPTransform<?, ?, ?>>> handleResult( | ||
| @Nullable CommittedBundle<?> completedBundle, | ||
| Iterable<TimerData> completedTimers, | ||
| InProcessTransformResult result) { | ||
| Iterable<? extends CommittedBundle<?>> committedBundles = | ||
| Map<CommittedBundle<?>, Collection<AppliedPTransform<?, ?, ?>>> committedBundles = | ||
| commitBundles(result.getOutputBundles()); | ||
| // Update watermarks and timers | ||
| watermarkManager.updateWatermarks( | ||
|
|
@@ -179,10 +185,11 @@ public synchronized Iterable<? extends CommittedBundle<?>> handleResult( | |
| return committedBundles; | ||
| } | ||
|
|
||
| private Iterable<? extends CommittedBundle<?>> commitBundles( | ||
| Iterable<? extends UncommittedBundle<?>> bundles) { | ||
| ImmutableList.Builder<CommittedBundle<?>> completed = ImmutableList.builder(); | ||
| for (UncommittedBundle<?> inProgress : bundles) { | ||
| private Map<CommittedBundle<?>, Collection<AppliedPTransform<?, ?, ?>>> commitBundles( | ||
| Iterable<? extends UncommittedBundle<?>> outputBundles) { | ||
| ImmutableMap.Builder<CommittedBundle<?>, Collection<AppliedPTransform<?, ?, ?>>> outputs | ||
| = ImmutableMap.builder(); | ||
| for (UncommittedBundle<?> inProgress : outputBundles) { | ||
| AppliedPTransform<?, ?, ?> producing = | ||
| inProgress.getPCollection().getProducingTransformInternal(); | ||
| TransformWatermarks watermarks = watermarkManager.getWatermarks(producing); | ||
|
|
@@ -191,10 +198,10 @@ private Iterable<? extends CommittedBundle<?>> commitBundles( | |
| // Empty bundles don't impact watermarks and shouldn't trigger downstream execution, so | ||
| // filter them out | ||
| if (!Iterables.isEmpty(committed.getElements())) { | ||
| completed.add(committed); | ||
| outputs.put(committed, valueToConsumers.get(committed.getPCollection())); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This changes in the immediate follow-up to this CL, which allows a TransformEvaluator to return some elements as "unprocessed". Those elements are added to the result map here, but should only be consumed by the producing transform Ex: |
||
| } | ||
| } | ||
| return completed.build(); | ||
| return outputs.build(); | ||
| } | ||
|
|
||
| private void fireAllAvailableCallbacks() { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comment here about the invariant that this is either/or where two things are null together.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.