-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Enhance Compaction task to be able to write to a different/new datasource #18612
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,7 +87,7 @@ public class CompactionTaskTest extends CompactionTestBase | |
| () -> TaskBuilder | ||
| .ofTypeCompact() | ||
| .context("storeCompactionState", true) | ||
| .ioConfig(new CompactionIntervalSpec(Intervals.of("2013-08-31/2013-09-02"), null), false); | ||
| .ioConfig(new CompactionIntervalSpec(Intervals.of("2013-08-31/2013-09-02"), null, null), false); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add tests where this isn't null |
||
| private static final Supplier<TaskBuilder.Compact> PARALLEL_COMPACTION_TASK = | ||
| () -> COMPACTION_TASK.get().tuningConfig( | ||
| t -> t.withPartitionsSpec(new HashedPartitionsSpec(null, null, null)) | ||
|
|
@@ -98,7 +98,7 @@ public class CompactionTaskTest extends CompactionTestBase | |
| () -> TaskBuilder | ||
| .ofTypeCompact() | ||
| .context("storeCompactionState", true) | ||
| .ioConfig(new CompactionIntervalSpec(Intervals.of("2013-08-31/2013-09-02"), null), true); | ||
| .ioConfig(new CompactionIntervalSpec(Intervals.of("2013-08-31/2013-09-02"), null, null), true); | ||
|
|
||
| private static final Supplier<TaskBuilder.Index> INDEX_TASK_WITH_TIMESTAMP = | ||
| () -> MoreResources.Task.INDEX_TASK_WITH_AGGREGATORS.get().dimensions( | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -51,4 +51,9 @@ public interface CompactionInputSpec | |||||
| * @param latestSegments most recent published segments in the interval returned by {@link #findInterval} | ||||||
| */ | ||||||
| boolean validateSegments(LockGranularity lockGranularityInUse, List<DataSegment> latestSegments); | ||||||
|
|
||||||
| /** | ||||||
| * Return the datasource to be used as input to the compaction task. | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| */ | ||||||
| String getDataSource(); | ||||||
| } | ||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -212,7 +212,7 @@ public CompactionTask( | |||||
| if (ioConfig != null) { | ||||||
| this.ioConfig = ioConfig; | ||||||
| } else if (interval != null) { | ||||||
| this.ioConfig = new CompactionIOConfig(new CompactionIntervalSpec(interval, null), false, null); | ||||||
| this.ioConfig = new CompactionIOConfig(new CompactionIntervalSpec(interval, null, null), false, null); | ||||||
| } else { | ||||||
| // We already checked segments is not null or empty above. | ||||||
| //noinspection ConstantConditions | ||||||
|
|
@@ -242,7 +242,10 @@ public CompactionTask( | |||||
| } | ||||||
| this.projections = projections; | ||||||
| this.tuningConfig = tuningConfig != null ? getTuningConfig(tuningConfig) : null; | ||||||
| this.segmentProvider = new SegmentProvider(dataSource, this.ioConfig.getInputSpec()); | ||||||
| this.segmentProvider = new SegmentProvider( | ||||||
| this.ioConfig.getInputSpec().getDataSource() == null ? dataSource : this.ioConfig.getInputSpec().getDataSource(), | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| this.ioConfig.getInputSpec() | ||||||
| ); | ||||||
| // Note: The default compactionRunnerType used here should match the default runner used in CompactSegments#run | ||||||
| // when no runner is detected in the returned compactionTaskQuery. | ||||||
| this.compactionRunner = compactionRunner == null | ||||||
|
|
@@ -517,6 +520,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception | |||||
| emitMetric(toolbox.getEmitter(), "ingest/count", 1); | ||||||
|
|
||||||
| final Map<Interval, DataSchema> intervalDataSchemas = createDataSchemasForIntervals( | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given we are now ingesting into potentially a new datasource, have we checked whether all associated tasks for ingestion (e.g. lock acquisition, segment alloc) are now working? |
||||||
| getDataSource(), | ||||||
| toolbox, | ||||||
| getTaskLockHelper().getLockGranularityToUse(), | ||||||
| segmentProvider, | ||||||
|
|
@@ -548,6 +552,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception | |||||
| */ | ||||||
| @VisibleForTesting | ||||||
| static Map<Interval, DataSchema> createDataSchemasForIntervals( | ||||||
| final String dataSource, | ||||||
| final TaskToolbox toolbox, | ||||||
| final LockGranularity lockGranularityInUse, | ||||||
| final SegmentProvider segmentProvider, | ||||||
|
|
@@ -613,7 +618,7 @@ static Map<Interval, DataSchema> createDataSchemasForIntervals( | |||||
| final DataSchema dataSchema = createDataSchema( | ||||||
| toolbox.getEmitter(), | ||||||
| metricBuilder, | ||||||
| segmentProvider.dataSource, | ||||||
| dataSource, | ||||||
| interval, | ||||||
| lazyFetchSegments(segmentsToCompact, toolbox.getSegmentCacheManager()), | ||||||
| dimensionsSpec, | ||||||
|
|
@@ -633,7 +638,7 @@ static Map<Interval, DataSchema> createDataSchemasForIntervals( | |||||
| final DataSchema dataSchema = createDataSchema( | ||||||
| toolbox.getEmitter(), | ||||||
| metricBuilder, | ||||||
| segmentProvider.dataSource, | ||||||
| dataSource, | ||||||
| JodaUtils.umbrellaInterval( | ||||||
| Iterables.transform( | ||||||
| timelineSegments, | ||||||
|
|
@@ -1275,7 +1280,7 @@ public Builder( | |||||
|
|
||||||
| public Builder interval(Interval interval) | ||||||
| { | ||||||
| return inputSpec(new CompactionIntervalSpec(interval, null)); | ||||||
| return inputSpec(new CompactionIntervalSpec(interval, null, null)); | ||||||
| } | ||||||
|
|
||||||
| public Builder segments(List<DataSegment> segments) | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,7 @@ | |
| import org.apache.druid.timeline.SegmentId; | ||
| import org.joda.time.Interval; | ||
|
|
||
| import javax.annotation.Nullable; | ||
| import java.util.Collections; | ||
| import java.util.List; | ||
| import java.util.Objects; | ||
|
|
@@ -39,18 +40,26 @@ public class SpecificSegmentsSpec implements CompactionInputSpec | |
|
|
||
| private final List<String> segments; | ||
|
|
||
| @Nullable | ||
| private final String dataSource; | ||
|
|
||
| public static SpecificSegmentsSpec fromSegments(List<DataSegment> segments) | ||
| { | ||
| Preconditions.checkArgument(!segments.isEmpty(), "Empty segment list"); | ||
| return new SpecificSegmentsSpec( | ||
| segments.stream().map(segment -> segment.getId().toString()).collect(Collectors.toList()) | ||
| segments.stream().map(segment -> segment.getId().toString()).collect(Collectors.toList()), | ||
| null | ||
| ); | ||
| } | ||
|
|
||
| @JsonCreator | ||
| public SpecificSegmentsSpec(@JsonProperty("segments") List<String> segments) | ||
| public SpecificSegmentsSpec( | ||
| @JsonProperty("segments") List<String> segments, | ||
| @JsonProperty("dataSource") @Nullable String dataSource | ||
| ) | ||
| { | ||
| this.segments = segments; | ||
| this.dataSource = dataSource; | ||
| // Sort segments to use in validateSegments. | ||
| Collections.sort(this.segments); | ||
| } | ||
|
|
@@ -61,6 +70,14 @@ public List<String> getSegments() | |
| return segments; | ||
| } | ||
|
|
||
| @Override | ||
| @Nullable | ||
| @JsonProperty | ||
| public String getDataSource() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: let's add some javadoc to this |
||
| { | ||
| return dataSource; | ||
| } | ||
|
|
||
| @Override | ||
| public Interval findInterval(String dataSource) | ||
| { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's avoid writing this if possible