-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Update TableInputSpec to be able to handle specific segments.
#18922
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,10 +23,12 @@ | |
| import com.fasterxml.jackson.annotation.JsonInclude; | ||
| import com.fasterxml.jackson.annotation.JsonProperty; | ||
| import com.fasterxml.jackson.annotation.JsonTypeName; | ||
| import org.apache.druid.java.util.common.IAE; | ||
| import org.apache.druid.java.util.common.Intervals; | ||
| import org.apache.druid.msq.input.InputSpec; | ||
| import org.apache.druid.msq.input.LoadableSegment; | ||
| import org.apache.druid.msq.input.PhysicalInputSlice; | ||
| import org.apache.druid.query.SegmentDescriptor; | ||
| import org.apache.druid.query.filter.DimFilter; | ||
| import org.joda.time.Interval; | ||
|
|
||
|
|
@@ -44,6 +46,9 @@ public class TableInputSpec implements InputSpec | |
| private final String dataSource; | ||
| private final List<Interval> intervals; | ||
|
|
||
| @Nullable | ||
| private final List<SegmentDescriptor> segments; | ||
|
|
||
| @Nullable | ||
| private final DimFilter filter; | ||
|
|
||
|
|
@@ -58,6 +63,8 @@ public class TableInputSpec implements InputSpec | |
| * meaning that when this spec is sliced and read, the returned {@link LoadableSegment} | ||
| * from {@link PhysicalInputSlice#getLoadableSegments()} are clipped to these intervals using | ||
| * {@link LoadableSegment#descriptor()}. | ||
| * @param segments specific segments to read, or null to read all segments in the intervals. If provided, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does there need to be documentation around what happens with non-null intervals
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this would just be the same if request year2025 data when there's only year2026 is available. msq input allows it, although compaction io config doesnt allow this (this is not in this pr though). |
||
| * only these segments will be read. Must not be empty if non-null. | ||
| * @param filter other filters to use for pruning, or null if no pruning is desired. Pruning filters are | ||
| * *not strict*, which means that processors must re-apply them when processing the returned | ||
| * {@link LoadableSegment} from {@link PhysicalInputSlice#getLoadableSegments()}. This matches how | ||
|
|
@@ -69,16 +76,35 @@ public class TableInputSpec implements InputSpec | |
| public TableInputSpec( | ||
| @JsonProperty("dataSource") String dataSource, | ||
| @JsonProperty("intervals") @Nullable List<Interval> intervals, | ||
| @JsonProperty("segments") @Nullable List<SegmentDescriptor> segments, | ||
| @JsonProperty("filter") @Nullable DimFilter filter, | ||
| @JsonProperty("filterFields") @Nullable Set<String> filterFields | ||
| ) | ||
| { | ||
| this.dataSource = dataSource; | ||
| this.intervals = intervals == null ? Intervals.ONLY_ETERNITY : intervals; | ||
| if (segments != null && segments.isEmpty()) { | ||
| throw new IAE("Can not supply empty segments as input, please use either null or non-empty segments."); | ||
| } | ||
| this.segments = segments; | ||
| this.filter = filter; | ||
| this.filterFields = filterFields; | ||
| } | ||
|
|
||
| /** | ||
| * @deprecated Use {@link #TableInputSpec(String, List, List, DimFilter, Set)} with explicit null for segments instead. | ||
| */ | ||
| @Deprecated | ||
| public TableInputSpec( | ||
| String dataSource, | ||
| @Nullable List<Interval> intervals, | ||
| @Nullable DimFilter filter, | ||
| @Nullable Set<String> filterFields | ||
| ) | ||
| { | ||
| this(dataSource, intervals, null, filter, filterFields); | ||
| } | ||
|
|
||
| @JsonProperty | ||
| public String getDataSource() | ||
| { | ||
|
|
@@ -99,6 +125,14 @@ private List<Interval> getIntervalsForSerialization() | |
| return intervals.equals(Intervals.ONLY_ETERNITY) ? null : intervals; | ||
| } | ||
|
|
||
| @JsonProperty | ||
| @JsonInclude(JsonInclude.Include.NON_NULL) | ||
| @Nullable | ||
| public List<SegmentDescriptor> getSegments() | ||
| { | ||
| return segments; | ||
| } | ||
|
|
||
| @JsonProperty | ||
| @JsonInclude(JsonInclude.Include.NON_NULL) | ||
| @Nullable | ||
|
|
@@ -127,14 +161,15 @@ public boolean equals(Object o) | |
| TableInputSpec that = (TableInputSpec) o; | ||
| return Objects.equals(dataSource, that.dataSource) | ||
| && Objects.equals(intervals, that.intervals) | ||
| && Objects.equals(segments, that.segments) | ||
| && Objects.equals(filter, that.filter) | ||
| && Objects.equals(filterFields, that.filterFields); | ||
| } | ||
|
|
||
| @Override | ||
| public int hashCode() | ||
| { | ||
| return Objects.hash(dataSource, intervals, filter, filterFields); | ||
| return Objects.hash(dataSource, intervals, segments, filter, filterFields); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -143,6 +178,7 @@ public String toString() | |
| return "TableInputSpec{" + | ||
| "dataSource='" + dataSource + '\'' + | ||
| ", intervals=" + intervals + | ||
| (segments == null ? "" : ", segments=" + segments) + | ||
| (filter == null ? "" : ", filter=" + filter) + | ||
| (filterFields == null ? "" : ", filterFields=" + filterFields) + | ||
| '}'; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think I fully follow this comment.
A segment outside search intervals. Is that referring to a segment that is in
tableInputSpec.getSegments()but ends up not overlapping any search intervals and thus not get found?Same idea regarding the
0 timecomment. Is that just saying that even though a segment was intableInputSpec.getSegments(), it may not appear in the iterator?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
think of it as a combination of
MultipleIntervalSegmentSpecandMultipleSpecificSegmentSpec, on top of it this spec also has filter stuff.the 0 time is actually not a change. even before this change think if we have two intervals day0 and day1, we can have segments from both days, so we could build a
DataSegmentWithIntervalwith segment from day0 and interval day1, but it doesnt matter at the end. although we usually only have 1 interval so this dont happen often.i guess the change here is that since now segments can be user input now, so they could be anything.