-
Notifications
You must be signed in to change notification settings - Fork 3.8k
ability to build and load custom segment in realtime node #4448
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,10 +21,12 @@ | |
|
|
||
| import com.fasterxml.jackson.annotation.JsonCreator; | ||
| import com.fasterxml.jackson.annotation.JsonProperty; | ||
| import io.druid.segment.IndexMerger; | ||
| import io.druid.segment.IndexSpec; | ||
| import io.druid.segment.indexing.RealtimeTuningConfig; | ||
| import io.druid.segment.indexing.TuningConfig; | ||
| import io.druid.segment.realtime.appenderator.AppenderatorConfig; | ||
| import io.druid.segment.realtime.plumber.SinkFactory; | ||
| import org.joda.time.Period; | ||
|
|
||
| import java.io.File; | ||
|
|
@@ -44,6 +46,8 @@ public class KafkaTuningConfig implements TuningConfig, AppenderatorConfig | |
| @Deprecated | ||
| private final long handoffConditionTimeout; | ||
| private final boolean resetOffsetAutomatically; | ||
| private final SinkFactory sinkFactory; | ||
| private final IndexMerger customIndexMerger; | ||
|
|
||
| @JsonCreator | ||
| public KafkaTuningConfig( | ||
|
|
@@ -57,7 +61,9 @@ public KafkaTuningConfig( | |
| @JsonProperty("buildV9Directly") Boolean buildV9Directly, | ||
| @JsonProperty("reportParseExceptions") Boolean reportParseExceptions, | ||
| @JsonProperty("handoffConditionTimeout") Long handoffConditionTimeout, | ||
| @JsonProperty("resetOffsetAutomatically") Boolean resetOffsetAutomatically | ||
| @JsonProperty("resetOffsetAutomatically") Boolean resetOffsetAutomatically, | ||
| @JsonProperty("sinkFactory") SinkFactory sinkFactory, | ||
| @JsonProperty("customIndexMerger") IndexMerger customIndexMerger | ||
| ) | ||
| { | ||
| // Cannot be a static because default basePersistDirectory is unique per-instance | ||
|
|
@@ -80,6 +86,8 @@ public KafkaTuningConfig( | |
| this.resetOffsetAutomatically = resetOffsetAutomatically == null | ||
| ? DEFAULT_RESET_OFFSET_AUTOMATICALLY | ||
| : resetOffsetAutomatically; | ||
| this.sinkFactory = sinkFactory == null ? defaults.getSinkFactory() : sinkFactory; | ||
| this.customIndexMerger = customIndexMerger; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to handle null here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the caller will handle the null customIndexMerger. like in IndexTask: customIndexMerger is a customized IndexMerger which can be null if user don't need it. |
||
| } | ||
|
|
||
| public static KafkaTuningConfig copyOf(KafkaTuningConfig config) | ||
|
|
@@ -94,7 +102,9 @@ public static KafkaTuningConfig copyOf(KafkaTuningConfig config) | |
| true, | ||
| config.reportParseExceptions, | ||
| config.handoffConditionTimeout, | ||
| config.resetOffsetAutomatically | ||
| config.resetOffsetAutomatically, | ||
| config.sinkFactory, | ||
| config.customIndexMerger | ||
| ); | ||
| } | ||
|
|
||
|
|
@@ -125,6 +135,20 @@ public File getBasePersistDirectory() | |
| return basePersistDirectory; | ||
| } | ||
|
|
||
| @Override | ||
| @JsonProperty | ||
| public SinkFactory getSinkFactory() | ||
| { | ||
| return sinkFactory; | ||
| } | ||
|
|
||
| @Override | ||
| @JsonProperty | ||
| public IndexMerger getCustomIndexMerger() | ||
| { | ||
| return customIndexMerger; | ||
| } | ||
|
|
||
| @Override | ||
| @JsonProperty | ||
| public int getMaxPendingPersists() | ||
|
|
@@ -181,7 +205,9 @@ public KafkaTuningConfig withBasePersistDirectory(File dir) | |
| true, | ||
| reportParseExceptions, | ||
| handoffConditionTimeout, | ||
| resetOffsetAutomatically | ||
| resetOffsetAutomatically, | ||
| sinkFactory, | ||
| customIndexMerger | ||
| ); | ||
| } | ||
|
|
||
|
|
@@ -197,7 +223,9 @@ public KafkaTuningConfig withMaxRowsInMemory(int rows) | |
| true, | ||
| reportParseExceptions, | ||
| handoffConditionTimeout, | ||
| resetOffsetAutomatically | ||
| resetOffsetAutomatically, | ||
| sinkFactory, | ||
| customIndexMerger | ||
| ); | ||
| } | ||
|
|
||
|
|
@@ -241,8 +269,15 @@ public boolean equals(Object o) | |
| : that.basePersistDirectory != null) { | ||
| return false; | ||
| } | ||
| return indexSpec != null ? indexSpec.equals(that.indexSpec) : that.indexSpec == null; | ||
|
|
||
| if (indexSpec != null ? !indexSpec.equals(that.indexSpec) : that.indexSpec != null) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Refactor to use Objects.equals()
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's generated by Intellij Idea, but sure, will modify it.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can generate the forms that I suggested with intelliJ as well, need to choose another option in generation dialog. Once you choose it, it will be the default later. |
||
| return false; | ||
| } | ||
| if (sinkFactory != null ? !sinkFactory.equals(that.sinkFactory) : that.sinkFactory != null) { | ||
| return false; | ||
| } | ||
| return customIndexMerger != null | ||
| ? customIndexMerger.equals(that.customIndexMerger) | ||
| : that.customIndexMerger == null; | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -257,6 +292,8 @@ public int hashCode() | |
| result = 31 * result + (reportParseExceptions ? 1 : 0); | ||
| result = 31 * result + (int) (handoffConditionTimeout ^ (handoffConditionTimeout >>> 32)); | ||
| result = 31 * result + (resetOffsetAutomatically ? 1 : 0); | ||
| result = 31 * result + (sinkFactory != null ? sinkFactory.hashCode() : 0); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Refactor to use Objects.hash()
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure |
||
| result = 31 * result + (customIndexMerger != null ? customIndexMerger.hashCode() : 0); | ||
| return result; | ||
| } | ||
|
|
||
|
|
@@ -273,6 +310,8 @@ public String toString() | |
| ", reportParseExceptions=" + reportParseExceptions + | ||
| ", handoffConditionTimeout=" + handoffConditionTimeout + | ||
| ", resetOffsetAutomatically=" + resetOffsetAutomatically + | ||
| ", sinkFactory=" + sinkFactory + | ||
| ", customIndexMerger=" + customIndexMerger + | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IndexMerger doesn't provide proper toString() method
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IndexMerger is interface now, the implement of this interface should implement toString |
||
| '}'; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,6 +36,7 @@ | |
| import io.druid.query.Query; | ||
| import io.druid.query.QueryRunner; | ||
| import io.druid.segment.IndexIO; | ||
| import io.druid.segment.IndexMerger; | ||
| import io.druid.segment.IndexMergerV9; | ||
| import io.druid.segment.QueryableIndex; | ||
| import io.druid.segment.SegmentUtils; | ||
|
|
@@ -98,7 +99,7 @@ public Plumber findPlumber( | |
| ) | ||
| { | ||
| // There can be only one. | ||
| final Sink theSink = new Sink( | ||
| final Sink theSink = config.getSinkFactory().create( | ||
| interval, | ||
| schema, | ||
| config.getShardSpec(), | ||
|
|
@@ -113,6 +114,10 @@ public Plumber findPlumber( | |
| // Set of spilled segments. Will be merged at the end. | ||
| final Set<File> spilled = Sets.newHashSet(); | ||
|
|
||
| // IndexMerger implementation. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not useful comment |
||
| final IndexMerger theIndexMerger = config.getCustomIndexMerger() != null | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Regarding null handling - doesn't Jackson handle it up front, when you specify
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. usually the default implement is provided in the class context, like |
||
| ? config.getCustomIndexMerger() : indexMergerV9; | ||
|
|
||
| return new Plumber() | ||
| { | ||
| @Override | ||
|
|
@@ -181,7 +186,7 @@ public void finishJob() | |
| } | ||
|
|
||
| fileToUpload = new File(tmpSegmentDir, "merged"); | ||
| indexMergerV9.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), fileToUpload, config.getIndexSpec()); | ||
| theIndexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), fileToUpload, config.getIndexSpec()); | ||
| } | ||
|
|
||
| // Map merged segment so we can extract dimensions | ||
|
|
@@ -226,7 +231,7 @@ private void spillIfSwappable() | |
| log.info("Spilling index[%d] with rows[%d] to: %s", indexToPersist.getCount(), rowsToPersist, dirToPersist); | ||
|
|
||
| try { | ||
| indexMergerV9.persist( | ||
| theIndexMerger.persist( | ||
| indexToPersist.getIndex(), | ||
| dirToPersist, | ||
| config.getIndexSpec() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please extract an umbrella abstraction like "SegmentStrategy" with createSinkFactory() and createIndexMerger() methods, and inject/serialize/deserialize only it, here and in other places.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure.