-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Add export capabilities to MSQ with SQL syntax #15689
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
51639bc
158ba0c
9e1ef17
06e9d92
6ff6747
53ff841
6daf530
550cc8f
9ab7b37
5fd7ded
e6c75ab
4c9d4cc
529d14b
58f1d13
3db7a1b
f3ebe05
c45c357
8ed422c
6e7262d
f75188e
e571a0a
1247461
d5d3693
55a4aed
0063767
7cbdef1
6f46c41
3f8d715
7c00062
6e9f53b
9bbda77
20103ca
aa3ce05
0501106
ebfc53e
941605b
eb73cc2
1867cce
9ff0cd4
8e21576
62c2c04
2c9e87b
c79c496
81ee2a3
f9873a6
b4a2223
c7f8234
cf15323
2ff3410
c71cc5a
180f132
5206e90
10217a5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -106,6 +106,7 @@ | |
| import org.apache.druid.msq.indexing.client.ControllerChatHandler; | ||
| import org.apache.druid.msq.indexing.destination.DataSourceMSQDestination; | ||
| import org.apache.druid.msq.indexing.destination.DurableStorageMSQDestination; | ||
| import org.apache.druid.msq.indexing.destination.ExportMSQDestination; | ||
| import org.apache.druid.msq.indexing.destination.MSQSelectDestination; | ||
| import org.apache.druid.msq.indexing.destination.TaskReportMSQDestination; | ||
| import org.apache.druid.msq.indexing.error.CanceledFault; | ||
|
|
@@ -165,9 +166,9 @@ | |
| import org.apache.druid.msq.querykit.MultiQueryKit; | ||
| import org.apache.druid.msq.querykit.QueryKit; | ||
| import org.apache.druid.msq.querykit.QueryKitUtils; | ||
| import org.apache.druid.msq.querykit.ShuffleSpecFactories; | ||
| import org.apache.druid.msq.querykit.ShuffleSpecFactory; | ||
| import org.apache.druid.msq.querykit.groupby.GroupByQueryKit; | ||
| import org.apache.druid.msq.querykit.results.ExportResultsFrameProcessorFactory; | ||
| import org.apache.druid.msq.querykit.results.QueryResultFrameProcessorFactory; | ||
| import org.apache.druid.msq.querykit.scan.ScanQueryKit; | ||
| import org.apache.druid.msq.shuffle.input.DurableStorageInputChannelFactory; | ||
|
|
@@ -201,6 +202,8 @@ | |
| import org.apache.druid.sql.calcite.planner.ColumnMapping; | ||
| import org.apache.druid.sql.calcite.planner.ColumnMappings; | ||
| import org.apache.druid.sql.calcite.rel.DruidQuery; | ||
| import org.apache.druid.sql.http.ResultFormat; | ||
| import org.apache.druid.storage.ExportStorageProvider; | ||
| import org.apache.druid.timeline.DataSegment; | ||
| import org.apache.druid.timeline.SegmentTimeline; | ||
| import org.apache.druid.timeline.partition.DimensionRangeShardSpec; | ||
|
|
@@ -220,6 +223,7 @@ | |
| import java.util.Comparator; | ||
| import java.util.HashMap; | ||
| import java.util.HashSet; | ||
| import java.util.Iterator; | ||
| import java.util.LinkedHashSet; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
|
|
@@ -1756,7 +1760,8 @@ private static QueryDefinition makeQueryDefinition( | |
| final ShuffleSpecFactory shuffleSpecFactory; | ||
|
|
||
| if (MSQControllerTask.isIngestion(querySpec)) { | ||
| shuffleSpecFactory = ShuffleSpecFactories.getGlobalSortWithTargetSize(tuningConfig.getRowsPerSegment()); | ||
| shuffleSpecFactory = querySpec.getDestination() | ||
| .getShuffleSpecFactory(tuningConfig.getRowsPerSegment()); | ||
|
|
||
| if (!columnMappings.hasUniqueOutputColumnNames()) { | ||
| // We do not expect to hit this case in production, because the SQL validator checks that column names | ||
|
|
@@ -1777,16 +1782,10 @@ private static QueryDefinition makeQueryDefinition( | |
| } else { | ||
| queryToPlan = querySpec.getQuery(); | ||
| } | ||
| } else if (querySpec.getDestination() instanceof TaskReportMSQDestination) { | ||
| shuffleSpecFactory = ShuffleSpecFactories.singlePartition(); | ||
| queryToPlan = querySpec.getQuery(); | ||
| } else if (querySpec.getDestination() instanceof DurableStorageMSQDestination) { | ||
| shuffleSpecFactory = ShuffleSpecFactories.getGlobalSortWithTargetSize( | ||
| MultiStageQueryContext.getRowsPerPage(querySpec.getQuery().context()) | ||
| ); | ||
| queryToPlan = querySpec.getQuery(); | ||
| } else { | ||
| throw new ISE("Unsupported destination [%s]", querySpec.getDestination()); | ||
| shuffleSpecFactory = querySpec.getDestination() | ||
| .getShuffleSpecFactory(MultiStageQueryContext.getRowsPerPage(querySpec.getQuery().context())); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the refactor. Its much cleaner now.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean a comment every where the function is being called? We don't pass the whole context to getShuffleSpecFactory(), just the integer, so would this need to be specifically mentioned somewhere? |
||
| queryToPlan = querySpec.getQuery(); | ||
| } | ||
|
|
||
| final QueryDefinition queryDef; | ||
|
|
@@ -1877,6 +1876,43 @@ private static QueryDefinition makeQueryDefinition( | |
| } else { | ||
| return queryDef; | ||
| } | ||
| } else if (querySpec.getDestination() instanceof ExportMSQDestination) { | ||
| final ExportMSQDestination exportMSQDestination = (ExportMSQDestination) querySpec.getDestination(); | ||
| final ExportStorageProvider exportStorageProvider = exportMSQDestination.getExportStorageProvider(); | ||
|
|
||
| try { | ||
| // Check that the export destination is empty as a sanity check. We want to avoid modifying any other files with export. | ||
| Iterator<String> filesIterator = exportStorageProvider.get().listDir(""); | ||
| if (filesIterator.hasNext()) { | ||
| throw DruidException.forPersona(DruidException.Persona.USER) | ||
| .ofCategory(DruidException.Category.RUNTIME_FAILURE) | ||
| .build("Found files at provided export destination[%s]. Export is only allowed to " | ||
| + "an empty path. Please provide an empty path/subdirectory or move the existing files.", | ||
| exportStorageProvider.getBasePath()); | ||
| } | ||
| } | ||
| catch (IOException e) { | ||
| throw DruidException.forPersona(DruidException.Persona.USER) | ||
| .ofCategory(DruidException.Category.RUNTIME_FAILURE) | ||
| .build(e, "Exception occurred while connecting to export destination."); | ||
| } | ||
|
|
||
|
|
||
| final ResultFormat resultFormat = exportMSQDestination.getResultFormat(); | ||
| final QueryDefinitionBuilder builder = QueryDefinition.builder(); | ||
| builder.addAll(queryDef); | ||
| builder.add(StageDefinition.builder(queryDef.getNextStageNumber()) | ||
| .inputs(new StageInputSpec(queryDef.getFinalStageDefinition().getStageNumber())) | ||
| .maxWorkerCount(tuningConfig.getMaxNumWorkers()) | ||
| .signature(queryDef.getFinalStageDefinition().getSignature()) | ||
| .shuffleSpec(null) | ||
| .processorFactory(new ExportResultsFrameProcessorFactory( | ||
| queryId, | ||
| exportStorageProvider, | ||
| resultFormat | ||
| )) | ||
| ); | ||
| return builder.build(); | ||
| } else { | ||
| throw new ISE("Unsupported destination [%s]", querySpec.getDestination()); | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
ASclause would not be an argument to extern, it's present elsewhere in the query. Would it be confusing to call it an argument?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about the change I just made?