From 51639bc0a0c886bbacc298bf5bb4b025a8318726 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 10 Jan 2024 12:23:45 +0530 Subject: [PATCH 01/50] Add test --- .../druid/sql/calcite/CalciteExportTest.java | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java new file mode 100644 index 000000000000..d1edf2823216 --- /dev/null +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite; + +import org.apache.druid.query.Druids; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.filtration.Filtration; +import org.junit.Test; + +public class CalciteExportTest extends CalciteIngestionDmlTest +{ + + @Test + public void name() + { + testIngestionQuery() + .sql("REPLACE OVERWRITE TO EXTERNAL('{\"type\":\"hdfs\",\"uri\":\"hdfs://localhost:9090/outputdirectory/\"}') AS 'CSV' SELECT dim2 FROM foo") + .expectQuery( + Druids.newScanQueryBuilder() + .dataSource( + "foo" + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ) + .expectResources(dataSourceRead("foo"), dataSourceWrite("extern")) + .expectTarget("extern", RowSignature.builder().add("dim2", ColumnType.STRING).build()) + .verify(); + } +} From 158ba0ce8793eb37c752b176578e0440c50a38c3 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 15 Jan 2024 12:12:12 +0530 Subject: [PATCH 02/50] Parser changes to support export statements --- .../destination/ExportMSQDestination.java | 71 ++++++++++++ .../indexing/destination/MSQDestination.java | 1 + .../destination/MSQSelectDestination.java | 4 + .../druid/msq/sql/MSQTaskQueryMaker.java | 14 ++- .../druid/msq/sql/MSQTaskSqlEngine.java | 5 +- .../destination/ExportMSQDestinationTest.java | 43 +++++++ .../model/table/IngestDestination.java | 38 ++++++ .../model/table/export/ExportDestination.java | 26 +++++ .../table/export/S3ExportDestination.java | 85 ++++++++++++++ .../model/table/export/TableDestination.java | 70 +++++++++++ .../table/export/S3ExportDestinationTest.java | 40 +++++++ .../table/export/TableDestinationTest.java | 40 +++++++ sql/src/main/codegen/config.fmpp | 11 ++ sql/src/main/codegen/includes/common.ftl | 49 ++++++++ sql/src/main/codegen/includes/insert.ftl | 109 +++++++++--------- sql/src/main/codegen/includes/replace.ftl | 45 +++++--- .../sql/calcite/parser/DruidSqlIngest.java | 12 +- .../sql/calcite/parser/DruidSqlInsert.java | 6 +- .../sql/calcite/parser/DruidSqlReplace.java | 12 +- .../ExternalDestinationSqlIdentifier.java | 70 +++++++++++ .../calcite/planner/ExplainAttributes.java | 7 +- .../sql/calcite/planner/IngestHandler.java | 30 +++-- .../sql/calcite/run/NativeSqlEngine.java | 3 +- .../druid/sql/calcite/run/SqlEngine.java | 5 +- .../druid/sql/calcite/view/ViewSqlEngine.java | 3 +- .../druid/sql/calcite/CalciteExportTest.java | 7 +- .../sql/calcite/CalciteScanSignatureTest.java | 3 +- .../sql/calcite/IngestionTestSqlEngine.java | 5 +- .../sql/calcite/TestInsertQueryMaker.java | 9 +- .../calcite/parser/DruidSqlUnparseTest.java | 16 +++ .../planner/ExplainAttributesTest.java | 25 ++-- 31 files changed, 746 insertions(+), 118 deletions(-) create mode 100644 extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java create mode 100644 extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java create mode 100644 server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java create mode 100644 server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java create mode 100644 server/src/main/java/org/apache/druid/catalog/model/table/export/S3ExportDestination.java create mode 100644 server/src/main/java/org/apache/druid/catalog/model/table/export/TableDestination.java create mode 100644 server/src/test/java/org/apache/druid/catalog/model/table/export/S3ExportDestinationTest.java create mode 100644 server/src/test/java/org/apache/druid/catalog/model/table/export/TableDestinationTest.java create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java new file mode 100644 index 000000000000..e21b6dabf142 --- /dev/null +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.msq.indexing.destination; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.catalog.model.table.export.ExportDestination; + +import java.util.Objects; + +public class ExportMSQDestination implements MSQDestination +{ + public static final String TYPE = "export"; + private final ExportDestination exportDestination; + + @JsonCreator + public ExportMSQDestination(@JsonProperty("exportDestination") ExportDestination exportDestination) + { + this.exportDestination = exportDestination; + } + + @JsonProperty("exportDestination") + public ExportDestination getExportDestination() + { + return exportDestination; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ExportMSQDestination that = (ExportMSQDestination) o; + return Objects.equals(exportDestination, that.exportDestination); + } + + @Override + public int hashCode() + { + return Objects.hash(exportDestination); + } + + @Override + public String toString() + { + return "ExportMSQDestination{" + + "exportDestination=" + exportDestination + + '}'; + } +} diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQDestination.java index 52489d15a343..be58a48bb9ed 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQDestination.java @@ -26,6 +26,7 @@ @JsonSubTypes(value = { @JsonSubTypes.Type(name = DataSourceMSQDestination.TYPE, value = DataSourceMSQDestination.class), @JsonSubTypes.Type(name = TaskReportMSQDestination.TYPE, value = TaskReportMSQDestination.class), + @JsonSubTypes.Type(name = ExportMSQDestination.TYPE, value = ExportMSQDestination.class), @JsonSubTypes.Type(name = DurableStorageMSQDestination.TYPE, value = DurableStorageMSQDestination.class) }) public interface MSQDestination diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQSelectDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQSelectDestination.java index db57fdc5dc07..719e571ab6ca 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQSelectDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQSelectDestination.java @@ -30,6 +30,10 @@ public enum MSQSelectDestination * Writes all the results directly to the report. */ TASKREPORT("taskReport", false), + /** + * Writes the results as rows to a location. + */ + EXPORT("export", false), /** * Writes the results as frame files to durable storage. Task report can be truncated to a preview. */ diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java index d38fa1a8dc64..c9874cbde659 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java @@ -25,6 +25,9 @@ import org.apache.calcite.runtime.Hook; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.Pair; +import org.apache.druid.catalog.model.table.IngestDestination; +import org.apache.druid.catalog.model.table.export.ExportDestination; +import org.apache.druid.catalog.model.table.export.TableDestination; import org.apache.druid.common.guava.FutureUtils; import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; @@ -39,6 +42,7 @@ import org.apache.druid.msq.indexing.MSQTuningConfig; import org.apache.druid.msq.indexing.destination.DataSourceMSQDestination; import org.apache.druid.msq.indexing.destination.DurableStorageMSQDestination; +import org.apache.druid.msq.indexing.destination.ExportMSQDestination; import org.apache.druid.msq.indexing.destination.MSQDestination; import org.apache.druid.msq.indexing.destination.MSQSelectDestination; import org.apache.druid.msq.indexing.destination.TaskReportMSQDestination; @@ -80,7 +84,7 @@ public class MSQTaskQueryMaker implements QueryMaker private static final Granularity DEFAULT_SEGMENT_GRANULARITY = Granularities.ALL; - private final String targetDataSource; + private final IngestDestination targetDataSource; private final OverlordClient overlordClient; private final PlannerContext plannerContext; private final ObjectMapper jsonMapper; @@ -88,7 +92,7 @@ public class MSQTaskQueryMaker implements QueryMaker MSQTaskQueryMaker( - @Nullable final String targetDataSource, + @Nullable final IngestDestination targetDataSource, final OverlordClient overlordClient, final PlannerContext plannerContext, final ObjectMapper jsonMapper, @@ -203,7 +207,9 @@ public QueryResponse runQuery(final DruidQuery druidQuery) final MSQDestination destination; - if (targetDataSource != null) { + if (targetDataSource instanceof ExportDestination) { + destination = new ExportMSQDestination((ExportDestination) targetDataSource); + } else if (targetDataSource instanceof TableDestination) { Granularity segmentGranularityObject; try { segmentGranularityObject = jsonMapper.readValue((String) segmentGranularity, Granularity.class); @@ -227,7 +233,7 @@ public QueryResponse runQuery(final DruidQuery druidQuery) ); final DataSourceMSQDestination dataSourceMSQDestination = new DataSourceMSQDestination( - targetDataSource, + targetDataSource.getDestinationName(), segmentGranularityObject, segmentSortOrder, replaceTimeChunks diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java index cb331760ca34..e9f0d1704932 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java @@ -31,6 +31,7 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.Pair; +import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; import org.apache.druid.error.InvalidSqlInput; @@ -153,7 +154,7 @@ public OverlordClient overlordClient() @Override public QueryMaker buildQueryMakerForInsert( - final String targetDataSource, + final IngestDestination targetDestination, final RelRoot relRoot, final PlannerContext plannerContext ) @@ -161,7 +162,7 @@ public QueryMaker buildQueryMakerForInsert( validateInsert(relRoot.rel, relRoot.fields, plannerContext); return new MSQTaskQueryMaker( - targetDataSource, + targetDestination, overlordClient, plannerContext, jsonMapper, diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java new file mode 100644 index 000000000000..60e40179dd52 --- /dev/null +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.msq.indexing.destination; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.catalog.model.table.export.S3ExportDestination; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class ExportMSQDestinationTest +{ + + @Test + public void testSerde() throws IOException + { + ExportMSQDestination exportDestination = new ExportMSQDestination(new S3ExportDestination("hdfs://localhost:9090/outputdirectory/", null)); + ObjectMapper objectMapper = new DefaultObjectMapper(); + String s = objectMapper.writeValueAsString(exportDestination); + + ExportMSQDestination newDest = objectMapper.readValue(s, ExportMSQDestination.class); + Assert.assertEquals(exportDestination, newDest); + } +} diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java b/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java new file mode 100644 index 000000000000..d52462997cd1 --- /dev/null +++ b/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog.model.table; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import org.apache.druid.catalog.model.table.export.S3ExportDestination; +import org.apache.druid.catalog.model.table.export.TableDestination; +import org.apache.druid.guice.annotations.UnstableApi; + +@UnstableApi +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = IngestDestination.TYPE_PROPERTY) +@JsonSubTypes(value = { + @JsonSubTypes.Type(name = TableDestination.TYPE_KEY, value = TableDestination.class), + @JsonSubTypes.Type(name = S3ExportDestination.TYPE_KEY, value = S3ExportDestination.class) +}) +public interface IngestDestination +{ + String TYPE_PROPERTY = "type"; + String getDestinationName(); +} diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java b/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java new file mode 100644 index 000000000000..d9f61c2a1d19 --- /dev/null +++ b/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog.model.table.export; + +import org.apache.druid.catalog.model.table.IngestDestination; + +public interface ExportDestination extends IngestDestination +{ +} diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/S3ExportDestination.java b/server/src/main/java/org/apache/druid/catalog/model/table/export/S3ExportDestination.java new file mode 100644 index 000000000000..63ced77e4904 --- /dev/null +++ b/server/src/main/java/org/apache/druid/catalog/model/table/export/S3ExportDestination.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog.model.table.export; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Map; +import java.util.Objects; + +public class S3ExportDestination implements ExportDestination +{ + public static final String TYPE_KEY = "s3"; + + private final String uri; + private final String username; + + public S3ExportDestination(Map properties) + { + this(properties.get("uri"), properties.get("username")); + } + + @JsonCreator + public S3ExportDestination(@JsonProperty("uri") String uri, @JsonProperty("username") String username) + { + this.uri = uri; + this.username = username; + } + + @JsonProperty("uri") + public String getUri() + { + return uri; + } + + @JsonProperty("username") + public String getUsername() + { + return username; + } + + @Override + @JsonIgnore + public String getDestinationName() + { + return TYPE_KEY; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + S3ExportDestination that = (S3ExportDestination) o; + return Objects.equals(uri, that.uri) && Objects.equals(username, that.username); + } + + @Override + public int hashCode() + { + return Objects.hash(uri, username); + } +} diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/TableDestination.java b/server/src/main/java/org/apache/druid/catalog/model/table/export/TableDestination.java new file mode 100644 index 000000000000..019bfa017ec1 --- /dev/null +++ b/server/src/main/java/org/apache/druid/catalog/model/table/export/TableDestination.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog.model.table.export; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.catalog.model.table.IngestDestination; + +import java.util.Objects; + +public class TableDestination implements IngestDestination +{ + public static final String TYPE_KEY = "table"; + String tableName; + + @JsonCreator + public TableDestination(@JsonProperty("tableName") String tableName) + { + this.tableName = tableName; + } + + @Override + @JsonProperty("tableName") + public String getDestinationName() + { + return tableName; + } + + @Override + public String toString() + { + return tableName; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TableDestination that = (TableDestination) o; + return Objects.equals(tableName, that.tableName); + } + + @Override + public int hashCode() + { + return Objects.hash(tableName); + } +} diff --git a/server/src/test/java/org/apache/druid/catalog/model/table/export/S3ExportDestinationTest.java b/server/src/test/java/org/apache/druid/catalog/model/table/export/S3ExportDestinationTest.java new file mode 100644 index 000000000000..b97fd1e65275 --- /dev/null +++ b/server/src/test/java/org/apache/druid/catalog/model/table/export/S3ExportDestinationTest.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog.model.table.export; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class S3ExportDestinationTest +{ + @Test + public void testSerde() throws IOException + { + S3ExportDestination exportDestination = new S3ExportDestination("uri", "username"); + ObjectMapper objectMapper = new DefaultObjectMapper(); + byte[] bytes = objectMapper.writeValueAsBytes(exportDestination); + S3ExportDestination newDest = objectMapper.readValue(bytes, S3ExportDestination.class); + Assert.assertEquals(exportDestination, newDest); + } +} \ No newline at end of file diff --git a/server/src/test/java/org/apache/druid/catalog/model/table/export/TableDestinationTest.java b/server/src/test/java/org/apache/druid/catalog/model/table/export/TableDestinationTest.java new file mode 100644 index 000000000000..c1995476f405 --- /dev/null +++ b/server/src/test/java/org/apache/druid/catalog/model/table/export/TableDestinationTest.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog.model.table.export; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class TableDestinationTest +{ + @Test + public void testSerde() throws IOException + { + TableDestination exportDestination = new TableDestination("tableName"); + ObjectMapper objectMapper = new DefaultObjectMapper(); + byte[] bytes = objectMapper.writeValueAsBytes(exportDestination); + TableDestination newDest = objectMapper.readValue(bytes, TableDestination.class); + Assert.assertEquals(exportDestination, newDest); + } +} \ No newline at end of file diff --git a/sql/src/main/codegen/config.fmpp b/sql/src/main/codegen/config.fmpp index 87195131b574..ee4adaed2e02 100644 --- a/sql/src/main/codegen/config.fmpp +++ b/sql/src/main/codegen/config.fmpp @@ -51,12 +51,17 @@ data: { "java.util.List" "org.apache.calcite.sql.SqlNode" "org.apache.calcite.sql.SqlInsert" + "org.apache.calcite.sql.SqlNodeList" + "org.apache.calcite.sql.SqlBasicCall" "org.apache.druid.java.util.common.granularity.Granularity" "org.apache.druid.java.util.common.granularity.Granularities" "org.apache.druid.sql.calcite.parser.DruidSqlInsert" "org.apache.druid.sql.calcite.parser.DruidSqlParserUtils" "org.apache.druid.sql.calcite.external.ExtendOperator" "org.apache.druid.sql.calcite.external.ParameterizeOperator" + "org.apache.druid.sql.calcite.parser.ExternalDestinationSqlIdentifier" + "org.apache.druid.catalog.model.table.export.S3ExportDestination" + "java.util.HashMap" ] # List of new keywords. Example: "DATABASES", "TABLES". If the keyword is not a reserved @@ -65,10 +70,16 @@ data: { "CLUSTERED" "OVERWRITE" "PARTITIONED" + "EXTERN" + "S3" + "CSV" ] nonReservedKeywordsToAdd: [ "OVERWRITE" + "EXTERN" + "S3" + "CSV" ] # List of methods for parsing custom SQL statements. diff --git a/sql/src/main/codegen/includes/common.ftl b/sql/src/main/codegen/includes/common.ftl index 59915bf09ed4..81b21b30ba47 100644 --- a/sql/src/main/codegen/includes/common.ftl +++ b/sql/src/main/codegen/includes/common.ftl @@ -106,3 +106,52 @@ SqlTypeNameSpec DruidType() : return new SqlUserDefinedTypeNameSpec(typeName, span().pos()); } } + +SqlIdentifier ExternalDestination() : +{ + final Span s; + Map properties = new HashMap(); +} +{ + ( + [ properties = ExternProperties() ] + { + s = span(); + return new ExternalDestinationSqlIdentifier( + org.apache.druid.catalog.model.table.export.S3ExportDestination.TYPE_KEY, + s.pos(), + new S3ExportDestination(properties), + properties + ); + } + ) +} + +Map ExternProperties() : +{ + final Span s; + final Map properties = new HashMap(); + SqlNodeList commaList = null; +} +{ + commaList = ExpressionCommaList(span(), ExprContext.ACCEPT_NON_QUERY) + { + for (SqlNode sqlNode : commaList) { + List sqlNodeList = ((SqlBasicCall) sqlNode).getOperandList(); + properties.put(((SqlIdentifier) sqlNodeList.get(0)).getSimple(), ((SqlIdentifier) sqlNodeList.get(1)).getSimple()); + } + return properties; + } +} + +// Parses the supported file formats for export. +String FileFormat() : +{} +{ + ( + + { + return "CSV"; + } + ) +} \ No newline at end of file diff --git a/sql/src/main/codegen/includes/insert.ftl b/sql/src/main/codegen/includes/insert.ftl index 00133496d240..51d647b47ec2 100644 --- a/sql/src/main/codegen/includes/insert.ftl +++ b/sql/src/main/codegen/includes/insert.ftl @@ -21,68 +21,68 @@ * Parses an INSERT statement. This function is copied from SqlInsert in core/src/main/codegen/templates/Parser.jj, * with some changes to allow a custom error message if an OVERWRITE clause is present. */ -SqlNode DruidSqlInsert() : +// Using fully qualified name for Pair class, since Calcite also has a same class name being used in the Parser.jj +SqlNode DruidSqlInsertEof() : { - final List keywords = new ArrayList(); - final SqlNodeList keywordList; - final SqlIdentifier tableName; - SqlNode tableRef; - SqlNode source; - final SqlNodeList columnList; - final Span s; - final Pair p; + SqlNode insertNode; + final List keywords = new ArrayList(); + final SqlNodeList keywordList; + final SqlIdentifier destination; + SqlNode tableRef = null; + SqlNode source; + final SqlNodeList columnList; + final Span s; + final Pair p; + org.apache.druid.java.util.common.Pair partitionedBy = new org.apache.druid.java.util.common.Pair(null, null); + SqlNodeList clusteredBy = null; + String exportFileFormat = null; } { - ( - + ( + | - { keywords.add(SqlInsertKeyword.UPSERT.symbol(getPos())); } - ) - { s = span(); } - SqlInsertKeywords(keywords) { - keywordList = new SqlNodeList(keywords, s.addAll(keywords).pos()); - } - tableName = CompoundTableIdentifier() - ( tableRef = TableHints(tableName) | { tableRef = tableName; } ) + { keywords.add(SqlInsertKeyword.UPSERT.symbol(getPos())); } + ) + { s = span(); } + SqlInsertKeywords(keywords) { + keywordList = new SqlNodeList(keywords, s.addAll(keywords).pos()); + } + + ( + LOOKAHEAD(2) + destination = ExternalDestination() + | + destination = CompoundTableIdentifier() + ( tableRef = TableHints(destination) | { tableRef = destination; } ) [ LOOKAHEAD(5) tableRef = ExtendTable(tableRef) ] - ( - LOOKAHEAD(2) - p = ParenthesizedCompoundIdentifierList() { - if (p.right.size() > 0) { - tableRef = extend(tableRef, p.right); - } - if (p.left.size() > 0) { - columnList = p.left; - } else { - columnList = null; - } - } - | { columnList = null; } - ) - ( + ) + ( + LOOKAHEAD(2) + p = ParenthesizedCompoundIdentifierList() { + if (p.right.size() > 0) { + tableRef = extend(tableRef, p.right); + } + if (p.left.size() > 0) { + columnList = p.left; + } else { + columnList = null; + } + } + | { columnList = null; } + ) + [ + exportFileFormat = FileFormat() + ] + ( { - throw org.apache.druid.sql.calcite.parser.DruidSqlParserUtils.problemParsing( - "An OVERWRITE clause is not allowed with INSERT statements. Use REPLACE statements if overwriting existing segments is required or remove the OVERWRITE clause." - ); + throw org.apache.druid.sql.calcite.parser.DruidSqlParserUtils.problemParsing( + "An OVERWRITE clause is not allowed with INSERT statements. Use REPLACE statements if overwriting existing segments is required or remove the OVERWRITE clause." + ); } | - source = OrderedQueryOrExpr(ExprContext.ACCEPT_QUERY) { - return new SqlInsert(s.end(source), keywordList, tableRef, source, - columnList); - } - ) -} - -// Using fully qualified name for Pair class, since Calcite also has a same class name being used in the Parser.jj -SqlNode DruidSqlInsertEof() : -{ - SqlNode insertNode; - org.apache.druid.java.util.common.Pair partitionedBy = new org.apache.druid.java.util.common.Pair(null, null); - SqlNodeList clusteredBy = null; -} -{ - insertNode = DruidSqlInsert() + source = OrderedQueryOrExpr(ExprContext.ACCEPT_QUERY) + ) // PARTITIONED BY is necessary, but is kept optional in the grammar. It is asserted that it is not missing in the // DruidSqlInsert constructor so that we can return a custom error message. [ @@ -105,12 +105,13 @@ SqlNode DruidSqlInsertEof() : // actual error message. { + insertNode = new SqlInsert(s.end(source), keywordList, tableRef, source, columnList); if (!(insertNode instanceof SqlInsert)) { // This shouldn't be encountered, but done as a defensive practice. SqlInsert() always returns a node of type // SqlInsert return insertNode; } SqlInsert sqlInsert = (SqlInsert) insertNode; - return new DruidSqlInsert(sqlInsert, partitionedBy.lhs, partitionedBy.rhs, clusteredBy); + return new DruidSqlInsert(sqlInsert, partitionedBy.lhs, partitionedBy.rhs, clusteredBy, exportFileFormat); } } diff --git a/sql/src/main/codegen/includes/replace.ftl b/sql/src/main/codegen/includes/replace.ftl index f3ea3a567610..b2c5e55ff956 100644 --- a/sql/src/main/codegen/includes/replace.ftl +++ b/sql/src/main/codegen/includes/replace.ftl @@ -20,33 +20,52 @@ // Taken from syntax of SqlInsert statement from calcite parser, edited for replace syntax SqlNode DruidSqlReplaceEof() : { - SqlNode table; + final SqlIdentifier destination; SqlNode source; SqlNodeList columnList = null; final Span s; + SqlNode tableRef = null; SqlInsert sqlInsert; // Using fully qualified name for Pair class, since Calcite also has a same class name being used in the Parser.jj org.apache.druid.java.util.common.Pair partitionedBy = new org.apache.druid.java.util.common.Pair(null, null); SqlNodeList clusteredBy = null; final Pair p; SqlNode replaceTimeQuery = null; + String exportFileFormat = null; } { { s = span(); } - table = CompoundIdentifier() - [ - p = ParenthesizedCompoundIdentifierList() { - if (p.left.size() > 0) { - columnList = p.left; - } + ( + LOOKAHEAD(2) + destination = ExternalDestination() + | + destination = CompoundTableIdentifier() + ( tableRef = TableHints(destination) | { tableRef = destination; } ) + [ LOOKAHEAD(5) tableRef = ExtendTable(tableRef) ] + ) + ( + LOOKAHEAD(2) + p = ParenthesizedCompoundIdentifierList() { + if (p.right.size() > 0) { + tableRef = extend(tableRef, p.right); } + if (p.left.size() > 0) { + columnList = p.left; + } else { + columnList = null; + } + } + | { columnList = null; } + ) + [ + exportFileFormat = FileFormat() ] [ - - [ - replaceTimeQuery = ReplaceTimeQuery() - ] + + [ + replaceTimeQuery = ReplaceTimeQuery() + ] ] source = OrderedQueryOrExpr(ExprContext.ACCEPT_QUERY) // PARTITIONED BY is necessary, but is kept optional in the grammar. It is asserted that it is not missing in the @@ -71,8 +90,8 @@ SqlNode DruidSqlReplaceEof() : // actual error message. { - sqlInsert = new SqlInsert(s.end(source), SqlNodeList.EMPTY, table, source, columnList); - return new DruidSqlReplace(sqlInsert, partitionedBy.lhs, partitionedBy.rhs, clusteredBy, replaceTimeQuery); + sqlInsert = new SqlInsert(s.end(source), SqlNodeList.EMPTY, destination, source, columnList); + return new DruidSqlReplace(sqlInsert, partitionedBy.lhs, partitionedBy.rhs, clusteredBy, replaceTimeQuery, exportFileFormat); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlIngest.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlIngest.java index 146d13673bde..a95db5d03ec6 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlIngest.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlIngest.java @@ -41,6 +41,8 @@ public abstract class DruidSqlIngest extends SqlInsert @Nullable protected final SqlNodeList clusteredBy; + @Nullable + private final String exportFileFormat; public DruidSqlIngest( SqlParserPos pos, @@ -50,7 +52,8 @@ public DruidSqlIngest( SqlNodeList columnList, @Nullable Granularity partitionedBy, @Nullable String partitionedByStringForUnparse, - @Nullable SqlNodeList clusteredBy + @Nullable SqlNodeList clusteredBy, + @Nullable String exportFileFormat ) { super(pos, keywords, targetTable, source, columnList); @@ -58,6 +61,7 @@ public DruidSqlIngest( this.partitionedByStringForUnparse = partitionedByStringForUnparse; this.partitionedBy = partitionedBy; this.clusteredBy = clusteredBy; + this.exportFileFormat = exportFileFormat; } public Granularity getPartitionedBy() @@ -70,4 +74,10 @@ public SqlNodeList getClusteredBy() { return clusteredBy; } + + @Nullable + public String getExportFileFormat() + { + return exportFileFormat; + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java index c2eeb2ed1e4d..41a3bf23d152 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java @@ -51,7 +51,8 @@ public DruidSqlInsert( @Nonnull SqlInsert insertNode, @Nullable Granularity partitionedBy, @Nullable String partitionedByStringForUnparse, - @Nullable SqlNodeList clusteredBy + @Nullable SqlNodeList clusteredBy, + @Nullable String exportFileFormat ) { super( @@ -62,7 +63,8 @@ public DruidSqlInsert( insertNode.getTargetColumnList(), partitionedBy, partitionedByStringForUnparse, - clusteredBy + clusteredBy, + exportFileFormat ); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlReplace.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlReplace.java index d527a08b59ec..cec9996d460d 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlReplace.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlReplace.java @@ -56,7 +56,8 @@ public DruidSqlReplace( @Nullable Granularity partitionedBy, @Nullable String partitionedByStringForUnparse, @Nullable SqlNodeList clusteredBy, - @Nullable SqlNode replaceTimeQuery + @Nullable SqlNode replaceTimeQuery, + @Nullable String exportFileFormat ) { super( @@ -67,7 +68,8 @@ public DruidSqlReplace( insertNode.getTargetColumnList(), partitionedBy, partitionedByStringForUnparse, - clusteredBy + clusteredBy, + exportFileFormat ); this.replaceTimeQuery = replaceTimeQuery; @@ -99,6 +101,12 @@ public void unparse(SqlWriter writer, int leftPrec, int rightPrec) } writer.newlineAndIndent(); + if (getExportFileFormat() != null) { + writer.keyword("AS"); + writer.print(getExportFileFormat()); + writer.newlineAndIndent(); + } + writer.keyword("OVERWRITE"); if (replaceTimeQuery instanceof SqlLiteral) { writer.keyword("ALL"); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java new file mode 100644 index 000000000000..842a724dcb48 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.parser; + +import com.google.common.collect.Iterables; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlWriter; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.druid.catalog.model.table.export.ExportDestination; + +import java.util.Map; + +/** + * Extends the {@link SqlIdentifier} to hold parameters for an external table destination. This contains information + * required for a task to write to a destination. + */ +public class ExternalDestinationSqlIdentifier extends SqlIdentifier +{ + private final ExportDestination exportDestination; + private final Map propertiesForUnparse; + + public ExternalDestinationSqlIdentifier( + String name, + SqlParserPos pos, + ExportDestination exportDestination, + Map propertiesForUnparse + ) + { + super(name, pos); + this.exportDestination = exportDestination; + this.propertiesForUnparse = propertiesForUnparse; + } + + public ExportDestination getExportDestination() + { + return exportDestination; + } + + @Override + public void unparse(SqlWriter writer, int leftPrec, int rightPrec) + { + SqlWriter.Frame externFrame = writer.startFunCall("EXTERN"); + SqlWriter.Frame frame = writer.startFunCall(Iterables.getOnlyElement(names)); + for (Map.Entry property : propertiesForUnparse.entrySet()) { + writer.sep(","); + writer.keyword(property.getKey()); + writer.print("="); + writer.identifier(property.getValue(), false); + } + writer.endFunCall(frame); + writer.endFunCall(externFrame); + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/ExplainAttributes.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/ExplainAttributes.java index e2ae4fa7a10c..0535463e4039 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/ExplainAttributes.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/ExplainAttributes.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.java.util.common.granularity.Granularity; import javax.annotation.Nullable; @@ -34,7 +35,7 @@ public final class ExplainAttributes private final String statementType; @Nullable - private final String targetDataSource; + private final IngestDestination targetDataSource; @Nullable private final Granularity partitionedBy; @@ -47,7 +48,7 @@ public final class ExplainAttributes public ExplainAttributes( @JsonProperty("statementType") final String statementType, - @JsonProperty("targetDataSource") @Nullable final String targetDataSource, + @JsonProperty("targetDataSource") @Nullable final IngestDestination targetDataSource, @JsonProperty("partitionedBy") @Nullable final Granularity partitionedBy, @JsonProperty("clusteredBy") @Nullable final List clusteredBy, @JsonProperty("replaceTimeChunks") @Nullable final String replaceTimeChunks @@ -76,7 +77,7 @@ public String getStatementType() @Nullable @JsonProperty @JsonInclude(JsonInclude.Include.NON_NULL) - public String getTargetDataSource() + public IngestDestination getTargetDataSource() { return targetDataSource; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index 3d38c6b3f2c1..b65a152be27b 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -34,6 +34,8 @@ import org.apache.calcite.sql.SqlOrderBy; import org.apache.calcite.tools.ValidationException; import org.apache.calcite.util.Pair; +import org.apache.druid.catalog.model.table.IngestDestination; +import org.apache.druid.catalog.model.table.export.TableDestination; import org.apache.druid.common.utils.IdUtils; import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidSqlInput; @@ -42,6 +44,7 @@ import org.apache.druid.server.security.Resource; import org.apache.druid.server.security.ResourceAction; import org.apache.druid.server.security.ResourceType; +import org.apache.druid.sql.calcite.parser.ExternalDestinationSqlIdentifier; import org.apache.druid.sql.calcite.parser.DruidSqlIngest; import org.apache.druid.sql.calcite.parser.DruidSqlInsert; import org.apache.druid.sql.calcite.parser.DruidSqlParserUtils; @@ -57,7 +60,7 @@ public abstract class IngestHandler extends QueryHandler private static final Pattern UNNAMED_COLUMN_PATTERN = Pattern.compile("^EXPR\\$\\d+$", Pattern.CASE_INSENSITIVE); protected final Granularity ingestionGranularity; - protected String targetDatasource; + protected IngestDestination targetDatasource; IngestHandler( HandlerContext handlerContext, @@ -135,7 +138,6 @@ public void validate() ); } targetDatasource = validateAndGetDataSourceForIngest(); - resourceActions.add(new ResourceAction(new Resource(targetDatasource, ResourceType.DATASOURCE), Action.WRITE)); } @Override @@ -149,10 +151,12 @@ protected RelDataType returnedRowType() } /** - * Extract target datasource from a {@link SqlInsert}, and also validate that the ingestion is of a form we support. - * Expects the target datasource to be either an unqualified name, or a name qualified by the default schema. + * Extract target destination from a {@link SqlInsert}, validates that the ingestion is of a form we support, and + * adds the resource action required (if the destination is a druid datasource). + * Expects the target datasource to be an unqualified name, a name qualified by the default schema or an external + * destination. */ - private String validateAndGetDataSourceForIngest() + private IngestDestination validateAndGetDataSourceForIngest() { final SqlInsert insert = ingestNode(); if (insert.isUpsert()) { @@ -168,23 +172,31 @@ private String validateAndGetDataSourceForIngest() } final SqlIdentifier tableIdentifier = (SqlIdentifier) insert.getTargetTable(); - final String dataSource; + final IngestDestination dataSource; if (tableIdentifier.names.isEmpty()) { // I don't think this can happen, but include a branch for it just in case. throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.DEFENSIVE) .build("Operation [%s] requires a target table", operationName()); + } else if (tableIdentifier instanceof ExternalDestinationSqlIdentifier) { + dataSource = ((ExternalDestinationSqlIdentifier) tableIdentifier).getExportDestination(); } else if (tableIdentifier.names.size() == 1) { // Unqualified name. - dataSource = Iterables.getOnlyElement(tableIdentifier.names); + String tableName = Iterables.getOnlyElement(tableIdentifier.names); + IdUtils.validateId("table", tableName); + dataSource = new TableDestination(tableName); + resourceActions.add(new ResourceAction(new Resource(tableName, ResourceType.DATASOURCE), Action.WRITE)); } else { // Qualified name. final String defaultSchemaName = Iterables.getOnlyElement(CalciteSchema.from(handlerContext.defaultSchema()).path(null)); if (tableIdentifier.names.size() == 2 && defaultSchemaName.equals(tableIdentifier.names.get(0))) { - dataSource = tableIdentifier.names.get(1); + String tableName = tableIdentifier.names.get(1); + IdUtils.validateId("table", tableName); + dataSource = new TableDestination(tableName); + resourceActions.add(new ResourceAction(new Resource(tableName, ResourceType.DATASOURCE), Action.WRITE)); } else { throw InvalidSqlInput.exception( "Table [%s] does not support operation [%s] because it is not a Druid datasource", @@ -194,8 +206,6 @@ private String validateAndGetDataSourceForIngest() } } - IdUtils.validateId("table", dataSource); - return dataSource; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java index 164e02a0ca8d..db630156d33e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java @@ -25,6 +25,7 @@ import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.error.InvalidSqlInput; import org.apache.druid.guice.LazySingleton; import org.apache.druid.query.groupby.GroupByQuery; @@ -133,7 +134,7 @@ public QueryMaker buildQueryMakerForSelect(final RelRoot relRoot, final PlannerC @Override public QueryMaker buildQueryMakerForInsert( - final String targetDataSource, + final IngestDestination targetDestination, final RelRoot relRoot, final PlannerContext plannerContext ) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java index 678ded23e9da..b0b138432741 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java @@ -23,6 +23,7 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.tools.ValidationException; +import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.sql.calcite.planner.PlannerContext; import java.util.Map; @@ -82,7 +83,7 @@ public interface SqlEngine /** * Create a {@link QueryMaker} for an INSERT ... SELECT query. * - * @param targetDataSource datasource for the INSERT portion of the query + * @param targetDestination destination for the INSERT portion of the query * @param relRoot planned and validated rel for the SELECT portion of the query * @param plannerContext context for this query * @@ -92,7 +93,7 @@ public interface SqlEngine */ @SuppressWarnings("RedundantThrows") QueryMaker buildQueryMakerForInsert( - String targetDataSource, + IngestDestination targetDestination, RelRoot relRoot, PlannerContext plannerContext ) throws ValidationException; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java index e2ce813a37f7..9414074e8667 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java @@ -22,6 +22,7 @@ import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.run.EngineFeature; import org.apache.druid.sql.calcite.run.QueryMaker; @@ -109,7 +110,7 @@ public QueryMaker buildQueryMakerForSelect(RelRoot relRoot, PlannerContext plann } @Override - public QueryMaker buildQueryMakerForInsert(String targetDataSource, RelRoot relRoot, PlannerContext plannerContext) + public QueryMaker buildQueryMakerForInsert(IngestDestination targetDestination, RelRoot relRoot, PlannerContext plannerContext) { // Can't have views of INSERT or REPLACE statements. throw new UnsupportedOperationException(); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index d1edf2823216..ece96f18bf20 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -28,12 +28,11 @@ public class CalciteExportTest extends CalciteIngestionDmlTest { - @Test - public void name() + public void testReplaceIntoExtern() { testIngestionQuery() - .sql("REPLACE OVERWRITE TO EXTERNAL('{\"type\":\"hdfs\",\"uri\":\"hdfs://localhost:9090/outputdirectory/\"}') AS 'CSV' SELECT dim2 FROM foo") + .sql("REPLACE INTO EXTERN(s3(uri=\"s3://druid-data/exportdest/\",username=\"user1\")) AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL") .expectQuery( Druids.newScanQueryBuilder() .dataSource( @@ -45,7 +44,7 @@ public void name() .legacy(false) .build() ) - .expectResources(dataSourceRead("foo"), dataSourceWrite("extern")) + .expectResources(dataSourceRead("foo")) .expectTarget("extern", RowSignature.builder().add("dim2", ColumnType.STRING).build()) .verify(); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java index 424a2e8895e9..45cc06ecafc3 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java @@ -26,6 +26,7 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.tools.ValidationException; +import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.query.scan.ScanQuery; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.server.QueryLifecycleFactory; @@ -168,7 +169,7 @@ public QueryMaker buildQueryMakerForSelect(RelRoot relRoot, PlannerContext plann } @Override - public QueryMaker buildQueryMakerForInsert(String targetDataSource, RelRoot relRoot, PlannerContext plannerContext) + public QueryMaker buildQueryMakerForInsert(IngestDestination targetDestination, RelRoot relRoot, PlannerContext plannerContext) { throw new UnsupportedOperationException(); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java b/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java index b0bf0bd7b29d..e41c34088c93 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java @@ -24,6 +24,7 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.run.EngineFeature; @@ -104,13 +105,13 @@ public QueryMaker buildQueryMakerForSelect(RelRoot relRoot, PlannerContext plann } @Override - public QueryMaker buildQueryMakerForInsert(String targetDataSource, RelRoot relRoot, PlannerContext plannerContext) + public QueryMaker buildQueryMakerForInsert(IngestDestination targetDestination, RelRoot relRoot, PlannerContext plannerContext) { final RowSignature signature = RowSignatures.fromRelDataType( relRoot.validatedRowType.getFieldNames(), relRoot.validatedRowType ); - return new TestInsertQueryMaker(targetDataSource, signature); + return new TestInsertQueryMaker(targetDestination, signature); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java b/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java index 8562ce29bec5..7b59285617d1 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableList; import org.apache.calcite.runtime.Hook; +import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.server.QueryResponse; @@ -32,15 +33,15 @@ */ public class TestInsertQueryMaker implements QueryMaker { - private final String targetDataSource; + private final IngestDestination targetDestination; private final RowSignature signature; public TestInsertQueryMaker( - final String targetDataSource, + final IngestDestination targetDestination, final RowSignature signature ) { - this.targetDataSource = targetDataSource; + this.targetDestination = targetDestination; this.signature = signature; } @@ -54,7 +55,7 @@ public QueryResponse runQuery(final DruidQuery druidQuery) // 2) Return the dataSource and signature of the insert operation, so tests can confirm they are correct. return QueryResponse.withEmptyContext( - Sequences.simple(ImmutableList.of(new Object[]{targetDataSource, signature})) + Sequences.simple(ImmutableList.of(new Object[]{targetDestination.getDestinationName(), signature})) ); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java index 0e67fdbe9641..2df46ed88a6e 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java @@ -95,4 +95,20 @@ private static DruidSqlParserImpl createTestParser(String parseString) druidSqlParser.setIdentifierMaxLength(20); return druidSqlParser; } + + @Test + public void testUnparseExternalSqlIdentifier() throws ParseException + { + String sqlQuery = "REPLACE INTO EXTERN(s3(uri = s3url, user = user1)) AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL"; + String prettySqlQuery = "REPLACE INTO EXTERN(S3(URI =\"s3url\", USER =\"user1\"))\n" + + "AS CSV\n" + + "OVERWRITE ALL\n" + + "SELECT \"dim2\"\n" + + " FROM \"foo\"\n" + + "PARTITIONED BY ALL"; + DruidSqlParserImpl druidSqlParser = createTestParser(sqlQuery); + DruidSqlReplace druidSqlReplace = (DruidSqlReplace) druidSqlParser.DruidSqlReplaceEof(); + druidSqlReplace.unparse(sqlWriter, 0, 0); + assertEquals(prettySqlQuery, sqlWriter.toSqlString().getSql()); + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/planner/ExplainAttributesTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/planner/ExplainAttributesTest.java index 67f97d64215d..c72cb0d37e70 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/planner/ExplainAttributesTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/planner/ExplainAttributesTest.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.catalog.model.table.export.TableDestination; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.granularity.Granularities; import org.junit.Assert; @@ -64,14 +65,14 @@ public void testSerializeInsertAttributes() throws JsonProcessingException { ExplainAttributes insertAttributes = new ExplainAttributes( "INSERT", - "foo", + new TableDestination("foo"), Granularities.DAY, null, null ); final String expectedAttributes = "{" + "\"statementType\":\"INSERT\"," - + "\"targetDataSource\":\"foo\"," + + "\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"foo\"}," + "\"partitionedBy\":\"DAY\"" + "}"; Assert.assertEquals(expectedAttributes, DEFAULT_OBJECT_MAPPER.writeValueAsString(insertAttributes)); @@ -82,14 +83,14 @@ public void testSerializeInsertAllAttributes() throws JsonProcessingException { ExplainAttributes insertAttributes = new ExplainAttributes( "INSERT", - "foo", + new TableDestination("foo"), Granularities.ALL, null, null ); final String expectedAttributes = "{" + "\"statementType\":\"INSERT\"," - + "\"targetDataSource\":\"foo\"," + + "\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"foo\"}," + "\"partitionedBy\":{\"type\":\"all\"}" + "}"; Assert.assertEquals(expectedAttributes, DEFAULT_OBJECT_MAPPER.writeValueAsString(insertAttributes)); @@ -100,14 +101,14 @@ public void testSerializeReplaceAttributes() throws JsonProcessingException { ExplainAttributes replaceAttributes1 = new ExplainAttributes( "REPLACE", - "foo", + new TableDestination("foo"), Granularities.HOUR, null, "ALL" ); final String expectedAttributes1 = "{" + "\"statementType\":\"REPLACE\"," - + "\"targetDataSource\":\"foo\"," + + "\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"foo\"}," + "\"partitionedBy\":\"HOUR\"," + "\"replaceTimeChunks\":\"ALL\"" + "}"; @@ -116,14 +117,14 @@ public void testSerializeReplaceAttributes() throws JsonProcessingException ExplainAttributes replaceAttributes2 = new ExplainAttributes( "REPLACE", - "foo", + new TableDestination("foo"), Granularities.HOUR, null, "2019-08-25T02:00:00.000Z/2019-08-25T03:00:00.000Z" ); final String expectedAttributes2 = "{" + "\"statementType\":\"REPLACE\"," - + "\"targetDataSource\":\"foo\"," + + "\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"foo\"}," + "\"partitionedBy\":\"HOUR\"," + "\"replaceTimeChunks\":\"2019-08-25T02:00:00.000Z/2019-08-25T03:00:00.000Z\"" + "}"; @@ -135,14 +136,14 @@ public void testSerializeReplaceWithClusteredByAttributes() throws JsonProcessin { ExplainAttributes replaceAttributes1 = new ExplainAttributes( "REPLACE", - "foo", + new TableDestination("foo"), Granularities.HOUR, Arrays.asList("foo", "CEIL(`f2`)"), "ALL" ); final String expectedAttributes1 = "{" + "\"statementType\":\"REPLACE\"," - + "\"targetDataSource\":\"foo\"," + + "\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"foo\"}," + "\"partitionedBy\":\"HOUR\"," + "\"clusteredBy\":[\"foo\",\"CEIL(`f2`)\"]," + "\"replaceTimeChunks\":\"ALL\"" @@ -152,14 +153,14 @@ public void testSerializeReplaceWithClusteredByAttributes() throws JsonProcessin ExplainAttributes replaceAttributes2 = new ExplainAttributes( "REPLACE", - "foo", + new TableDestination("foo"), Granularities.HOUR, Arrays.asList("foo", "boo"), "2019-08-25T02:00:00.000Z/2019-08-25T03:00:00.000Z" ); final String expectedAttributes2 = "{" + "\"statementType\":\"REPLACE\"," - + "\"targetDataSource\":\"foo\"," + + "\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"foo\"}," + "\"partitionedBy\":\"HOUR\"," + "\"clusteredBy\":[\"foo\",\"boo\"]," + "\"replaceTimeChunks\":\"2019-08-25T02:00:00.000Z/2019-08-25T03:00:00.000Z\"" From 9e1ef177ddcc0eddbdf949071f3bcdc4ffd18dcb Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 16 Jan 2024 20:32:03 +0530 Subject: [PATCH 03/50] Fix builds --- .../model/table/export/S3ExportDestinationTest.java | 2 +- .../model/table/export/TableDestinationTest.java | 2 +- .../parser/ExternalDestinationSqlIdentifier.java | 7 +++++++ .../druid/sql/calcite/planner/IngestHandler.java | 2 +- .../apache/druid/sql/calcite/CalciteInsertDmlTest.java | 10 +++++----- .../druid/sql/calcite/CalciteReplaceDmlTest.java | 6 +++--- 6 files changed, 18 insertions(+), 11 deletions(-) diff --git a/server/src/test/java/org/apache/druid/catalog/model/table/export/S3ExportDestinationTest.java b/server/src/test/java/org/apache/druid/catalog/model/table/export/S3ExportDestinationTest.java index b97fd1e65275..5eb0eecb0e6a 100644 --- a/server/src/test/java/org/apache/druid/catalog/model/table/export/S3ExportDestinationTest.java +++ b/server/src/test/java/org/apache/druid/catalog/model/table/export/S3ExportDestinationTest.java @@ -37,4 +37,4 @@ public void testSerde() throws IOException S3ExportDestination newDest = objectMapper.readValue(bytes, S3ExportDestination.class); Assert.assertEquals(exportDestination, newDest); } -} \ No newline at end of file +} diff --git a/server/src/test/java/org/apache/druid/catalog/model/table/export/TableDestinationTest.java b/server/src/test/java/org/apache/druid/catalog/model/table/export/TableDestinationTest.java index c1995476f405..891a8d1b5091 100644 --- a/server/src/test/java/org/apache/druid/catalog/model/table/export/TableDestinationTest.java +++ b/server/src/test/java/org/apache/druid/catalog/model/table/export/TableDestinationTest.java @@ -37,4 +37,4 @@ public void testSerde() throws IOException TableDestination newDest = objectMapper.readValue(bytes, TableDestination.class); Assert.assertEquals(exportDestination, newDest); } -} \ No newline at end of file +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index 842a724dcb48..c1af53c02df5 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -21,6 +21,7 @@ import com.google.common.collect.Iterables; import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlWriter; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.druid.catalog.model.table.export.ExportDestination; @@ -67,4 +68,10 @@ public void unparse(SqlWriter writer, int leftPrec, int rightPrec) writer.endFunCall(frame); writer.endFunCall(externFrame); } + + @Override + public SqlNode clone(SqlParserPos pos) + { + return new ExternalDestinationSqlIdentifier(Iterables.getOnlyElement(names), pos, exportDestination, propertiesForUnparse); + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index b65a152be27b..f4efb1790f2b 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -44,11 +44,11 @@ import org.apache.druid.server.security.Resource; import org.apache.druid.server.security.ResourceAction; import org.apache.druid.server.security.ResourceType; -import org.apache.druid.sql.calcite.parser.ExternalDestinationSqlIdentifier; import org.apache.druid.sql.calcite.parser.DruidSqlIngest; import org.apache.druid.sql.calcite.parser.DruidSqlInsert; import org.apache.druid.sql.calcite.parser.DruidSqlParserUtils; import org.apache.druid.sql.calcite.parser.DruidSqlReplace; +import org.apache.druid.sql.calcite.parser.ExternalDestinationSqlIdentifier; import org.apache.druid.sql.calcite.run.EngineFeature; import org.apache.druid.sql.calcite.run.QueryMaker; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java index a34c93ce9c92..35256c96de59 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java @@ -657,7 +657,7 @@ public void testExplainPlanInsertWithClusteredBy() throws JsonProcessingExceptio skipVectorize(); final String resources = "[{\"name\":\"dst\",\"type\":\"DATASOURCE\"},{\"name\":\"foo\",\"type\":\"DATASOURCE\"}]"; - final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":\"dst\",\"partitionedBy\":\"DAY\",\"clusteredBy\":[\"floor_m1\",\"dim1\",\"CEIL(\\\"m2\\\")\"]}"; + final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"dst\"},\"partitionedBy\":\"DAY\",\"clusteredBy\":[\"floor_m1\",\"dim1\",\"CEIL(\\\"m2\\\")\"]}"; final String sql = "EXPLAIN PLAN FOR INSERT INTO druid.dst " + "SELECT __time, FLOOR(m1) as floor_m1, dim1, CEIL(m2) as ceil_m2 FROM foo " @@ -761,7 +761,7 @@ public void testExplainPlanInsertWithAsSubQueryClusteredBy() skipVectorize(); final String resources = "[{\"name\":\"EXTERNAL\",\"type\":\"EXTERNAL\"},{\"name\":\"foo\",\"type\":\"DATASOURCE\"}]"; - final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":\"foo\",\"partitionedBy\":{\"type\":\"all\"},\"clusteredBy\":[\"namespace\",\"country\"]}"; + final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"foo\"},\"partitionedBy\":{\"type\":\"all\"},\"clusteredBy\":[\"namespace\",\"country\"]}"; final String sql = "EXPLAIN PLAN FOR\n" + "INSERT INTO \"foo\"\n" @@ -859,7 +859,7 @@ public void testExplainPlanInsertJoinQuery() skipVectorize(); final String resources = "[{\"name\":\"EXTERNAL\",\"type\":\"EXTERNAL\"},{\"name\":\"my_table\",\"type\":\"DATASOURCE\"}]"; - final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":\"my_table\",\"partitionedBy\":\"HOUR\",\"clusteredBy\":[\"__time\",\"isRobotAlias\",\"countryCapital\",\"regionName\"]}"; + final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"my_table\"},\"partitionedBy\":\"HOUR\",\"clusteredBy\":[\"__time\",\"isRobotAlias\",\"countryCapital\",\"regionName\"]}"; final String sql = "EXPLAIN PLAN FOR\n" + "INSERT INTO my_table\n" @@ -1225,7 +1225,7 @@ public void testExplainInsertFromExternal() throws IOException + "}]"; final String resources = "[{\"name\":\"EXTERNAL\",\"type\":\"EXTERNAL\"},{\"name\":\"dst\",\"type\":\"DATASOURCE\"}]"; - final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":\"dst\",\"partitionedBy\":{\"type\":\"all\"}}"; + final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"dst\"},\"partitionedBy\":{\"type\":\"all\"}}"; // Use testQuery for EXPLAIN (not testIngestionQuery). testQuery( @@ -1329,7 +1329,7 @@ public void testExplainPlanForInsertWithClusteredBy() throws JsonProcessingExcep + "}]"; final String resources = "[{\"name\":\"dst\",\"type\":\"DATASOURCE\"},{\"name\":\"foo\",\"type\":\"DATASOURCE\"}]"; - final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":\"dst\",\"partitionedBy\":\"DAY\",\"clusteredBy\":[\"floor_m1\",\"dim1\",\"CEIL(\\\"m2\\\")\"]}"; + final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"dst\"},\"partitionedBy\":\"DAY\",\"clusteredBy\":[\"floor_m1\",\"dim1\",\"CEIL(\\\"m2\\\")\"]}"; // Use testQuery for EXPLAIN (not testIngestionQuery). testQuery( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteReplaceDmlTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteReplaceDmlTest.java index 2970330e82fc..dbad37496f7c 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteReplaceDmlTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteReplaceDmlTest.java @@ -654,7 +654,7 @@ public void testExplainReplaceFromExternal() throws IOException + "\"columnMappings\":[{\"queryColumn\":\"x\",\"outputColumn\":\"x\"},{\"queryColumn\":\"y\",\"outputColumn\":\"y\"},{\"queryColumn\":\"z\",\"outputColumn\":\"z\"}]}]"; final String resources = "[{\"name\":\"EXTERNAL\",\"type\":\"EXTERNAL\"},{\"name\":\"dst\",\"type\":\"DATASOURCE\"}]"; - final String attributes = "{\"statementType\":\"REPLACE\",\"targetDataSource\":\"dst\",\"partitionedBy\":{\"type\":\"all\"},\"replaceTimeChunks\":\"all\"}"; + final String attributes = "{\"statementType\":\"REPLACE\",\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"dst\"},\"partitionedBy\":{\"type\":\"all\"},\"replaceTimeChunks\":\"all\"}"; // Use testQuery for EXPLAIN (not testIngestionQuery). testQuery( @@ -732,7 +732,7 @@ public void testExplainReplaceTimeChunksWithPartitioningAndClustering() throws I final String explanation = "[{\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"orderBy\":[{\"columnName\":\"dim1\",\"order\":\"ascending\"}],\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"sqlInsertSegmentGranularity\":\"\\\"DAY\\\"\",\"sqlQueryId\":\"dummy\",\"sqlReplaceTimeChunks\":\"2000-01-01T00:00:00.000Z/2000-01-02T00:00:00.000Z\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}},\"signature\":[{\"name\":\"__time\",\"type\":\"LONG\"},{\"name\":\"dim1\",\"type\":\"STRING\"},{\"name\":\"dim2\",\"type\":\"STRING\"},{\"name\":\"dim3\",\"type\":\"STRING\"},{\"name\":\"cnt\",\"type\":\"LONG\"},{\"name\":\"m1\",\"type\":\"FLOAT\"},{\"name\":\"m2\",\"type\":\"DOUBLE\"},{\"name\":\"unique_dim1\",\"type\":\"COMPLEX\"}],\"columnMappings\":[{\"queryColumn\":\"__time\",\"outputColumn\":\"__time\"},{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"},{\"queryColumn\":\"dim2\",\"outputColumn\":\"dim2\"},{\"queryColumn\":\"dim3\",\"outputColumn\":\"dim3\"},{\"queryColumn\":\"cnt\",\"outputColumn\":\"cnt\"},{\"queryColumn\":\"m1\",\"outputColumn\":\"m1\"},{\"queryColumn\":\"m2\",\"outputColumn\":\"m2\"},{\"queryColumn\":\"unique_dim1\",\"outputColumn\":\"unique_dim1\"}]}]"; final String resources = "[{\"name\":\"dst\",\"type\":\"DATASOURCE\"},{\"name\":\"foo\",\"type\":\"DATASOURCE\"}]"; - final String attributes = "{\"statementType\":\"REPLACE\",\"targetDataSource\":\"dst\",\"partitionedBy\":\"DAY\",\"clusteredBy\":[\"dim1\"],\"replaceTimeChunks\":\"2000-01-01T00:00:00.000Z/2000-01-02T00:00:00.000Z\"}"; + final String attributes = "{\"statementType\":\"REPLACE\",\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"dst\"},\"partitionedBy\":\"DAY\",\"clusteredBy\":[\"dim1\"],\"replaceTimeChunks\":\"2000-01-01T00:00:00.000Z/2000-01-02T00:00:00.000Z\"}"; final String sql = "EXPLAIN PLAN FOR" + " REPLACE INTO dst" @@ -833,7 +833,7 @@ public void testExplainReplaceWithLimitAndClusteredByOrdinals() throws IOExcepti + "{\"queryColumn\":\"dim2\",\"outputColumn\":\"dim2\"},{\"queryColumn\":\"dim3\",\"outputColumn\":\"dim3\"},{\"queryColumn\":\"cnt\",\"outputColumn\":\"cnt\"}," + "{\"queryColumn\":\"m1\",\"outputColumn\":\"m1\"},{\"queryColumn\":\"m2\",\"outputColumn\":\"m2\"},{\"queryColumn\":\"unique_dim1\",\"outputColumn\":\"unique_dim1\"}]}]"; final String resources = "[{\"name\":\"dst\",\"type\":\"DATASOURCE\"},{\"name\":\"foo\",\"type\":\"DATASOURCE\"}]"; - final String attributes = "{\"statementType\":\"REPLACE\",\"targetDataSource\":\"dst\",\"partitionedBy\":\"HOUR\"," + final String attributes = "{\"statementType\":\"REPLACE\",\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"dst\"},\"partitionedBy\":\"HOUR\"," + "\"clusteredBy\":[\"__time\",\"dim1\",\"dim3\",\"dim2\"],\"replaceTimeChunks\":\"2000-01-01T00:00:00.000Z/2000-01-02T00:00:00.000Z\"}"; final String sql = "EXPLAIN PLAN FOR" From 06e9d92d4bf3a12881ca004c236cffff138512f7 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 17 Jan 2024 14:43:54 +0530 Subject: [PATCH 04/50] Address comments --- .../java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java | 4 ++-- .../calcite/parser/ExternalDestinationSqlIdentifier.java | 7 +++++++ .../org/apache/druid/sql/calcite/run/NativeSqlEngine.java | 2 +- .../java/org/apache/druid/sql/calcite/run/SqlEngine.java | 4 ++-- .../org/apache/druid/sql/calcite/view/ViewSqlEngine.java | 2 +- .../org/apache/druid/sql/calcite/CalciteExportTest.java | 2 +- .../druid/sql/calcite/CalciteScanSignatureTest.java | 2 +- .../apache/druid/sql/calcite/IngestionTestSqlEngine.java | 4 ++-- .../apache/druid/sql/calcite/TestInsertQueryMaker.java | 8 ++++---- 9 files changed, 21 insertions(+), 14 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java index e9f0d1704932..9bc8dfa35c7e 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java @@ -154,7 +154,7 @@ public OverlordClient overlordClient() @Override public QueryMaker buildQueryMakerForInsert( - final IngestDestination targetDestination, + final IngestDestination destination, final RelRoot relRoot, final PlannerContext plannerContext ) @@ -162,7 +162,7 @@ public QueryMaker buildQueryMakerForInsert( validateInsert(relRoot.rel, relRoot.fields, plannerContext); return new MSQTaskQueryMaker( - targetDestination, + destination, overlordClient, plannerContext, jsonMapper, diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index c1af53c02df5..7dc3aaafa303 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -74,4 +74,11 @@ public SqlNode clone(SqlParserPos pos) { return new ExternalDestinationSqlIdentifier(Iterables.getOnlyElement(names), pos, exportDestination, propertiesForUnparse); } + + @Override + @Deprecated + public Object clone() + { + throw new UnsupportedOperationException("Function is deprecated, please use clone(SqlNode) instead."); + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java index db630156d33e..ade7c7895b47 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java @@ -134,7 +134,7 @@ public QueryMaker buildQueryMakerForSelect(final RelRoot relRoot, final PlannerC @Override public QueryMaker buildQueryMakerForInsert( - final IngestDestination targetDestination, + final IngestDestination destination, final RelRoot relRoot, final PlannerContext plannerContext ) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java index b0b138432741..980fb25d13bb 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java @@ -83,7 +83,7 @@ public interface SqlEngine /** * Create a {@link QueryMaker} for an INSERT ... SELECT query. * - * @param targetDestination destination for the INSERT portion of the query + * @param destination destination for the INSERT portion of the query * @param relRoot planned and validated rel for the SELECT portion of the query * @param plannerContext context for this query * @@ -93,7 +93,7 @@ public interface SqlEngine */ @SuppressWarnings("RedundantThrows") QueryMaker buildQueryMakerForInsert( - IngestDestination targetDestination, + IngestDestination destination, RelRoot relRoot, PlannerContext plannerContext ) throws ValidationException; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java index 9414074e8667..3cce507714a9 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java @@ -110,7 +110,7 @@ public QueryMaker buildQueryMakerForSelect(RelRoot relRoot, PlannerContext plann } @Override - public QueryMaker buildQueryMakerForInsert(IngestDestination targetDestination, RelRoot relRoot, PlannerContext plannerContext) + public QueryMaker buildQueryMakerForInsert(IngestDestination destination, RelRoot relRoot, PlannerContext plannerContext) { // Can't have views of INSERT or REPLACE statements. throw new UnsupportedOperationException(); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index ece96f18bf20..ed059401ccc0 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -45,7 +45,7 @@ public void testReplaceIntoExtern() .build() ) .expectResources(dataSourceRead("foo")) - .expectTarget("extern", RowSignature.builder().add("dim2", ColumnType.STRING).build()) + .expectTarget("s3", RowSignature.builder().add("dim2", ColumnType.STRING).build()) .verify(); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java index 45cc06ecafc3..bed56819ccde 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java @@ -169,7 +169,7 @@ public QueryMaker buildQueryMakerForSelect(RelRoot relRoot, PlannerContext plann } @Override - public QueryMaker buildQueryMakerForInsert(IngestDestination targetDestination, RelRoot relRoot, PlannerContext plannerContext) + public QueryMaker buildQueryMakerForInsert(IngestDestination destination, RelRoot relRoot, PlannerContext plannerContext) { throw new UnsupportedOperationException(); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java b/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java index e41c34088c93..f3c68be36d35 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java @@ -105,13 +105,13 @@ public QueryMaker buildQueryMakerForSelect(RelRoot relRoot, PlannerContext plann } @Override - public QueryMaker buildQueryMakerForInsert(IngestDestination targetDestination, RelRoot relRoot, PlannerContext plannerContext) + public QueryMaker buildQueryMakerForInsert(IngestDestination destination, RelRoot relRoot, PlannerContext plannerContext) { final RowSignature signature = RowSignatures.fromRelDataType( relRoot.validatedRowType.getFieldNames(), relRoot.validatedRowType ); - return new TestInsertQueryMaker(targetDestination, signature); + return new TestInsertQueryMaker(destination, signature); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java b/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java index 7b59285617d1..1da300493a6f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java @@ -33,15 +33,15 @@ */ public class TestInsertQueryMaker implements QueryMaker { - private final IngestDestination targetDestination; + private final IngestDestination destination; private final RowSignature signature; public TestInsertQueryMaker( - final IngestDestination targetDestination, + final IngestDestination destination, final RowSignature signature ) { - this.targetDestination = targetDestination; + this.destination = destination; this.signature = signature; } @@ -55,7 +55,7 @@ public QueryResponse runQuery(final DruidQuery druidQuery) // 2) Return the dataSource and signature of the insert operation, so tests can confirm they are correct. return QueryResponse.withEmptyContext( - Sequences.simple(ImmutableList.of(new Object[]{targetDestination.getDestinationName(), signature})) + Sequences.simple(ImmutableList.of(new Object[]{destination.getDestinationName(), signature})) ); } } From 6ff6747c898310defc5e121845e3dc26dc8dea52 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Sun, 21 Jan 2024 17:29:47 +0530 Subject: [PATCH 05/50] Add frame processor --- .../apache/druid/msq/exec/ControllerImpl.java | 27 +++ .../druid/msq/guice/MSQIndexingModule.java | 2 + .../destination/ExportMSQDestination.java | 33 ++-- .../results/ExportResultsFrameProcessor.java | 162 ++++++++++++++++++ .../ExportResultsFrameProcessorFactory.java | 123 +++++++++++++ .../druid/msq/sql/MSQTaskQueryMaker.java | 22 ++- .../druid/msq/sql/MSQTaskSqlEngine.java | 1 + .../destination/ExportMSQDestinationTest.java | 13 +- .../local/LocalFileStorageConnector.java | 5 +- .../LocalFileStorageConnectorProvider.java | 22 ++- .../model/table/IngestDestination.java | 10 -- .../model/table/export/ExportDestination.java | 21 ++- .../table/export/S3ExportDestination.java | 85 --------- .../table/export/S3ExportDestinationTest.java | 40 ----- sql/src/main/codegen/config.fmpp | 1 - sql/src/main/codegen/includes/common.ftl | 38 +--- .../sql/calcite/parser/DruidSqlIngest.java | 4 + .../ExternalDestinationSqlIdentifier.java | 28 +-- .../sql/calcite/planner/IngestHandler.java | 29 +++- .../druid/sql/calcite/run/EngineFeature.java | 6 +- .../sql/calcite/run/NativeSqlEngine.java | 1 + .../druid/sql/calcite/view/ViewSqlEngine.java | 1 + .../druid/sql/calcite/CalciteExportTest.java | 4 +- .../sql/calcite/IngestionTestSqlEngine.java | 1 + .../calcite/parser/DruidSqlUnparseTest.java | 4 +- 25 files changed, 472 insertions(+), 211 deletions(-) create mode 100644 extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java create mode 100644 extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java delete mode 100644 server/src/main/java/org/apache/druid/catalog/model/table/export/S3ExportDestination.java delete mode 100644 server/src/test/java/org/apache/druid/catalog/model/table/export/S3ExportDestinationTest.java diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 9a1dd089cfc5..5cebb34a327a 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -106,6 +106,7 @@ import org.apache.druid.msq.indexing.client.ControllerChatHandler; import org.apache.druid.msq.indexing.destination.DataSourceMSQDestination; import org.apache.druid.msq.indexing.destination.DurableStorageMSQDestination; +import org.apache.druid.msq.indexing.destination.ExportMSQDestination; import org.apache.druid.msq.indexing.destination.MSQSelectDestination; import org.apache.druid.msq.indexing.destination.TaskReportMSQDestination; import org.apache.druid.msq.indexing.error.CanceledFault; @@ -168,6 +169,7 @@ import org.apache.druid.msq.querykit.ShuffleSpecFactories; import org.apache.druid.msq.querykit.ShuffleSpecFactory; import org.apache.druid.msq.querykit.groupby.GroupByQueryKit; +import org.apache.druid.msq.querykit.results.ExportResultsFrameProcessorFactory; import org.apache.druid.msq.querykit.results.QueryResultFrameProcessorFactory; import org.apache.druid.msq.querykit.scan.ScanQueryKit; import org.apache.druid.msq.shuffle.input.DurableStorageInputChannelFactory; @@ -201,6 +203,8 @@ import org.apache.druid.sql.calcite.planner.ColumnMapping; import org.apache.druid.sql.calcite.planner.ColumnMappings; import org.apache.druid.sql.calcite.rel.DruidQuery; +import org.apache.druid.sql.http.ResultFormat; +import org.apache.druid.storage.StorageConnectorProvider; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentTimeline; import org.apache.druid.timeline.partition.DimensionRangeShardSpec; @@ -1780,6 +1784,11 @@ private static QueryDefinition makeQueryDefinition( MultiStageQueryContext.getRowsPerPage(querySpec.getQuery().context()) ); queryToPlan = querySpec.getQuery(); + } else if (querySpec.getDestination() instanceof ExportMSQDestination) { + shuffleSpecFactory = ShuffleSpecFactories.getGlobalSortWithTargetSize( + MultiStageQueryContext.getRowsPerPage(querySpec.getQuery().context()) + ); + queryToPlan = querySpec.getQuery(); } else { throw new ISE("Unsupported destination [%s]", querySpec.getDestination()); } @@ -1872,6 +1881,24 @@ private static QueryDefinition makeQueryDefinition( } else { return queryDef; } + } else if (querySpec.getDestination() instanceof ExportMSQDestination) { + ExportMSQDestination exportMSQDestination = (ExportMSQDestination) querySpec.getDestination(); + StorageConnectorProvider storageConnectorProvider = exportMSQDestination.getStorageConnectorProvider(); + ResultFormat resultFormat = exportMSQDestination.getResultFormat(); + + final QueryDefinitionBuilder builder = QueryDefinition.builder(); + builder.addAll(queryDef); + builder.add(StageDefinition.builder(queryDef.getNextStageNumber()) + .inputs(new StageInputSpec(queryDef.getFinalStageDefinition().getStageNumber())) + .maxWorkerCount(tuningConfig.getMaxNumWorkers()) + .signature(queryDef.getFinalStageDefinition().getSignature()) + .shuffleSpec(null) + .processorFactory(new ExportResultsFrameProcessorFactory( + storageConnectorProvider, + resultFormat + )) + ); + return builder.build(); } else { throw new ISE("Unsupported destination [%s]", querySpec.getDestination()); } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java index 4af832705c3f..c2b50f172677 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java @@ -88,6 +88,7 @@ import org.apache.druid.msq.querykit.common.SortMergeJoinFrameProcessorFactory; import org.apache.druid.msq.querykit.groupby.GroupByPostShuffleFrameProcessorFactory; import org.apache.druid.msq.querykit.groupby.GroupByPreShuffleFrameProcessorFactory; +import org.apache.druid.msq.querykit.results.ExportResultsFrameProcessorFactory; import org.apache.druid.msq.querykit.results.QueryResultFrameProcessorFactory; import org.apache.druid.msq.querykit.scan.ScanQueryFrameProcessorFactory; import org.apache.druid.msq.util.PassthroughAggregatorFactory; @@ -158,6 +159,7 @@ public List getJacksonModules() NilExtraInfoHolder.class, SortMergeJoinFrameProcessorFactory.class, QueryResultFrameProcessorFactory.class, + ExportResultsFrameProcessorFactory.class, // DataSource classes (note: ExternalDataSource is in MSQSqlModule) InputNumberDataSource.class, diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java index e21b6dabf142..cd56f1f2cab5 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java @@ -21,25 +21,36 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.druid.catalog.model.table.export.ExportDestination; +import org.apache.druid.sql.http.ResultFormat; +import org.apache.druid.storage.StorageConnectorProvider; import java.util.Objects; public class ExportMSQDestination implements MSQDestination { public static final String TYPE = "export"; - private final ExportDestination exportDestination; + private final StorageConnectorProvider storageConnectorProvider; + private final ResultFormat resultFormat; @JsonCreator - public ExportMSQDestination(@JsonProperty("exportDestination") ExportDestination exportDestination) + public ExportMSQDestination(@JsonProperty("storageConnectorProvider") StorageConnectorProvider storageConnectorProvider, + @JsonProperty("resultFormat") ResultFormat resultFormat + ) { - this.exportDestination = exportDestination; + this.storageConnectorProvider = storageConnectorProvider; + this.resultFormat = resultFormat; } - @JsonProperty("exportDestination") - public ExportDestination getExportDestination() + @JsonProperty("storageConnectorProvider") + public StorageConnectorProvider getStorageConnectorProvider() { - return exportDestination; + return storageConnectorProvider; + } + + @JsonProperty("resultFormat") + public ResultFormat getResultFormat() + { + return resultFormat; } @Override @@ -52,20 +63,22 @@ public boolean equals(Object o) return false; } ExportMSQDestination that = (ExportMSQDestination) o; - return Objects.equals(exportDestination, that.exportDestination); + return Objects.equals(storageConnectorProvider, that.storageConnectorProvider) + && resultFormat == that.resultFormat; } @Override public int hashCode() { - return Objects.hash(exportDestination); + return Objects.hash(storageConnectorProvider, resultFormat); } @Override public String toString() { return "ExportMSQDestination{" + - "exportDestination=" + exportDestination + + "storageConnectorProvider=" + storageConnectorProvider + + ", resultFormat=" + resultFormat + '}'; } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java new file mode 100644 index 000000000000..958d2eedf5bd --- /dev/null +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.msq.querykit.results; + +import com.fasterxml.jackson.databind.ObjectMapper; +import it.unimi.dsi.fastutil.ints.IntSet; +import org.apache.druid.frame.Frame; +import org.apache.druid.frame.channel.ReadableFrameChannel; +import org.apache.druid.frame.channel.WritableFrameChannel; +import org.apache.druid.frame.processor.FrameProcessor; +import org.apache.druid.frame.processor.FrameProcessors; +import org.apache.druid.frame.processor.ReturnOrAwait; +import org.apache.druid.frame.read.FrameReader; +import org.apache.druid.frame.segment.FrameStorageAdapter; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.Unit; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.msq.counters.ChannelCounters; +import org.apache.druid.msq.util.SequenceUtils; +import org.apache.druid.segment.BaseObjectColumnValueSelector; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.http.ResultFormat; +import org.apache.druid.storage.StorageConnector; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +public class ExportResultsFrameProcessor implements FrameProcessor +{ + private final ReadableFrameChannel inputChannel; + private final ResultFormat exportFormat; + private final FrameReader frameReader; + private final StorageConnector storageConnector; + private final ObjectMapper jsonMapper; + private final int partitionNumber; + private final int workerNumber; + private final ChannelCounters channelCounter; + + public ExportResultsFrameProcessor( + ReadableFrameChannel inputChannel, + ResultFormat exportFormat, + FrameReader frameReader, + StorageConnector storageConnector, + ObjectMapper jsonMapper, + int partitionNumber, + int workerNumber, + ChannelCounters channelCounter + ) + { + this.inputChannel = inputChannel; + this.exportFormat = exportFormat; + this.frameReader = frameReader; + this.storageConnector = storageConnector; + this.jsonMapper = jsonMapper; + this.partitionNumber = partitionNumber; + this.workerNumber = workerNumber; + this.channelCounter = channelCounter; + } + + @Override + public List inputChannels() + { + return Collections.singletonList(inputChannel); + } + + @Override + public List outputChannels() + { + return Collections.emptyList(); + } + + @Override + public ReturnOrAwait runIncrementally(IntSet readableInputs) throws InterruptedException, IOException + { + if (readableInputs.isEmpty()) { + return ReturnOrAwait.awaitAll(1); + } + + if (inputChannel.isFinished()) { + return ReturnOrAwait.returnObject(Unit.instance()); + } else { + addFrame(inputChannel.read()); + return ReturnOrAwait.awaitAll(1); + } + } + + private void addFrame(final Frame frame) throws IOException + { + final RowSignature signature = frameReader.signature(); + + final Sequence cursorSequence = + new FrameStorageAdapter(frame, frameReader, Intervals.ETERNITY) + .makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null); + + try (OutputStream stream = storageConnector.write(workerNumber + "/" + partitionNumber)) { + ResultFormat.Writer formatter = exportFormat.createFormatter(stream, jsonMapper); + + SequenceUtils.forEach( + cursorSequence, + cursor -> { + try { + formatter.writeResponseStart(); + final ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); + + //noinspection rawtypes + @SuppressWarnings("rawtypes") + final List selectors = + frameReader.signature() + .getColumnNames() + .stream() + .map(columnSelectorFactory::makeColumnValueSelector) + .collect(Collectors.toList()); + + while (!cursor.isDone()) { + formatter.writeRowStart(); + for (int j = 0; j < signature.size(); j++) { + formatter.writeRowField(signature.getColumnName(j), selectors.get(j).getObject()); + } + channelCounter.incrementRowCount(); + formatter.writeRowEnd(); + cursor.advance(); + } + formatter.writeResponseEnd(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + ); + } + } + + @Override + public void cleanup() throws IOException + { + FrameProcessors.closeAll(inputChannels(), outputChannels()); + } +} diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java new file mode 100644 index 000000000000..561606256542 --- /dev/null +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.msq.querykit.results; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.google.common.collect.Iterables; +import org.apache.druid.frame.processor.FrameProcessor; +import org.apache.druid.frame.processor.OutputChannelFactory; +import org.apache.druid.frame.processor.OutputChannels; +import org.apache.druid.frame.processor.manager.ProcessorManagers; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.java.util.common.guava.Sequences; +import org.apache.druid.msq.counters.ChannelCounters; +import org.apache.druid.msq.counters.CounterNames; +import org.apache.druid.msq.counters.CounterTracker; +import org.apache.druid.msq.input.InputSlice; +import org.apache.druid.msq.input.InputSliceReader; +import org.apache.druid.msq.input.ReadableInput; +import org.apache.druid.msq.input.stage.StageInputSlice; +import org.apache.druid.msq.kernel.FrameContext; +import org.apache.druid.msq.kernel.ProcessorsAndChannels; +import org.apache.druid.msq.kernel.StageDefinition; +import org.apache.druid.msq.querykit.BaseFrameProcessorFactory; +import org.apache.druid.sql.http.ResultFormat; +import org.apache.druid.storage.StorageConnectorProvider; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.util.List; +import java.util.function.Consumer; + +@JsonTypeName("exportResults") +public class ExportResultsFrameProcessorFactory extends BaseFrameProcessorFactory +{ + + private final StorageConnectorProvider storageConnectorProvider; + private final ResultFormat exportFormat; + + @JsonCreator + public ExportResultsFrameProcessorFactory( + @JsonProperty("storageConnectorProvider") StorageConnectorProvider storageConnectorProvider, + @JsonProperty("exportFormat") ResultFormat exportFormat + ) + { + this.storageConnectorProvider = storageConnectorProvider; + this.exportFormat = exportFormat; + } + + @JsonProperty("exportFormat") + public ResultFormat getExportFormat() + { + return exportFormat; + } + + @JsonProperty("storageConnectorProvider") + public StorageConnectorProvider getStorageConnectorProvider() + { + return storageConnectorProvider; + } + + @Override + public ProcessorsAndChannels makeProcessors( + StageDefinition stageDefinition, + int workerNumber, + List inputSlices, + InputSliceReader inputSliceReader, + @Nullable Object extra, + OutputChannelFactory outputChannelFactory, + FrameContext frameContext, + int maxOutstandingProcessors, + CounterTracker counters, + Consumer warningPublisher + ) throws IOException + { + // Expecting a single input slice from some prior stage. + final StageInputSlice slice = (StageInputSlice) Iterables.getOnlyElement(inputSlices); + + if (inputSliceReader.numReadableInputs(slice) == 0) { + return new ProcessorsAndChannels<>(ProcessorManagers.none(), OutputChannels.none()); + } + + ChannelCounters channelCounter = counters.channel(CounterNames.outputChannel()); + final Sequence readableInputs = + Sequences.simple(inputSliceReader.attach(0, slice, counters, warningPublisher)); + + final Sequence> processors = readableInputs.map( + readableInput -> new ExportResultsFrameProcessor( + readableInput.getChannel(), + exportFormat, + readableInput.getChannelFrameReader(), + storageConnectorProvider.get(), + frameContext.jsonMapper(), + readableInput.getStagePartition().getPartitionNumber(), + workerNumber, + channelCounter + ) + ); + + return new ProcessorsAndChannels<>( + ProcessorManagers.of(processors), + OutputChannels.none() + ); + } +} diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java index c9874cbde659..ea0e0e252495 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java @@ -55,6 +55,7 @@ import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.server.QueryResponse; +import org.apache.druid.sql.calcite.parser.DruidSqlIngest; import org.apache.druid.sql.calcite.parser.DruidSqlInsert; import org.apache.druid.sql.calcite.parser.DruidSqlReplace; import org.apache.druid.sql.calcite.planner.ColumnMapping; @@ -66,6 +67,8 @@ import org.apache.druid.sql.calcite.run.QueryMaker; import org.apache.druid.sql.calcite.run.SqlResults; import org.apache.druid.sql.calcite.table.RowSignatures; +import org.apache.druid.sql.http.ResultFormat; +import org.apache.druid.storage.StorageConnectorProvider; import org.joda.time.Interval; import javax.annotation.Nullable; @@ -208,7 +211,24 @@ public QueryResponse runQuery(final DruidQuery druidQuery) final MSQDestination destination; if (targetDataSource instanceof ExportDestination) { - destination = new ExportMSQDestination((ExportDestination) targetDataSource); + String exportDestination = ((ExportDestination) targetDataSource).getExportDestinationString(); + exportDestination = exportDestination.substring(1, exportDestination.length() - 1); + ResultFormat format = ResultFormat.fromString(sqlQueryContext.getString(DruidSqlIngest.SQL_EXPORT_FILE_FORMAT)); + try { + StorageConnectorProvider storageConnectorProvider = jsonMapper.readValue( + exportDestination, + StorageConnectorProvider.class + ); + destination = new ExportMSQDestination(storageConnectorProvider, format); + } + catch (Exception e) { + throw DruidException.defensive() + .build( + e, + "Unable to deserialize the external destination: [%s].", + exportDestination + ); + } } else if (targetDataSource instanceof TableDestination) { Granularity segmentGranularityObject; try { diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java index 9bc8dfa35c7e..84a5b5cb6849 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java @@ -122,6 +122,7 @@ public boolean featureAvailable(EngineFeature feature, PlannerContext plannerCon case CAN_INSERT: case CAN_REPLACE: case READ_EXTERNAL_DATA: + case WRITE_EXTERNAL_DATA: case SCAN_ORDER_BY_NON_TIME: case SCAN_NEEDS_SIGNATURE: return true; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java index 60e40179dd52..91cc41523821 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java @@ -20,24 +20,27 @@ package org.apache.druid.msq.indexing.destination; import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.druid.catalog.model.table.export.S3ExportDestination; import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.sql.http.ResultFormat; +import org.apache.druid.storage.StorageConnectorModule; +import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import org.junit.Assert; import org.junit.Test; +import java.io.File; import java.io.IOException; public class ExportMSQDestinationTest { - @Test public void testSerde() throws IOException { - ExportMSQDestination exportDestination = new ExportMSQDestination(new S3ExportDestination("hdfs://localhost:9090/outputdirectory/", null)); + ExportMSQDestination exportDestination = new ExportMSQDestination(new LocalFileStorageConnectorProvider(new File("/path")), ResultFormat.CSV); ObjectMapper objectMapper = new DefaultObjectMapper(); - String s = objectMapper.writeValueAsString(exportDestination); + new StorageConnectorModule().getJacksonModules().forEach(objectMapper::registerModule); + String string = objectMapper.writeValueAsString(exportDestination); - ExportMSQDestination newDest = objectMapper.readValue(s, ExportMSQDestination.class); + ExportMSQDestination newDest = objectMapper.readValue(string, ExportMSQDestination.class); Assert.assertEquals(exportDestination, newDest); } } diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnector.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnector.java index 3d96f8d43b1d..ea308654acf5 100644 --- a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnector.java +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnector.java @@ -39,6 +39,9 @@ import java.util.Arrays; import java.util.Iterator; +import static java.nio.file.StandardOpenOption.APPEND; +import static java.nio.file.StandardOpenOption.CREATE; + /** * Implementation that uses local filesystem. All paths are appended with the base path, in such a way that it is not visible * to the users of this class. @@ -101,7 +104,7 @@ public OutputStream write(String path) throws IOException { File toWrite = fileWithBasePath(path); FileUtils.mkdirp(toWrite.getParentFile()); - return Files.newOutputStream(toWrite.toPath()); + return Files.newOutputStream(toWrite.toPath(), CREATE, APPEND); } /** diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java index 07966378c455..b9215da8c8ca 100644 --- a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java @@ -28,12 +28,13 @@ import java.io.File; import java.io.IOException; +import java.util.Objects; @JsonTypeName("local") public class LocalFileStorageConnectorProvider implements StorageConnectorProvider { @JsonProperty - File basePath; + final File basePath; @JsonCreator public LocalFileStorageConnectorProvider(@JsonProperty(value = "basePath", required = true) File basePath) @@ -56,4 +57,23 @@ public StorageConnector get() ); } } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LocalFileStorageConnectorProvider that = (LocalFileStorageConnectorProvider) o; + return Objects.equals(basePath, that.basePath); + } + + @Override + public int hashCode() + { + return Objects.hash(basePath); + } } diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java b/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java index d52462997cd1..251e5b91ad51 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java +++ b/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java @@ -19,20 +19,10 @@ package org.apache.druid.catalog.model.table; -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import org.apache.druid.catalog.model.table.export.S3ExportDestination; -import org.apache.druid.catalog.model.table.export.TableDestination; import org.apache.druid.guice.annotations.UnstableApi; @UnstableApi -@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = IngestDestination.TYPE_PROPERTY) -@JsonSubTypes(value = { - @JsonSubTypes.Type(name = TableDestination.TYPE_KEY, value = TableDestination.class), - @JsonSubTypes.Type(name = S3ExportDestination.TYPE_KEY, value = S3ExportDestination.class) -}) public interface IngestDestination { - String TYPE_PROPERTY = "type"; String getDestinationName(); } diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java b/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java index d9f61c2a1d19..ec78384cad3f 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java +++ b/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java @@ -20,7 +20,26 @@ package org.apache.druid.catalog.model.table.export; import org.apache.druid.catalog.model.table.IngestDestination; +import org.apache.druid.storage.StorageConnectorProvider; -public interface ExportDestination extends IngestDestination +public class ExportDestination implements IngestDestination { + private final String exportDestinationString; + + public ExportDestination(String exportDestinationString) + { + + this.exportDestinationString = exportDestinationString; + } + + public String getExportDestinationString() + { + return exportDestinationString; + } + + @Override + public String getDestinationName() + { + return "EXTERN"; + } } diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/S3ExportDestination.java b/server/src/main/java/org/apache/druid/catalog/model/table/export/S3ExportDestination.java deleted file mode 100644 index 63ced77e4904..000000000000 --- a/server/src/main/java/org/apache/druid/catalog/model/table/export/S3ExportDestination.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.catalog.model.table.export; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Map; -import java.util.Objects; - -public class S3ExportDestination implements ExportDestination -{ - public static final String TYPE_KEY = "s3"; - - private final String uri; - private final String username; - - public S3ExportDestination(Map properties) - { - this(properties.get("uri"), properties.get("username")); - } - - @JsonCreator - public S3ExportDestination(@JsonProperty("uri") String uri, @JsonProperty("username") String username) - { - this.uri = uri; - this.username = username; - } - - @JsonProperty("uri") - public String getUri() - { - return uri; - } - - @JsonProperty("username") - public String getUsername() - { - return username; - } - - @Override - @JsonIgnore - public String getDestinationName() - { - return TYPE_KEY; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - S3ExportDestination that = (S3ExportDestination) o; - return Objects.equals(uri, that.uri) && Objects.equals(username, that.username); - } - - @Override - public int hashCode() - { - return Objects.hash(uri, username); - } -} diff --git a/server/src/test/java/org/apache/druid/catalog/model/table/export/S3ExportDestinationTest.java b/server/src/test/java/org/apache/druid/catalog/model/table/export/S3ExportDestinationTest.java deleted file mode 100644 index 5eb0eecb0e6a..000000000000 --- a/server/src/test/java/org/apache/druid/catalog/model/table/export/S3ExportDestinationTest.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.catalog.model.table.export; - -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.druid.jackson.DefaultObjectMapper; -import org.junit.Assert; -import org.junit.Test; - -import java.io.IOException; - -public class S3ExportDestinationTest -{ - @Test - public void testSerde() throws IOException - { - S3ExportDestination exportDestination = new S3ExportDestination("uri", "username"); - ObjectMapper objectMapper = new DefaultObjectMapper(); - byte[] bytes = objectMapper.writeValueAsBytes(exportDestination); - S3ExportDestination newDest = objectMapper.readValue(bytes, S3ExportDestination.class); - Assert.assertEquals(exportDestination, newDest); - } -} diff --git a/sql/src/main/codegen/config.fmpp b/sql/src/main/codegen/config.fmpp index ee4adaed2e02..88fa826a3a8e 100644 --- a/sql/src/main/codegen/config.fmpp +++ b/sql/src/main/codegen/config.fmpp @@ -60,7 +60,6 @@ data: { "org.apache.druid.sql.calcite.external.ExtendOperator" "org.apache.druid.sql.calcite.external.ParameterizeOperator" "org.apache.druid.sql.calcite.parser.ExternalDestinationSqlIdentifier" - "org.apache.druid.catalog.model.table.export.S3ExportDestination" "java.util.HashMap" ] diff --git a/sql/src/main/codegen/includes/common.ftl b/sql/src/main/codegen/includes/common.ftl index 81b21b30ba47..d5a4599c4f7e 100644 --- a/sql/src/main/codegen/includes/common.ftl +++ b/sql/src/main/codegen/includes/common.ftl @@ -110,38 +110,18 @@ SqlTypeNameSpec DruidType() : SqlIdentifier ExternalDestination() : { final Span s; - Map properties = new HashMap(); + final SqlNode key; } { - ( - [ properties = ExternProperties() ] - { - s = span(); - return new ExternalDestinationSqlIdentifier( - org.apache.druid.catalog.model.table.export.S3ExportDestination.TYPE_KEY, + key = StringLiteral() + { + s = span(); + return new ExternalDestinationSqlIdentifier( + "EXTERN", s.pos(), - new S3ExportDestination(properties), - properties - ); - } - ) -} - -Map ExternProperties() : -{ - final Span s; - final Map properties = new HashMap(); - SqlNodeList commaList = null; -} -{ - commaList = ExpressionCommaList(span(), ExprContext.ACCEPT_NON_QUERY) - { - for (SqlNode sqlNode : commaList) { - List sqlNodeList = ((SqlBasicCall) sqlNode).getOperandList(); - properties.put(((SqlIdentifier) sqlNodeList.get(0)).getSimple(), ((SqlIdentifier) sqlNodeList.get(1)).getSimple()); - } - return properties; - } + key + ); + } } // Parses the supported file formats for export. diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlIngest.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlIngest.java index a95db5d03ec6..56c2766b0fa7 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlIngest.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlIngest.java @@ -34,6 +34,9 @@ */ public abstract class DruidSqlIngest extends SqlInsert { + public static final String SQL_EXPORT_FILE_FORMAT = "__exportFileFormat"; + + @Nullable protected final Granularity partitionedBy; // Used in the unparse function to generate the original query since we convert the string to an enum @@ -64,6 +67,7 @@ public DruidSqlIngest( this.exportFileFormat = exportFileFormat; } + @Nullable public Granularity getPartitionedBy() { return partitionedBy; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index 7dc3aaafa303..e845780ae5d7 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -20,13 +20,11 @@ package org.apache.druid.sql.calcite.parser; import com.google.common.collect.Iterables; +import org.apache.calcite.sql.SqlCharStringLiteral; import org.apache.calcite.sql.SqlIdentifier; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlWriter; import org.apache.calcite.sql.parser.SqlParserPos; -import org.apache.druid.catalog.model.table.export.ExportDestination; - -import java.util.Map; /** * Extends the {@link SqlIdentifier} to hold parameters for an external table destination. This contains information @@ -34,45 +32,35 @@ */ public class ExternalDestinationSqlIdentifier extends SqlIdentifier { - private final ExportDestination exportDestination; - private final Map propertiesForUnparse; + private final SqlCharStringLiteral exportDestinationString; public ExternalDestinationSqlIdentifier( String name, SqlParserPos pos, - ExportDestination exportDestination, - Map propertiesForUnparse + SqlNode exportDestinationString ) { super(name, pos); - this.exportDestination = exportDestination; - this.propertiesForUnparse = propertiesForUnparse; + this.exportDestinationString = (SqlCharStringLiteral) exportDestinationString; } - public ExportDestination getExportDestination() + public String getExportDestinationString() { - return exportDestination; + return exportDestinationString.toString(); } @Override public void unparse(SqlWriter writer, int leftPrec, int rightPrec) { SqlWriter.Frame externFrame = writer.startFunCall("EXTERN"); - SqlWriter.Frame frame = writer.startFunCall(Iterables.getOnlyElement(names)); - for (Map.Entry property : propertiesForUnparse.entrySet()) { - writer.sep(","); - writer.keyword(property.getKey()); - writer.print("="); - writer.identifier(property.getValue(), false); - } - writer.endFunCall(frame); + writer.print(exportDestinationString.toString()); writer.endFunCall(externFrame); } @Override public SqlNode clone(SqlParserPos pos) { - return new ExternalDestinationSqlIdentifier(Iterables.getOnlyElement(names), pos, exportDestination, propertiesForUnparse); + return new ExternalDestinationSqlIdentifier(Iterables.getOnlyElement(names), pos, exportDestinationString); } @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index f4efb1790f2b..c4d0bfbcf773 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -35,6 +35,7 @@ import org.apache.calcite.tools.ValidationException; import org.apache.calcite.util.Pair; import org.apache.druid.catalog.model.table.IngestDestination; +import org.apache.druid.catalog.model.table.export.ExportDestination; import org.apache.druid.catalog.model.table.export.TableDestination; import org.apache.druid.common.utils.IdUtils; import org.apache.druid.error.DruidException; @@ -109,12 +110,35 @@ protected String operationName() @Override public void validate() { - if (ingestNode().getPartitionedBy() == null) { + if (ingestNode().getTargetTable() instanceof ExternalDestinationSqlIdentifier) { + if (!handlerContext.plannerContext().featureAvailable(EngineFeature.WRITE_EXTERNAL_DATA)) { + throw InvalidSqlInput.exception( + "Writing to external sources are not supported by requested SQL engine [%s], consider using MSQ.", + handlerContext.engine().name() + ); + } + } else if (ingestNode().getPartitionedBy() == null) { throw InvalidSqlInput.exception( "Operation [%s] requires a PARTITIONED BY to be explicitly defined, but none was found.", operationName() ); } + + String exportFileFormat = ingestNode().getExportFileFormat(); + if (ingestNode().getTargetTable() instanceof ExternalDestinationSqlIdentifier) { + if (exportFileFormat == null) { + throw InvalidSqlInput.exception( + "External write statemetns requires a AS clause to specify the format, but none was found.", + operationName() + ); + } else { + handlerContext.plannerContext().queryContextMap().put( + DruidSqlIngest.SQL_EXPORT_FILE_FORMAT, + exportFileFormat + ); + } + } + try { PlannerContext plannerContext = handlerContext.plannerContext(); if (ingestionGranularity != null) { @@ -180,7 +204,8 @@ private IngestDestination validateAndGetDataSourceForIngest() .ofCategory(DruidException.Category.DEFENSIVE) .build("Operation [%s] requires a target table", operationName()); } else if (tableIdentifier instanceof ExternalDestinationSqlIdentifier) { - dataSource = ((ExternalDestinationSqlIdentifier) tableIdentifier).getExportDestination(); + String exportDestinationString = ((ExternalDestinationSqlIdentifier) tableIdentifier).getExportDestinationString(); + dataSource = new ExportDestination(exportDestinationString); } else if (tableIdentifier.names.size() == 1) { // Unqualified name. String tableName = Iterables.getOnlyElement(tableIdentifier.names); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java index 778c7ec03b6f..b514ffe12b65 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java @@ -118,5 +118,9 @@ public enum EngineFeature * and cannot concat the results together (as * the result for broker is the query id). Therefore, we don't get the * correct result back, while the MSQ engine is executing the partial query */ - ALLOW_TOP_LEVEL_UNION_ALL; + ALLOW_TOP_LEVEL_UNION_ALL, + /** + * Queries can write to an {@link ExternalDataSource}. + */ + WRITE_EXTERNAL_DATA; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java index ade7c7895b47..0fd6b6a02494 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java @@ -113,6 +113,7 @@ public boolean featureAvailable(EngineFeature feature, PlannerContext plannerCon case CAN_INSERT: case CAN_REPLACE: case READ_EXTERNAL_DATA: + case WRITE_EXTERNAL_DATA: case SCAN_ORDER_BY_NON_TIME: case SCAN_NEEDS_SIGNATURE: return false; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java index 3cce507714a9..82ea2416815a 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java @@ -60,6 +60,7 @@ public boolean featureAvailable(EngineFeature feature, PlannerContext plannerCon case CAN_SELECT: case ALLOW_BINDABLE_PLAN: case READ_EXTERNAL_DATA: + case WRITE_EXTERNAL_DATA: case SCAN_ORDER_BY_NON_TIME: case GROUPING_SETS: case WINDOW_FUNCTIONS: diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index ed059401ccc0..53957ac2bcaf 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -32,7 +32,7 @@ public class CalciteExportTest extends CalciteIngestionDmlTest public void testReplaceIntoExtern() { testIngestionQuery() - .sql("REPLACE INTO EXTERN(s3(uri=\"s3://druid-data/exportdest/\",username=\"user1\")) AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL") + .sql("REPLACE INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}') AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL") .expectQuery( Druids.newScanQueryBuilder() .dataSource( @@ -45,7 +45,7 @@ public void testReplaceIntoExtern() .build() ) .expectResources(dataSourceRead("foo")) - .expectTarget("s3", RowSignature.builder().add("dim2", ColumnType.STRING).build()) + .expectTarget("EXTERN", RowSignature.builder().add("dim2", ColumnType.STRING).build()) .verify(); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java b/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java index f3c68be36d35..5ef762d81cd2 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java @@ -89,6 +89,7 @@ public boolean featureAvailable(final EngineFeature feature, final PlannerContex case CAN_INSERT: case CAN_REPLACE: case READ_EXTERNAL_DATA: + case WRITE_EXTERNAL_DATA: case SCAN_ORDER_BY_NON_TIME: case ALLOW_BROADCAST_RIGHTY_JOIN: case ALLOW_TOP_LEVEL_UNION_ALL: diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java index 2df46ed88a6e..1c6606a68aa4 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java @@ -99,8 +99,8 @@ private static DruidSqlParserImpl createTestParser(String parseString) @Test public void testUnparseExternalSqlIdentifier() throws ParseException { - String sqlQuery = "REPLACE INTO EXTERN(s3(uri = s3url, user = user1)) AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL"; - String prettySqlQuery = "REPLACE INTO EXTERN(S3(URI =\"s3url\", USER =\"user1\"))\n" + String sqlQuery = "REPLACE INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}') AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL"; + String prettySqlQuery = "REPLACE INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}')\n" + "AS CSV\n" + "OVERWRITE ALL\n" + "SELECT \"dim2\"\n" From 53ff841acbef3e4046aad05212fc71f70d3a3579 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 22 Jan 2024 17:07:18 +0530 Subject: [PATCH 06/50] Address review comments --- .../destination/ExportMSQDestination.java | 32 +++++++- .../destination/MSQSelectDestination.java | 2 +- .../druid/msq/sql/MSQTaskQueryMaker.java | 2 +- .../destination/ExportMSQDestinationTest.java | 7 +- .../local/LocalFileStorageConnector.java | 5 +- ...LocalFileStorageConnectorProviderTest.java | 35 +++++++++ .../model/table/IngestDestination.java | 2 + .../model/table/export/ExportDestination.java | 4 +- .../model/table/export/TableDestination.java | 2 + sql/src/main/codegen/includes/insert.ftl | 2 +- .../ExternalDestinationSqlIdentifier.java | 14 ++-- .../sql/calcite/planner/IngestHandler.java | 5 ++ .../druid/sql/calcite/CalciteExportTest.java | 77 ++++++++++++++++++- 13 files changed, 171 insertions(+), 18 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/storage/local/LocalFileStorageConnectorProviderTest.java diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java index cd56f1f2cab5..501ad8797311 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java @@ -20,10 +20,16 @@ package org.apache.druid.msq.indexing.destination; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.storage.StorageConnectorProvider; +import org.joda.time.Interval; +import javax.annotation.Nullable; +import java.util.List; import java.util.Objects; public class ExportMSQDestination implements MSQDestination @@ -31,14 +37,24 @@ public class ExportMSQDestination implements MSQDestination public static final String TYPE = "export"; private final StorageConnectorProvider storageConnectorProvider; private final ResultFormat resultFormat; + @Nullable + private final List replaceTimeChunks; @JsonCreator public ExportMSQDestination(@JsonProperty("storageConnectorProvider") StorageConnectorProvider storageConnectorProvider, - @JsonProperty("resultFormat") ResultFormat resultFormat + @JsonProperty("resultFormat") ResultFormat resultFormat, + @JsonProperty("replaceTimeChunks") @Nullable List replaceTimeChunks ) { this.storageConnectorProvider = storageConnectorProvider; this.resultFormat = resultFormat; + if (replaceTimeChunks == null || Intervals.ONLY_ETERNITY.equals(replaceTimeChunks)) { + this.replaceTimeChunks = replaceTimeChunks; + } else { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.UNSUPPORTED) + .build("Currently export only works with "); + } } @JsonProperty("storageConnectorProvider") @@ -53,6 +69,14 @@ public ResultFormat getResultFormat() return resultFormat; } + @Nullable + @JsonProperty("replaceTimeChunks") + @JsonInclude(JsonInclude.Include.NON_NULL) + public List getReplaceTimeChunks() + { + return replaceTimeChunks; + } + @Override public boolean equals(Object o) { @@ -64,13 +88,14 @@ public boolean equals(Object o) } ExportMSQDestination that = (ExportMSQDestination) o; return Objects.equals(storageConnectorProvider, that.storageConnectorProvider) - && resultFormat == that.resultFormat; + && resultFormat == that.resultFormat + && Objects.equals(replaceTimeChunks, that.replaceTimeChunks); } @Override public int hashCode() { - return Objects.hash(storageConnectorProvider, resultFormat); + return Objects.hash(storageConnectorProvider, resultFormat, replaceTimeChunks); } @Override @@ -79,6 +104,7 @@ public String toString() return "ExportMSQDestination{" + "storageConnectorProvider=" + storageConnectorProvider + ", resultFormat=" + resultFormat + + ", replaceTimeChunks=" + replaceTimeChunks + '}'; } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQSelectDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQSelectDestination.java index 719e571ab6ca..e32705462470 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQSelectDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQSelectDestination.java @@ -31,7 +31,7 @@ public enum MSQSelectDestination */ TASKREPORT("taskReport", false), /** - * Writes the results as rows to a location. + * Writes all the results as files in a specified format to an external location outside druid. */ EXPORT("export", false), /** diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java index ea0e0e252495..c2aec222beed 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java @@ -219,7 +219,7 @@ public QueryResponse runQuery(final DruidQuery druidQuery) exportDestination, StorageConnectorProvider.class ); - destination = new ExportMSQDestination(storageConnectorProvider, format); + destination = new ExportMSQDestination(storageConnectorProvider, format, replaceTimeChunks); } catch (Exception e) { throw DruidException.defensive() diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java index 91cc41523821..211986f712c6 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.storage.StorageConnectorModule; import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; @@ -35,7 +36,11 @@ public class ExportMSQDestinationTest @Test public void testSerde() throws IOException { - ExportMSQDestination exportDestination = new ExportMSQDestination(new LocalFileStorageConnectorProvider(new File("/path")), ResultFormat.CSV); + ExportMSQDestination exportDestination = new ExportMSQDestination( + new LocalFileStorageConnectorProvider(new File("/path")), + ResultFormat.CSV, + Intervals.ONLY_ETERNITY + ); ObjectMapper objectMapper = new DefaultObjectMapper(); new StorageConnectorModule().getJacksonModules().forEach(objectMapper::registerModule); String string = objectMapper.writeValueAsString(exportDestination); diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnector.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnector.java index ea308654acf5..225f3eb85373 100644 --- a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnector.java +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnector.java @@ -39,9 +39,6 @@ import java.util.Arrays; import java.util.Iterator; -import static java.nio.file.StandardOpenOption.APPEND; -import static java.nio.file.StandardOpenOption.CREATE; - /** * Implementation that uses local filesystem. All paths are appended with the base path, in such a way that it is not visible * to the users of this class. @@ -104,7 +101,7 @@ public OutputStream write(String path) throws IOException { File toWrite = fileWithBasePath(path); FileUtils.mkdirp(toWrite.getParentFile()); - return Files.newOutputStream(toWrite.toPath(), CREATE, APPEND); + return Files.newOutputStream(toWrite.toPath(), StandardOpenOption.CREATE, StandardOpenOption.APPEND); } /** diff --git a/processing/src/test/java/org/apache/druid/storage/local/LocalFileStorageConnectorProviderTest.java b/processing/src/test/java/org/apache/druid/storage/local/LocalFileStorageConnectorProviderTest.java new file mode 100644 index 000000000000..5b09022e363f --- /dev/null +++ b/processing/src/test/java/org/apache/druid/storage/local/LocalFileStorageConnectorProviderTest.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.storage.local; + +import nl.jqno.equalsverifier.EqualsVerifier; +import org.junit.Test; + +public class LocalFileStorageConnectorProviderTest +{ + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(LocalFileStorageConnectorProvider.class) + .withNonnullFields("basePath") + .usingGetClass() + .verify(); + } +} \ No newline at end of file diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java b/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java index 251e5b91ad51..830d46ef354d 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java +++ b/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java @@ -19,9 +19,11 @@ package org.apache.druid.catalog.model.table; +import com.fasterxml.jackson.annotation.JsonTypeInfo; import org.apache.druid.guice.annotations.UnstableApi; @UnstableApi +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") public interface IngestDestination { String getDestinationName(); diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java b/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java index ec78384cad3f..73ab2f541138 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java +++ b/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java @@ -19,11 +19,13 @@ package org.apache.druid.catalog.model.table.export; +import com.fasterxml.jackson.annotation.JsonTypeName; import org.apache.druid.catalog.model.table.IngestDestination; -import org.apache.druid.storage.StorageConnectorProvider; +@JsonTypeName(ExportDestination.TYPE_KEY) public class ExportDestination implements IngestDestination { + public static final String TYPE_KEY = "external"; private final String exportDestinationString; public ExportDestination(String exportDestinationString) diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/TableDestination.java b/server/src/main/java/org/apache/druid/catalog/model/table/export/TableDestination.java index 019bfa017ec1..5e5d50c5f3ea 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/table/export/TableDestination.java +++ b/server/src/main/java/org/apache/druid/catalog/model/table/export/TableDestination.java @@ -21,10 +21,12 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; import org.apache.druid.catalog.model.table.IngestDestination; import java.util.Objects; +@JsonTypeName(TableDestination.TYPE_KEY) public class TableDestination implements IngestDestination { public static final String TYPE_KEY = "table"; diff --git a/sql/src/main/codegen/includes/insert.ftl b/sql/src/main/codegen/includes/insert.ftl index 51d647b47ec2..1e74cd3e6bf8 100644 --- a/sql/src/main/codegen/includes/insert.ftl +++ b/sql/src/main/codegen/includes/insert.ftl @@ -105,7 +105,7 @@ SqlNode DruidSqlInsertEof() : // actual error message. { - insertNode = new SqlInsert(s.end(source), keywordList, tableRef, source, columnList); + insertNode = new SqlInsert(s.end(source), keywordList, destination, source, columnList); if (!(insertNode instanceof SqlInsert)) { // This shouldn't be encountered, but done as a defensive practice. SqlInsert() always returns a node of type // SqlInsert diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index e845780ae5d7..1a6a1466e6ab 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -19,16 +19,16 @@ package org.apache.druid.sql.calcite.parser; -import com.google.common.collect.Iterables; import org.apache.calcite.sql.SqlCharStringLiteral; import org.apache.calcite.sql.SqlIdentifier; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlWriter; import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.druid.error.DruidException; +import org.apache.druid.utils.CollectionUtils; /** - * Extends the {@link SqlIdentifier} to hold parameters for an external table destination. This contains information - * required for a task to write to a destination. + * Extends the {@link SqlIdentifier} to hold parameters for an external destination. */ public class ExternalDestinationSqlIdentifier extends SqlIdentifier { @@ -60,13 +60,17 @@ public void unparse(SqlWriter writer, int leftPrec, int rightPrec) @Override public SqlNode clone(SqlParserPos pos) { - return new ExternalDestinationSqlIdentifier(Iterables.getOnlyElement(names), pos, exportDestinationString); + final String name = CollectionUtils.getOnlyElement( + names, + x -> DruidException.defensive("Expected single name in identifier [%s], but got [%s]", names) + ); + return new ExternalDestinationSqlIdentifier(name, pos, exportDestinationString); } @Override @Deprecated public Object clone() { - throw new UnsupportedOperationException("Function is deprecated, please use clone(SqlNode) instead."); + throw DruidException.defensive("Function is deprecated, please use clone(SqlNode) instead."); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index c4d0bfbcf773..c9f85e3fd154 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -117,6 +117,11 @@ public void validate() handlerContext.engine().name() ); } + if (ingestNode().getPartitionedBy() != null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.UNSUPPORTED) + .build("Export statements do not currently support a PARTITIONED BY or CLUSTERED BY clause."); + } } else if (ingestNode().getPartitionedBy() == null) { throw InvalidSqlInput.exception( "Operation [%s] requires a PARTITIONED BY to be explicitly defined, but none was found.", diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index 53957ac2bcaf..70b26b43decc 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -19,6 +19,7 @@ package org.apache.druid.sql.calcite; +import org.apache.druid.error.DruidException; import org.apache.druid.query.Druids; import org.apache.druid.query.scan.ScanQuery; import org.apache.druid.segment.column.ColumnType; @@ -32,7 +33,10 @@ public class CalciteExportTest extends CalciteIngestionDmlTest public void testReplaceIntoExtern() { testIngestionQuery() - .sql("REPLACE INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}') AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL") + .sql("REPLACE INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}') " + + "AS CSV " + + "OVERWRITE ALL " + + "SELECT dim2 FROM foo") .expectQuery( Druids.newScanQueryBuilder() .dataSource( @@ -48,4 +52,75 @@ public void testReplaceIntoExtern() .expectTarget("EXTERN", RowSignature.builder().add("dim2", ColumnType.STRING).build()) .verify(); } + + @Test + public void testExportWithPartitionedBy() + { + testIngestionQuery() + .sql("REPLACE INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}') " + + "AS CSV " + + "OVERWRITE ALL " + + "SELECT dim2 FROM foo " + + "PARTITIONED BY ALL") + .expectValidationError( + DruidException.class, + "Export statements do not currently support a PARTITIONED BY or CLUSTERED BY clause." + ) + .verify(); + } + + @Test + public void testInsertIntoExtern() + { + testIngestionQuery() + .sql("INSERT INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}') " + + "AS CSV " + + "SELECT dim2 FROM foo") + .expectQuery( + Druids.newScanQueryBuilder() + .dataSource( + "foo" + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ) + .expectResources(dataSourceRead("foo")) + .expectTarget("EXTERN", RowSignature.builder().add("dim2", ColumnType.STRING).build()) + .verify(); + } + + @Test + public void testExportWithoutFormat() + { + testIngestionQuery() + .sql("INSERT INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}') " + + "SELECT dim2 FROM foo") + .expectValidationError( + DruidException.class, + "External write statemetns requires a AS clause to specify the format, but none was found." + ) + .verify(); + } + + @Test + public void testSelectFromTableNamedExport() + { + testIngestionQuery() + .sql("INSERT INTO csv SELECT dim2 FROM foo PARTITIONED BY ALL") + .expectQuery( + Druids.newScanQueryBuilder() + .dataSource("foo") + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ) + .expectResources(dataSourceRead("foo"), dataSourceWrite("csv")) + .expectTarget("csv", RowSignature.builder().add("dim2", ColumnType.STRING).build()) + .verify(); + } } From 6daf530a7689fd43c72ed5198e2b742e20963eca Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 22 Jan 2024 18:20:00 +0530 Subject: [PATCH 07/50] Fix builds --- .../org/apache/druid/msq/exec/ControllerImpl.java | 14 ++++++++++++++ .../results/ExportResultsFrameProcessor.java | 2 +- .../ExportResultsFrameProcessorFactory.java | 3 +-- .../LocalFileStorageConnectorProviderTest.java | 2 +- .../parser/ExternalDestinationSqlIdentifier.java | 2 +- .../druid/sql/calcite/IngestTableFunctionTest.java | 2 +- 6 files changed, 19 insertions(+), 6 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 5cebb34a327a..2b703efbf807 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -204,6 +204,7 @@ import org.apache.druid.sql.calcite.planner.ColumnMappings; import org.apache.druid.sql.calcite.rel.DruidQuery; import org.apache.druid.sql.http.ResultFormat; +import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorProvider; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentTimeline; @@ -1886,6 +1887,19 @@ private static QueryDefinition makeQueryDefinition( StorageConnectorProvider storageConnectorProvider = exportMSQDestination.getStorageConnectorProvider(); ResultFormat resultFormat = exportMSQDestination.getResultFormat(); + // If the statement is a REPLACE, delete the existing files at the destination. + if (Intervals.ONLY_ETERNITY.equals(exportMSQDestination.getReplaceTimeChunks())) { + StorageConnector storageConnector = storageConnectorProvider.get(); + try { + storageConnector.deleteRecursively(""); + } + catch (IOException e) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build(e, "Count not delete existing files from "); + } + } + final QueryDefinitionBuilder builder = QueryDefinition.builder(); builder.addAll(queryDef); builder.add(StageDefinition.builder(queryDef.getNextStageNumber()) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java index 958d2eedf5bd..d9da52520b0e 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java @@ -94,7 +94,7 @@ public List outputChannels() } @Override - public ReturnOrAwait runIncrementally(IntSet readableInputs) throws InterruptedException, IOException + public ReturnOrAwait runIncrementally(IntSet readableInputs) throws IOException { if (readableInputs.isEmpty()) { return ReturnOrAwait.awaitAll(1); diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java index 561606256542..c3754ca69c9c 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java @@ -44,7 +44,6 @@ import org.apache.druid.storage.StorageConnectorProvider; import javax.annotation.Nullable; -import java.io.IOException; import java.util.List; import java.util.function.Consumer; @@ -89,7 +88,7 @@ public ProcessorsAndChannels makeProcessors( int maxOutstandingProcessors, CounterTracker counters, Consumer warningPublisher - ) throws IOException + ) { // Expecting a single input slice from some prior stage. final StageInputSlice slice = (StageInputSlice) Iterables.getOnlyElement(inputSlices); diff --git a/processing/src/test/java/org/apache/druid/storage/local/LocalFileStorageConnectorProviderTest.java b/processing/src/test/java/org/apache/druid/storage/local/LocalFileStorageConnectorProviderTest.java index 5b09022e363f..eb4c4242f96b 100644 --- a/processing/src/test/java/org/apache/druid/storage/local/LocalFileStorageConnectorProviderTest.java +++ b/processing/src/test/java/org/apache/druid/storage/local/LocalFileStorageConnectorProviderTest.java @@ -32,4 +32,4 @@ public void testEqualsAndHashCode() .usingGetClass() .verify(); } -} \ No newline at end of file +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index 1a6a1466e6ab..5fab3ae207d7 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -62,7 +62,7 @@ public SqlNode clone(SqlParserPos pos) { final String name = CollectionUtils.getOnlyElement( names, - x -> DruidException.defensive("Expected single name in identifier [%s], but got [%s]", names) + x -> DruidException.defensive("Expected single name in external destination identifier, but got [%s]", names) ); return new ExternalDestinationSqlIdentifier(name, pos, exportDestinationString); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/IngestTableFunctionTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/IngestTableFunctionTest.java index ff29a8743242..976e0aff59bc 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/IngestTableFunctionTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/IngestTableFunctionTest.java @@ -322,7 +322,7 @@ public void testExplainHttpFn() "\"signature\":[{\"name\":\"x\",\"type\":\"STRING\"},{\"name\":\"y\",\"type\":\"STRING\"},{\"name\":\"z\",\"type\":\"LONG\"}]," + "\"columnMappings\":[{\"queryColumn\":\"x\",\"outputColumn\":\"x\"},{\"queryColumn\":\"y\",\"outputColumn\":\"y\"},{\"queryColumn\":\"z\",\"outputColumn\":\"z\"}]}]"; final String resources = "[{\"name\":\"EXTERNAL\",\"type\":\"EXTERNAL\"},{\"name\":\"dst\",\"type\":\"DATASOURCE\"}]"; - final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":\"dst\",\"partitionedBy\":{\"type\":\"all\"}}"; + final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":{\"type\":\"table\",\"tableName\":\"dst\"},\"partitionedBy\":{\"type\":\"all\"}}"; testQuery( PLANNER_CONFIG_NATIVE_QUERY_EXPLAIN, From 550cc8f47ac987b2938f179e8700776d1b73ea5a Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 23 Jan 2024 15:56:17 +0530 Subject: [PATCH 08/50] Update syntax --- .../apache/druid/msq/exec/ControllerImpl.java | 17 +++-- .../druid/msq/exec/ExportSourceHandlers.java | 40 ++++++++++++ .../destination/ExportMSQDestination.java | 35 +++++++---- .../druid/msq/sql/MSQTaskQueryMaker.java | 25 +++----- .../destination/ExportMSQDestinationTest.java | 6 +- .../storage/s3/S3StorageDruidModule.java | 4 ++ .../s3/output/S3StorageConnectorProvider.java | 14 +++++ .../s3/output/S3StorageExportConfig.java | 44 +++++++++++++ .../model/table/export/ExportDestination.java | 23 +++++-- .../table/export/ExportSourceConfig.java | 30 +++++++++ .../export/LocalStorageExportConfig.java | 36 +++++++++++ .../apache/druid/curator/CuratorModule.java | 5 ++ sql/src/main/codegen/includes/common.ftl | 63 ++++++++++++++----- .../ExternalDestinationSqlIdentifier.java | 32 +++++++--- .../sql/calcite/planner/IngestHandler.java | 4 +- .../druid/sql/calcite/CalciteExportTest.java | 8 +-- .../calcite/parser/DruidSqlUnparseTest.java | 4 +- 17 files changed, 314 insertions(+), 76 deletions(-) create mode 100644 extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportSourceHandlers.java create mode 100644 extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageExportConfig.java create mode 100644 server/src/main/java/org/apache/druid/catalog/model/table/export/ExportSourceConfig.java create mode 100644 server/src/main/java/org/apache/druid/catalog/model/table/export/LocalStorageExportConfig.java diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 2b703efbf807..5c2d2f32c01c 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -33,6 +33,7 @@ import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.SettableFuture; +import com.google.inject.Injector; import it.unimi.dsi.fastutil.ints.Int2ObjectAVLTreeMap; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; @@ -40,6 +41,7 @@ import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntSet; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.druid.catalog.model.table.export.ExportSourceConfig; import org.apache.druid.client.ImmutableSegmentLoadInfo; import org.apache.druid.common.guava.FutureUtils; import org.apache.druid.data.input.StringTuple; @@ -655,7 +657,8 @@ private QueryDefinition initializeQueryDefAndState(final Closer closer) id(), makeQueryControllerToolKit(), task.getQuerySpec(), - context.jsonMapper() + context.jsonMapper(), + context.injector() ); QueryValidator.validateQueryDef(queryDef); @@ -1747,7 +1750,8 @@ private static QueryDefinition makeQueryDefinition( final String queryId, @SuppressWarnings("rawtypes") final QueryKit toolKit, final MSQSpec querySpec, - final ObjectMapper jsonMapper + final ObjectMapper jsonMapper, + final Injector injector ) { final MSQTuningConfig tuningConfig = querySpec.getTuningConfig(); @@ -1884,10 +1888,13 @@ private static QueryDefinition makeQueryDefinition( } } else if (querySpec.getDestination() instanceof ExportMSQDestination) { ExportMSQDestination exportMSQDestination = (ExportMSQDestination) querySpec.getDestination(); - StorageConnectorProvider storageConnectorProvider = exportMSQDestination.getStorageConnectorProvider(); - ResultFormat resultFormat = exportMSQDestination.getResultFormat(); + final ExportSourceHandlers exportSourceHandlers = injector.getInstance(ExportSourceHandlers.class); + final ExportSourceConfig outputConfig = exportSourceHandlers.getConnectorProviderMap() + .get(exportMSQDestination.getStorageConnectorType()); + final StorageConnectorProvider storageConnectorProvider = outputConfig.get(exportMSQDestination.getProperties(), injector); + final ResultFormat resultFormat = exportMSQDestination.getResultFormat(); - // If the statement is a REPLACE, delete the existing files at the destination. + // If the statement is a REPLACE statement, delete the existing files at the destination. if (Intervals.ONLY_ETERNITY.equals(exportMSQDestination.getReplaceTimeChunks())) { StorageConnector storageConnector = storageConnectorProvider.get(); try { diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportSourceHandlers.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportSourceHandlers.java new file mode 100644 index 000000000000..368760a35f1f --- /dev/null +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportSourceHandlers.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.msq.exec; + +import com.google.inject.Inject; +import org.apache.druid.catalog.model.table.export.ExportSourceConfig; + +import java.util.Map; + +public class ExportSourceHandlers +{ + public Map getConnectorProviderMap() + { + return connectorProviderMap; + } + + private final Map connectorProviderMap; + + @Inject + public ExportSourceHandlers(Map connectorProviderMap) { + this.connectorProviderMap = connectorProviderMap; + } +} diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java index 501ad8797311..0308c8f9b8c3 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java @@ -25,28 +25,31 @@ import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.sql.http.ResultFormat; -import org.apache.druid.storage.StorageConnectorProvider; import org.joda.time.Interval; import javax.annotation.Nullable; import java.util.List; +import java.util.Map; import java.util.Objects; public class ExportMSQDestination implements MSQDestination { public static final String TYPE = "export"; - private final StorageConnectorProvider storageConnectorProvider; + private final String storageConnectorType; + private final Map properties; private final ResultFormat resultFormat; @Nullable private final List replaceTimeChunks; @JsonCreator - public ExportMSQDestination(@JsonProperty("storageConnectorProvider") StorageConnectorProvider storageConnectorProvider, + public ExportMSQDestination(@JsonProperty("storageConnectorType") String storageConnectorType, + @JsonProperty("properties") Map properties, @JsonProperty("resultFormat") ResultFormat resultFormat, @JsonProperty("replaceTimeChunks") @Nullable List replaceTimeChunks ) { - this.storageConnectorProvider = storageConnectorProvider; + this.storageConnectorType = storageConnectorType; + this.properties = properties; this.resultFormat = resultFormat; if (replaceTimeChunks == null || Intervals.ONLY_ETERNITY.equals(replaceTimeChunks)) { this.replaceTimeChunks = replaceTimeChunks; @@ -57,10 +60,16 @@ public ExportMSQDestination(@JsonProperty("storageConnectorProvider") StorageCon } } - @JsonProperty("storageConnectorProvider") - public StorageConnectorProvider getStorageConnectorProvider() + @JsonProperty("storageConnectorType") + public String getStorageConnectorType() { - return storageConnectorProvider; + return storageConnectorType; + } + + @JsonProperty("properties") + public Map getProperties() + { + return properties; } @JsonProperty("resultFormat") @@ -87,22 +96,24 @@ public boolean equals(Object o) return false; } ExportMSQDestination that = (ExportMSQDestination) o; - return Objects.equals(storageConnectorProvider, that.storageConnectorProvider) - && resultFormat == that.resultFormat - && Objects.equals(replaceTimeChunks, that.replaceTimeChunks); + return Objects.equals(storageConnectorType, that.storageConnectorType) && Objects.equals( + properties, + that.properties + ) && resultFormat == that.resultFormat && Objects.equals(replaceTimeChunks, that.replaceTimeChunks); } @Override public int hashCode() { - return Objects.hash(storageConnectorProvider, resultFormat, replaceTimeChunks); + return Objects.hash(storageConnectorType, properties, resultFormat, replaceTimeChunks); } @Override public String toString() { return "ExportMSQDestination{" + - "storageConnectorProvider=" + storageConnectorProvider + + "storageConnectorType='" + storageConnectorType + '\'' + + ", properties=" + properties + ", resultFormat=" + resultFormat + ", replaceTimeChunks=" + replaceTimeChunks + '}'; diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java index c2aec222beed..7897658ceac4 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java @@ -68,7 +68,6 @@ import org.apache.druid.sql.calcite.run.SqlResults; import org.apache.druid.sql.calcite.table.RowSignatures; import org.apache.druid.sql.http.ResultFormat; -import org.apache.druid.storage.StorageConnectorProvider; import org.joda.time.Interval; import javax.annotation.Nullable; @@ -211,24 +210,14 @@ public QueryResponse runQuery(final DruidQuery druidQuery) final MSQDestination destination; if (targetDataSource instanceof ExportDestination) { - String exportDestination = ((ExportDestination) targetDataSource).getExportDestinationString(); - exportDestination = exportDestination.substring(1, exportDestination.length() - 1); + ExportDestination exportDestination = ((ExportDestination) targetDataSource); ResultFormat format = ResultFormat.fromString(sqlQueryContext.getString(DruidSqlIngest.SQL_EXPORT_FILE_FORMAT)); - try { - StorageConnectorProvider storageConnectorProvider = jsonMapper.readValue( - exportDestination, - StorageConnectorProvider.class - ); - destination = new ExportMSQDestination(storageConnectorProvider, format, replaceTimeChunks); - } - catch (Exception e) { - throw DruidException.defensive() - .build( - e, - "Unable to deserialize the external destination: [%s].", - exportDestination - ); - } + destination = new ExportMSQDestination( + exportDestination.getDestinationType(), + exportDestination.getProperties(), + format, + replaceTimeChunks + ); } else if (targetDataSource instanceof TableDestination) { Granularity segmentGranularityObject; try { diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java index 211986f712c6..62ede446b4e0 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java @@ -20,15 +20,14 @@ package org.apache.druid.msq.indexing.destination; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.storage.StorageConnectorModule; -import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import org.junit.Assert; import org.junit.Test; -import java.io.File; import java.io.IOException; public class ExportMSQDestinationTest @@ -37,7 +36,8 @@ public class ExportMSQDestinationTest public void testSerde() throws IOException { ExportMSQDestination exportDestination = new ExportMSQDestination( - new LocalFileStorageConnectorProvider(new File("/path")), + "local", + ImmutableMap.of("basePath", "/path"), ResultFormat.CSV, Intervals.ONLY_ETERNITY ); diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java index 3747088aeb6e..7dc0f18a5d03 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java @@ -35,6 +35,7 @@ import com.google.inject.Provides; import com.google.inject.multibindings.MapBinder; import org.apache.commons.lang.StringUtils; +import org.apache.druid.catalog.model.table.export.ExportSourceConfig; import org.apache.druid.common.aws.AWSClientConfig; import org.apache.druid.common.aws.AWSEndpointConfig; import org.apache.druid.common.aws.AWSProxyConfig; @@ -44,6 +45,7 @@ import org.apache.druid.guice.LazySingleton; import org.apache.druid.initialization.DruidModule; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.storage.s3.output.S3StorageExportConfig; import java.util.List; @@ -115,6 +117,8 @@ public void configure(Binder binder) JsonConfigProvider.bind(binder, "druid.storage", S3StorageConfig.class); JsonConfigProvider.bind(binder, "druid.storage.sse.kms", S3SSEKmsConfig.class); JsonConfigProvider.bind(binder, "druid.storage.sse.custom", S3SSECustomConfig.class); + MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportSourceConfig.class); + mapbinder.addBinding("s3").to(S3StorageExportConfig.class); Binders.taskLogsBinder(binder).addBinding(SCHEME).to(S3TaskLogs.class); JsonConfigProvider.bind(binder, "druid.indexer.logs", S3TaskLogsConfig.class); diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java index 7f4b43a0ede8..38b8cada1976 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; +import com.google.inject.Injector; import org.apache.druid.java.util.common.HumanReadableBytes; import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorProvider; @@ -50,6 +51,19 @@ public S3StorageConnectorProvider( super(bucket, prefix, tempDir, chunkSize, maxRetry); } + public S3StorageConnectorProvider( + String bucket, + String prefix, + File tempDir, + HumanReadableBytes chunkSize, + Integer maxRetry, + Injector injector + ) + { + super(bucket, prefix, tempDir, chunkSize, maxRetry); + this.s3 = injector.getInstance(ServerSideEncryptingAmazonS3.class); + } + @Override public StorageConnector get() { diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageExportConfig.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageExportConfig.java new file mode 100644 index 000000000000..20fa64684399 --- /dev/null +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageExportConfig.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.storage.s3.output; + +import com.google.inject.Injector; +import org.apache.druid.catalog.model.table.export.ExportSourceConfig; +import org.apache.druid.java.util.common.HumanReadableBytes; +import org.apache.druid.storage.StorageConnectorProvider; + +import java.io.File; +import java.util.Map; + +public class S3StorageExportConfig implements ExportSourceConfig +{ + @Override + public StorageConnectorProvider get(Map properties, Injector injector) + { + return new S3StorageConnectorProvider( + properties.get("bucket"), + properties.get("prefix"), + new File(properties.get("tempDir")), + HumanReadableBytes.valueOf(Integer.parseInt(properties.get("chunkSize"))), + Integer.parseInt(properties.get("maxRetry")), + injector + ); + } +} diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java b/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java index 73ab2f541138..b79a2c5ecd59 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java +++ b/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java @@ -19,27 +19,40 @@ package org.apache.druid.catalog.model.table.export; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import org.apache.druid.catalog.model.table.IngestDestination; +import java.util.Map; + @JsonTypeName(ExportDestination.TYPE_KEY) public class ExportDestination implements IngestDestination { public static final String TYPE_KEY = "external"; - private final String exportDestinationString; + private final String destinationType; + private final Map properties; - public ExportDestination(String exportDestinationString) + public ExportDestination(@JsonProperty("destinationType") String destinationType, @JsonProperty("properties") Map properties) { + this.destinationType = destinationType; + this.properties = properties; + } - this.exportDestinationString = exportDestinationString; + @JsonProperty("destinationType") + public String getDestinationType() + { + return destinationType; } - public String getExportDestinationString() + @JsonProperty("properties") + public Map getProperties() { - return exportDestinationString; + return properties; } @Override + @JsonIgnore public String getDestinationName() { return "EXTERN"; diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportSourceConfig.java b/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportSourceConfig.java new file mode 100644 index 000000000000..b5ceb2ae55a4 --- /dev/null +++ b/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportSourceConfig.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog.model.table.export; + +import com.google.inject.Injector; +import org.apache.druid.storage.StorageConnectorProvider; + +import java.util.Map; + +public interface ExportSourceConfig +{ + StorageConnectorProvider get(Map properties, Injector injector); +} diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/LocalStorageExportConfig.java b/server/src/main/java/org/apache/druid/catalog/model/table/export/LocalStorageExportConfig.java new file mode 100644 index 000000000000..9992fbfd65fb --- /dev/null +++ b/server/src/main/java/org/apache/druid/catalog/model/table/export/LocalStorageExportConfig.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.catalog.model.table.export; + +import com.google.inject.Injector; +import org.apache.druid.storage.StorageConnectorProvider; +import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; + +import java.io.File; +import java.util.Map; + +public class LocalStorageExportConfig implements ExportSourceConfig +{ + @Override + public StorageConnectorProvider get(Map properties, Injector injector) + { + return new LocalFileStorageConnectorProvider(new File(properties.get("basePath"))); + } +} diff --git a/server/src/main/java/org/apache/druid/curator/CuratorModule.java b/server/src/main/java/org/apache/druid/curator/CuratorModule.java index a7e76af474a3..62ea2ac7544f 100644 --- a/server/src/main/java/org/apache/druid/curator/CuratorModule.java +++ b/server/src/main/java/org/apache/druid/curator/CuratorModule.java @@ -22,6 +22,7 @@ import com.google.inject.Binder; import com.google.inject.Module; import com.google.inject.Provides; +import com.google.inject.multibindings.MapBinder; import org.apache.curator.RetryPolicy; import org.apache.curator.ensemble.fixed.FixedEnsembleProvider; import org.apache.curator.framework.CuratorFramework; @@ -30,6 +31,8 @@ import org.apache.curator.framework.imps.DefaultACLProvider; import org.apache.curator.retry.BoundedExponentialBackoffRetry; import org.apache.curator.shaded.com.google.common.base.Strings; +import org.apache.druid.catalog.model.table.export.LocalStorageExportConfig; +import org.apache.druid.catalog.model.table.export.ExportSourceConfig; import org.apache.druid.concurrent.Threads; import org.apache.druid.guice.JsonConfigProvider; import org.apache.druid.guice.LazySingleton; @@ -75,6 +78,8 @@ public void configure(Binder binder) JsonConfigProvider.bind(binder, CuratorConfig.CONFIG_PREFIX, ZkEnablementConfig.class); JsonConfigProvider.bind(binder, CuratorConfig.CONFIG_PREFIX, CuratorConfig.class); MetricsModule.register(binder, DruidConnectionStateListener.class); + MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportSourceConfig.class); + mapbinder.addBinding("local").to(LocalStorageExportConfig.class); } /** diff --git a/sql/src/main/codegen/includes/common.ftl b/sql/src/main/codegen/includes/common.ftl index d5a4599c4f7e..cdd0900a2774 100644 --- a/sql/src/main/codegen/includes/common.ftl +++ b/sql/src/main/codegen/includes/common.ftl @@ -107,23 +107,6 @@ SqlTypeNameSpec DruidType() : } } -SqlIdentifier ExternalDestination() : -{ - final Span s; - final SqlNode key; -} -{ - key = StringLiteral() - { - s = span(); - return new ExternalDestinationSqlIdentifier( - "EXTERN", - s.pos(), - key - ); - } -} - // Parses the supported file formats for export. String FileFormat() : {} @@ -134,4 +117,50 @@ String FileFormat() : return "CSV"; } ) +} + +SqlIdentifier ExternalDestination() : +{ + final Span s; + Map properties = new HashMap(); +} +{ + ( + [ [properties = ExternProperties()] ] + { + s = span(); + return new ExternalDestinationSqlIdentifier( + "s3", + s.pos(), + properties + ); + } + | + [ [properties = ExternProperties()] ] + { + s = span(); + return new ExternalDestinationSqlIdentifier( + "local", + s.pos(), + properties + ); + } + ) +} + +Map ExternProperties() : +{ + final Span s; + final Map properties = new HashMap(); + SqlNodeList commaList = SqlNodeList.EMPTY; +} +{ + commaList = ExpressionCommaList(span(), ExprContext.ACCEPT_NON_QUERY) + { + for (SqlNode sqlNode : commaList) { + List sqlNodeList = ((SqlBasicCall) sqlNode).getOperandList(); + properties.put(((SqlIdentifier) sqlNodeList.get(0)).getSimple(), ((SqlIdentifier) sqlNodeList.get(1)).getSimple()); + } + return properties; + } } \ No newline at end of file diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index 5fab3ae207d7..833acea70505 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -19,7 +19,6 @@ package org.apache.druid.sql.calcite.parser; -import org.apache.calcite.sql.SqlCharStringLiteral; import org.apache.calcite.sql.SqlIdentifier; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlWriter; @@ -27,33 +26,50 @@ import org.apache.druid.error.DruidException; import org.apache.druid.utils.CollectionUtils; +import java.util.Map; + /** * Extends the {@link SqlIdentifier} to hold parameters for an external destination. */ public class ExternalDestinationSqlIdentifier extends SqlIdentifier { - private final SqlCharStringLiteral exportDestinationString; + private final Map properties; public ExternalDestinationSqlIdentifier( String name, SqlParserPos pos, - SqlNode exportDestinationString + Map properties ) { super(name, pos); - this.exportDestinationString = (SqlCharStringLiteral) exportDestinationString; + this.properties = properties; + } + + public String getDestinationType() + { + return CollectionUtils.getOnlyElement( + names, + x -> DruidException.defensive("Expected single name in external destination identifier, but got [%s]", names) + ); } - public String getExportDestinationString() + public Map getProperties() { - return exportDestinationString.toString(); + return properties; } @Override public void unparse(SqlWriter writer, int leftPrec, int rightPrec) { SqlWriter.Frame externFrame = writer.startFunCall("EXTERN"); - writer.print(exportDestinationString.toString()); + SqlWriter.Frame frame = writer.startFunCall(getDestinationType()); + for (Map.Entry property : properties.entrySet()) { + writer.sep(","); + writer.print(property.getKey()); + writer.print(" = "); + writer.identifier(property.getValue(), false); + } + writer.endFunCall(frame); writer.endFunCall(externFrame); } @@ -64,7 +80,7 @@ public SqlNode clone(SqlParserPos pos) names, x -> DruidException.defensive("Expected single name in external destination identifier, but got [%s]", names) ); - return new ExternalDestinationSqlIdentifier(name, pos, exportDestinationString); + return new ExternalDestinationSqlIdentifier(name, pos, properties); } @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index c9f85e3fd154..9450f998af1a 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -209,8 +209,8 @@ private IngestDestination validateAndGetDataSourceForIngest() .ofCategory(DruidException.Category.DEFENSIVE) .build("Operation [%s] requires a target table", operationName()); } else if (tableIdentifier instanceof ExternalDestinationSqlIdentifier) { - String exportDestinationString = ((ExternalDestinationSqlIdentifier) tableIdentifier).getExportDestinationString(); - dataSource = new ExportDestination(exportDestinationString); + ExternalDestinationSqlIdentifier externalDestination = ((ExternalDestinationSqlIdentifier) tableIdentifier); + dataSource = new ExportDestination(externalDestination.getDestinationType(), externalDestination.getProperties()); } else if (tableIdentifier.names.size() == 1) { // Unqualified name. String tableName = Iterables.getOnlyElement(tableIdentifier.names); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index 70b26b43decc..7b099a048cbe 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -33,7 +33,7 @@ public class CalciteExportTest extends CalciteIngestionDmlTest public void testReplaceIntoExtern() { testIngestionQuery() - .sql("REPLACE INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}') " + .sql("REPLACE INTO EXTERN(s3(bucket=\"bucket1\",prefix=\"prefix1\",tempDir=\"/tempdir\",chunkSize=\"5242880\",maxRetry=\"1\")) " + "AS CSV " + "OVERWRITE ALL " + "SELECT dim2 FROM foo") @@ -57,7 +57,7 @@ public void testReplaceIntoExtern() public void testExportWithPartitionedBy() { testIngestionQuery() - .sql("REPLACE INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}') " + .sql("REPLACE INTO EXTERN(s3(bucket=\"bucket1\",prefix=\"prefix1\",tempDir=\"/tempdir\",chunkSize=\"5242880\",maxRetry=\"1\")) " + "AS CSV " + "OVERWRITE ALL " + "SELECT dim2 FROM foo " @@ -73,7 +73,7 @@ public void testExportWithPartitionedBy() public void testInsertIntoExtern() { testIngestionQuery() - .sql("INSERT INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}') " + .sql("INSERT INTO EXTERN(s3(bucket=\"bucket1\",prefix=\"prefix1\",tempDir=\"/tempdir\",chunkSize=\"5242880\",maxRetry=\"1\")) " + "AS CSV " + "SELECT dim2 FROM foo") .expectQuery( @@ -96,7 +96,7 @@ public void testInsertIntoExtern() public void testExportWithoutFormat() { testIngestionQuery() - .sql("INSERT INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}') " + .sql("INSERT INTO EXTERN(s3(bucket=\"bucket1\",prefix=\"prefix1\",tempDir=\"/tempdir\",chunkSize=\"5242880\",maxRetry=\"1\")) " + "SELECT dim2 FROM foo") .expectValidationError( DruidException.class, diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java index 1c6606a68aa4..168e4222051c 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java @@ -99,8 +99,8 @@ private static DruidSqlParserImpl createTestParser(String parseString) @Test public void testUnparseExternalSqlIdentifier() throws ParseException { - String sqlQuery = "REPLACE INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}') AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL"; - String prettySqlQuery = "REPLACE INTO EXTERN('{\"type\":\"s3\",\"bucket\":\"bucket1\",\"prefix\":\"prefix1\",\"tempDir\":\"/tempdir\",\"chunkSize\":5242880,\"maxRetry\":1}')\n" + String sqlQuery = "REPLACE INTO EXTERN( s3(bucket=bucket1,prefix=prefix1) ) AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL"; + String prettySqlQuery = "REPLACE INTO EXTERN(S3(bucket = \"bucket1\", prefix = \"prefix1\"))\n" + "AS CSV\n" + "OVERWRITE ALL\n" + "SELECT \"dim2\"\n" From 9ab7b371fe1f56ad1108be96d0f7db04d039ebfe Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 23 Jan 2024 19:00:41 +0530 Subject: [PATCH 09/50] Webconsole workaround --- .../main/java/org/apache/druid/curator/CuratorModule.java | 2 +- .../src/views/workbench-view/query-tab/query-tab.tsx | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/apache/druid/curator/CuratorModule.java b/server/src/main/java/org/apache/druid/curator/CuratorModule.java index 62ea2ac7544f..7bf181a936e2 100644 --- a/server/src/main/java/org/apache/druid/curator/CuratorModule.java +++ b/server/src/main/java/org/apache/druid/curator/CuratorModule.java @@ -31,8 +31,8 @@ import org.apache.curator.framework.imps.DefaultACLProvider; import org.apache.curator.retry.BoundedExponentialBackoffRetry; import org.apache.curator.shaded.com.google.common.base.Strings; -import org.apache.druid.catalog.model.table.export.LocalStorageExportConfig; import org.apache.druid.catalog.model.table.export.ExportSourceConfig; +import org.apache.druid.catalog.model.table.export.LocalStorageExportConfig; import org.apache.druid.concurrent.Threads; import org.apache.druid.guice.JsonConfigProvider; import org.apache.druid.guice.LazySingleton; diff --git a/web-console/src/views/workbench-view/query-tab/query-tab.tsx b/web-console/src/views/workbench-view/query-tab/query-tab.tsx index 8a4129fc67bf..22d2c79e8992 100644 --- a/web-console/src/views/workbench-view/query-tab/query-tab.tsx +++ b/web-console/src/views/workbench-view/query-tab/query-tab.tsx @@ -428,7 +428,12 @@ export const QueryTab = React.memo(function QueryTab(props: QueryTabProps) { )} ) : ( -
Unknown query execution state
+ onDetails(statsTaskId!, 'error')} + onWarningClick={() => onDetails(statsTaskId!, 'warnings')} + goToTask={goToTask} + /> ))} {executionState.error && ( Date: Wed, 24 Jan 2024 11:38:35 +0530 Subject: [PATCH 10/50] Refactor --- .../apache/druid/msq/exec/ControllerImpl.java | 9 ++-- ...a => ExportStorageConnectorFactories.java} | 11 ++--- .../results/ExportResultsFrameProcessor.java | 2 +- .../druid/msq/sql/MSQTaskQueryMaker.java | 6 +-- .../druid/msq/sql/MSQTaskSqlEngine.java | 2 +- .../storage/s3/S3StorageDruidModule.java | 8 ++-- ...a => S3ExportStorageConnectorFactory.java} | 4 +- .../druid/storage/StorageConnectorModule.java | 6 ++- .../export/ExportStorageConnectorFactory.java | 4 +- .../LocalExportStorageConnectorFactory.java | 4 +- .../apache/druid/curator/CuratorModule.java | 5 --- .../apache/druid/sql/avatica/DruidMeta.java | 1 - .../calcite/planner/DruidOperatorTable.java | 1 - .../calcite/planner/ExplainAttributes.java | 2 +- .../sql/calcite/planner/IngestHandler.java | 6 +-- .../druid/sql/calcite/rel/InputAccessor.java | 1 - .../rule/DruidUnionDataSourceRule.java | 1 - .../druid/sql/calcite/run/EngineFeature.java | 2 +- .../sql/calcite/run/NativeSqlEngine.java | 2 +- .../druid/sql/calcite/run/SqlEngine.java | 2 +- .../druid/sql/calcite/view/ViewSqlEngine.java | 2 +- .../sql/destination}/ExportDestination.java | 3 +- .../sql/destination}/IngestDestination.java | 5 ++- .../sql/destination}/TableDestination.java | 3 +- .../sql/calcite/BaseCalciteQueryTest.java | 1 - .../sql/calcite/CalciteScanSignatureTest.java | 2 +- .../sql/calcite/DrillWindowQueryTest.java | 1 - .../sql/calcite/IngestionTestSqlEngine.java | 2 +- .../sql/calcite/TestInsertQueryMaker.java | 2 +- .../planner/ExplainAttributesTest.java | 2 +- .../druid/sql/calcite/rel/DruidRelsTest.java | 1 - .../destination/ExportDestinationTest.java | 43 +++++++++++++++++++ .../destination}/TableDestinationTest.java | 12 +++--- 33 files changed, 99 insertions(+), 59 deletions(-) rename extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/{ExportSourceHandlers.java => ExportStorageConnectorFactories.java} (72%) rename extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/{S3StorageExportConfig.java => S3ExportStorageConnectorFactory.java} (90%) rename server/src/main/java/org/apache/druid/catalog/model/table/export/ExportSourceConfig.java => processing/src/main/java/org/apache/druid/storage/export/ExportStorageConnectorFactory.java (91%) rename server/src/main/java/org/apache/druid/catalog/model/table/export/LocalStorageExportConfig.java => processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java (90%) rename {server/src/main/java/org/apache/druid/catalog/model/table/export => sql/src/main/java/org/apache/druid/sql/destination}/ExportDestination.java (93%) rename {server/src/main/java/org/apache/druid/catalog/model/table => sql/src/main/java/org/apache/druid/sql/destination}/IngestDestination.java (89%) rename {server/src/main/java/org/apache/druid/catalog/model/table/export => sql/src/main/java/org/apache/druid/sql/destination}/TableDestination.java (94%) create mode 100644 sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java rename {server/src/test/java/org/apache/druid/catalog/model/table/export => sql/src/test/java/org/apache/druid/sql/destination}/TableDestinationTest.java (77%) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 5c2d2f32c01c..b8d8fe76276d 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -41,7 +41,7 @@ import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntSet; import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.druid.catalog.model.table.export.ExportSourceConfig; +import org.apache.druid.storage.export.ExportStorageConnectorFactory; import org.apache.druid.client.ImmutableSegmentLoadInfo; import org.apache.druid.common.guava.FutureUtils; import org.apache.druid.data.input.StringTuple; @@ -1888,9 +1888,10 @@ private static QueryDefinition makeQueryDefinition( } } else if (querySpec.getDestination() instanceof ExportMSQDestination) { ExportMSQDestination exportMSQDestination = (ExportMSQDestination) querySpec.getDestination(); - final ExportSourceHandlers exportSourceHandlers = injector.getInstance(ExportSourceHandlers.class); - final ExportSourceConfig outputConfig = exportSourceHandlers.getConnectorProviderMap() - .get(exportMSQDestination.getStorageConnectorType()); + final ExportStorageConnectorFactories exportStorageConnectorFactories = injector.getInstance( + ExportStorageConnectorFactories.class); + final ExportStorageConnectorFactory outputConfig = exportStorageConnectorFactories.getConnectorProviderMap() + .get(exportMSQDestination.getStorageConnectorType()); final StorageConnectorProvider storageConnectorProvider = outputConfig.get(exportMSQDestination.getProperties(), injector); final ResultFormat resultFormat = exportMSQDestination.getResultFormat(); diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportSourceHandlers.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java similarity index 72% rename from extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportSourceHandlers.java rename to extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java index 368760a35f1f..ea30b46bfe3f 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportSourceHandlers.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java @@ -20,21 +20,22 @@ package org.apache.druid.msq.exec; import com.google.inject.Inject; -import org.apache.druid.catalog.model.table.export.ExportSourceConfig; +import org.apache.druid.storage.export.ExportStorageConnectorFactory; import java.util.Map; -public class ExportSourceHandlers +public class ExportStorageConnectorFactories { - public Map getConnectorProviderMap() + public Map getConnectorProviderMap() { return connectorProviderMap; } - private final Map connectorProviderMap; + private final Map connectorProviderMap; @Inject - public ExportSourceHandlers(Map connectorProviderMap) { + public ExportStorageConnectorFactories(Map connectorProviderMap) + { this.connectorProviderMap = connectorProviderMap; } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java index d9da52520b0e..8d1780a25b70 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java @@ -116,7 +116,7 @@ private void addFrame(final Frame frame) throws IOException new FrameStorageAdapter(frame, frameReader, Intervals.ETERNITY) .makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null); - try (OutputStream stream = storageConnector.write(workerNumber + "/" + partitionNumber)) { + try (OutputStream stream = storageConnector.write(workerNumber + "/" + partitionNumber + "." + exportFormat.toString())) { ResultFormat.Writer formatter = exportFormat.createFormatter(stream, jsonMapper); SequenceUtils.forEach( diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java index 7897658ceac4..578b97a934f3 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java @@ -25,9 +25,9 @@ import org.apache.calcite.runtime.Hook; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.Pair; -import org.apache.druid.catalog.model.table.IngestDestination; -import org.apache.druid.catalog.model.table.export.ExportDestination; -import org.apache.druid.catalog.model.table.export.TableDestination; +import org.apache.druid.sql.destination.IngestDestination; +import org.apache.druid.sql.destination.ExportDestination; +import org.apache.druid.sql.destination.TableDestination; import org.apache.druid.common.guava.FutureUtils; import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java index 84a5b5cb6849..b49eabfff2b3 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java @@ -31,7 +31,7 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.Pair; -import org.apache.druid.catalog.model.table.IngestDestination; +import org.apache.druid.sql.destination.IngestDestination; import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; import org.apache.druid.error.InvalidSqlInput; diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java index 7dc0f18a5d03..2ea8856b0245 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java @@ -35,7 +35,7 @@ import com.google.inject.Provides; import com.google.inject.multibindings.MapBinder; import org.apache.commons.lang.StringUtils; -import org.apache.druid.catalog.model.table.export.ExportSourceConfig; +import org.apache.druid.storage.export.ExportStorageConnectorFactory; import org.apache.druid.common.aws.AWSClientConfig; import org.apache.druid.common.aws.AWSEndpointConfig; import org.apache.druid.common.aws.AWSProxyConfig; @@ -45,7 +45,7 @@ import org.apache.druid.guice.LazySingleton; import org.apache.druid.initialization.DruidModule; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.storage.s3.output.S3StorageExportConfig; +import org.apache.druid.storage.s3.output.S3ExportStorageConnectorFactory; import java.util.List; @@ -117,8 +117,8 @@ public void configure(Binder binder) JsonConfigProvider.bind(binder, "druid.storage", S3StorageConfig.class); JsonConfigProvider.bind(binder, "druid.storage.sse.kms", S3SSEKmsConfig.class); JsonConfigProvider.bind(binder, "druid.storage.sse.custom", S3SSECustomConfig.class); - MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportSourceConfig.class); - mapbinder.addBinding("s3").to(S3StorageExportConfig.class); + MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportStorageConnectorFactory.class); + mapbinder.addBinding("s3").to(S3ExportStorageConnectorFactory.class); Binders.taskLogsBinder(binder).addBinding(SCHEME).to(S3TaskLogs.class); JsonConfigProvider.bind(binder, "druid.indexer.logs", S3TaskLogsConfig.class); diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageExportConfig.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java similarity index 90% rename from extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageExportConfig.java rename to extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java index 20fa64684399..090f1a2c2aa5 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageExportConfig.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java @@ -20,14 +20,14 @@ package org.apache.druid.storage.s3.output; import com.google.inject.Injector; -import org.apache.druid.catalog.model.table.export.ExportSourceConfig; +import org.apache.druid.storage.export.ExportStorageConnectorFactory; import org.apache.druid.java.util.common.HumanReadableBytes; import org.apache.druid.storage.StorageConnectorProvider; import java.io.File; import java.util.Map; -public class S3StorageExportConfig implements ExportSourceConfig +public class S3ExportStorageConnectorFactory implements ExportStorageConnectorFactory { @Override public StorageConnectorProvider get(Map properties, Injector injector) diff --git a/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java b/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java index a0bf3a91f0cc..fce9b6a61b1b 100644 --- a/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java +++ b/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java @@ -23,7 +23,10 @@ import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.common.collect.ImmutableList; import com.google.inject.Binder; +import com.google.inject.multibindings.MapBinder; import org.apache.druid.initialization.DruidModule; +import org.apache.druid.storage.export.ExportStorageConnectorFactory; +import org.apache.druid.storage.export.LocalExportStorageConnectorFactory; import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import java.util.List; @@ -42,6 +45,7 @@ public List getJacksonModules() @Override public void configure(Binder binder) { - + MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportStorageConnectorFactory.class); + mapbinder.addBinding("local").to(LocalExportStorageConnectorFactory.class); } } diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportSourceConfig.java b/processing/src/main/java/org/apache/druid/storage/export/ExportStorageConnectorFactory.java similarity index 91% rename from server/src/main/java/org/apache/druid/catalog/model/table/export/ExportSourceConfig.java rename to processing/src/main/java/org/apache/druid/storage/export/ExportStorageConnectorFactory.java index b5ceb2ae55a4..6f1c4dcadd2f 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportSourceConfig.java +++ b/processing/src/main/java/org/apache/druid/storage/export/ExportStorageConnectorFactory.java @@ -17,14 +17,14 @@ * under the License. */ -package org.apache.druid.catalog.model.table.export; +package org.apache.druid.storage.export; import com.google.inject.Injector; import org.apache.druid.storage.StorageConnectorProvider; import java.util.Map; -public interface ExportSourceConfig +public interface ExportStorageConnectorFactory { StorageConnectorProvider get(Map properties, Injector injector); } diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/LocalStorageExportConfig.java b/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java similarity index 90% rename from server/src/main/java/org/apache/druid/catalog/model/table/export/LocalStorageExportConfig.java rename to processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java index 9992fbfd65fb..be989aae4244 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/table/export/LocalStorageExportConfig.java +++ b/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.catalog.model.table.export; +package org.apache.druid.storage.export; import com.google.inject.Injector; import org.apache.druid.storage.StorageConnectorProvider; @@ -26,7 +26,7 @@ import java.io.File; import java.util.Map; -public class LocalStorageExportConfig implements ExportSourceConfig +public class LocalExportStorageConnectorFactory implements ExportStorageConnectorFactory { @Override public StorageConnectorProvider get(Map properties, Injector injector) diff --git a/server/src/main/java/org/apache/druid/curator/CuratorModule.java b/server/src/main/java/org/apache/druid/curator/CuratorModule.java index 7bf181a936e2..a7e76af474a3 100644 --- a/server/src/main/java/org/apache/druid/curator/CuratorModule.java +++ b/server/src/main/java/org/apache/druid/curator/CuratorModule.java @@ -22,7 +22,6 @@ import com.google.inject.Binder; import com.google.inject.Module; import com.google.inject.Provides; -import com.google.inject.multibindings.MapBinder; import org.apache.curator.RetryPolicy; import org.apache.curator.ensemble.fixed.FixedEnsembleProvider; import org.apache.curator.framework.CuratorFramework; @@ -31,8 +30,6 @@ import org.apache.curator.framework.imps.DefaultACLProvider; import org.apache.curator.retry.BoundedExponentialBackoffRetry; import org.apache.curator.shaded.com.google.common.base.Strings; -import org.apache.druid.catalog.model.table.export.ExportSourceConfig; -import org.apache.druid.catalog.model.table.export.LocalStorageExportConfig; import org.apache.druid.concurrent.Threads; import org.apache.druid.guice.JsonConfigProvider; import org.apache.druid.guice.LazySingleton; @@ -78,8 +75,6 @@ public void configure(Binder binder) JsonConfigProvider.bind(binder, CuratorConfig.CONFIG_PREFIX, ZkEnablementConfig.class); JsonConfigProvider.bind(binder, CuratorConfig.CONFIG_PREFIX, CuratorConfig.class); MetricsModule.register(binder, DruidConnectionStateListener.class); - MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportSourceConfig.class); - mapbinder.addBinding("local").to(LocalStorageExportConfig.class); } /** diff --git a/sql/src/main/java/org/apache/druid/sql/avatica/DruidMeta.java b/sql/src/main/java/org/apache/druid/sql/avatica/DruidMeta.java index fa3dbdfa22cc..0dde72e4830c 100644 --- a/sql/src/main/java/org/apache/druid/sql/avatica/DruidMeta.java +++ b/sql/src/main/java/org/apache/druid/sql/avatica/DruidMeta.java @@ -56,7 +56,6 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; - import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java index 145886686f4d..d1851c511c2d 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java @@ -128,7 +128,6 @@ import org.apache.druid.sql.calcite.planner.convertlet.DruidConvertletTable; import javax.annotation.Nullable; - import java.util.ArrayList; import java.util.HashMap; import java.util.List; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/ExplainAttributes.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/ExplainAttributes.java index 0535463e4039..49c1861f6ab5 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/ExplainAttributes.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/ExplainAttributes.java @@ -21,8 +21,8 @@ import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.sql.destination.IngestDestination; import javax.annotation.Nullable; import java.util.List; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index 9450f998af1a..2eff30f0171a 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -34,9 +34,6 @@ import org.apache.calcite.sql.SqlOrderBy; import org.apache.calcite.tools.ValidationException; import org.apache.calcite.util.Pair; -import org.apache.druid.catalog.model.table.IngestDestination; -import org.apache.druid.catalog.model.table.export.ExportDestination; -import org.apache.druid.catalog.model.table.export.TableDestination; import org.apache.druid.common.utils.IdUtils; import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidSqlInput; @@ -52,6 +49,9 @@ import org.apache.druid.sql.calcite.parser.ExternalDestinationSqlIdentifier; import org.apache.druid.sql.calcite.run.EngineFeature; import org.apache.druid.sql.calcite.run.QueryMaker; +import org.apache.druid.sql.destination.ExportDestination; +import org.apache.druid.sql.destination.IngestDestination; +import org.apache.druid.sql.destination.TableDestination; import java.util.List; import java.util.regex.Pattern; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/InputAccessor.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/InputAccessor.java index 57b81c685368..12c81d887567 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/InputAccessor.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/InputAccessor.java @@ -28,7 +28,6 @@ import org.apache.druid.sql.calcite.expression.Expressions; import javax.annotation.Nullable; - import java.util.List; import java.util.stream.Collectors; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnionDataSourceRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnionDataSourceRule.java index e4a72776315d..31a6d38785de 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnionDataSourceRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnionDataSourceRule.java @@ -36,7 +36,6 @@ import org.apache.druid.sql.calcite.table.DruidTable; import javax.annotation.Nullable; - import java.util.ArrayList; import java.util.Collections; import java.util.List; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java index b514ffe12b65..79883d505931 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java @@ -120,7 +120,7 @@ public enum EngineFeature */ ALLOW_TOP_LEVEL_UNION_ALL, /** - * Queries can write to an {@link ExternalDataSource}. + * Queries can write to an external datasource using {@link org.apache.druid.sql.destination.ExportDestination} */ WRITE_EXTERNAL_DATA; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java index 0fd6b6a02494..e7fdf9f7c33f 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java @@ -25,7 +25,6 @@ import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.error.InvalidSqlInput; import org.apache.druid.guice.LazySingleton; import org.apache.druid.query.groupby.GroupByQuery; @@ -36,6 +35,7 @@ import org.apache.druid.sql.calcite.planner.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.DruidQuery; +import org.apache.druid.sql.destination.IngestDestination; import java.util.Map; import java.util.Set; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java index 980fb25d13bb..1ff52f84d0c5 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java @@ -23,8 +23,8 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.tools.ValidationException; -import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.destination.IngestDestination; import java.util.Map; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java index 82ea2416815a..ae4cf9639549 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java @@ -22,12 +22,12 @@ import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.run.EngineFeature; import org.apache.druid.sql.calcite.run.QueryMaker; import org.apache.druid.sql.calcite.run.SqlEngine; import org.apache.druid.sql.calcite.run.SqlEngines; +import org.apache.druid.sql.destination.IngestDestination; import java.util.Map; diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java b/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java similarity index 93% rename from server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java rename to sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java index b79a2c5ecd59..42e486c93a9c 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/table/export/ExportDestination.java +++ b/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java @@ -17,12 +17,11 @@ * under the License. */ -package org.apache.druid.catalog.model.table.export; +package org.apache.druid.sql.destination; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; -import org.apache.druid.catalog.model.table.IngestDestination; import java.util.Map; diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java b/sql/src/main/java/org/apache/druid/sql/destination/IngestDestination.java similarity index 89% rename from server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java rename to sql/src/main/java/org/apache/druid/sql/destination/IngestDestination.java index 830d46ef354d..710362fef7ac 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/table/IngestDestination.java +++ b/sql/src/main/java/org/apache/druid/sql/destination/IngestDestination.java @@ -17,11 +17,14 @@ * under the License. */ -package org.apache.druid.catalog.model.table; +package org.apache.druid.sql.destination; import com.fasterxml.jackson.annotation.JsonTypeInfo; import org.apache.druid.guice.annotations.UnstableApi; +/** + * Represents the destination to which the ingested data is written to. + */ @UnstableApi @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") public interface IngestDestination diff --git a/server/src/main/java/org/apache/druid/catalog/model/table/export/TableDestination.java b/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java similarity index 94% rename from server/src/main/java/org/apache/druid/catalog/model/table/export/TableDestination.java rename to sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java index 5e5d50c5f3ea..cb4b8e66e6c1 100644 --- a/server/src/main/java/org/apache/druid/catalog/model/table/export/TableDestination.java +++ b/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java @@ -17,12 +17,11 @@ * under the License. */ -package org.apache.druid.catalog.model.table.export; +package org.apache.druid.sql.destination; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; -import org.apache.druid.catalog.model.table.IngestDestination; import java.util.Objects; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index eb2528a6d1cf..ac47ccc5ce7d 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -122,7 +122,6 @@ import org.junit.rules.TemporaryFolder; import javax.annotation.Nullable; - import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java index bed56819ccde..954eda85b546 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java @@ -26,7 +26,6 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.tools.ValidationException; -import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.query.scan.ScanQuery; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.server.QueryLifecycleFactory; @@ -37,6 +36,7 @@ import org.apache.druid.sql.calcite.run.QueryMaker; import org.apache.druid.sql.calcite.run.SqlEngine; import org.apache.druid.sql.calcite.util.CalciteTests; +import org.apache.druid.sql.destination.IngestDestination; import org.junit.Test; import java.util.HashMap; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java index 5d2098b760e1..a72ae48edf29 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java @@ -70,7 +70,6 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; - import java.io.File; import java.io.IOException; import java.io.InputStream; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java b/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java index 5ef762d81cd2..0a18eb47f46a 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java @@ -24,7 +24,6 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.run.EngineFeature; @@ -32,6 +31,7 @@ import org.apache.druid.sql.calcite.run.SqlEngine; import org.apache.druid.sql.calcite.run.SqlEngines; import org.apache.druid.sql.calcite.table.RowSignatures; +import org.apache.druid.sql.destination.IngestDestination; import java.util.Map; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java b/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java index 1da300493a6f..0e0d6e53e197 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java @@ -21,12 +21,12 @@ import com.google.common.collect.ImmutableList; import org.apache.calcite.runtime.Hook; -import org.apache.druid.catalog.model.table.IngestDestination; import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.server.QueryResponse; import org.apache.druid.sql.calcite.rel.DruidQuery; import org.apache.druid.sql.calcite.run.QueryMaker; +import org.apache.druid.sql.destination.IngestDestination; /** * QueryMaker used by {@link CalciteInsertDmlTest}. diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/planner/ExplainAttributesTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/planner/ExplainAttributesTest.java index c72cb0d37e70..052a57c6d4e5 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/planner/ExplainAttributesTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/planner/ExplainAttributesTest.java @@ -21,9 +21,9 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.druid.catalog.model.table.export.TableDestination; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.sql.destination.TableDestination; import org.junit.Assert; import org.junit.Test; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/rel/DruidRelsTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/rel/DruidRelsTest.java index 199d259d92f6..e5daa3471e16 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/rel/DruidRelsTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/rel/DruidRelsTest.java @@ -32,7 +32,6 @@ import org.junit.Test; import javax.annotation.Nullable; - import java.util.List; import java.util.function.Consumer; diff --git a/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java b/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java new file mode 100644 index 000000000000..d54dc47b6a5f --- /dev/null +++ b/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.destination; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class ExportDestinationTest +{ + @Test + public void testSerde() throws IOException + { + ExportDestination exportDestination = new ExportDestination("s3", ImmutableMap.of("bucketName", "bucket1", "prefix", "basepath/export")); + + ObjectMapper objectMapper = new DefaultObjectMapper(); + byte[] bytes = objectMapper.writeValueAsBytes(exportDestination); + + ExportDestination deserialized = objectMapper.readValue(bytes, ExportDestination.class); + Assert.assertEquals(exportDestination, deserialized); + } +} diff --git a/server/src/test/java/org/apache/druid/catalog/model/table/export/TableDestinationTest.java b/sql/src/test/java/org/apache/druid/sql/destination/TableDestinationTest.java similarity index 77% rename from server/src/test/java/org/apache/druid/catalog/model/table/export/TableDestinationTest.java rename to sql/src/test/java/org/apache/druid/sql/destination/TableDestinationTest.java index 891a8d1b5091..7af4ad2eb4d6 100644 --- a/server/src/test/java/org/apache/druid/catalog/model/table/export/TableDestinationTest.java +++ b/sql/src/test/java/org/apache/druid/sql/destination/TableDestinationTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.catalog.model.table.export; +package org.apache.druid.sql.destination; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.jackson.DefaultObjectMapper; @@ -31,10 +31,12 @@ public class TableDestinationTest @Test public void testSerde() throws IOException { - TableDestination exportDestination = new TableDestination("tableName"); + TableDestination tableDestination = new TableDestination("tableName"); + ObjectMapper objectMapper = new DefaultObjectMapper(); - byte[] bytes = objectMapper.writeValueAsBytes(exportDestination); - TableDestination newDest = objectMapper.readValue(bytes, TableDestination.class); - Assert.assertEquals(exportDestination, newDest); + byte[] bytes = objectMapper.writeValueAsBytes(tableDestination); + + TableDestination deserialized = objectMapper.readValue(bytes, TableDestination.class); + Assert.assertEquals(tableDestination, deserialized); } } From e6c75abf4180fdc97b181a9a0fa435ae2bce39b4 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 24 Jan 2024 12:25:26 +0530 Subject: [PATCH 11/50] Refactor --- .../apache/druid/msq/exec/ControllerImpl.java | 16 ++++++++-------- .../exec/ExportStorageConnectorFactories.java | 10 +++++----- .../destination/ExportMSQDestination.java | 10 +--------- .../results/ExportResultsFrameProcessor.java | 12 +++++++++--- .../ExportResultsFrameProcessorFactory.java | 1 - .../apache/druid/msq/sql/MSQTaskQueryMaker.java | 13 ++++++++++--- .../apache/druid/msq/sql/MSQTaskSqlEngine.java | 2 +- 7 files changed, 34 insertions(+), 30 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index b8d8fe76276d..ea09bfc79722 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -41,7 +41,6 @@ import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntSet; import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.druid.storage.export.ExportStorageConnectorFactory; import org.apache.druid.client.ImmutableSegmentLoadInfo; import org.apache.druid.common.guava.FutureUtils; import org.apache.druid.data.input.StringTuple; @@ -208,6 +207,7 @@ import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorProvider; +import org.apache.druid.storage.export.ExportStorageConnectorFactory; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentTimeline; import org.apache.druid.timeline.partition.DimensionRangeShardSpec; @@ -1887,12 +1887,12 @@ private static QueryDefinition makeQueryDefinition( return queryDef; } } else if (querySpec.getDestination() instanceof ExportMSQDestination) { - ExportMSQDestination exportMSQDestination = (ExportMSQDestination) querySpec.getDestination(); - final ExportStorageConnectorFactories exportStorageConnectorFactories = injector.getInstance( - ExportStorageConnectorFactories.class); - final ExportStorageConnectorFactory outputConfig = exportStorageConnectorFactories.getConnectorProviderMap() - .get(exportMSQDestination.getStorageConnectorType()); - final StorageConnectorProvider storageConnectorProvider = outputConfig.get(exportMSQDestination.getProperties(), injector); + final ExportMSQDestination exportMSQDestination = (ExportMSQDestination) querySpec.getDestination(); + final ExportStorageConnectorFactory storageConnectorFactory = injector.getInstance(ExportStorageConnectorFactories.class) + .getFactories() + .get(exportMSQDestination.getStorageConnectorType()); + final StorageConnectorProvider storageConnectorProvider = + storageConnectorFactory.get(exportMSQDestination.getProperties(), injector); final ResultFormat resultFormat = exportMSQDestination.getResultFormat(); // If the statement is a REPLACE statement, delete the existing files at the destination. @@ -1904,7 +1904,7 @@ private static QueryDefinition makeQueryDefinition( catch (IOException e) { throw DruidException.forPersona(DruidException.Persona.OPERATOR) .ofCategory(DruidException.Category.RUNTIME_FAILURE) - .build(e, "Count not delete existing files from "); + .build(e, "Count not delete existing files from the export destination."); } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java index ea30b46bfe3f..039924de65b2 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java @@ -26,16 +26,16 @@ public class ExportStorageConnectorFactories { - public Map getConnectorProviderMap() + public Map getFactories() { - return connectorProviderMap; + return factoriesMap; } - private final Map connectorProviderMap; + private final Map factoriesMap; @Inject - public ExportStorageConnectorFactories(Map connectorProviderMap) + public ExportStorageConnectorFactories(Map factoriesMap) { - this.connectorProviderMap = connectorProviderMap; + this.factoriesMap = factoriesMap; } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java index 0308c8f9b8c3..25f56e0ba628 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java @@ -22,8 +22,6 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.druid.error.DruidException; -import org.apache.druid.java.util.common.Intervals; import org.apache.druid.sql.http.ResultFormat; import org.joda.time.Interval; @@ -51,13 +49,7 @@ public ExportMSQDestination(@JsonProperty("storageConnectorType") String storage this.storageConnectorType = storageConnectorType; this.properties = properties; this.resultFormat = resultFormat; - if (replaceTimeChunks == null || Intervals.ONLY_ETERNITY.equals(replaceTimeChunks)) { - this.replaceTimeChunks = replaceTimeChunks; - } else { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.UNSUPPORTED) - .build("Currently export only works with "); - } + this.replaceTimeChunks = replaceTimeChunks; } @JsonProperty("storageConnectorType") diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java index 8d1780a25b70..9c2a7d02ccb1 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java @@ -30,6 +30,7 @@ import org.apache.druid.frame.read.FrameReader; import org.apache.druid.frame.segment.FrameStorageAdapter; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.Unit; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; @@ -103,12 +104,12 @@ public ReturnOrAwait runIncrementally(IntSet readableInputs) throws IOEx if (inputChannel.isFinished()) { return ReturnOrAwait.returnObject(Unit.instance()); } else { - addFrame(inputChannel.read()); + exportFrame(inputChannel.read()); return ReturnOrAwait.awaitAll(1); } } - private void addFrame(final Frame frame) throws IOException + private void exportFrame(final Frame frame) throws IOException { final RowSignature signature = frameReader.signature(); @@ -116,7 +117,7 @@ private void addFrame(final Frame frame) throws IOException new FrameStorageAdapter(frame, frameReader, Intervals.ETERNITY) .makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null); - try (OutputStream stream = storageConnector.write(workerNumber + "/" + partitionNumber + "." + exportFormat.toString())) { + try (OutputStream stream = storageConnector.write(getExportFilePath(workerNumber, partitionNumber, exportFormat))) { ResultFormat.Writer formatter = exportFormat.createFormatter(stream, jsonMapper); SequenceUtils.forEach( @@ -154,6 +155,11 @@ private void addFrame(final Frame frame) throws IOException } } + private static String getExportFilePath(int workerNumber, int partitionNumber, ResultFormat exportFormat) + { + return StringUtils.format("%s/%s.%s", workerNumber, partitionNumber, exportFormat.toString()); + } + @Override public void cleanup() throws IOException { diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java index c3754ca69c9c..2bd9126f787e 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java @@ -90,7 +90,6 @@ public ProcessorsAndChannels makeProcessors( Consumer warningPublisher ) { - // Expecting a single input slice from some prior stage. final StageInputSlice slice = (StageInputSlice) Iterables.getOnlyElement(inputSlices); if (inputSliceReader.numReadableInputs(slice) == 0) { diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java index 578b97a934f3..a572e583019c 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java @@ -25,9 +25,6 @@ import org.apache.calcite.runtime.Hook; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.Pair; -import org.apache.druid.sql.destination.IngestDestination; -import org.apache.druid.sql.destination.ExportDestination; -import org.apache.druid.sql.destination.TableDestination; import org.apache.druid.common.guava.FutureUtils; import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; @@ -67,6 +64,9 @@ import org.apache.druid.sql.calcite.run.QueryMaker; import org.apache.druid.sql.calcite.run.SqlResults; import org.apache.druid.sql.calcite.table.RowSignatures; +import org.apache.druid.sql.destination.ExportDestination; +import org.apache.druid.sql.destination.IngestDestination; +import org.apache.druid.sql.destination.TableDestination; import org.apache.druid.sql.http.ResultFormat; import org.joda.time.Interval; @@ -212,6 +212,13 @@ public QueryResponse runQuery(final DruidQuery druidQuery) if (targetDataSource instanceof ExportDestination) { ExportDestination exportDestination = ((ExportDestination) targetDataSource); ResultFormat format = ResultFormat.fromString(sqlQueryContext.getString(DruidSqlIngest.SQL_EXPORT_FILE_FORMAT)); + + if (replaceTimeChunks != null && !Intervals.ONLY_ETERNITY.equals(replaceTimeChunks)) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.UNSUPPORTED) + .build("Currently export only works with OVERWRITE ALL clause"); + } + destination = new ExportMSQDestination( exportDestination.getDestinationType(), exportDestination.getProperties(), diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java index b49eabfff2b3..4c46d3c7fc13 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java @@ -31,7 +31,6 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.Pair; -import org.apache.druid.sql.destination.IngestDestination; import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; import org.apache.druid.error.InvalidSqlInput; @@ -49,6 +48,7 @@ import org.apache.druid.sql.calcite.run.QueryMaker; import org.apache.druid.sql.calcite.run.SqlEngine; import org.apache.druid.sql.calcite.run.SqlEngines; +import org.apache.druid.sql.destination.IngestDestination; import java.util.HashSet; import java.util.List; From 4c9d4cc894d2db9f6fc6d0e28facebde37a8dd9a Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 24 Jan 2024 16:46:57 +0530 Subject: [PATCH 12/50] Change export file path --- .../apache/druid/msq/exec/ControllerImpl.java | 24 ++++++++++++------- .../exec/ExportStorageConnectorFactories.java | 13 ++++++---- .../destination/ExportMSQDestination.java | 9 +++++++ .../results/ExportResultsFrameProcessor.java | 10 +++++--- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index ea09bfc79722..173575605695 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1896,15 +1896,21 @@ private static QueryDefinition makeQueryDefinition( final ResultFormat resultFormat = exportMSQDestination.getResultFormat(); // If the statement is a REPLACE statement, delete the existing files at the destination. - if (Intervals.ONLY_ETERNITY.equals(exportMSQDestination.getReplaceTimeChunks())) { - StorageConnector storageConnector = storageConnectorProvider.get(); - try { - storageConnector.deleteRecursively(""); - } - catch (IOException e) { - throw DruidException.forPersona(DruidException.Persona.OPERATOR) - .ofCategory(DruidException.Category.RUNTIME_FAILURE) - .build(e, "Count not delete existing files from the export destination."); + if (exportMSQDestination.getReplaceTimeChunks() != null) { + if (Intervals.ONLY_ETERNITY.equals(exportMSQDestination.getReplaceTimeChunks())) { + StorageConnector storageConnector = storageConnectorProvider.get(); + try { + storageConnector.deleteRecursively(""); + } + catch (IOException e) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build(e, "Exception occurred while deleting existing files from export destination."); + } + } else { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.UNSUPPORTED) + .build("Currently export only works with OVERWRITE ALL clause."); } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java index 039924de65b2..806ef77d9459 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java @@ -24,13 +24,11 @@ import java.util.Map; +/** + * Holder for injected {@link ExportStorageConnectorFactory}. + */ public class ExportStorageConnectorFactories { - public Map getFactories() - { - return factoriesMap; - } - private final Map factoriesMap; @Inject @@ -38,4 +36,9 @@ public ExportStorageConnectorFactories(Map getFactories() + { + return factoriesMap; + } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java index 25f56e0ba628..d701e997fa38 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java @@ -30,6 +30,15 @@ import java.util.Map; import java.util.Objects; +/** + * Destination used by tasks that write the results as files to an external destination. {@link #resultFormat} denotes + * the format of the file created and {@link #storageConnectorType} and {@link #properties} denotes the type of external + * destination. + *
+ * {@link #replaceTimeChunks} denotes how existing files should be handled. + * - If the value is null, the results are appended to the existing files. + * - If the value is present, existing files will be deleted according to time intervals. + */ public class ExportMSQDestination implements MSQDestination { public static final String TYPE = "export"; diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java index 9c2a7d02ccb1..bcc6c749f720 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import it.unimi.dsi.fastutil.ints.IntSet; +import org.apache.druid.error.DruidException; import org.apache.druid.frame.Frame; import org.apache.druid.frame.channel.ReadableFrameChannel; import org.apache.druid.frame.channel.WritableFrameChannel; @@ -117,7 +118,8 @@ private void exportFrame(final Frame frame) throws IOException new FrameStorageAdapter(frame, frameReader, Intervals.ETERNITY) .makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null); - try (OutputStream stream = storageConnector.write(getExportFilePath(workerNumber, partitionNumber, exportFormat))) { + final String exportFilePath = getExportFilePath(workerNumber, partitionNumber, exportFormat); + try (OutputStream stream = storageConnector.write(exportFilePath)) { ResultFormat.Writer formatter = exportFormat.createFormatter(stream, jsonMapper); SequenceUtils.forEach( @@ -148,7 +150,9 @@ private void exportFrame(final Frame frame) throws IOException formatter.writeResponseEnd(); } catch (IOException e) { - throw new RuntimeException(e); + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build(e, "Exception occurred while writing file to the export location [%s].", exportFilePath); } } ); @@ -157,7 +161,7 @@ private void exportFrame(final Frame frame) throws IOException private static String getExportFilePath(int workerNumber, int partitionNumber, ResultFormat exportFormat) { - return StringUtils.format("%s/%s.%s", workerNumber, partitionNumber, exportFormat.toString()); + return StringUtils.format("worker%s/partition%s.%s", workerNumber, partitionNumber, exportFormat.toString()); } @Override From 58f1d130ff7842e0b4ac32301f2ef8af51c78580 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 25 Jan 2024 12:10:15 +0530 Subject: [PATCH 13/50] Update docs --- docs/multi-stage-query/concepts.md | 11 ++++ docs/multi-stage-query/reference.md | 62 ++++++++++++++++++- .../ExportResultsFrameProcessorFactory.java | 9 ++- .../storage/s3/S3StorageDruidModule.java | 4 +- .../S3ExportStorageConnectorFactory.java | 2 +- .../druid/storage/StorageConnectorModule.java | 2 +- .../LocalFileStorageConnectorProvider.java | 4 +- sql/src/main/codegen/includes/common.ftl | 2 +- .../sql/calcite/planner/IngestHandler.java | 7 ++- .../sql/destination/ExportDestination.java | 35 +++++++++++ .../sql/destination/TableDestination.java | 5 +- .../druid/sql/calcite/CalciteExportTest.java | 18 +++++- .../workbench-view/query-tab/query-tab.tsx | 6 +- 13 files changed, 149 insertions(+), 18 deletions(-) diff --git a/docs/multi-stage-query/concepts.md b/docs/multi-stage-query/concepts.md index 7100e14d01cf..a2daf9ee957d 100644 --- a/docs/multi-stage-query/concepts.md +++ b/docs/multi-stage-query/concepts.md @@ -115,6 +115,17 @@ When deciding whether to use `REPLACE` or `INSERT`, keep in mind that segments g with dimension-based pruning but those generated with `INSERT` cannot. For more information about the requirements for dimension-based pruning, see [Clustering](#clustering). +### Write to an external destination with `EXTERN` + +Query tasks can write data to an external destination through the `EXTERN` function, when it is used with the `INTO` +clause, such as `REPLACE INTO EXTERN(...)` + +The EXTERN function takes arguments which specifies where to the files should be created. + +The format can be specified using an `AS` clause. + +For more information about the syntax, see [`EXTERN`](./reference.md#extern-function). + ### Primary timestamp Druid tables always include a primary timestamp named `__time`. diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index db003a683047..321aa6c83847 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -45,8 +45,11 @@ making it easy to reuse the same SQL statement for each ingest: just specify the ### `EXTERN` Function -Use the `EXTERN` function to read external data. The function has two variations. +Use the `EXTERN` function to read external data or write to an external source. +#### `EXTERN` as an input source + +The function has two variations. Function variation 1, with the input schema expressed as JSON: ```sql @@ -90,6 +93,63 @@ can precede the column list: `EXTEND (timestamp VARCHAR...)`. For more information, see [Read external data with EXTERN](concepts.md#read-external-data-with-extern). +#### `EXTERN` to export to a destination + +`EXTERN` can be used as a destination, which will export the data to the specified location and format. EXTERN when +used in this way accepts one argument. Please note that partitioning (`PARTITIONED BY`) and clustering (`CLUSTERED BY`) +is not currently supported with export statements. + +INSERT statements and REPLACE statements are both supported with an `EXTERN` destination. The statments require an `AS` +clause that determines the format. +Currently, only `CSV` is supported as a format. + +INSERT statements append the results to the existing files at the destination. +```sql +INSERT INTO + EXTERN() +AS CSV +SELECT + +FROM +``` + +REPLACE statements have an additional OVERWRITE clause. As partitioning is not yet supported, only `OVERWRITE ALL` +is allowed. REPLACE deletes any existing files at the destination and creates new files with the results of the query. + +```sql +REPLACE INTO + EXTERN() +AS CSV +OVERWRITE ALL +SELECT + +FROM
+``` + +Exporting is currently supported to Amazon S3 storage. The S3 extension is required to be loaded for this. +This can be done passing the function `S3` to the `EXTERN` function. + +```sql +INSERT INTO + EXTERN(S3(bucket=<...>, prefix=<...>, tempDir=<...>)) +AS CSV +SELECT + +FROM
+``` + +Supported arguments to the function: + +| Parameter | Required | Description | Default | +|-------------|---------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --| +| `bucket` | Yes | The S3 bucket to which the files are exported to. | n/a | +| `prefix` | Yes | Path prepended to all the paths uploaded to the bucket to namespace the connector's files. Provide a unique value for the prefix and do not share the same prefix between different clusters. If the location includes other files or directories, then they might get cleaned up as well. | n/a | +| `tempDir` | Yes | Directory path on the local disk to store temporary files required while uploading the data | n/a | +| `maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | +| `chunkSize` | No | Defines the size of each chunk to temporarily store in `tempDir`. The chunk size must be between 5 MiB and 5 GiB. A large chunk size reduces the API calls to S3, however it requires more disk space to store the temporary chunks. | 100MiB | + +For more information, see [Read external data with EXTERN](concepts.md#write-to-an-external-destination-with-extern). + ### `INSERT` Use the `INSERT` statement to insert data. diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java index 2bd9126f787e..030f020f25b6 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java @@ -22,7 +22,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; -import com.google.common.collect.Iterables; +import org.apache.druid.error.DruidException; import org.apache.druid.frame.processor.FrameProcessor; import org.apache.druid.frame.processor.OutputChannelFactory; import org.apache.druid.frame.processor.OutputChannels; @@ -42,6 +42,7 @@ import org.apache.druid.msq.querykit.BaseFrameProcessorFactory; import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.storage.StorageConnectorProvider; +import org.apache.druid.utils.CollectionUtils; import javax.annotation.Nullable; import java.util.List; @@ -50,7 +51,6 @@ @JsonTypeName("exportResults") public class ExportResultsFrameProcessorFactory extends BaseFrameProcessorFactory { - private final StorageConnectorProvider storageConnectorProvider; private final ResultFormat exportFormat; @@ -90,7 +90,10 @@ public ProcessorsAndChannels makeProcessors( Consumer warningPublisher ) { - final StageInputSlice slice = (StageInputSlice) Iterables.getOnlyElement(inputSlices); + final StageInputSlice slice = (StageInputSlice) CollectionUtils.getOnlyElement( + inputSlices, + x -> DruidException.defensive().build("Expected only a single input slice but found [%s]", inputSlices) + ); if (inputSliceReader.numReadableInputs(slice) == 0) { return new ProcessorsAndChannels<>(ProcessorManagers.none(), OutputChannels.none()); diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java index 2ea8856b0245..9626f6107312 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java @@ -35,7 +35,6 @@ import com.google.inject.Provides; import com.google.inject.multibindings.MapBinder; import org.apache.commons.lang.StringUtils; -import org.apache.druid.storage.export.ExportStorageConnectorFactory; import org.apache.druid.common.aws.AWSClientConfig; import org.apache.druid.common.aws.AWSEndpointConfig; import org.apache.druid.common.aws.AWSProxyConfig; @@ -45,6 +44,7 @@ import org.apache.druid.guice.LazySingleton; import org.apache.druid.initialization.DruidModule; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.storage.export.ExportStorageConnectorFactory; import org.apache.druid.storage.s3.output.S3ExportStorageConnectorFactory; import java.util.List; @@ -118,7 +118,7 @@ public void configure(Binder binder) JsonConfigProvider.bind(binder, "druid.storage.sse.kms", S3SSEKmsConfig.class); JsonConfigProvider.bind(binder, "druid.storage.sse.custom", S3SSECustomConfig.class); MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportStorageConnectorFactory.class); - mapbinder.addBinding("s3").to(S3ExportStorageConnectorFactory.class); + mapbinder.addBinding(SCHEME).to(S3ExportStorageConnectorFactory.class); Binders.taskLogsBinder(binder).addBinding(SCHEME).to(S3TaskLogs.class); JsonConfigProvider.bind(binder, "druid.indexer.logs", S3TaskLogsConfig.class); diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java index 090f1a2c2aa5..0c6622db9867 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java @@ -20,9 +20,9 @@ package org.apache.druid.storage.s3.output; import com.google.inject.Injector; -import org.apache.druid.storage.export.ExportStorageConnectorFactory; import org.apache.druid.java.util.common.HumanReadableBytes; import org.apache.druid.storage.StorageConnectorProvider; +import org.apache.druid.storage.export.ExportStorageConnectorFactory; import java.io.File; import java.util.Map; diff --git a/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java b/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java index fce9b6a61b1b..f86c6e938898 100644 --- a/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java +++ b/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java @@ -46,6 +46,6 @@ public List getJacksonModules() public void configure(Binder binder) { MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportStorageConnectorFactory.class); - mapbinder.addBinding("local").to(LocalExportStorageConnectorFactory.class); + mapbinder.addBinding(LocalFileStorageConnectorProvider.TYPE_NAME).to(LocalExportStorageConnectorFactory.class); } } diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java index b9215da8c8ca..82d1623f8404 100644 --- a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java @@ -30,9 +30,11 @@ import java.io.IOException; import java.util.Objects; -@JsonTypeName("local") +@JsonTypeName(LocalFileStorageConnectorProvider.TYPE_NAME) public class LocalFileStorageConnectorProvider implements StorageConnectorProvider { + public static final String TYPE_NAME = "local"; + @JsonProperty final File basePath; diff --git a/sql/src/main/codegen/includes/common.ftl b/sql/src/main/codegen/includes/common.ftl index cdd0900a2774..47c0f68c3113 100644 --- a/sql/src/main/codegen/includes/common.ftl +++ b/sql/src/main/codegen/includes/common.ftl @@ -163,4 +163,4 @@ Map ExternProperties() : } return properties; } -} \ No newline at end of file +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index 2eff30f0171a..9e1566210894 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -133,7 +133,7 @@ public void validate() if (ingestNode().getTargetTable() instanceof ExternalDestinationSqlIdentifier) { if (exportFileFormat == null) { throw InvalidSqlInput.exception( - "External write statemetns requires a AS clause to specify the format, but none was found.", + "Exporting rows into an EXTERN destination requires an AS clause to specify the format, but none was found.", operationName() ); } else { @@ -142,6 +142,11 @@ public void validate() exportFileFormat ); } + } else if (exportFileFormat != null) { + throw InvalidSqlInput.exception( + "The AS clause should only be specified while exporting rows into an EXTERN destination.", + operationName() + ); } try { diff --git a/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java b/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java index 42e486c93a9c..f6cccece3d11 100644 --- a/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java +++ b/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java @@ -24,7 +24,11 @@ import com.fasterxml.jackson.annotation.JsonTypeName; import java.util.Map; +import java.util.Objects; +/** + * Destination that represents an ingestion to an external source. + */ @JsonTypeName(ExportDestination.TYPE_KEY) public class ExportDestination implements IngestDestination { @@ -56,4 +60,35 @@ public String getDestinationName() { return "EXTERN"; } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ExportDestination that = (ExportDestination) o; + return Objects.equals(destinationType, that.destinationType) && Objects.equals( + properties, + that.properties + ); + } + + @Override + public int hashCode() + { + return Objects.hash(destinationType, properties); + } + + @Override + public String toString() + { + return "ExportDestination{" + + "destinationType='" + destinationType + '\'' + + ", properties=" + properties + + '}'; + } } diff --git a/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java b/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java index cb4b8e66e6c1..4914cd331de8 100644 --- a/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java +++ b/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java @@ -25,11 +25,14 @@ import java.util.Objects; +/** + * Destination that represents an ingestion to a druid table. + */ @JsonTypeName(TableDestination.TYPE_KEY) public class TableDestination implements IngestDestination { public static final String TYPE_KEY = "table"; - String tableName; + private final String tableName; @JsonCreator public TableDestination(@JsonProperty("tableName") String tableName) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index 7b099a048cbe..21038ad2ecd7 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -100,7 +100,7 @@ public void testExportWithoutFormat() + "SELECT dim2 FROM foo") .expectValidationError( DruidException.class, - "External write statemetns requires a AS clause to specify the format, but none was found." + "Exporting rows into an EXTERN destination requires an AS clause to specify the format, but none was found." ) .verify(); } @@ -123,4 +123,20 @@ public void testSelectFromTableNamedExport() .expectTarget("csv", RowSignature.builder().add("dim2", ColumnType.STRING).build()) .verify(); } + + @Test + public void testNormalInsertWithFormat() + { + testIngestionQuery() + .sql("REPLACE INTO testTable " + + "AS CSV " + + "OVERWRITE ALL " + + "SELECT dim2 FROM foo " + + "PARTITIONED BY ALL") + .expectValidationError( + DruidException.class, + "The AS clause should only be specified while exporting rows into an EXTERN destination." + ) + .verify(); + } } diff --git a/web-console/src/views/workbench-view/query-tab/query-tab.tsx b/web-console/src/views/workbench-view/query-tab/query-tab.tsx index 22d2c79e8992..ee06d52d2571 100644 --- a/web-console/src/views/workbench-view/query-tab/query-tab.tsx +++ b/web-console/src/views/workbench-view/query-tab/query-tab.tsx @@ -428,11 +428,7 @@ export const QueryTab = React.memo(function QueryTab(props: QueryTabProps) { )} ) : ( - onDetails(statsTaskId!, 'error')} - onWarningClick={() => onDetails(statsTaskId!, 'warnings')} - goToTask={goToTask} +
Unknown query execution state
/> ))} {executionState.error && ( From 3db7a1b55c0812c8c16bfe5ff849fe264d621b30 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 25 Jan 2024 12:20:06 +0530 Subject: [PATCH 14/50] Remove webconsole changes --- web-console/src/views/workbench-view/query-tab/query-tab.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/web-console/src/views/workbench-view/query-tab/query-tab.tsx b/web-console/src/views/workbench-view/query-tab/query-tab.tsx index ee06d52d2571..8a4129fc67bf 100644 --- a/web-console/src/views/workbench-view/query-tab/query-tab.tsx +++ b/web-console/src/views/workbench-view/query-tab/query-tab.tsx @@ -429,7 +429,6 @@ export const QueryTab = React.memo(function QueryTab(props: QueryTabProps) { ) : (
Unknown query execution state
- /> ))} {executionState.error && ( Date: Thu, 25 Jan 2024 12:55:31 +0530 Subject: [PATCH 15/50] Fix spelling mistake --- docs/multi-stage-query/reference.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 321aa6c83847..ad601fb0c6dd 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -99,7 +99,7 @@ For more information, see [Read external data with EXTERN](concepts.md#read-exte used in this way accepts one argument. Please note that partitioning (`PARTITIONED BY`) and clustering (`CLUSTERED BY`) is not currently supported with export statements. -INSERT statements and REPLACE statements are both supported with an `EXTERN` destination. The statments require an `AS` +INSERT statements and REPLACE statements are both supported with an `EXTERN` destination. The statements require an `AS` clause that determines the format. Currently, only `CSV` is supported as a format. From c45c35793ed175ec82b9ab3fe76476b37fcee061 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 25 Jan 2024 17:54:02 +0530 Subject: [PATCH 16/50] Parser changes, add tests --- .../apache/druid/msq/exec/ControllerImpl.java | 15 +++-- .../apache/druid/msq/exec/MSQExportTest.java | 59 +++++++++++++++++++ .../apache/druid/msq/test/MSQTestBase.java | 8 ++- .../LocalExportStorageConnectorFactory.java | 6 ++ sql/src/main/codegen/config.fmpp | 4 -- sql/src/main/codegen/includes/common.ftl | 53 ++++++++--------- .../druid/sql/calcite/CalciteExportTest.java | 16 +++++ 7 files changed, 124 insertions(+), 37 deletions(-) create mode 100644 extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 173575605695..f37944b3f73c 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1888,11 +1888,18 @@ private static QueryDefinition makeQueryDefinition( } } else if (querySpec.getDestination() instanceof ExportMSQDestination) { final ExportMSQDestination exportMSQDestination = (ExportMSQDestination) querySpec.getDestination(); - final ExportStorageConnectorFactory storageConnectorFactory = injector.getInstance(ExportStorageConnectorFactories.class) - .getFactories() - .get(exportMSQDestination.getStorageConnectorType()); + final Map storageConnectorFactories = injector.getInstance(ExportStorageConnectorFactories.class) + .getFactories(); + if (!storageConnectorFactories.containsKey(exportMSQDestination.getStorageConnectorType())) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build("No storage connector found for storage connector type:[%s].", exportMSQDestination.getStorageConnectorType()); + } + final StorageConnectorProvider storageConnectorProvider = - storageConnectorFactory.get(exportMSQDestination.getProperties(), injector); + storageConnectorFactories.get(exportMSQDestination.getStorageConnectorType()) + .get(exportMSQDestination.getProperties(), injector); + final ResultFormat resultFormat = exportMSQDestination.getResultFormat(); // If the statement is a REPLACE statement, delete the existing files at the destination. diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java new file mode 100644 index 000000000000..1e947303cde0 --- /dev/null +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java @@ -0,0 +1,59 @@ +package org.apache.druid.msq.exec; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.msq.test.MSQTestBase; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.hamcrest.CoreMatchers; +import org.junit.Test; +import org.junit.internal.matchers.ThrowableMessageMatcher; + +import java.io.File; +import java.io.IOException; + +public class MSQExportTest extends MSQTestBase +{ + @Test + public void testExport() throws IOException + { + RowSignature rowSignature = RowSignature.builder() + .add("__time", ColumnType.LONG) + .add("dim1", ColumnType.STRING) + .add("cnt", ColumnType.LONG).build(); + + File exportDir = temporaryFolder.newFolder("export/"); + testIngestQuery().setSql( + "insert into extern(local(basePath = \"" + exportDir.getAbsolutePath() + "\")) as csv select __time, dim1 from foo") + .setExpectedDataSource("foo1") + .setQueryContext(DEFAULT_MSQ_CONTEXT) + .setExpectedRowSignature(rowSignature) + .setExpectedSegment(ImmutableSet.of()) + .setExpectedResultRows(ImmutableList.of()) + .verifyResults(); + } + + @Test + public void testWithUnsupportedStorageConnector() + { + RowSignature rowSignature = RowSignature.builder() + .add("__time", ColumnType.LONG) + .add("dim1", ColumnType.STRING) + .add("cnt", ColumnType.LONG).build(); + + testIngestQuery().setSql( + "insert into extern(hdfs(basePath = \"/var\")) as csv select __time, dim1 from foo") + .setExpectedDataSource("foo1") + .setQueryContext(DEFAULT_MSQ_CONTEXT) + .setExpectedRowSignature(rowSignature) + .setExpectedSegment(ImmutableSet.of()) + .setExpectedResultRows(ImmutableList.of()) + .setExpectedExecutionErrorMatcher(CoreMatchers.allOf( + CoreMatchers.instanceOf(ISE.class), + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString( + "No storage connector found for storage connector type:[hdfs]." + ))) + ).verifyExecutionError(); + } +} diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index 0146fcf9bd8b..939bffa16bb3 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -34,6 +34,7 @@ import com.google.inject.Key; import com.google.inject.Module; import com.google.inject.TypeLiteral; +import com.google.inject.multibindings.MapBinder; import com.google.inject.util.Modules; import com.google.inject.util.Providers; import org.apache.druid.client.ImmutableSegmentLoadInfo; @@ -176,7 +177,10 @@ import org.apache.druid.sql.guice.SqlBindings; import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorProvider; +import org.apache.druid.storage.export.ExportStorageConnectorFactory; +import org.apache.druid.storage.export.LocalExportStorageConnectorFactory; import org.apache.druid.storage.local.LocalFileStorageConnector; +import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.PruneLoadSpec; import org.apache.druid.timeline.SegmentId; @@ -476,6 +480,8 @@ public String getFormatString() } binder.bind(DataSegment.PruneSpecsHolder.class).toInstance(DataSegment.PruneSpecsHolder.DEFAULT); + MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportStorageConnectorFactory.class); + mapbinder.addBinding(LocalFileStorageConnectorProvider.TYPE_NAME).to(LocalExportStorageConnectorFactory.class); }, // Requirement of WorkerMemoryParameters.createProductionInstanceForWorker(injector) binder -> binder.bind(AppenderatorsManager.class).toProvider(() -> null), @@ -895,7 +901,7 @@ public Builder setExpectedRowSignature(RowSignature expectedRowSignature) public Builder setExpectedSegment(Set expectedSegments) { - Preconditions.checkArgument(!expectedSegments.isEmpty(), "Segments cannot be empty"); + Preconditions.checkArgument(expectedSegments != null, "Segments cannot be null"); this.expectedSegments = expectedSegments; return asBuilder(); } diff --git a/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java b/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java index be989aae4244..7bbd75e765c3 100644 --- a/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java +++ b/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java @@ -26,6 +26,12 @@ import java.io.File; import java.util.Map; +/** + * Provides a {@link StorageConnectorProvider} which allows writing to the local machine. Not meaningful for production, + * and is used for tests or debugging purposes. + *
+ * Not to be bound in Guice modules. + */ public class LocalExportStorageConnectorFactory implements ExportStorageConnectorFactory { @Override diff --git a/sql/src/main/codegen/config.fmpp b/sql/src/main/codegen/config.fmpp index 88fa826a3a8e..19ff322406eb 100644 --- a/sql/src/main/codegen/config.fmpp +++ b/sql/src/main/codegen/config.fmpp @@ -70,15 +70,11 @@ data: { "OVERWRITE" "PARTITIONED" "EXTERN" - "S3" - "CSV" ] nonReservedKeywordsToAdd: [ "OVERWRITE" "EXTERN" - "S3" - "CSV" ] # List of methods for parsing custom SQL statements. diff --git a/sql/src/main/codegen/includes/common.ftl b/sql/src/main/codegen/includes/common.ftl index 47c0f68c3113..b3fe2b118d65 100644 --- a/sql/src/main/codegen/includes/common.ftl +++ b/sql/src/main/codegen/includes/common.ftl @@ -109,43 +109,32 @@ SqlTypeNameSpec DruidType() : // Parses the supported file formats for export. String FileFormat() : -{} { - ( - - { - return "CSV"; - } - ) + SqlNode format; +} +{ + format = SimpleIdentifier() + { + return format.toString(); + } } SqlIdentifier ExternalDestination() : { final Span s; + SqlIdentifier destinationType; Map properties = new HashMap(); } { - ( - [ [properties = ExternProperties()] ] - { - s = span(); - return new ExternalDestinationSqlIdentifier( - "s3", - s.pos(), - properties - ); - } - | - [ [properties = ExternProperties()] ] - { - s = span(); - return new ExternalDestinationSqlIdentifier( - "local", - s.pos(), - properties - ); - } - ) + destinationType = SimpleIdentifier() [ [properties = ExternProperties()] ] + { + s = span(); + return new ExternalDestinationSqlIdentifier( + destinationType.toString(), + s.pos(), + properties + ); + } } Map ExternProperties() : @@ -164,3 +153,11 @@ Map ExternProperties() : return properties; } } + +SqlNode testRule(): +{ + final SqlNode e; +} +{ + e = SimpleIdentifier() { return e; } +} \ No newline at end of file diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index 21038ad2ecd7..122f7baacea4 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -139,4 +139,20 @@ public void testNormalInsertWithFormat() ) .verify(); } + + @Test + public void testUnsupportedExportFormat() + { + testIngestionQuery() + .sql("REPLACE INTO testTable " + + "AS JSON " + + "OVERWRITE ALL " + + "SELECT dim2 FROM foo " + + "PARTITIONED BY ALL") + .expectValidationError( + DruidException.class, + "The AS clause should only be specified while exporting rows into an EXTERN destination." + ) + .verify(); + } } From 8ed422c90c73991526d8b0bdfe4afcf0cc3469ee Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 25 Jan 2024 19:55:00 +0530 Subject: [PATCH 17/50] Parser changes, resolve build warnings --- .../apache/druid/msq/exec/MSQExportTest.java | 4 ++-- .../S3ExportStorageConnectorFactory.java | 17 +++++++++----- .../s3/output/S3StorageConnectorProvider.java | 5 +++++ .../LocalExportStorageConnectorFactory.java | 2 +- .../LocalFileStorageConnectorProvider.java | 5 +++-- sql/src/main/codegen/includes/common.ftl | 22 ++++++++++++++----- .../druid/sql/calcite/CalciteExportTest.java | 8 +++---- 7 files changed, 42 insertions(+), 21 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java index 1e947303cde0..bea6bc726f72 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java @@ -25,7 +25,7 @@ public void testExport() throws IOException File exportDir = temporaryFolder.newFolder("export/"); testIngestQuery().setSql( - "insert into extern(local(basePath = \"" + exportDir.getAbsolutePath() + "\")) as csv select __time, dim1 from foo") + "insert into extern(localStorage(basePath = '" + exportDir.getAbsolutePath() + "')) as csv select __time, dim1 from foo") .setExpectedDataSource("foo1") .setQueryContext(DEFAULT_MSQ_CONTEXT) .setExpectedRowSignature(rowSignature) @@ -43,7 +43,7 @@ public void testWithUnsupportedStorageConnector() .add("cnt", ColumnType.LONG).build(); testIngestQuery().setSql( - "insert into extern(hdfs(basePath = \"/var\")) as csv select __time, dim1 from foo") + "insert into extern(hdfs(basePath = '/var')) as csv select __time, dim1 from foo") .setExpectedDataSource("foo1") .setQueryContext(DEFAULT_MSQ_CONTEXT) .setExpectedRowSignature(rowSignature) diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java index 0c6622db9867..0ffc98b9eaf0 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java @@ -20,7 +20,7 @@ package org.apache.druid.storage.s3.output; import com.google.inject.Injector; -import org.apache.druid.java.util.common.HumanReadableBytes; +import org.apache.druid.query.QueryContexts; import org.apache.druid.storage.StorageConnectorProvider; import org.apache.druid.storage.export.ExportStorageConnectorFactory; @@ -32,12 +32,17 @@ public class S3ExportStorageConnectorFactory implements ExportStorageConnectorFa @Override public StorageConnectorProvider get(Map properties, Injector injector) { + String chunkSize = properties.getOrDefault(S3StorageConnectorProvider.CHUNK_SIZE_FIELD_NAME, null); + Integer maxRetries = QueryContexts.getAsInt( + S3StorageConnectorProvider.MAX_RETRY_FIELD_NAME, + properties.getOrDefault(S3StorageConnectorProvider.MAX_RETRY_FIELD_NAME, null)); + return new S3StorageConnectorProvider( - properties.get("bucket"), - properties.get("prefix"), - new File(properties.get("tempDir")), - HumanReadableBytes.valueOf(Integer.parseInt(properties.get("chunkSize"))), - Integer.parseInt(properties.get("maxRetry")), + properties.get(S3StorageConnectorProvider.BUCKET_FIELD_NAME), + properties.get(S3StorageConnectorProvider.PREFIX_FIELD_NAME), + new File(properties.get(S3StorageConnectorProvider.TEMP_DIR_FIELD_NAME)), + QueryContexts.getAsHumanReadableBytes(S3StorageConnectorProvider.CHUNK_SIZE_FIELD_NAME, chunkSize, null), + maxRetries, injector ); } diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java index 38b8cada1976..c7be1e26e27e 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java @@ -36,6 +36,11 @@ @JsonTypeName(S3StorageDruidModule.SCHEME) public class S3StorageConnectorProvider extends S3OutputConfig implements StorageConnectorProvider { + public static final String BUCKET_FIELD_NAME = "bucket"; + public static final String PREFIX_FIELD_NAME = "prefix"; + public static final String TEMP_DIR_FIELD_NAME = "tempDir"; + public static final String CHUNK_SIZE_FIELD_NAME = "chunkSize"; + public static final String MAX_RETRY_FIELD_NAME = "maxRetry"; @JacksonInject ServerSideEncryptingAmazonS3 s3; diff --git a/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java b/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java index 7bbd75e765c3..ffcc37f300a2 100644 --- a/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java +++ b/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java @@ -37,6 +37,6 @@ public class LocalExportStorageConnectorFactory implements ExportStorageConnecto @Override public StorageConnectorProvider get(Map properties, Injector injector) { - return new LocalFileStorageConnectorProvider(new File(properties.get("basePath"))); + return new LocalFileStorageConnectorProvider(new File(properties.get(LocalFileStorageConnectorProvider.BASE_PATH_FIELD_NAME))); } } diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java index 82d1623f8404..3fe76096c30d 100644 --- a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java @@ -33,13 +33,14 @@ @JsonTypeName(LocalFileStorageConnectorProvider.TYPE_NAME) public class LocalFileStorageConnectorProvider implements StorageConnectorProvider { - public static final String TYPE_NAME = "local"; + public static final String TYPE_NAME = "localStorage"; + public static final String BASE_PATH_FIELD_NAME = "basePath"; @JsonProperty final File basePath; @JsonCreator - public LocalFileStorageConnectorProvider(@JsonProperty(value = "basePath", required = true) File basePath) + public LocalFileStorageConnectorProvider(@JsonProperty(value = BASE_PATH_FIELD_NAME, required = true) File basePath) { this.basePath = basePath; } diff --git a/sql/src/main/codegen/includes/common.ftl b/sql/src/main/codegen/includes/common.ftl index b3fe2b118d65..1607b818d00a 100644 --- a/sql/src/main/codegen/includes/common.ftl +++ b/sql/src/main/codegen/includes/common.ftl @@ -126,7 +126,7 @@ SqlIdentifier ExternalDestination() : Map properties = new HashMap(); } { - destinationType = SimpleIdentifier() [ [properties = ExternProperties()] ] + destinationType = SimpleIdentifier() [ properties = ExternProperties() ] { s = span(); return new ExternalDestinationSqlIdentifier( @@ -141,15 +141,25 @@ Map ExternProperties() : { final Span s; final Map properties = new HashMap(); + SqlIdentifier identifier; + String value; SqlNodeList commaList = SqlNodeList.EMPTY; } { - commaList = ExpressionCommaList(span(), ExprContext.ACCEPT_NON_QUERY) - { - for (SqlNode sqlNode : commaList) { - List sqlNodeList = ((SqlBasicCall) sqlNode).getOperandList(); - properties.put(((SqlIdentifier) sqlNodeList.get(0)).getSimple(), ((SqlIdentifier) sqlNodeList.get(1)).getSimple()); + ( + identifier = SimpleIdentifier() value = SimpleStringLiteral() + { + properties.put(identifier.toString(), value); } + ) + ( + + identifier = SimpleIdentifier() value = SimpleStringLiteral() + { + properties.put(identifier.toString(), value); + } + )* + { return properties; } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index 122f7baacea4..ca2fd2e200d8 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -33,7 +33,7 @@ public class CalciteExportTest extends CalciteIngestionDmlTest public void testReplaceIntoExtern() { testIngestionQuery() - .sql("REPLACE INTO EXTERN(s3(bucket=\"bucket1\",prefix=\"prefix1\",tempDir=\"/tempdir\",chunkSize=\"5242880\",maxRetry=\"1\")) " + .sql("REPLACE INTO EXTERN(s3(bucket='bucket1',prefix='prefix1',tempDir='/tempdir',chunkSize='5242880',maxRetry='1')) " + "AS CSV " + "OVERWRITE ALL " + "SELECT dim2 FROM foo") @@ -57,7 +57,7 @@ public void testReplaceIntoExtern() public void testExportWithPartitionedBy() { testIngestionQuery() - .sql("REPLACE INTO EXTERN(s3(bucket=\"bucket1\",prefix=\"prefix1\",tempDir=\"/tempdir\",chunkSize=\"5242880\",maxRetry=\"1\")) " + .sql("REPLACE INTO EXTERN(s3(bucket='bucket1',prefix='prefix1',tempDir='/tempdir',chunkSize='5242880',maxRetry='1')) " + "AS CSV " + "OVERWRITE ALL " + "SELECT dim2 FROM foo " @@ -73,7 +73,7 @@ public void testExportWithPartitionedBy() public void testInsertIntoExtern() { testIngestionQuery() - .sql("INSERT INTO EXTERN(s3(bucket=\"bucket1\",prefix=\"prefix1\",tempDir=\"/tempdir\",chunkSize=\"5242880\",maxRetry=\"1\")) " + .sql("INSERT INTO EXTERN(s3(bucket='bucket1',prefix='prefix1',tempDir='/tempdir',chunkSize='5242880',maxRetry='1')) " + "AS CSV " + "SELECT dim2 FROM foo") .expectQuery( @@ -96,7 +96,7 @@ public void testInsertIntoExtern() public void testExportWithoutFormat() { testIngestionQuery() - .sql("INSERT INTO EXTERN(s3(bucket=\"bucket1\",prefix=\"prefix1\",tempDir=\"/tempdir\",chunkSize=\"5242880\",maxRetry=\"1\")) " + .sql("INSERT INTO EXTERN(s3(bucket='bucket1',prefix='prefix1',tempDir='/tempdir',chunkSize='5242880',maxRetry='1')) " + "SELECT dim2 FROM foo") .expectValidationError( DruidException.class, From 6e7262d90a63562fd30d664661f91929b2405327 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 25 Jan 2024 20:44:33 +0530 Subject: [PATCH 18/50] Fix failing test --- .../apache/druid/msq/exec/MSQExportTest.java | 19 +++++++++++++++++++ .../calcite/parser/DruidSqlUnparseTest.java | 4 ++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java index bea6bc726f72..a08540574690 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.druid.msq.exec; import com.google.common.collect.ImmutableList; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java index 168e4222051c..31ddc8d9d6be 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java @@ -99,9 +99,9 @@ private static DruidSqlParserImpl createTestParser(String parseString) @Test public void testUnparseExternalSqlIdentifier() throws ParseException { - String sqlQuery = "REPLACE INTO EXTERN( s3(bucket=bucket1,prefix=prefix1) ) AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL"; + String sqlQuery = "REPLACE INTO EXTERN( s3(bucket='bucket1',prefix='prefix1') ) AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL"; String prettySqlQuery = "REPLACE INTO EXTERN(S3(bucket = \"bucket1\", prefix = \"prefix1\"))\n" - + "AS CSV\n" + + "AS csv\n" + "OVERWRITE ALL\n" + "SELECT \"dim2\"\n" + " FROM \"foo\"\n" From f75188eed08f3665fdcc9a8c8cff4b36ac577dfe Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 25 Jan 2024 22:32:04 +0530 Subject: [PATCH 19/50] Fix failing test --- .../org/apache/druid/storage/StorageConnectorModuleTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/storage/StorageConnectorModuleTest.java b/processing/src/test/java/org/apache/druid/storage/StorageConnectorModuleTest.java index df9a88d4813e..2773a95edc51 100644 --- a/processing/src/test/java/org/apache/druid/storage/StorageConnectorModuleTest.java +++ b/processing/src/test/java/org/apache/druid/storage/StorageConnectorModuleTest.java @@ -31,12 +31,12 @@ public class StorageConnectorModuleTest { private static final String JSON = "{\n" - + " \"type\": \"local\",\n" + + " \"type\": \"localStorage\",\n" + " \"basePath\": \"/tmp\"\n" + "}"; private static final String JSON_WITHOUT_PATH = "{\n" - + " \"type\": \"local\"\n" + + " \"type\": \"localStorage\"\n" + "}"; final ObjectMapper objectMapper = new ObjectMapper() From e571a0a838ae311697cb10ca11c35f8e88a49161 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Fri, 26 Jan 2024 20:23:58 +0530 Subject: [PATCH 20/50] Fix IT tests --- .../src/test/java/org/apache/druid/msq/test/MSQTestBase.java | 3 +-- .../org/apache/druid/storage/StorageConnectorModule.java | 5 ----- .../storage/export/LocalExportStorageConnectorFactory.java | 1 + .../storage/local/LocalFileStorageConnectorProvider.java | 2 +- .../org/apache/druid/storage/StorageConnectorModuleTest.java | 4 ++-- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index 939bffa16bb3..9fd0160fa6ed 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -180,7 +180,6 @@ import org.apache.druid.storage.export.ExportStorageConnectorFactory; import org.apache.druid.storage.export.LocalExportStorageConnectorFactory; import org.apache.druid.storage.local.LocalFileStorageConnector; -import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.PruneLoadSpec; import org.apache.druid.timeline.SegmentId; @@ -481,7 +480,7 @@ public String getFormatString() binder.bind(DataSegment.PruneSpecsHolder.class).toInstance(DataSegment.PruneSpecsHolder.DEFAULT); MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportStorageConnectorFactory.class); - mapbinder.addBinding(LocalFileStorageConnectorProvider.TYPE_NAME).to(LocalExportStorageConnectorFactory.class); + mapbinder.addBinding(LocalExportStorageConnectorFactory.TYPE_NAME).to(LocalExportStorageConnectorFactory.class); }, // Requirement of WorkerMemoryParameters.createProductionInstanceForWorker(injector) binder -> binder.bind(AppenderatorsManager.class).toProvider(() -> null), diff --git a/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java b/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java index f86c6e938898..4792a5139c6b 100644 --- a/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java +++ b/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java @@ -23,10 +23,7 @@ import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.common.collect.ImmutableList; import com.google.inject.Binder; -import com.google.inject.multibindings.MapBinder; import org.apache.druid.initialization.DruidModule; -import org.apache.druid.storage.export.ExportStorageConnectorFactory; -import org.apache.druid.storage.export.LocalExportStorageConnectorFactory; import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import java.util.List; @@ -45,7 +42,5 @@ public List getJacksonModules() @Override public void configure(Binder binder) { - MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportStorageConnectorFactory.class); - mapbinder.addBinding(LocalFileStorageConnectorProvider.TYPE_NAME).to(LocalExportStorageConnectorFactory.class); } } diff --git a/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java b/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java index ffcc37f300a2..3f45a2a5ff93 100644 --- a/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java +++ b/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java @@ -34,6 +34,7 @@ */ public class LocalExportStorageConnectorFactory implements ExportStorageConnectorFactory { + public static final String TYPE_NAME = "localStorage"; @Override public StorageConnectorProvider get(Map properties, Injector injector) { diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java index 3fe76096c30d..ae95804ff1c9 100644 --- a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java @@ -33,7 +33,7 @@ @JsonTypeName(LocalFileStorageConnectorProvider.TYPE_NAME) public class LocalFileStorageConnectorProvider implements StorageConnectorProvider { - public static final String TYPE_NAME = "localStorage"; + public static final String TYPE_NAME = "local"; public static final String BASE_PATH_FIELD_NAME = "basePath"; @JsonProperty diff --git a/processing/src/test/java/org/apache/druid/storage/StorageConnectorModuleTest.java b/processing/src/test/java/org/apache/druid/storage/StorageConnectorModuleTest.java index 2773a95edc51..df9a88d4813e 100644 --- a/processing/src/test/java/org/apache/druid/storage/StorageConnectorModuleTest.java +++ b/processing/src/test/java/org/apache/druid/storage/StorageConnectorModuleTest.java @@ -31,12 +31,12 @@ public class StorageConnectorModuleTest { private static final String JSON = "{\n" - + " \"type\": \"localStorage\",\n" + + " \"type\": \"local\",\n" + " \"basePath\": \"/tmp\"\n" + "}"; private static final String JSON_WITHOUT_PATH = "{\n" - + " \"type\": \"localStorage\"\n" + + " \"type\": \"local\"\n" + "}"; final ObjectMapper objectMapper = new ObjectMapper() From 12474613aec18b683e76d32ea6eca85a44710136 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Fri, 26 Jan 2024 22:03:46 +0530 Subject: [PATCH 21/50] Add tests --- .../apache/druid/msq/exec/MSQExportTest.java | 41 +++++++- .../export/TestExportStorageConnector.java | 93 +++++++++++++++++++ .../apache/druid/msq/test/MSQTestBase.java | 10 +- sql/src/main/codegen/includes/common.ftl | 2 +- .../druid/sql/calcite/CalciteExportTest.java | 23 +++++ 5 files changed, 161 insertions(+), 8 deletions(-) create mode 100644 extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnector.java diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java index a08540574690..93ae6680264b 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java @@ -22,15 +22,21 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.msq.export.TestExportStorageConnector; import org.apache.druid.msq.test.MSQTestBase; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.http.ResultFormat; import org.hamcrest.CoreMatchers; +import org.junit.Assert; import org.junit.Test; import org.junit.internal.matchers.ThrowableMessageMatcher; -import java.io.File; +import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; public class MSQExportTest extends MSQTestBase { @@ -42,15 +48,19 @@ public void testExport() throws IOException .add("dim1", ColumnType.STRING) .add("cnt", ColumnType.LONG).build(); - File exportDir = temporaryFolder.newFolder("export/"); testIngestQuery().setSql( - "insert into extern(localStorage(basePath = '" + exportDir.getAbsolutePath() + "')) as csv select __time, dim1 from foo") + "insert into extern(" + TestExportStorageConnector.TYPE + "()) as csv select cnt, dim1 from foo") .setExpectedDataSource("foo1") .setQueryContext(DEFAULT_MSQ_CONTEXT) .setExpectedRowSignature(rowSignature) .setExpectedSegment(ImmutableSet.of()) .setExpectedResultRows(ImmutableList.of()) .verifyResults(); + + Assert.assertEquals( + expectedFooFileContents(), + new String(testExportStorageConnector.getByteArrayOutputStream().toByteArray(), Charset.defaultCharset()) + ); } @Test @@ -75,4 +85,29 @@ public void testWithUnsupportedStorageConnector() ))) ).verifyExecutionError(); } + + private String expectedFooFileContents() throws IOException + { + List expectedRows = new ArrayList<>(ImmutableList.of( + new Object[]{0, "1", null}, + new Object[]{1, "1", 10.1}, + new Object[]{2, "1", 2}, + new Object[]{3, "1", 1}, + new Object[]{4, "1", "def"}, + new Object[]{5, "1", "abc"} + )); + + ByteArrayOutputStream expectedResult = new ByteArrayOutputStream(); + ResultFormat.Writer formatter = ResultFormat.CSV.createFormatter(expectedResult, objectMapper); + formatter.writeResponseStart(); + for (Object[] row : expectedRows) { + formatter.writeRowStart(); + for (Object object : row) { + formatter.writeRowField("", object); + } + formatter.writeRowEnd(); + } + formatter.writeResponseEnd(); + return new String(expectedResult.toByteArray(), Charset.defaultCharset()); + } } diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnector.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnector.java new file mode 100644 index 000000000000..f8105ac94207 --- /dev/null +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnector.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.msq.export; + +import org.apache.druid.storage.StorageConnector; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Iterator; + +public class TestExportStorageConnector implements StorageConnector +{ + public static final String TYPE = "testStorage"; + private final ByteArrayOutputStream byteArrayOutputStream; + + public TestExportStorageConnector() + { + this.byteArrayOutputStream = new ByteArrayOutputStream(); + } + + public ByteArrayOutputStream getByteArrayOutputStream() + { + return byteArrayOutputStream; + } + + @Override + public boolean pathExists(String path) + { + throw new UnsupportedOperationException(); + } + + @Override + public InputStream read(String path) throws IOException + { + throw new UnsupportedOperationException(); + } + + @Override + public InputStream readRange(String path, long from, long size) + { + throw new UnsupportedOperationException(); + } + + @Override + public OutputStream write(String path) throws IOException + { + + return byteArrayOutputStream; + } + + @Override + public void deleteFile(String path) + { + throw new UnsupportedOperationException(); + } + + @Override + public void deleteFiles(Iterable paths) + { + throw new UnsupportedOperationException(); + } + + @Override + public void deleteRecursively(String path) + { + throw new UnsupportedOperationException(); + } + + @Override + public Iterator listDir(String dirName) + { + throw new UnsupportedOperationException(); + } +} diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index 9fd0160fa6ed..ff22c4bd389a 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -88,6 +88,7 @@ import org.apache.druid.msq.exec.LoadedSegmentDataProvider; import org.apache.druid.msq.exec.LoadedSegmentDataProviderFactory; import org.apache.druid.msq.exec.WorkerMemoryParameters; +import org.apache.druid.msq.export.TestExportStorageConnector; import org.apache.druid.msq.guice.MSQDurableStorageModule; import org.apache.druid.msq.guice.MSQExternalDataSourceModule; import org.apache.druid.msq.guice.MSQIndexingModule; @@ -178,7 +179,6 @@ import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorProvider; import org.apache.druid.storage.export.ExportStorageConnectorFactory; -import org.apache.druid.storage.export.LocalExportStorageConnectorFactory; import org.apache.druid.storage.local.LocalFileStorageConnector; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.PruneLoadSpec; @@ -309,7 +309,7 @@ public class MSQTestBase extends BaseCalciteQueryTest protected SqlStatementFactory sqlStatementFactory; protected AuthorizerMapper authorizerMapper; private IndexIO indexIO; - + protected TestExportStorageConnector testExportStorageConnector = new TestExportStorageConnector(); // Contains the metadata of loaded segments protected List loadedSegmentsMetadata = new ArrayList<>(); // Mocks the return of data from data servers @@ -479,8 +479,10 @@ public String getFormatString() } binder.bind(DataSegment.PruneSpecsHolder.class).toInstance(DataSegment.PruneSpecsHolder.DEFAULT); - MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportStorageConnectorFactory.class); - mapbinder.addBinding(LocalExportStorageConnectorFactory.TYPE_NAME).to(LocalExportStorageConnectorFactory.class); + + MapBinder mapbinder + = MapBinder.newMapBinder(binder, String.class, ExportStorageConnectorFactory.class); + mapbinder.addBinding(TestExportStorageConnector.TYPE).toInstance((properties, injector) -> () -> testExportStorageConnector); }, // Requirement of WorkerMemoryParameters.createProductionInstanceForWorker(injector) binder -> binder.bind(AppenderatorsManager.class).toProvider(() -> null), diff --git a/sql/src/main/codegen/includes/common.ftl b/sql/src/main/codegen/includes/common.ftl index 1607b818d00a..aa8db161de0b 100644 --- a/sql/src/main/codegen/includes/common.ftl +++ b/sql/src/main/codegen/includes/common.ftl @@ -126,7 +126,7 @@ SqlIdentifier ExternalDestination() : Map properties = new HashMap(); } { - destinationType = SimpleIdentifier() [ properties = ExternProperties() ] + destinationType = SimpleIdentifier() [ [ properties = ExternProperties() ] ] { s = span(); return new ExternalDestinationSqlIdentifier( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index ca2fd2e200d8..788f9843e528 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -105,6 +105,29 @@ public void testExportWithoutFormat() .verify(); } + @Test + public void testExportSourceWithNoArguments() + { + testIngestionQuery() + .sql("INSERT INTO EXTERN(testLocal()) " + + "AS CSV " + + "SELECT dim2 FROM foo") + .expectQuery( + Druids.newScanQueryBuilder() + .dataSource( + "foo" + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("dim2") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ) + .expectResources(dataSourceRead("foo")) + .expectTarget("EXTERN", RowSignature.builder().add("dim2", ColumnType.STRING).build()) + .verify(); + } + @Test public void testSelectFromTableNamedExport() { From d5d369366587964fa14bc4813f886578a1ba3551 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Fri, 26 Jan 2024 23:02:16 +0530 Subject: [PATCH 22/50] Cleanup --- .../java/org/apache/druid/msq/exec/MSQExportTest.java | 2 +- .../druid/msq/export/TestExportStorageConnector.java | 8 +++----- .../test/java/org/apache/druid/msq/test/MSQTestBase.java | 4 +++- .../export/LocalExportStorageConnectorFactory.java | 1 + 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java index 93ae6680264b..f49836f5a694 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java @@ -49,7 +49,7 @@ public void testExport() throws IOException .add("cnt", ColumnType.LONG).build(); testIngestQuery().setSql( - "insert into extern(" + TestExportStorageConnector.TYPE + "()) as csv select cnt, dim1 from foo") + "insert into extern(" + TestExportStorageConnector.TYPE_NAME + "()) as csv select cnt, dim1 from foo") .setExpectedDataSource("foo1") .setQueryContext(DEFAULT_MSQ_CONTEXT) .setExpectedRowSignature(rowSignature) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnector.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnector.java index f8105ac94207..2399b916c7c1 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnector.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnector.java @@ -22,14 +22,13 @@ import org.apache.druid.storage.StorageConnector; import java.io.ByteArrayOutputStream; -import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.Iterator; public class TestExportStorageConnector implements StorageConnector { - public static final String TYPE = "testStorage"; + public static final String TYPE_NAME = "testStorage"; private final ByteArrayOutputStream byteArrayOutputStream; public TestExportStorageConnector() @@ -49,7 +48,7 @@ public boolean pathExists(String path) } @Override - public InputStream read(String path) throws IOException + public InputStream read(String path) { throw new UnsupportedOperationException(); } @@ -61,9 +60,8 @@ public InputStream readRange(String path, long from, long size) } @Override - public OutputStream write(String path) throws IOException + public OutputStream write(String path) { - return byteArrayOutputStream; } diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index ff22c4bd389a..b36590a29839 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -179,6 +179,7 @@ import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorProvider; import org.apache.druid.storage.export.ExportStorageConnectorFactory; +import org.apache.druid.storage.export.LocalExportStorageConnectorFactory; import org.apache.druid.storage.local.LocalFileStorageConnector; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.PruneLoadSpec; @@ -482,7 +483,8 @@ public String getFormatString() MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportStorageConnectorFactory.class); - mapbinder.addBinding(TestExportStorageConnector.TYPE).toInstance((properties, injector) -> () -> testExportStorageConnector); + mapbinder.addBinding(LocalExportStorageConnectorFactory.TYPE_NAME).to(LocalExportStorageConnectorFactory.class); + mapbinder.addBinding(TestExportStorageConnector.TYPE_NAME).toInstance((properties, injector) -> () -> testExportStorageConnector); }, // Requirement of WorkerMemoryParameters.createProductionInstanceForWorker(injector) binder -> binder.bind(AppenderatorsManager.class).toProvider(() -> null), diff --git a/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java b/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java index 3f45a2a5ff93..d3aca9b79475 100644 --- a/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java +++ b/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java @@ -35,6 +35,7 @@ public class LocalExportStorageConnectorFactory implements ExportStorageConnectorFactory { public static final String TYPE_NAME = "localStorage"; + @Override public StorageConnectorProvider get(Map properties, Injector injector) { From 55a4aed38ff9bb3bd4a1716215954c38f867c887 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Sat, 27 Jan 2024 06:54:43 +0530 Subject: [PATCH 23/50] Fix unparse --- .../sql/calcite/parser/ExternalDestinationSqlIdentifier.java | 4 +--- .../apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index 833acea70505..2e8ad8b74f65 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -65,9 +65,7 @@ public void unparse(SqlWriter writer, int leftPrec, int rightPrec) SqlWriter.Frame frame = writer.startFunCall(getDestinationType()); for (Map.Entry property : properties.entrySet()) { writer.sep(","); - writer.print(property.getKey()); - writer.print(" = "); - writer.identifier(property.getValue(), false); + writer.print(String.format("%s = '%s'", property.getKey(), property.getValue())); } writer.endFunCall(frame); writer.endFunCall(externFrame); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java index 31ddc8d9d6be..71c001653601 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java @@ -100,7 +100,7 @@ private static DruidSqlParserImpl createTestParser(String parseString) public void testUnparseExternalSqlIdentifier() throws ParseException { String sqlQuery = "REPLACE INTO EXTERN( s3(bucket='bucket1',prefix='prefix1') ) AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL"; - String prettySqlQuery = "REPLACE INTO EXTERN(S3(bucket = \"bucket1\", prefix = \"prefix1\"))\n" + String prettySqlQuery = "REPLACE INTO EXTERN(S3(bucket = 'bucket1', prefix = 'prefix1'))\n" + "AS csv\n" + "OVERWRITE ALL\n" + "SELECT \"dim2\"\n" From 006376789cf2d3b68ba18d496940c116afdf95e0 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Sat, 27 Jan 2024 17:00:17 +0530 Subject: [PATCH 24/50] Fix forbidden API --- .../sql/calcite/parser/ExternalDestinationSqlIdentifier.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index 2e8ad8b74f65..fc5a67e6b794 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -24,6 +24,7 @@ import org.apache.calcite.sql.SqlWriter; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.utils.CollectionUtils; import java.util.Map; @@ -65,7 +66,7 @@ public void unparse(SqlWriter writer, int leftPrec, int rightPrec) SqlWriter.Frame frame = writer.startFunCall(getDestinationType()); for (Map.Entry property : properties.entrySet()) { writer.sep(","); - writer.print(String.format("%s = '%s'", property.getKey(), property.getValue())); + writer.print(StringUtils.format("%s = '%s'", property.getKey(), property.getValue())); } writer.endFunCall(frame); writer.endFunCall(externFrame); From 6f46c4137728e79b2f21ad8e65159d31a84b44d9 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 29 Jan 2024 10:40:36 +0530 Subject: [PATCH 25/50] Update docs --- docs/multi-stage-query/reference.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index ad601fb0c6dd..d569a60a3b05 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -103,6 +103,9 @@ INSERT statements and REPLACE statements are both supported with an `EXTERN` des clause that determines the format. Currently, only `CSV` is supported as a format. +Export statements support the context parameter `rowsPerPage` for the number of rows in each exported file. The default value +is 100,000. + INSERT statements append the results to the existing files at the destination. ```sql INSERT INTO @@ -127,7 +130,7 @@ FROM
``` Exporting is currently supported to Amazon S3 storage. The S3 extension is required to be loaded for this. -This can be done passing the function `S3` to the `EXTERN` function. +This can be done passing the function `S3()` as an argument to the `EXTERN` function. ```sql INSERT INTO From 3f8d7157c78a9ef380dc651b5d9bd011e45fc10b Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 29 Jan 2024 19:00:28 +0530 Subject: [PATCH 26/50] Update docs --- docs/multi-stage-query/concepts.md | 5 +---- docs/multi-stage-query/reference.md | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/docs/multi-stage-query/concepts.md b/docs/multi-stage-query/concepts.md index a2daf9ee957d..f9a1bc734b2b 100644 --- a/docs/multi-stage-query/concepts.md +++ b/docs/multi-stage-query/concepts.md @@ -118,10 +118,7 @@ for dimension-based pruning, see [Clustering](#clustering). ### Write to an external destination with `EXTERN` Query tasks can write data to an external destination through the `EXTERN` function, when it is used with the `INTO` -clause, such as `REPLACE INTO EXTERN(...)` - -The EXTERN function takes arguments which specifies where to the files should be created. - +clause, such as `REPLACE INTO EXTERN(...)` The EXTERN function takes arguments which specifies where to the files should be created. The format can be specified using an `AS` clause. For more information about the syntax, see [`EXTERN`](./reference.md#extern-function). diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index d569a60a3b05..502c1256fc28 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -45,7 +45,7 @@ making it easy to reuse the same SQL statement for each ingest: just specify the ### `EXTERN` Function -Use the `EXTERN` function to read external data or write to an external source. +Use the `EXTERN` function to read external data or write to an external location. #### `EXTERN` as an input source @@ -95,13 +95,13 @@ For more information, see [Read external data with EXTERN](concepts.md#read-exte #### `EXTERN` to export to a destination -`EXTERN` can be used as a destination, which will export the data to the specified location and format. EXTERN when -used in this way accepts one argument. Please note that partitioning (`PARTITIONED BY`) and clustering (`CLUSTERED BY`) -is not currently supported with export statements. +`EXTERN` can be used to specify a destination, where the data needs to be exported. +This variation of EXTERN requires one argument, the details of the destination as specified below. +This variation additionally requires an `AS` clause to specify the format of the exported rows. -INSERT statements and REPLACE statements are both supported with an `EXTERN` destination. The statements require an `AS` -clause that determines the format. -Currently, only `CSV` is supported as a format. +INSERT statements and REPLACE statements are both supported with an `EXTERN` destination. +Only `CSV` format is supported at the moment. +Please note that partitioning (`PARTITIONED BY`) and clustering (`CLUSTERED BY`) is not currently supported with export statements. Export statements support the context parameter `rowsPerPage` for the number of rows in each exported file. The default value is 100,000. @@ -117,7 +117,8 @@ FROM
``` REPLACE statements have an additional OVERWRITE clause. As partitioning is not yet supported, only `OVERWRITE ALL` -is allowed. REPLACE deletes any existing files at the destination and creates new files with the results of the query. +is allowed. REPLACE deletes any currently existing files at the specified directory, and creates new files with the results of the query. + ```sql REPLACE INTO @@ -129,8 +130,7 @@ SELECT FROM
``` -Exporting is currently supported to Amazon S3 storage. The S3 extension is required to be loaded for this. -This can be done passing the function `S3()` as an argument to the `EXTERN` function. +Exporting is currently supported for Amazon S3 storage. This can be done passing the function `S3()` as an argument to the `EXTERN` function. The `druid-s3-extensions` should be loaded. ```sql INSERT INTO From 7c00062ab537f4fc840f4a73726f7de16b58e5f0 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 30 Jan 2024 14:55:39 +0530 Subject: [PATCH 27/50] Address review comments --- .../apache/druid/msq/exec/ControllerImpl.java | 19 ++---- .../exec/ExportStorageConnectorFactories.java | 44 -------------- .../results/ExportResultsFrameProcessor.java | 11 ++-- .../druid/msq/sql/MSQTaskQueryMaker.java | 4 +- .../apache/druid/msq/exec/MSQExportTest.java | 58 ++++++++++++++++--- .../TestExportStorageConnectorProvider.java | 16 +++-- .../apache/druid/msq/test/MSQTestBase.java | 19 +++--- .../storage/s3/S3StorageDruidModule.java | 4 -- .../S3ExportStorageConnectorFactory.java | 49 ---------------- .../s3/output/S3StorageConnectorProvider.java | 19 ------ .../s3/S3StorageConnectorProviderTest.java | 7 +++ .../LocalExportStorageConnectorFactory.java | 44 -------------- .../ExternalDestinationSqlIdentifier.java | 1 + .../sql/calcite/planner/IngestHandler.java | 2 +- .../sql/destination/ExportDestination.java | 24 ++++---- .../sql/destination/IngestDestination.java | 4 +- .../sql/destination/TableDestination.java | 3 +- .../druid/sql/calcite/CalciteExportTest.java | 2 +- .../sql/calcite/TestInsertQueryMaker.java | 2 +- 19 files changed, 105 insertions(+), 227 deletions(-) delete mode 100644 extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java rename processing/src/main/java/org/apache/druid/storage/export/ExportStorageConnectorFactory.java => extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnectorProvider.java (71%) delete mode 100644 extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java delete mode 100644 processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index f37944b3f73c..8efc42b1fd6d 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -207,7 +207,6 @@ import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorProvider; -import org.apache.druid.storage.export.ExportStorageConnectorFactory; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentTimeline; import org.apache.druid.timeline.partition.DimensionRangeShardSpec; @@ -1888,21 +1887,13 @@ private static QueryDefinition makeQueryDefinition( } } else if (querySpec.getDestination() instanceof ExportMSQDestination) { final ExportMSQDestination exportMSQDestination = (ExportMSQDestination) querySpec.getDestination(); - final Map storageConnectorFactories = injector.getInstance(ExportStorageConnectorFactories.class) - .getFactories(); - if (!storageConnectorFactories.containsKey(exportMSQDestination.getStorageConnectorType())) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.RUNTIME_FAILURE) - .build("No storage connector found for storage connector type:[%s].", exportMSQDestination.getStorageConnectorType()); - } - - final StorageConnectorProvider storageConnectorProvider = - storageConnectorFactories.get(exportMSQDestination.getStorageConnectorType()) - .get(exportMSQDestination.getProperties(), injector); - + final StorageConnectorProvider storageConnectorProvider = jsonMapper.convertValue( + exportMSQDestination.getProperties(), + StorageConnectorProvider.class + ); final ResultFormat resultFormat = exportMSQDestination.getResultFormat(); - // If the statement is a REPLACE statement, delete the existing files at the destination. + // If the statement is a 'REPLACE' statement, delete the existing files at the destination. if (exportMSQDestination.getReplaceTimeChunks() != null) { if (Intervals.ONLY_ETERNITY.equals(exportMSQDestination.getReplaceTimeChunks())) { StorageConnector storageConnector = storageConnectorProvider.get(); diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java deleted file mode 100644 index 806ef77d9459..000000000000 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportStorageConnectorFactories.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.msq.exec; - -import com.google.inject.Inject; -import org.apache.druid.storage.export.ExportStorageConnectorFactory; - -import java.util.Map; - -/** - * Holder for injected {@link ExportStorageConnectorFactory}. - */ -public class ExportStorageConnectorFactories -{ - private final Map factoriesMap; - - @Inject - public ExportStorageConnectorFactories(Map factoriesMap) - { - this.factoriesMap = factoriesMap; - } - - public Map getFactories() - { - return factoriesMap; - } -} diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java index bcc6c749f720..ad68330663c2 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java @@ -58,9 +58,8 @@ public class ExportResultsFrameProcessor implements FrameProcessor private final FrameReader frameReader; private final StorageConnector storageConnector; private final ObjectMapper jsonMapper; - private final int partitionNumber; - private final int workerNumber; private final ChannelCounters channelCounter; + final String exportFilePath; public ExportResultsFrameProcessor( ReadableFrameChannel inputChannel, @@ -78,9 +77,8 @@ public ExportResultsFrameProcessor( this.frameReader = frameReader; this.storageConnector = storageConnector; this.jsonMapper = jsonMapper; - this.partitionNumber = partitionNumber; - this.workerNumber = workerNumber; this.channelCounter = channelCounter; + this.exportFilePath = getExportFilePath(workerNumber, partitionNumber, exportFormat); } @Override @@ -118,15 +116,14 @@ private void exportFrame(final Frame frame) throws IOException new FrameStorageAdapter(frame, frameReader, Intervals.ETERNITY) .makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null); - final String exportFilePath = getExportFilePath(workerNumber, partitionNumber, exportFormat); try (OutputStream stream = storageConnector.write(exportFilePath)) { ResultFormat.Writer formatter = exportFormat.createFormatter(stream, jsonMapper); + formatter.writeResponseStart(); SequenceUtils.forEach( cursorSequence, cursor -> { try { - formatter.writeResponseStart(); final ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); //noinspection rawtypes @@ -147,7 +144,6 @@ private void exportFrame(final Frame frame) throws IOException formatter.writeRowEnd(); cursor.advance(); } - formatter.writeResponseEnd(); } catch (IOException e) { throw DruidException.forPersona(DruidException.Persona.USER) @@ -156,6 +152,7 @@ private void exportFrame(final Frame frame) throws IOException } } ); + formatter.writeResponseEnd(); } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java index a572e583019c..7053e9461bda 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java @@ -220,7 +220,7 @@ public QueryResponse runQuery(final DruidQuery druidQuery) } destination = new ExportMSQDestination( - exportDestination.getDestinationType(), + exportDestination.getDestination(), exportDestination.getProperties(), format, replaceTimeChunks @@ -249,7 +249,7 @@ public QueryResponse runQuery(final DruidQuery druidQuery) ); final DataSourceMSQDestination dataSourceMSQDestination = new DataSourceMSQDestination( - targetDataSource.getDestinationName(), + targetDataSource.getType(), segmentGranularityObject, segmentSortOrder, replaceTimeChunks diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java index f49836f5a694..207acd20f85b 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java @@ -21,9 +21,10 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; -import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.msq.export.TestExportStorageConnector; import org.apache.druid.msq.test.MSQTestBase; +import org.apache.druid.msq.util.MultiStageQueryContext; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.http.ResultFormat; @@ -33,16 +34,22 @@ import org.junit.internal.matchers.ThrowableMessageMatcher; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Objects; public class MSQExportTest extends MSQTestBase { @Test public void testExport() throws IOException { + TestExportStorageConnector storageConnector = (TestExportStorageConnector) exportStorageConnectorProvider.get(); + RowSignature rowSignature = RowSignature.builder() .add("__time", ColumnType.LONG) .add("dim1", ColumnType.STRING) @@ -57,9 +64,40 @@ public void testExport() throws IOException .setExpectedResultRows(ImmutableList.of()) .verifyResults(); + List objects = expectedFooFileContents(); + + Assert.assertEquals( + convertResultsToString(objects), + new String(storageConnector.getByteArrayOutputStream().toByteArray(), Charset.defaultCharset()) + ); + } + + @Test + public void testNumberOfRowsPerFile() throws IOException + { + RowSignature rowSignature = RowSignature.builder() + .add("__time", ColumnType.LONG) + .add("dim1", ColumnType.STRING) + .add("cnt", ColumnType.LONG).build(); + + File exportDir = temporaryFolder.newFolder("export/"); + + Map queryContext = new HashMap<>(DEFAULT_MSQ_CONTEXT); + queryContext.put(MultiStageQueryContext.CTX_ROWS_PER_PAGE, 1); + + testIngestQuery().setSql( + StringUtils.format("insert into extern(localStorage(basePath='%s')) as csv select cnt, dim1 from foo", exportDir.getAbsolutePath()) + ) + .setExpectedDataSource("foo1") + .setQueryContext(queryContext) + .setExpectedRowSignature(rowSignature) + .setExpectedSegment(ImmutableSet.of()) + .setExpectedResultRows(ImmutableList.of()) + .verifyResults(); + Assert.assertEquals( - expectedFooFileContents(), - new String(testExportStorageConnector.getByteArrayOutputStream().toByteArray(), Charset.defaultCharset()) + expectedFooFileContents().size(), + Objects.requireNonNull(new File(exportDir.getAbsolutePath(), "worker0").listFiles()).length ); } @@ -78,17 +116,16 @@ public void testWithUnsupportedStorageConnector() .setExpectedRowSignature(rowSignature) .setExpectedSegment(ImmutableSet.of()) .setExpectedResultRows(ImmutableList.of()) - .setExpectedExecutionErrorMatcher(CoreMatchers.allOf( - CoreMatchers.instanceOf(ISE.class), + .setExpectedExecutionErrorMatcher( ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString( - "No storage connector found for storage connector type:[hdfs]." - ))) + "Could not resolve type id 'hdfs' as a subtype" + )) ).verifyExecutionError(); } - private String expectedFooFileContents() throws IOException + private List expectedFooFileContents() { - List expectedRows = new ArrayList<>(ImmutableList.of( + return new ArrayList<>(ImmutableList.of( new Object[]{0, "1", null}, new Object[]{1, "1", 10.1}, new Object[]{2, "1", 2}, @@ -96,7 +133,10 @@ private String expectedFooFileContents() throws IOException new Object[]{4, "1", "def"}, new Object[]{5, "1", "abc"} )); + } + private String convertResultsToString(List expectedRows) throws IOException + { ByteArrayOutputStream expectedResult = new ByteArrayOutputStream(); ResultFormat.Writer formatter = ResultFormat.CSV.createFormatter(expectedResult, objectMapper); formatter.writeResponseStart(); diff --git a/processing/src/main/java/org/apache/druid/storage/export/ExportStorageConnectorFactory.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnectorProvider.java similarity index 71% rename from processing/src/main/java/org/apache/druid/storage/export/ExportStorageConnectorFactory.java rename to extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnectorProvider.java index 6f1c4dcadd2f..ab5d43238810 100644 --- a/processing/src/main/java/org/apache/druid/storage/export/ExportStorageConnectorFactory.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnectorProvider.java @@ -17,14 +17,18 @@ * under the License. */ -package org.apache.druid.storage.export; +package org.apache.druid.msq.export; -import com.google.inject.Injector; +import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorProvider; -import java.util.Map; - -public interface ExportStorageConnectorFactory +public class TestExportStorageConnectorProvider implements StorageConnectorProvider { - StorageConnectorProvider get(Map properties, Injector injector); + private static final StorageConnector STORAGE_CONNECTOR = new TestExportStorageConnector(); + + @Override + public StorageConnector get() + { + return STORAGE_CONNECTOR; + } } diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index b36590a29839..75356a95e858 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -34,7 +34,6 @@ import com.google.inject.Key; import com.google.inject.Module; import com.google.inject.TypeLiteral; -import com.google.inject.multibindings.MapBinder; import com.google.inject.util.Modules; import com.google.inject.util.Providers; import org.apache.druid.client.ImmutableSegmentLoadInfo; @@ -89,6 +88,7 @@ import org.apache.druid.msq.exec.LoadedSegmentDataProviderFactory; import org.apache.druid.msq.exec.WorkerMemoryParameters; import org.apache.druid.msq.export.TestExportStorageConnector; +import org.apache.druid.msq.export.TestExportStorageConnectorProvider; import org.apache.druid.msq.guice.MSQDurableStorageModule; import org.apache.druid.msq.guice.MSQExternalDataSourceModule; import org.apache.druid.msq.guice.MSQIndexingModule; @@ -178,9 +178,8 @@ import org.apache.druid.sql.guice.SqlBindings; import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorProvider; -import org.apache.druid.storage.export.ExportStorageConnectorFactory; -import org.apache.druid.storage.export.LocalExportStorageConnectorFactory; import org.apache.druid.storage.local.LocalFileStorageConnector; +import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.PruneLoadSpec; import org.apache.druid.timeline.SegmentId; @@ -310,7 +309,7 @@ public class MSQTestBase extends BaseCalciteQueryTest protected SqlStatementFactory sqlStatementFactory; protected AuthorizerMapper authorizerMapper; private IndexIO indexIO; - protected TestExportStorageConnector testExportStorageConnector = new TestExportStorageConnector(); + protected TestExportStorageConnectorProvider exportStorageConnectorProvider = new TestExportStorageConnectorProvider(); // Contains the metadata of loaded segments protected List loadedSegmentsMetadata = new ArrayList<>(); // Mocks the return of data from data servers @@ -480,11 +479,6 @@ public String getFormatString() } binder.bind(DataSegment.PruneSpecsHolder.class).toInstance(DataSegment.PruneSpecsHolder.DEFAULT); - - MapBinder mapbinder - = MapBinder.newMapBinder(binder, String.class, ExportStorageConnectorFactory.class); - mapbinder.addBinding(LocalExportStorageConnectorFactory.TYPE_NAME).to(LocalExportStorageConnectorFactory.class); - mapbinder.addBinding(TestExportStorageConnector.TYPE_NAME).toInstance((properties, injector) -> () -> testExportStorageConnector); }, // Requirement of WorkerMemoryParameters.createProductionInstanceForWorker(injector) binder -> binder.bind(AppenderatorsManager.class).toProvider(() -> null), @@ -517,6 +511,13 @@ public String getFormatString() .build(); objectMapper = setupObjectMapper(injector); + objectMapper.registerModule( + new SimpleModule(StorageConnector.class.getSimpleName()) + .registerSubtypes( + new NamedType(TestExportStorageConnectorProvider.class, TestExportStorageConnector.TYPE_NAME), + new NamedType(LocalFileStorageConnectorProvider.class, "localStorage") + ) + ); objectMapper.registerModules(sqlModule.getJacksonModules()); doReturn(mock(Request.class)).when(brokerClient).makeRequest(any(), anyString()); diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java index 9626f6107312..3747088aeb6e 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3StorageDruidModule.java @@ -44,8 +44,6 @@ import org.apache.druid.guice.LazySingleton; import org.apache.druid.initialization.DruidModule; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.storage.export.ExportStorageConnectorFactory; -import org.apache.druid.storage.s3.output.S3ExportStorageConnectorFactory; import java.util.List; @@ -117,8 +115,6 @@ public void configure(Binder binder) JsonConfigProvider.bind(binder, "druid.storage", S3StorageConfig.class); JsonConfigProvider.bind(binder, "druid.storage.sse.kms", S3SSEKmsConfig.class); JsonConfigProvider.bind(binder, "druid.storage.sse.custom", S3SSECustomConfig.class); - MapBinder mapbinder = MapBinder.newMapBinder(binder, String.class, ExportStorageConnectorFactory.class); - mapbinder.addBinding(SCHEME).to(S3ExportStorageConnectorFactory.class); Binders.taskLogsBinder(binder).addBinding(SCHEME).to(S3TaskLogs.class); JsonConfigProvider.bind(binder, "druid.indexer.logs", S3TaskLogsConfig.class); diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java deleted file mode 100644 index 0ffc98b9eaf0..000000000000 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageConnectorFactory.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.storage.s3.output; - -import com.google.inject.Injector; -import org.apache.druid.query.QueryContexts; -import org.apache.druid.storage.StorageConnectorProvider; -import org.apache.druid.storage.export.ExportStorageConnectorFactory; - -import java.io.File; -import java.util.Map; - -public class S3ExportStorageConnectorFactory implements ExportStorageConnectorFactory -{ - @Override - public StorageConnectorProvider get(Map properties, Injector injector) - { - String chunkSize = properties.getOrDefault(S3StorageConnectorProvider.CHUNK_SIZE_FIELD_NAME, null); - Integer maxRetries = QueryContexts.getAsInt( - S3StorageConnectorProvider.MAX_RETRY_FIELD_NAME, - properties.getOrDefault(S3StorageConnectorProvider.MAX_RETRY_FIELD_NAME, null)); - - return new S3StorageConnectorProvider( - properties.get(S3StorageConnectorProvider.BUCKET_FIELD_NAME), - properties.get(S3StorageConnectorProvider.PREFIX_FIELD_NAME), - new File(properties.get(S3StorageConnectorProvider.TEMP_DIR_FIELD_NAME)), - QueryContexts.getAsHumanReadableBytes(S3StorageConnectorProvider.CHUNK_SIZE_FIELD_NAME, chunkSize, null), - maxRetries, - injector - ); - } -} diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java index c7be1e26e27e..7f4b43a0ede8 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; -import com.google.inject.Injector; import org.apache.druid.java.util.common.HumanReadableBytes; import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorProvider; @@ -36,11 +35,6 @@ @JsonTypeName(S3StorageDruidModule.SCHEME) public class S3StorageConnectorProvider extends S3OutputConfig implements StorageConnectorProvider { - public static final String BUCKET_FIELD_NAME = "bucket"; - public static final String PREFIX_FIELD_NAME = "prefix"; - public static final String TEMP_DIR_FIELD_NAME = "tempDir"; - public static final String CHUNK_SIZE_FIELD_NAME = "chunkSize"; - public static final String MAX_RETRY_FIELD_NAME = "maxRetry"; @JacksonInject ServerSideEncryptingAmazonS3 s3; @@ -56,19 +50,6 @@ public S3StorageConnectorProvider( super(bucket, prefix, tempDir, chunkSize, maxRetry); } - public S3StorageConnectorProvider( - String bucket, - String prefix, - File tempDir, - HumanReadableBytes chunkSize, - Integer maxRetry, - Injector injector - ) - { - super(bucket, prefix, tempDir, chunkSize, maxRetry); - this.s3 = injector.getInstance(ServerSideEncryptingAmazonS3.class); - } - @Override public StorageConnector get() { diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageConnectorProviderTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageConnectorProviderTest.java index 9f9d632f6181..0a23c006917c 100644 --- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageConnectorProviderTest.java +++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageConnectorProviderTest.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; import com.google.inject.Binder; import com.google.inject.Injector; import com.google.inject.Key; @@ -114,6 +115,12 @@ public void createS3StorageFactoryWithMissingTempDir() ); } + @Test + public void name() + { + ImmutableMap stringStringImmutableMap = ImmutableMap.of("type", "local", "basePath", "/path"); + } + private StorageConnectorProvider getStorageConnectorProvider(Properties properties) { StartupInjectorBuilder startupInjectorBuilder = new StartupInjectorBuilder().add( diff --git a/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java b/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java deleted file mode 100644 index d3aca9b79475..000000000000 --- a/processing/src/main/java/org/apache/druid/storage/export/LocalExportStorageConnectorFactory.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.storage.export; - -import com.google.inject.Injector; -import org.apache.druid.storage.StorageConnectorProvider; -import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; - -import java.io.File; -import java.util.Map; - -/** - * Provides a {@link StorageConnectorProvider} which allows writing to the local machine. Not meaningful for production, - * and is used for tests or debugging purposes. - *
- * Not to be bound in Guice modules. - */ -public class LocalExportStorageConnectorFactory implements ExportStorageConnectorFactory -{ - public static final String TYPE_NAME = "localStorage"; - - @Override - public StorageConnectorProvider get(Map properties, Injector injector) - { - return new LocalFileStorageConnectorProvider(new File(properties.get(LocalFileStorageConnectorProvider.BASE_PATH_FIELD_NAME))); - } -} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index fc5a67e6b794..56457db7162e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -43,6 +43,7 @@ public ExternalDestinationSqlIdentifier( ) { super(name, pos); + properties.put("type", getDestinationType()); this.properties = properties; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index 9e1566210894..2f532356d80d 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -120,7 +120,7 @@ public void validate() if (ingestNode().getPartitionedBy() != null) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.UNSUPPORTED) - .build("Export statements do not currently support a PARTITIONED BY or CLUSTERED BY clause."); + .build("Export statements do not support a PARTITIONED BY or CLUSTERED BY clause."); } } else if (ingestNode().getPartitionedBy() == null) { throw InvalidSqlInput.exception( diff --git a/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java b/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java index f6cccece3d11..c4c8638b62d0 100644 --- a/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java +++ b/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java @@ -19,7 +19,6 @@ package org.apache.druid.sql.destination; -import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; @@ -33,19 +32,19 @@ public class ExportDestination implements IngestDestination { public static final String TYPE_KEY = "external"; - private final String destinationType; + private final String destination; private final Map properties; - public ExportDestination(@JsonProperty("destinationType") String destinationType, @JsonProperty("properties") Map properties) + public ExportDestination(@JsonProperty("destination") String destination, @JsonProperty("properties") Map properties) { - this.destinationType = destinationType; + this.destination = destination; this.properties = properties; } - @JsonProperty("destinationType") - public String getDestinationType() + @JsonProperty("destination") + public String getDestination() { - return destinationType; + return destination; } @JsonProperty("properties") @@ -55,10 +54,9 @@ public Map getProperties() } @Override - @JsonIgnore - public String getDestinationName() + public String getType() { - return "EXTERN"; + return TYPE_KEY; } @Override @@ -71,7 +69,7 @@ public boolean equals(Object o) return false; } ExportDestination that = (ExportDestination) o; - return Objects.equals(destinationType, that.destinationType) && Objects.equals( + return Objects.equals(destination, that.destination) && Objects.equals( properties, that.properties ); @@ -80,14 +78,14 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(destinationType, properties); + return Objects.hash(destination, properties); } @Override public String toString() { return "ExportDestination{" + - "destinationType='" + destinationType + '\'' + + "destination='" + destination + '\'' + ", properties=" + properties + '}'; } diff --git a/sql/src/main/java/org/apache/druid/sql/destination/IngestDestination.java b/sql/src/main/java/org/apache/druid/sql/destination/IngestDestination.java index 710362fef7ac..0b1931c847d4 100644 --- a/sql/src/main/java/org/apache/druid/sql/destination/IngestDestination.java +++ b/sql/src/main/java/org/apache/druid/sql/destination/IngestDestination.java @@ -23,11 +23,11 @@ import org.apache.druid.guice.annotations.UnstableApi; /** - * Represents the destination to which the ingested data is written to. + * Represents the destination where the data is ingested. */ @UnstableApi @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") public interface IngestDestination { - String getDestinationName(); + String getType(); } diff --git a/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java b/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java index 4914cd331de8..972add963dd1 100644 --- a/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java +++ b/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java @@ -41,8 +41,7 @@ public TableDestination(@JsonProperty("tableName") String tableName) } @Override - @JsonProperty("tableName") - public String getDestinationName() + public String getType() { return tableName; } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index 788f9843e528..de1a6ecc64ca 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -64,7 +64,7 @@ public void testExportWithPartitionedBy() + "PARTITIONED BY ALL") .expectValidationError( DruidException.class, - "Export statements do not currently support a PARTITIONED BY or CLUSTERED BY clause." + "Export statements do not support a PARTITIONED BY or CLUSTERED BY clause." ) .verify(); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java b/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java index 0e0d6e53e197..ba1eec29d796 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java @@ -55,7 +55,7 @@ public QueryResponse runQuery(final DruidQuery druidQuery) // 2) Return the dataSource and signature of the insert operation, so tests can confirm they are correct. return QueryResponse.withEmptyContext( - Sequences.simple(ImmutableList.of(new Object[]{destination.getDestinationName(), signature})) + Sequences.simple(ImmutableList.of(new Object[]{destination.getType(), signature})) ); } } From 6e9f53b16728c1e6120010f0f81c5c554e0516eb Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 30 Jan 2024 16:51:09 +0530 Subject: [PATCH 28/50] Address review comments --- .../apache/druid/msq/exec/ControllerImpl.java | 17 +++++++++++++---- .../apache/druid/msq/exec/MSQExportTest.java | 10 ++++++---- .../s3/S3StorageConnectorProviderTest.java | 7 ------- .../druid/sql/destination/TableDestination.java | 6 ++++++ 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 8efc42b1fd6d..f971166e7cb9 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1887,10 +1887,19 @@ private static QueryDefinition makeQueryDefinition( } } else if (querySpec.getDestination() instanceof ExportMSQDestination) { final ExportMSQDestination exportMSQDestination = (ExportMSQDestination) querySpec.getDestination(); - final StorageConnectorProvider storageConnectorProvider = jsonMapper.convertValue( - exportMSQDestination.getProperties(), - StorageConnectorProvider.class - ); + final StorageConnectorProvider storageConnectorProvider; + try { + storageConnectorProvider = jsonMapper.convertValue( + exportMSQDestination.getProperties(), + StorageConnectorProvider.class + ); + } + catch (IllegalArgumentException e) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build("No storage connector found for storage connector type:[%s].", exportMSQDestination.getStorageConnectorType()); + } + final ResultFormat resultFormat = exportMSQDestination.getResultFormat(); // If the statement is a 'REPLACE' statement, delete the existing files at the destination. diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java index 207acd20f85b..a722ef015a6e 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.msq.export.TestExportStorageConnector; import org.apache.druid.msq.test.MSQTestBase; @@ -116,11 +117,12 @@ public void testWithUnsupportedStorageConnector() .setExpectedRowSignature(rowSignature) .setExpectedSegment(ImmutableSet.of()) .setExpectedResultRows(ImmutableList.of()) - .setExpectedExecutionErrorMatcher( + .setExpectedExecutionErrorMatcher(CoreMatchers.allOf( + CoreMatchers.instanceOf(ISE.class), ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString( - "Could not resolve type id 'hdfs' as a subtype" - )) - ).verifyExecutionError(); + "No storage connector found for storage connector type:[hdfs]." + )))) + .verifyExecutionError(); } private List expectedFooFileContents() diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageConnectorProviderTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageConnectorProviderTest.java index 0a23c006917c..9f9d632f6181 100644 --- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageConnectorProviderTest.java +++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/S3StorageConnectorProviderTest.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableMap; import com.google.inject.Binder; import com.google.inject.Injector; import com.google.inject.Key; @@ -115,12 +114,6 @@ public void createS3StorageFactoryWithMissingTempDir() ); } - @Test - public void name() - { - ImmutableMap stringStringImmutableMap = ImmutableMap.of("type", "local", "basePath", "/path"); - } - private StorageConnectorProvider getStorageConnectorProvider(Properties properties) { StartupInjectorBuilder startupInjectorBuilder = new StartupInjectorBuilder().add( diff --git a/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java b/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java index 972add963dd1..587622ee2c4d 100644 --- a/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java +++ b/sql/src/main/java/org/apache/druid/sql/destination/TableDestination.java @@ -46,6 +46,12 @@ public String getType() return tableName; } + @JsonProperty("tableName") + public String getTableName() + { + return tableName; + } + @Override public String toString() { From 9bbda77a3b6f885ba03d85a3cb9572c19af8c61e Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 30 Jan 2024 17:55:52 +0530 Subject: [PATCH 29/50] Fix tests --- .../org/apache/druid/sql/calcite/CalciteExportTest.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index de1a6ecc64ca..cf58b6d55c61 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -25,6 +25,7 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.filtration.Filtration; +import org.apache.druid.sql.destination.ExportDestination; import org.junit.Test; public class CalciteExportTest extends CalciteIngestionDmlTest @@ -49,7 +50,7 @@ public void testReplaceIntoExtern() .build() ) .expectResources(dataSourceRead("foo")) - .expectTarget("EXTERN", RowSignature.builder().add("dim2", ColumnType.STRING).build()) + .expectTarget(ExportDestination.TYPE_KEY, RowSignature.builder().add("dim2", ColumnType.STRING).build()) .verify(); } @@ -88,7 +89,7 @@ public void testInsertIntoExtern() .build() ) .expectResources(dataSourceRead("foo")) - .expectTarget("EXTERN", RowSignature.builder().add("dim2", ColumnType.STRING).build()) + .expectTarget(ExportDestination.TYPE_KEY, RowSignature.builder().add("dim2", ColumnType.STRING).build()) .verify(); } @@ -124,7 +125,7 @@ public void testExportSourceWithNoArguments() .build() ) .expectResources(dataSourceRead("foo")) - .expectTarget("EXTERN", RowSignature.builder().add("dim2", ColumnType.STRING).build()) + .expectTarget(ExportDestination.TYPE_KEY, RowSignature.builder().add("dim2", ColumnType.STRING).build()) .verify(); } From 20103caa5fe240ec7d7e88f39ce3a0f754908e09 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 30 Jan 2024 21:19:56 +0530 Subject: [PATCH 30/50] Address review comments --- .../apache/druid/msq/exec/ControllerImpl.java | 41 +++-------- .../destination/DataSourceMSQDestination.java | 8 +++ .../DurableStorageMSQDestination.java | 7 ++ .../destination/ExportMSQDestination.java | 49 +++++++------ .../indexing/destination/MSQDestination.java | 3 +- .../destination/TaskReportMSQDestination.java | 7 ++ .../druid/msq/sql/MSQTaskQueryMaker.java | 3 +- .../druid/msq/sql/MSQTaskSqlEngine.java | 2 + .../apache/druid/msq/exec/MSQExportTest.java | 28 +------- .../destination/ExportMSQDestinationTest.java | 6 +- .../apache/druid/msq/test/MSQTestBase.java | 4 +- .../ExternalDestinationSqlIdentifier.java | 24 ++++++- .../sql/calcite/planner/IngestHandler.java | 68 +++++++++++-------- .../sql/destination/ExportDestination.java | 32 +++------ .../druid/sql/calcite/CalciteExportTest.java | 38 +++++------ .../sql/calcite/export/TestExportModule.java | 50 ++++++++++++++ .../export/TestExportStorageConnector.java | 2 +- .../TestExportStorageConnectorProvider.java | 2 +- .../destination/ExportDestinationTest.java | 7 +- 19 files changed, 212 insertions(+), 169 deletions(-) create mode 100644 sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportModule.java rename {extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq => sql/src/test/java/org/apache/druid/sql/calcite}/export/TestExportStorageConnector.java (98%) rename {extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq => sql/src/test/java/org/apache/druid/sql/calcite}/export/TestExportStorageConnectorProvider.java (96%) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index f971166e7cb9..55273e685780 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -33,7 +33,6 @@ import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.SettableFuture; -import com.google.inject.Injector; import it.unimi.dsi.fastutil.ints.Int2ObjectAVLTreeMap; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; @@ -167,7 +166,6 @@ import org.apache.druid.msq.querykit.MultiQueryKit; import org.apache.druid.msq.querykit.QueryKit; import org.apache.druid.msq.querykit.QueryKitUtils; -import org.apache.druid.msq.querykit.ShuffleSpecFactories; import org.apache.druid.msq.querykit.ShuffleSpecFactory; import org.apache.druid.msq.querykit.groupby.GroupByQueryKit; import org.apache.druid.msq.querykit.results.ExportResultsFrameProcessorFactory; @@ -656,8 +654,7 @@ private QueryDefinition initializeQueryDefAndState(final Closer closer) id(), makeQueryControllerToolKit(), task.getQuerySpec(), - context.jsonMapper(), - context.injector() + context.jsonMapper() ); QueryValidator.validateQueryDef(queryDef); @@ -1749,8 +1746,7 @@ private static QueryDefinition makeQueryDefinition( final String queryId, @SuppressWarnings("rawtypes") final QueryKit toolKit, final MSQSpec querySpec, - final ObjectMapper jsonMapper, - final Injector injector + final ObjectMapper jsonMapper ) { final MSQTuningConfig tuningConfig = querySpec.getTuningConfig(); @@ -1759,7 +1755,8 @@ private static QueryDefinition makeQueryDefinition( final ShuffleSpecFactory shuffleSpecFactory; if (MSQControllerTask.isIngestion(querySpec)) { - shuffleSpecFactory = ShuffleSpecFactories.getGlobalSortWithTargetSize(tuningConfig.getRowsPerSegment()); + shuffleSpecFactory = querySpec.getDestination() + .getShuffleSpecFactory(tuningConfig.getRowsPerSegment()); if (!columnMappings.hasUniqueOutputColumnNames()) { // We do not expect to hit this case in production, because the SQL validator checks that column names @@ -1780,21 +1777,10 @@ private static QueryDefinition makeQueryDefinition( } else { queryToPlan = querySpec.getQuery(); } - } else if (querySpec.getDestination() instanceof TaskReportMSQDestination) { - shuffleSpecFactory = ShuffleSpecFactories.singlePartition(); - queryToPlan = querySpec.getQuery(); - } else if (querySpec.getDestination() instanceof DurableStorageMSQDestination) { - shuffleSpecFactory = ShuffleSpecFactories.getGlobalSortWithTargetSize( - MultiStageQueryContext.getRowsPerPage(querySpec.getQuery().context()) - ); - queryToPlan = querySpec.getQuery(); - } else if (querySpec.getDestination() instanceof ExportMSQDestination) { - shuffleSpecFactory = ShuffleSpecFactories.getGlobalSortWithTargetSize( - MultiStageQueryContext.getRowsPerPage(querySpec.getQuery().context()) - ); - queryToPlan = querySpec.getQuery(); } else { - throw new ISE("Unsupported destination [%s]", querySpec.getDestination()); + shuffleSpecFactory = querySpec.getDestination() + .getShuffleSpecFactory(MultiStageQueryContext.getRowsPerPage(querySpec.getQuery().context())); + queryToPlan = querySpec.getQuery(); } final QueryDefinition queryDef; @@ -1887,18 +1873,7 @@ private static QueryDefinition makeQueryDefinition( } } else if (querySpec.getDestination() instanceof ExportMSQDestination) { final ExportMSQDestination exportMSQDestination = (ExportMSQDestination) querySpec.getDestination(); - final StorageConnectorProvider storageConnectorProvider; - try { - storageConnectorProvider = jsonMapper.convertValue( - exportMSQDestination.getProperties(), - StorageConnectorProvider.class - ); - } - catch (IllegalArgumentException e) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.RUNTIME_FAILURE) - .build("No storage connector found for storage connector type:[%s].", exportMSQDestination.getStorageConnectorType()); - } + final StorageConnectorProvider storageConnectorProvider = exportMSQDestination.getStorageConnectorProvider(); final ResultFormat resultFormat = exportMSQDestination.getResultFormat(); diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DataSourceMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DataSourceMSQDestination.java index 5bf033a1aa03..7d086b6277ed 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DataSourceMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DataSourceMSQDestination.java @@ -26,6 +26,8 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.msq.querykit.ShuffleSpecFactories; +import org.apache.druid.msq.querykit.ShuffleSpecFactory; import org.joda.time.Interval; import javax.annotation.Nullable; @@ -160,4 +162,10 @@ public String toString() ", replaceTimeChunks=" + replaceTimeChunks + '}'; } + + @Override + public ShuffleSpecFactory getShuffleSpecFactory(int targetSize) + { + return ShuffleSpecFactories.getGlobalSortWithTargetSize(targetSize); + } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DurableStorageMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DurableStorageMSQDestination.java index c401d2eee6e4..abebc5fa106a 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DurableStorageMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DurableStorageMSQDestination.java @@ -20,6 +20,8 @@ package org.apache.druid.msq.indexing.destination; import com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.druid.msq.querykit.ShuffleSpecFactories; +import org.apache.druid.msq.querykit.ShuffleSpecFactory; public class DurableStorageMSQDestination implements MSQDestination { @@ -45,4 +47,9 @@ public String toString() return "DurableStorageDestination{}"; } + @Override + public ShuffleSpecFactory getShuffleSpecFactory(int targetSize) + { + return ShuffleSpecFactories.getGlobalSortWithTargetSize(targetSize); + } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java index d701e997fa38..a6ed88d8a987 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java @@ -22,17 +22,19 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.msq.querykit.ShuffleSpecFactories; +import org.apache.druid.msq.querykit.ShuffleSpecFactory; import org.apache.druid.sql.http.ResultFormat; +import org.apache.druid.storage.StorageConnectorProvider; import org.joda.time.Interval; import javax.annotation.Nullable; import java.util.List; -import java.util.Map; import java.util.Objects; /** * Destination used by tasks that write the results as files to an external destination. {@link #resultFormat} denotes - * the format of the file created and {@link #storageConnectorType} and {@link #properties} denotes the type of external + * the format of the file created and {@link #storageConnectorProvider} denotes the type of external * destination. *
* {@link #replaceTimeChunks} denotes how existing files should be handled. @@ -42,35 +44,28 @@ public class ExportMSQDestination implements MSQDestination { public static final String TYPE = "export"; - private final String storageConnectorType; - private final Map properties; + private final StorageConnectorProvider storageConnectorProvider; private final ResultFormat resultFormat; @Nullable private final List replaceTimeChunks; @JsonCreator - public ExportMSQDestination(@JsonProperty("storageConnectorType") String storageConnectorType, - @JsonProperty("properties") Map properties, - @JsonProperty("resultFormat") ResultFormat resultFormat, - @JsonProperty("replaceTimeChunks") @Nullable List replaceTimeChunks + public ExportMSQDestination( + @JsonProperty("storageConnectorProvider") StorageConnectorProvider storageConnectorProvider, + @JsonProperty("resultFormat") ResultFormat resultFormat, + @JsonProperty("replaceTimeChunks") @Nullable List replaceTimeChunks ) { - this.storageConnectorType = storageConnectorType; - this.properties = properties; + this.storageConnectorProvider = storageConnectorProvider; this.resultFormat = resultFormat; this.replaceTimeChunks = replaceTimeChunks; } - @JsonProperty("storageConnectorType") - public String getStorageConnectorType() - { - return storageConnectorType; - } - @JsonProperty("properties") - public Map getProperties() + @JsonProperty("storageConnectorProvider") + public StorageConnectorProvider getStorageConnectorProvider() { - return properties; + return storageConnectorProvider; } @JsonProperty("resultFormat") @@ -97,26 +92,30 @@ public boolean equals(Object o) return false; } ExportMSQDestination that = (ExportMSQDestination) o; - return Objects.equals(storageConnectorType, that.storageConnectorType) && Objects.equals( - properties, - that.properties - ) && resultFormat == that.resultFormat && Objects.equals(replaceTimeChunks, that.replaceTimeChunks); + return Objects.equals(storageConnectorProvider, that.storageConnectorProvider) + && resultFormat == that.resultFormat + && Objects.equals(replaceTimeChunks, that.replaceTimeChunks); } @Override public int hashCode() { - return Objects.hash(storageConnectorType, properties, resultFormat, replaceTimeChunks); + return Objects.hash(storageConnectorProvider, resultFormat, replaceTimeChunks); } @Override public String toString() { return "ExportMSQDestination{" + - "storageConnectorType='" + storageConnectorType + '\'' + - ", properties=" + properties + + "storageConnectorProvider=" + storageConnectorProvider + ", resultFormat=" + resultFormat + ", replaceTimeChunks=" + replaceTimeChunks + '}'; } + + @Override + public ShuffleSpecFactory getShuffleSpecFactory(int targetSize) + { + return ShuffleSpecFactories.getGlobalSortWithTargetSize(targetSize); + } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQDestination.java index be58a48bb9ed..f7bc2d1ec6fb 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQDestination.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; +import org.apache.druid.msq.querykit.ShuffleSpecFactory; @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") @JsonSubTypes(value = { @@ -31,5 +32,5 @@ }) public interface MSQDestination { - // No methods. Just a marker interface for deserialization. + ShuffleSpecFactory getShuffleSpecFactory(int targetSize); } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/TaskReportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/TaskReportMSQDestination.java index ac1254abfe33..388f0179c289 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/TaskReportMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/TaskReportMSQDestination.java @@ -20,6 +20,8 @@ package org.apache.druid.msq.indexing.destination; import com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.druid.msq.querykit.ShuffleSpecFactories; +import org.apache.druid.msq.querykit.ShuffleSpecFactory; public class TaskReportMSQDestination implements MSQDestination { @@ -43,4 +45,9 @@ public String toString() return "TaskReportMSQDestination{}"; } + @Override + public ShuffleSpecFactory getShuffleSpecFactory(int targetSize) + { + return ShuffleSpecFactories.singlePartition(); + } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java index 7053e9461bda..d40cea44841b 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java @@ -220,8 +220,7 @@ public QueryResponse runQuery(final DruidQuery druidQuery) } destination = new ExportMSQDestination( - exportDestination.getDestination(), - exportDestination.getProperties(), + exportDestination.getStorageConnectorProvider(), format, replaceTimeChunks ); diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java index 4c46d3c7fc13..6f4f109ffa4b 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java @@ -40,6 +40,7 @@ import org.apache.druid.msq.util.MultiStageQueryContext; import org.apache.druid.rpc.indexing.OverlordClient; import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.sql.calcite.parser.DruidSqlIngest; import org.apache.druid.sql.calcite.parser.DruidSqlInsert; import org.apache.druid.sql.calcite.planner.Calcites; import org.apache.druid.sql.calcite.planner.PlannerContext; @@ -61,6 +62,7 @@ public class MSQTaskSqlEngine implements SqlEngine ImmutableSet.builder() .addAll(NativeSqlEngine.SYSTEM_CONTEXT_PARAMETERS) .add(QueryKitUtils.CTX_TIME_COLUMN_NAME) + .add(DruidSqlIngest.SQL_EXPORT_FILE_FORMAT) .add(MultiStageQueryContext.CTX_IS_REINDEX) .build(); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java index a722ef015a6e..aaa402fa733f 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java @@ -21,18 +21,15 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; -import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.msq.export.TestExportStorageConnector; import org.apache.druid.msq.test.MSQTestBase; import org.apache.druid.msq.util.MultiStageQueryContext; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.export.TestExportStorageConnector; import org.apache.druid.sql.http.ResultFormat; -import org.hamcrest.CoreMatchers; import org.junit.Assert; import org.junit.Test; -import org.junit.internal.matchers.ThrowableMessageMatcher; import java.io.ByteArrayOutputStream; import java.io.File; @@ -102,29 +99,6 @@ public void testNumberOfRowsPerFile() throws IOException ); } - @Test - public void testWithUnsupportedStorageConnector() - { - RowSignature rowSignature = RowSignature.builder() - .add("__time", ColumnType.LONG) - .add("dim1", ColumnType.STRING) - .add("cnt", ColumnType.LONG).build(); - - testIngestQuery().setSql( - "insert into extern(hdfs(basePath = '/var')) as csv select __time, dim1 from foo") - .setExpectedDataSource("foo1") - .setQueryContext(DEFAULT_MSQ_CONTEXT) - .setExpectedRowSignature(rowSignature) - .setExpectedSegment(ImmutableSet.of()) - .setExpectedResultRows(ImmutableList.of()) - .setExpectedExecutionErrorMatcher(CoreMatchers.allOf( - CoreMatchers.instanceOf(ISE.class), - ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString( - "No storage connector found for storage connector type:[hdfs]." - )))) - .verifyExecutionError(); - } - private List expectedFooFileContents() { return new ArrayList<>(ImmutableList.of( diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java index 62ede446b4e0..211986f712c6 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java @@ -20,14 +20,15 @@ package org.apache.druid.msq.indexing.destination; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableMap; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.storage.StorageConnectorModule; +import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import org.junit.Assert; import org.junit.Test; +import java.io.File; import java.io.IOException; public class ExportMSQDestinationTest @@ -36,8 +37,7 @@ public class ExportMSQDestinationTest public void testSerde() throws IOException { ExportMSQDestination exportDestination = new ExportMSQDestination( - "local", - ImmutableMap.of("basePath", "/path"), + new LocalFileStorageConnectorProvider(new File("/path")), ResultFormat.CSV, Intervals.ONLY_ETERNITY ); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index 75356a95e858..0d24b0c0747c 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -87,8 +87,6 @@ import org.apache.druid.msq.exec.LoadedSegmentDataProvider; import org.apache.druid.msq.exec.LoadedSegmentDataProviderFactory; import org.apache.druid.msq.exec.WorkerMemoryParameters; -import org.apache.druid.msq.export.TestExportStorageConnector; -import org.apache.druid.msq.export.TestExportStorageConnectorProvider; import org.apache.druid.msq.guice.MSQDurableStorageModule; import org.apache.druid.msq.guice.MSQExternalDataSourceModule; import org.apache.druid.msq.guice.MSQIndexingModule; @@ -160,6 +158,8 @@ import org.apache.druid.sql.SqlStatementFactory; import org.apache.druid.sql.SqlToolbox; import org.apache.druid.sql.calcite.BaseCalciteQueryTest; +import org.apache.druid.sql.calcite.export.TestExportStorageConnector; +import org.apache.druid.sql.calcite.export.TestExportStorageConnectorProvider; import org.apache.druid.sql.calcite.external.ExternalDataSource; import org.apache.druid.sql.calcite.external.ExternalOperatorConversion; import org.apache.druid.sql.calcite.planner.CalciteRulesManager; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index 56457db7162e..797a98d5ea20 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -19,14 +19,17 @@ package org.apache.druid.sql.calcite.parser; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.calcite.sql.SqlIdentifier; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlWriter; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.storage.StorageConnectorProvider; import org.apache.druid.utils.CollectionUtils; +import java.util.HashMap; import java.util.Map; /** @@ -43,7 +46,6 @@ public ExternalDestinationSqlIdentifier( ) { super(name, pos); - properties.put("type", getDestinationType()); this.properties = properties; } @@ -89,4 +91,24 @@ public Object clone() { throw DruidException.defensive("Function is deprecated, please use clone(SqlNode) instead."); } + + public StorageConnectorProvider toStorageConnectorProvider(ObjectMapper objectMapper) + { + final HashMap storageConnectorProperties = new HashMap<>(properties); + storageConnectorProperties.put("type", getDestinationType()); + + final StorageConnectorProvider storageConnectorProvider; + try { + storageConnectorProvider = objectMapper.convertValue( + storageConnectorProperties, + StorageConnectorProvider.class + ); + } + catch (IllegalArgumentException e) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build("No storage connector found for storage connector type:[%s].", getDestinationType()); + } + return storageConnectorProvider; + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index 2f532356d80d..520df939dd80 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -52,6 +52,7 @@ import org.apache.druid.sql.destination.ExportDestination; import org.apache.druid.sql.destination.IngestDestination; import org.apache.druid.sql.destination.TableDestination; +import org.apache.druid.storage.StorageConnectorProvider; import java.util.List; import java.util.regex.Pattern; @@ -107,46 +108,54 @@ protected String operationName() protected abstract DruidSqlIngest ingestNode(); - @Override - public void validate() + private void validateExport() { - if (ingestNode().getTargetTable() instanceof ExternalDestinationSqlIdentifier) { - if (!handlerContext.plannerContext().featureAvailable(EngineFeature.WRITE_EXTERNAL_DATA)) { - throw InvalidSqlInput.exception( - "Writing to external sources are not supported by requested SQL engine [%s], consider using MSQ.", - handlerContext.engine().name() - ); - } - if (ingestNode().getPartitionedBy() != null) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.UNSUPPORTED) - .build("Export statements do not support a PARTITIONED BY or CLUSTERED BY clause."); - } - } else if (ingestNode().getPartitionedBy() == null) { + if (!handlerContext.plannerContext().featureAvailable(EngineFeature.WRITE_EXTERNAL_DATA)) { + throw InvalidSqlInput.exception( + "Writing to external sources are not supported by requested SQL engine [%s], consider using MSQ.", + handlerContext.engine().name() + ); + } + + if (ingestNode().getPartitionedBy() != null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.UNSUPPORTED) + .build("Export statements do not support a PARTITIONED BY or CLUSTERED BY clause."); + } + + final String exportFileFormat = ingestNode().getExportFileFormat(); + if (exportFileFormat == null) { throw InvalidSqlInput.exception( - "Operation [%s] requires a PARTITIONED BY to be explicitly defined, but none was found.", + "Exporting rows into an EXTERN destination requires an AS clause to specify the format, but none was found.", operationName() ); + } else { + handlerContext.plannerContext().queryContextMap().put( + DruidSqlIngest.SQL_EXPORT_FILE_FORMAT, + exportFileFormat + ); } + } - String exportFileFormat = ingestNode().getExportFileFormat(); + @Override + public void validate() + { if (ingestNode().getTargetTable() instanceof ExternalDestinationSqlIdentifier) { - if (exportFileFormat == null) { + validateExport(); + } else { + if (ingestNode().getPartitionedBy() == null) { throw InvalidSqlInput.exception( - "Exporting rows into an EXTERN destination requires an AS clause to specify the format, but none was found.", + "Operation [%s] requires a PARTITIONED BY to be explicitly defined, but none was found.", operationName() ); - } else { - handlerContext.plannerContext().queryContextMap().put( - DruidSqlIngest.SQL_EXPORT_FILE_FORMAT, - exportFileFormat + } + + if (ingestNode().getExportFileFormat() != null) { + throw InvalidSqlInput.exception( + "The AS clause should only be specified while exporting rows into an EXTERN destination.", + operationName() ); } - } else if (exportFileFormat != null) { - throw InvalidSqlInput.exception( - "The AS clause should only be specified while exporting rows into an EXTERN destination.", - operationName() - ); } try { @@ -215,7 +224,8 @@ private IngestDestination validateAndGetDataSourceForIngest() .build("Operation [%s] requires a target table", operationName()); } else if (tableIdentifier instanceof ExternalDestinationSqlIdentifier) { ExternalDestinationSqlIdentifier externalDestination = ((ExternalDestinationSqlIdentifier) tableIdentifier); - dataSource = new ExportDestination(externalDestination.getDestinationType(), externalDestination.getProperties()); + StorageConnectorProvider storageConnectorProvider = externalDestination.toStorageConnectorProvider(handlerContext.jsonMapper()); + dataSource = new ExportDestination(storageConnectorProvider); } else if (tableIdentifier.names.size() == 1) { // Unqualified name. String tableName = Iterables.getOnlyElement(tableIdentifier.names); diff --git a/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java b/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java index c4c8638b62d0..dd08535c7835 100644 --- a/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java +++ b/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java @@ -21,8 +21,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; +import org.apache.druid.storage.StorageConnectorProvider; -import java.util.Map; import java.util.Objects; /** @@ -32,25 +32,17 @@ public class ExportDestination implements IngestDestination { public static final String TYPE_KEY = "external"; - private final String destination; - private final Map properties; + private final StorageConnectorProvider storageConnectorProvider; - public ExportDestination(@JsonProperty("destination") String destination, @JsonProperty("properties") Map properties) + public ExportDestination(@JsonProperty("storageConnectorProvider") StorageConnectorProvider storageConnectorProvider) { - this.destination = destination; - this.properties = properties; + this.storageConnectorProvider = storageConnectorProvider; } - @JsonProperty("destination") - public String getDestination() + @JsonProperty("storageConnectorProvider") + public StorageConnectorProvider getStorageConnectorProvider() { - return destination; - } - - @JsonProperty("properties") - public Map getProperties() - { - return properties; + return storageConnectorProvider; } @Override @@ -69,24 +61,20 @@ public boolean equals(Object o) return false; } ExportDestination that = (ExportDestination) o; - return Objects.equals(destination, that.destination) && Objects.equals( - properties, - that.properties - ); + return Objects.equals(storageConnectorProvider, that.storageConnectorProvider); } @Override public int hashCode() { - return Objects.hash(destination, properties); + return Objects.hash(storageConnectorProvider); } @Override public String toString() { return "ExportDestination{" + - "destination='" + destination + '\'' + - ", properties=" + properties + + "storageConnectorProvider=" + storageConnectorProvider + '}'; } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index cf58b6d55c61..22ff73be6263 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -20,21 +20,30 @@ package org.apache.druid.sql.calcite; import org.apache.druid.error.DruidException; +import org.apache.druid.guice.DruidInjectorBuilder; import org.apache.druid.query.Druids; import org.apache.druid.query.scan.ScanQuery; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.export.TestExportModule; import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.destination.ExportDestination; import org.junit.Test; public class CalciteExportTest extends CalciteIngestionDmlTest { + @Override + public void configureGuice(DruidInjectorBuilder builder) + { + super.configureGuice(builder); + builder.addModule(new TestExportModule()); + } + @Test public void testReplaceIntoExtern() { testIngestionQuery() - .sql("REPLACE INTO EXTERN(s3(bucket='bucket1',prefix='prefix1',tempDir='/tempdir',chunkSize='5242880',maxRetry='1')) " + .sql("REPLACE INTO EXTERN(testStorage()) " + "AS CSV " + "OVERWRITE ALL " + "SELECT dim2 FROM foo") @@ -58,7 +67,7 @@ public void testReplaceIntoExtern() public void testExportWithPartitionedBy() { testIngestionQuery() - .sql("REPLACE INTO EXTERN(s3(bucket='bucket1',prefix='prefix1',tempDir='/tempdir',chunkSize='5242880',maxRetry='1')) " + .sql("REPLACE INTO EXTERN(testStorage()) " + "AS CSV " + "OVERWRITE ALL " + "SELECT dim2 FROM foo " @@ -74,7 +83,7 @@ public void testExportWithPartitionedBy() public void testInsertIntoExtern() { testIngestionQuery() - .sql("INSERT INTO EXTERN(s3(bucket='bucket1',prefix='prefix1',tempDir='/tempdir',chunkSize='5242880',maxRetry='1')) " + .sql("INSERT INTO EXTERN(testStorage()) " + "AS CSV " + "SELECT dim2 FROM foo") .expectQuery( @@ -97,7 +106,7 @@ public void testInsertIntoExtern() public void testExportWithoutFormat() { testIngestionQuery() - .sql("INSERT INTO EXTERN(s3(bucket='bucket1',prefix='prefix1',tempDir='/tempdir',chunkSize='5242880',maxRetry='1')) " + .sql("INSERT INTO EXTERN(testStorage(bucket='bucket1',prefix='prefix1',tempDir='/tempdir',chunkSize='5242880',maxRetry='1')) " + "SELECT dim2 FROM foo") .expectValidationError( DruidException.class, @@ -107,25 +116,14 @@ public void testExportWithoutFormat() } @Test - public void testExportSourceWithNoArguments() + public void testWithUnsupportedStorageConnector() { testIngestionQuery() - .sql("INSERT INTO EXTERN(testLocal()) " - + "AS CSV " - + "SELECT dim2 FROM foo") - .expectQuery( - Druids.newScanQueryBuilder() - .dataSource( - "foo" - ) - .intervals(querySegmentSpec(Filtration.eternity())) - .columns("dim2") - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .legacy(false) - .build() + .sql("insert into extern(nonExistent()) as csv select __time, dim1 from foo") + .expectValidationError( + DruidException.class, + "No storage connector found for storage connector type:[nonExistent]." ) - .expectResources(dataSourceRead("foo")) - .expectTarget(ExportDestination.TYPE_KEY, RowSignature.builder().add("dim2", ColumnType.STRING).build()) .verify(); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportModule.java b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportModule.java new file mode 100644 index 000000000000..c92028d85f16 --- /dev/null +++ b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportModule.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.export; + +import com.fasterxml.jackson.databind.Module; +import com.fasterxml.jackson.databind.jsontype.NamedType; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.google.common.collect.ImmutableList; +import com.google.inject.Binder; +import org.apache.druid.initialization.DruidModule; +import org.apache.druid.storage.StorageConnectorProvider; + +import java.util.List; + +public class TestExportModule implements DruidModule +{ + @Override + public List getJacksonModules() + { + return ImmutableList.of( + new SimpleModule(StorageConnectorProvider.class.getSimpleName()) + .registerSubtypes( + new NamedType(TestExportStorageConnectorProvider.class, TestExportStorageConnector.TYPE_NAME) + ) + ); + } + + @Override + public void configure(Binder binder) + { + + } +} diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnector.java b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnector.java similarity index 98% rename from extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnector.java rename to sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnector.java index 2399b916c7c1..e0048a4dda86 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnector.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnector.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.msq.export; +package org.apache.druid.sql.calcite.export; import org.apache.druid.storage.StorageConnector; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnectorProvider.java b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java similarity index 96% rename from extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnectorProvider.java rename to sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java index ab5d43238810..9cf73fac1f96 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/export/TestExportStorageConnectorProvider.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.msq.export; +package org.apache.druid.sql.calcite.export; import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorProvider; diff --git a/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java b/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java index d54dc47b6a5f..ec56654c8785 100644 --- a/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java +++ b/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java @@ -20,11 +20,13 @@ package org.apache.druid.sql.destination; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableMap; import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.storage.StorageConnectorModule; +import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import org.junit.Assert; import org.junit.Test; +import java.io.File; import java.io.IOException; public class ExportDestinationTest @@ -32,9 +34,10 @@ public class ExportDestinationTest @Test public void testSerde() throws IOException { - ExportDestination exportDestination = new ExportDestination("s3", ImmutableMap.of("bucketName", "bucket1", "prefix", "basepath/export")); + ExportDestination exportDestination = new ExportDestination(new LocalFileStorageConnectorProvider(new File("/basepath/export"))); ObjectMapper objectMapper = new DefaultObjectMapper(); + objectMapper.registerModules(new StorageConnectorModule().getJacksonModules()); byte[] bytes = objectMapper.writeValueAsBytes(exportDestination); ExportDestination deserialized = objectMapper.readValue(bytes, ExportDestination.class); From aa3ce05435f27cd3ce793fbafaac9508528c8adc Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 31 Jan 2024 08:54:06 +0530 Subject: [PATCH 31/50] Fix insert unparse --- .../results/ExportResultsFrameProcessor.java | 20 +++++++++------ .../apache/druid/msq/exec/MSQExportTest.java | 12 ++++----- .../sql/calcite/parser/DruidSqlInsert.java | 25 ++++++++++++++++--- .../sql/calcite/parser/DruidSqlReplace.java | 6 +++-- .../calcite/parser/DruidSqlUnparseTest.java | 24 ++++++++++++++---- 5 files changed, 63 insertions(+), 24 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java index ad68330663c2..84cb93ab49c9 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java @@ -36,6 +36,7 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.msq.counters.ChannelCounters; +import org.apache.druid.msq.querykit.QueryKitUtils; import org.apache.druid.msq.util.SequenceUtils; import org.apache.druid.segment.BaseObjectColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; @@ -62,14 +63,14 @@ public class ExportResultsFrameProcessor implements FrameProcessor final String exportFilePath; public ExportResultsFrameProcessor( - ReadableFrameChannel inputChannel, - ResultFormat exportFormat, - FrameReader frameReader, - StorageConnector storageConnector, - ObjectMapper jsonMapper, - int partitionNumber, - int workerNumber, - ChannelCounters channelCounter + final ReadableFrameChannel inputChannel, + final ResultFormat exportFormat, + final FrameReader frameReader, + final StorageConnector storageConnector, + final ObjectMapper jsonMapper, + final int partitionNumber, + final int workerNumber, + final ChannelCounters channelCounter ) { this.inputChannel = inputChannel; @@ -138,6 +139,9 @@ private void exportFrame(final Frame frame) throws IOException while (!cursor.isDone()) { formatter.writeRowStart(); for (int j = 0; j < signature.size(); j++) { + if (QueryKitUtils.PARTITION_BOOST_COLUMN.equals(signature.getColumnName(j))) { + continue; + } formatter.writeRowField(signature.getColumnName(j), selectors.get(j).getObject()); } channelCounter.incrementRowCount(); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java index aaa402fa733f..b465425f3eb7 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java @@ -102,12 +102,12 @@ public void testNumberOfRowsPerFile() throws IOException private List expectedFooFileContents() { return new ArrayList<>(ImmutableList.of( - new Object[]{0, "1", null}, - new Object[]{1, "1", 10.1}, - new Object[]{2, "1", 2}, - new Object[]{3, "1", 1}, - new Object[]{4, "1", "def"}, - new Object[]{5, "1", "abc"} + new Object[]{"1", null}, + new Object[]{"1", 10.1}, + new Object[]{"1", 2}, + new Object[]{"1", 1}, + new Object[]{"1", "def"}, + new Object[]{"1", "abc"} )); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java index 41a3bf23d152..148caeb6d4bd 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlInsert.java @@ -78,9 +78,28 @@ public SqlOperator getOperator() @Override public void unparse(SqlWriter writer, int leftPrec, int rightPrec) { - super.unparse(writer, leftPrec, rightPrec); - writer.keyword("PARTITIONED BY"); - writer.keyword(partitionedByStringForUnparse); + writer.startList(SqlWriter.FrameTypeEnum.SELECT); + writer.sep(isUpsert() ? "UPSERT INTO" : "INSERT INTO"); + final int opLeft = getOperator().getLeftPrec(); + final int opRight = getOperator().getRightPrec(); + getTargetTable().unparse(writer, opLeft, opRight); + if (getTargetColumnList() != null) { + getTargetColumnList().unparse(writer, opLeft, opRight); + } + writer.newlineAndIndent(); + if (getExportFileFormat() != null) { + writer.keyword("AS"); + writer.print(getExportFileFormat()); + writer.newlineAndIndent(); + } + getSource().unparse(writer, 0, 0); + writer.newlineAndIndent(); + + if (partitionedByStringForUnparse != null) { + writer.keyword("PARTITIONED BY"); + writer.keyword(partitionedByStringForUnparse); + } + if (getClusteredBy() != null) { writer.keyword("CLUSTERED BY"); SqlWriter.Frame frame = writer.startList("", ""); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlReplace.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlReplace.java index cec9996d460d..543300b97f56 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlReplace.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/DruidSqlReplace.java @@ -118,8 +118,10 @@ public void unparse(SqlWriter writer, int leftPrec, int rightPrec) getSource().unparse(writer, 0, 0); writer.newlineAndIndent(); - writer.keyword("PARTITIONED BY"); - writer.keyword(partitionedByStringForUnparse); + if (partitionedByStringForUnparse != null) { + writer.keyword("PARTITIONED BY"); + writer.keyword(partitionedByStringForUnparse); + } if (getClusteredBy() != null) { writer.keyword("CLUSTERED BY"); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java index 71c001653601..b2bb9d2863f4 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java @@ -41,7 +41,8 @@ public void testUnparseInsert() throws ParseException String sqlQuery = "INSERT INTO dst SELECT * FROM foo PARTITIONED BY ALL TIME"; String prettySqlQuery = "INSERT INTO \"dst\"\n" + "SELECT *\n" - + " FROM \"foo\" PARTITIONED BY ALL TIME"; + + " FROM \"foo\"\n" + + "PARTITIONED BY ALL TIME"; DruidSqlParserImpl druidSqlParser = createTestParser(sqlQuery); DruidSqlInsert druidSqlReplace = (DruidSqlInsert) druidSqlParser.DruidSqlInsertEof(); @@ -97,18 +98,31 @@ private static DruidSqlParserImpl createTestParser(String parseString) } @Test - public void testUnparseExternalSqlIdentifier() throws ParseException + public void testUnparseExternalSqlIdentifierReplace() throws ParseException { - String sqlQuery = "REPLACE INTO EXTERN( s3(bucket='bucket1',prefix='prefix1') ) AS CSV OVERWRITE ALL SELECT dim2 FROM foo PARTITIONED BY ALL"; + String sqlQuery = "REPLACE INTO EXTERN( s3(bucket='bucket1',prefix='prefix1') ) AS CSV OVERWRITE ALL SELECT dim2 FROM foo"; String prettySqlQuery = "REPLACE INTO EXTERN(S3(bucket = 'bucket1', prefix = 'prefix1'))\n" + "AS csv\n" + "OVERWRITE ALL\n" + "SELECT \"dim2\"\n" - + " FROM \"foo\"\n" - + "PARTITIONED BY ALL"; + + " FROM \"foo\"\n"; DruidSqlParserImpl druidSqlParser = createTestParser(sqlQuery); DruidSqlReplace druidSqlReplace = (DruidSqlReplace) druidSqlParser.DruidSqlReplaceEof(); druidSqlReplace.unparse(sqlWriter, 0, 0); assertEquals(prettySqlQuery, sqlWriter.toSqlString().getSql()); } + + @Test + public void testUnparseExternalSqlIdentifierInsert() throws ParseException + { + String sqlQuery = "INSERT INTO EXTERN( s3(bucket='bucket1',prefix='prefix1') ) AS CSV SELECT dim2 FROM foo"; + String prettySqlQuery = "INSERT INTO EXTERN(S3(bucket = 'bucket1', prefix = 'prefix1'))\n" + + "AS csv\n" + + "SELECT \"dim2\"\n" + + " FROM \"foo\"\n"; + DruidSqlParserImpl druidSqlParser = createTestParser(sqlQuery); + DruidSqlInsert druidSqlInsert = (DruidSqlInsert) druidSqlParser.DruidSqlInsertEof(); + druidSqlInsert.unparse(sqlWriter, 0, 0); + assertEquals(prettySqlQuery, sqlWriter.toSqlString().getSql()); + } } From 0501106aa933a374a3e00eff230c1ea3ff406420 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 31 Jan 2024 12:09:57 +0530 Subject: [PATCH 32/50] Add external write resource action --- .../sql/calcite/planner/IngestHandler.java | 1 + .../druid/sql/calcite/CalciteExportTest.java | 41 ++++++++++++------- .../sql/calcite/export/TestExportModule.java | 4 +- .../sql/calcite/util/CalciteTestBase.java | 5 +++ .../druid/sql/calcite/util/CalciteTests.java | 7 ++++ 5 files changed, 43 insertions(+), 15 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index 520df939dd80..f25ab92ecdfe 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -226,6 +226,7 @@ private IngestDestination validateAndGetDataSourceForIngest() ExternalDestinationSqlIdentifier externalDestination = ((ExternalDestinationSqlIdentifier) tableIdentifier); StorageConnectorProvider storageConnectorProvider = externalDestination.toStorageConnectorProvider(handlerContext.jsonMapper()); dataSource = new ExportDestination(storageConnectorProvider); + resourceActions.add(new ResourceAction(new Resource(externalDestination.getDestinationType(), ResourceType.EXTERNAL), Action.WRITE)); } else if (tableIdentifier.names.size() == 1) { // Unqualified name. String tableName = Iterables.getOnlyElement(tableIdentifier.names); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index 22ff73be6263..c0a63902c743 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -21,12 +21,16 @@ import org.apache.druid.error.DruidException; import org.apache.druid.guice.DruidInjectorBuilder; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.Druids; import org.apache.druid.query.scan.ScanQuery; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.server.security.ForbiddenException; import org.apache.druid.sql.calcite.export.TestExportModule; +import org.apache.druid.sql.calcite.export.TestExportStorageConnector; import org.apache.druid.sql.calcite.filtration.Filtration; +import org.apache.druid.sql.calcite.util.CalciteTests; import org.apache.druid.sql.destination.ExportDestination; import org.junit.Test; @@ -43,10 +47,10 @@ public void configureGuice(DruidInjectorBuilder builder) public void testReplaceIntoExtern() { testIngestionQuery() - .sql("REPLACE INTO EXTERN(testStorage()) " - + "AS CSV " - + "OVERWRITE ALL " - + "SELECT dim2 FROM foo") + .sql(StringUtils.format("REPLACE INTO EXTERN(%s()) " + + "AS CSV " + + "OVERWRITE ALL " + + "SELECT dim2 FROM foo", TestExportStorageConnector.TYPE_NAME)) .expectQuery( Druids.newScanQueryBuilder() .dataSource( @@ -58,7 +62,7 @@ public void testReplaceIntoExtern() .legacy(false) .build() ) - .expectResources(dataSourceRead("foo")) + .expectResources(dataSourceRead("foo"), externalWrite(TestExportStorageConnector.TYPE_NAME)) .expectTarget(ExportDestination.TYPE_KEY, RowSignature.builder().add("dim2", ColumnType.STRING).build()) .verify(); } @@ -67,11 +71,11 @@ public void testReplaceIntoExtern() public void testExportWithPartitionedBy() { testIngestionQuery() - .sql("REPLACE INTO EXTERN(testStorage()) " - + "AS CSV " - + "OVERWRITE ALL " - + "SELECT dim2 FROM foo " - + "PARTITIONED BY ALL") + .sql(StringUtils.format("REPLACE INTO EXTERN(%s()) " + + "AS CSV " + + "OVERWRITE ALL " + + "SELECT dim2 FROM foo " + + "PARTITIONED BY ALL", TestExportStorageConnector.TYPE_NAME)) .expectValidationError( DruidException.class, "Export statements do not support a PARTITIONED BY or CLUSTERED BY clause." @@ -83,9 +87,9 @@ public void testExportWithPartitionedBy() public void testInsertIntoExtern() { testIngestionQuery() - .sql("INSERT INTO EXTERN(testStorage()) " - + "AS CSV " - + "SELECT dim2 FROM foo") + .sql(StringUtils.format("INSERT INTO EXTERN(%s()) " + + "AS CSV " + + "SELECT dim2 FROM foo", TestExportStorageConnector.TYPE_NAME)) .expectQuery( Druids.newScanQueryBuilder() .dataSource( @@ -97,7 +101,7 @@ public void testInsertIntoExtern() .legacy(false) .build() ) - .expectResources(dataSourceRead("foo")) + .expectResources(dataSourceRead("foo"), externalWrite(TestExportStorageConnector.TYPE_NAME)) .expectTarget(ExportDestination.TYPE_KEY, RowSignature.builder().add("dim2", ColumnType.STRING).build()) .verify(); } @@ -127,6 +131,15 @@ public void testWithUnsupportedStorageConnector() .verify(); } + @Test + public void testWithForbiddenDestination() + { + testIngestionQuery() + .sql(StringUtils.format("insert into extern(%s()) as csv select __time, dim1 from foo", CalciteTests.FORBIDDEN_DESTINATION)) + .expectValidationError(ForbiddenException.class) + .verify(); + } + @Test public void testSelectFromTableNamedExport() { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportModule.java b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportModule.java index c92028d85f16..b6969f4c165c 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportModule.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportModule.java @@ -25,6 +25,7 @@ import com.google.common.collect.ImmutableList; import com.google.inject.Binder; import org.apache.druid.initialization.DruidModule; +import org.apache.druid.sql.calcite.util.CalciteTests; import org.apache.druid.storage.StorageConnectorProvider; import java.util.List; @@ -37,7 +38,8 @@ public List getJacksonModules() return ImmutableList.of( new SimpleModule(StorageConnectorProvider.class.getSimpleName()) .registerSubtypes( - new NamedType(TestExportStorageConnectorProvider.class, TestExportStorageConnector.TYPE_NAME) + new NamedType(TestExportStorageConnectorProvider.class, TestExportStorageConnector.TYPE_NAME), + new NamedType(TestExportStorageConnectorProvider.class, CalciteTests.FORBIDDEN_DESTINATION) ) ); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTestBase.java b/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTestBase.java index 7e990887dd10..a10a56d31e34 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTestBase.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTestBase.java @@ -124,4 +124,9 @@ protected static ResourceAction externalRead(final String inputSourceType) { return new ResourceAction(new Resource(inputSourceType, ResourceType.EXTERNAL), Action.READ); } + + protected static ResourceAction externalWrite(final String inputSourceType) + { + return new ResourceAction(new Resource(inputSourceType, ResourceType.EXTERNAL), Action.WRITE); + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java b/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java index 0bb5130c148c..ff8a7278a6ef 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java @@ -115,6 +115,7 @@ public class CalciteTests public static final String DATASOURCE5 = "lotsocolumns"; public static final String BROADCAST_DATASOURCE = "broadcast"; public static final String FORBIDDEN_DATASOURCE = "forbiddenDatasource"; + public static final String FORBIDDEN_DESTINATION = "forbiddenDestination"; public static final String SOME_DATASOURCE = "some_datasource"; public static final String SOME_DATSOURCE_ESCAPED = "some\\_datasource"; public static final String SOMEXDATASOURCE = "somexdatasource"; @@ -149,6 +150,12 @@ public Authorizer getAuthorizer(String name) } case ResourceType.QUERY_CONTEXT: return Access.OK; + case ResourceType.EXTERNAL: + if (FORBIDDEN_DESTINATION.equals(resource.getName())) { + return new Access(false); + } else { + return Access.OK; + } default: return new Access(false); } From ebfc53e02fa8055ec09056148877ec1cf41f1ad5 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 31 Jan 2024 13:23:57 +0530 Subject: [PATCH 33/50] Fix tests --- .../apache/druid/sql/calcite/util/CalciteTests.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java b/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java index ff8a7278a6ef..a81953bc47cf 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java @@ -63,6 +63,7 @@ import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker; import org.apache.druid.server.coordination.DruidServerMetadata; import org.apache.druid.server.security.Access; +import org.apache.druid.server.security.Action; import org.apache.druid.server.security.AllowAllAuthenticator; import org.apache.druid.server.security.AuthConfig; import org.apache.druid.server.security.AuthenticationResult; @@ -151,11 +152,14 @@ public Authorizer getAuthorizer(String name) case ResourceType.QUERY_CONTEXT: return Access.OK; case ResourceType.EXTERNAL: - if (FORBIDDEN_DESTINATION.equals(resource.getName())) { - return new Access(false); - } else { - return Access.OK; + if (Action.WRITE.equals(action)) { + if (FORBIDDEN_DESTINATION.equals(resource.getName())) { + return new Access(false); + } else { + return Access.OK; + } } + return new Access(false); default: return new Access(false); } From 941605b9c607a783d4effe523f5c8f90239bc674 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 31 Jan 2024 16:41:09 +0530 Subject: [PATCH 34/50] Add resource check to overlord resource --- .../azure/output/AzureStorageConnectorProvider.java | 6 ++++++ .../google/output/GoogleStorageConnectorProvider.java | 5 +++++ .../org/apache/druid/msq/indexing/MSQControllerTask.java | 8 ++++++++ .../indexing/destination/DataSourceMSQDestination.java | 9 +++++++++ .../destination/DurableStorageMSQDestination.java | 9 +++++++++ .../msq/indexing/destination/ExportMSQDestination.java | 9 +++++++++ .../druid/msq/indexing/destination/MSQDestination.java | 5 +++++ .../indexing/destination/TaskReportMSQDestination.java | 9 +++++++++ .../storage/s3/output/S3StorageConnectorProvider.java | 6 ++++++ .../java/org/apache/druid/indexing/common/task/Task.java | 8 ++++++++ .../druid/indexing/overlord/http/OverlordResource.java | 4 ++-- .../apache/druid/storage/StorageConnectorProvider.java | 1 + .../storage/local/LocalFileStorageConnectorProvider.java | 6 ++++++ .../export/TestExportStorageConnectorProvider.java | 6 ++++++ 14 files changed, 89 insertions(+), 2 deletions(-) diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/output/AzureStorageConnectorProvider.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/output/AzureStorageConnectorProvider.java index 79be724c17f7..be39b49b2486 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/output/AzureStorageConnectorProvider.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/output/AzureStorageConnectorProvider.java @@ -58,4 +58,10 @@ public StorageConnector get() { return new AzureStorageConnector(this, azureStorage); } + + @Override + public String getType() + { + return AzureStorageDruidModule.SCHEME; + } } diff --git a/extensions-core/google-extensions/src/main/java/org/apache/druid/storage/google/output/GoogleStorageConnectorProvider.java b/extensions-core/google-extensions/src/main/java/org/apache/druid/storage/google/output/GoogleStorageConnectorProvider.java index f33a3b1f44db..c5f5abe28377 100644 --- a/extensions-core/google-extensions/src/main/java/org/apache/druid/storage/google/output/GoogleStorageConnectorProvider.java +++ b/extensions-core/google-extensions/src/main/java/org/apache/druid/storage/google/output/GoogleStorageConnectorProvider.java @@ -61,4 +61,9 @@ public StorageConnector get() return new GoogleStorageConnector(this, googleStorage, googleInputDataConfig); } + @Override + public String getType() + { + return GoogleStorageDruidModule.SCHEME; + } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java index 3cdf706ba163..7eb455ca8424 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java @@ -57,6 +57,7 @@ import org.apache.druid.rpc.StandardRetryPolicy; import org.apache.druid.rpc.indexing.OverlordClient; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.server.security.Resource; import org.apache.druid.server.security.ResourceAction; import org.apache.druid.sql.calcite.run.SqlResults; import org.joda.time.Interval; @@ -65,6 +66,7 @@ import javax.annotation.Nullable; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; @JsonTypeName(MSQControllerTask.TYPE) @@ -273,6 +275,12 @@ private static String getDataSourceForTaskMetadata(final MSQSpec querySpec) } } + @Override + public Optional getDestinationResource() + { + return querySpec.getDestination().getDestinationResource(); + } + public static boolean isIngestion(final MSQSpec querySpec) { return querySpec.getDestination() instanceof DataSourceMSQDestination; diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DataSourceMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DataSourceMSQDestination.java index 7d086b6277ed..0854582a733c 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DataSourceMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DataSourceMSQDestination.java @@ -28,12 +28,15 @@ import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.msq.querykit.ShuffleSpecFactories; import org.apache.druid.msq.querykit.ShuffleSpecFactory; +import org.apache.druid.server.security.Resource; +import org.apache.druid.server.security.ResourceType; import org.joda.time.Interval; import javax.annotation.Nullable; import java.util.Collections; import java.util.List; import java.util.Objects; +import java.util.Optional; public class DataSourceMSQDestination implements MSQDestination { @@ -168,4 +171,10 @@ public ShuffleSpecFactory getShuffleSpecFactory(int targetSize) { return ShuffleSpecFactories.getGlobalSortWithTargetSize(targetSize); } + + @Override + public Optional getDestinationResource() + { + return Optional.of(new Resource(getDataSource(), ResourceType.DATASOURCE)); + } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DurableStorageMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DurableStorageMSQDestination.java index abebc5fa106a..7db459b5b105 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DurableStorageMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DurableStorageMSQDestination.java @@ -22,6 +22,9 @@ import com.fasterxml.jackson.annotation.JsonCreator; import org.apache.druid.msq.querykit.ShuffleSpecFactories; import org.apache.druid.msq.querykit.ShuffleSpecFactory; +import org.apache.druid.server.security.Resource; + +import java.util.Optional; public class DurableStorageMSQDestination implements MSQDestination { @@ -52,4 +55,10 @@ public ShuffleSpecFactory getShuffleSpecFactory(int targetSize) { return ShuffleSpecFactories.getGlobalSortWithTargetSize(targetSize); } + + @Override + public Optional getDestinationResource() + { + return Optional.empty(); + } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java index a6ed88d8a987..6a08ad14dd7b 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java @@ -24,6 +24,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.msq.querykit.ShuffleSpecFactories; import org.apache.druid.msq.querykit.ShuffleSpecFactory; +import org.apache.druid.server.security.Resource; +import org.apache.druid.server.security.ResourceType; import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.storage.StorageConnectorProvider; import org.joda.time.Interval; @@ -31,6 +33,7 @@ import javax.annotation.Nullable; import java.util.List; import java.util.Objects; +import java.util.Optional; /** * Destination used by tasks that write the results as files to an external destination. {@link #resultFormat} denotes @@ -118,4 +121,10 @@ public ShuffleSpecFactory getShuffleSpecFactory(int targetSize) { return ShuffleSpecFactories.getGlobalSortWithTargetSize(targetSize); } + + @Override + public Optional getDestinationResource() + { + return Optional.of(new Resource(getStorageConnectorProvider().getType(), ResourceType.EXTERNAL)); + } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQDestination.java index f7bc2d1ec6fb..39460b15194c 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/MSQDestination.java @@ -22,6 +22,9 @@ import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; import org.apache.druid.msq.querykit.ShuffleSpecFactory; +import org.apache.druid.server.security.Resource; + +import java.util.Optional; @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") @JsonSubTypes(value = { @@ -33,4 +36,6 @@ public interface MSQDestination { ShuffleSpecFactory getShuffleSpecFactory(int targetSize); + + Optional getDestinationResource(); } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/TaskReportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/TaskReportMSQDestination.java index 388f0179c289..f5e166d92d9d 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/TaskReportMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/TaskReportMSQDestination.java @@ -22,6 +22,9 @@ import com.fasterxml.jackson.annotation.JsonCreator; import org.apache.druid.msq.querykit.ShuffleSpecFactories; import org.apache.druid.msq.querykit.ShuffleSpecFactory; +import org.apache.druid.server.security.Resource; + +import java.util.Optional; public class TaskReportMSQDestination implements MSQDestination { @@ -50,4 +53,10 @@ public ShuffleSpecFactory getShuffleSpecFactory(int targetSize) { return ShuffleSpecFactories.singlePartition(); } + + @Override + public Optional getDestinationResource() + { + return Optional.empty(); + } } diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java index 7f4b43a0ede8..da40bb5b3871 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java @@ -55,4 +55,10 @@ public StorageConnector get() { return new S3StorageConnector(this, s3); } + + @Override + public String getType() + { + return S3StorageDruidModule.SCHEME; + } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java index 81a55aae1b4c..a5f34f9bbbfe 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java @@ -41,10 +41,13 @@ import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.Query; import org.apache.druid.query.QueryRunner; +import org.apache.druid.server.security.Resource; import org.apache.druid.server.security.ResourceAction; +import org.apache.druid.server.security.ResourceType; import javax.annotation.Nonnull; import java.util.Map; +import java.util.Optional; import java.util.Set; /** @@ -297,6 +300,11 @@ default Map addToContextIfAbsent(String key, Object val) Map getContext(); + default Optional getDestinationResource() + { + return Optional.of(new Resource(getDataSource(), ResourceType.DATASOURCE)); + } + default ContextValueType getContextValue(String key) { return (ContextValueType) getContext().get(key); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/http/OverlordResource.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/http/OverlordResource.java index b589dc5ffd81..b529aa45854c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/http/OverlordResource.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/http/OverlordResource.java @@ -1115,9 +1115,9 @@ private Response asLeaderWith(Optional x, Function f) @VisibleForTesting Set getNeededResourceActionsForTask(Task task) throws UOE { - final String dataSource = task.getDataSource(); final Set resourceActions = new HashSet<>(); - resourceActions.add(new ResourceAction(new Resource(dataSource, ResourceType.DATASOURCE), Action.WRITE)); + java.util.Optional destinationResource = task.getDestinationResource(); + destinationResource.ifPresent(resource -> resourceActions.add(new ResourceAction(resource, Action.WRITE))); if (authConfig.isEnableInputSourceSecurity()) { resourceActions.addAll(task.getInputSourceResources()); } diff --git a/processing/src/main/java/org/apache/druid/storage/StorageConnectorProvider.java b/processing/src/main/java/org/apache/druid/storage/StorageConnectorProvider.java index 9fece71eab8e..28fee208e5b7 100644 --- a/processing/src/main/java/org/apache/druid/storage/StorageConnectorProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/StorageConnectorProvider.java @@ -25,4 +25,5 @@ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") public interface StorageConnectorProvider extends Provider { + String getType(); } diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java index ae95804ff1c9..cccc3c383c1a 100644 --- a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java @@ -79,4 +79,10 @@ public int hashCode() { return Objects.hash(basePath); } + + @Override + public String getType() + { + return LocalFileStorageConnectorProvider.TYPE_NAME; + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java index 9cf73fac1f96..1e0c9fbfe483 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java @@ -31,4 +31,10 @@ public StorageConnector get() { return STORAGE_CONNECTOR; } + + @Override + public String getType() + { + return TestExportStorageConnector.TYPE_NAME; + } } From eb73cc21d7af81ff6394743f76b1497c446ab0a1 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 31 Jan 2024 17:40:47 +0530 Subject: [PATCH 35/50] Fix tests --- .../druid/indexing/overlord/http/OverlordResourceTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordResourceTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordResourceTest.java index 6897e69c26b9..8c1b6765431c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordResourceTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordResourceTest.java @@ -1699,6 +1699,7 @@ public void testResourceActionsForTaskWithInputTypeAndInputSecurityEnabled() EasyMock.expect(authConfig.isEnableInputSourceSecurity()).andReturn(true); EasyMock.expect(task.getDataSource()).andReturn(dataSource); + EasyMock.expect(task.getDestinationResource()).andReturn(java.util.Optional.of(new Resource(dataSource, ResourceType.DATASOURCE))); EasyMock.expect(task.getInputSourceResources()) .andReturn(ImmutableSet.of(new ResourceAction( new Resource(inputSourceType, ResourceType.EXTERNAL), @@ -1735,6 +1736,7 @@ public void testResourceActionsForTaskWithFirehoseAndInputSecurityEnabled() EasyMock.expect(authConfig.isEnableInputSourceSecurity()).andReturn(true); EasyMock.expect(task.getId()).andReturn("taskId"); EasyMock.expect(task.getDataSource()).andReturn(dataSource); + EasyMock.expect(task.getDestinationResource()).andReturn(java.util.Optional.of(new Resource(dataSource, ResourceType.DATASOURCE))); EasyMock.expect(task.getInputSourceResources()).andThrow(expectedException); EasyMock.replay( @@ -1767,6 +1769,7 @@ public void testResourceActionsForTaskWithInputTypeAndInputSecurityDisabled() EasyMock.expect(authConfig.isEnableInputSourceSecurity()).andReturn(false); EasyMock.expect(task.getDataSource()).andReturn(dataSource); + EasyMock.expect(task.getDestinationResource()).andReturn(java.util.Optional.of(new Resource(dataSource, ResourceType.DATASOURCE))); EasyMock.expect(task.getInputSourceResources()) .andReturn(ImmutableSet.of(new ResourceAction( new Resource(inputSourceType, ResourceType.EXTERNAL), From 1867cce04a8e157c48b484bfa3eb99ed9566b6ef Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Thu, 1 Feb 2024 20:24:07 +0530 Subject: [PATCH 36/50] Add IT --- .../druid/testsEx/config/Initializer.java | 2 + .../druid/testsEx/msq/ITMultiStageQuery.java | 91 +++++++++++++++++++ .../druid/storage/StorageConnectorModule.java | 2 + 3 files changed, 95 insertions(+) diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/config/Initializer.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/config/Initializer.java index ac3f8415488c..6f1e336c935f 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/config/Initializer.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/config/Initializer.java @@ -71,6 +71,7 @@ import org.apache.druid.metadata.storage.mysql.MySQLConnectorDriverConfig; import org.apache.druid.metadata.storage.mysql.MySQLConnectorSslConfig; import org.apache.druid.metadata.storage.mysql.MySQLMetadataStorageModule; +import org.apache.druid.msq.guice.MSQExternalDataSourceModule; import org.apache.druid.server.DruidNode; import org.apache.druid.testing.IntegrationTestingConfig; import org.apache.druid.testing.IntegrationTestingConfigProvider; @@ -499,6 +500,7 @@ private static Injector makeInjector( new LegacyBrokerParallelMergeConfigModule(), // Dependencies from other modules new StorageNodeModule(), + new MSQExternalDataSourceModule(), // Test-specific items, including bits copy/pasted // from modules that don't play well in a client setting. diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java index b70329bd5683..8f0525713c41 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java @@ -19,8 +19,15 @@ package org.apache.druid.testsEx.msq; +import com.google.api.client.util.Preconditions; +import com.google.common.collect.ImmutableList; import com.google.inject.Inject; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.guava.Yielder; +import org.apache.druid.msq.indexing.report.MSQResultsReport; +import org.apache.druid.msq.indexing.report.MSQTaskReport; +import org.apache.druid.msq.indexing.report.MSQTaskReportPayload; import org.apache.druid.msq.sql.SqlTaskStatus; import org.apache.druid.testing.clients.CoordinatorResourceTestClient; import org.apache.druid.testing.utils.DataLoaderHelper; @@ -33,6 +40,11 @@ import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + @RunWith(DruidTestRunner.class) @Category(MultiStageQuery.class) public class ITMultiStageQuery @@ -176,4 +188,83 @@ public void testMsqIngestionAndQueryingWithLocalFn() throws Exception msqHelper.testQueriesFromFile(QUERY_FILE, datasource); } + + @Test + public void testExport() throws Exception + { + String exportQuery = + StringUtils.format( + "REPLACE INTO extern(%s(basePath = '%s'))\n" + + "AS CSV\n" + + "OVERWRITE ALL\n" + + "SELECT page, added, delta\n" + + "FROM TABLE(\n" + + " EXTERN(\n" + + " '{\"type\":\"local\",\"files\":[\"/resources/data/batch_index/json/wikipedia_index_data1.json\"]}',\n" + + " '{\"type\":\"json\"}',\n" + + " '[{\"type\":\"string\",\"name\":\"timestamp\"},{\"type\":\"string\",\"name\":\"isRobot\"},{\"type\":\"string\",\"name\":\"diffUrl\"},{\"type\":\"long\",\"name\":\"added\"},{\"type\":\"string\",\"name\":\"countryIsoCode\"},{\"type\":\"string\",\"name\":\"regionName\"},{\"type\":\"string\",\"name\":\"channel\"},{\"type\":\"string\",\"name\":\"flags\"},{\"type\":\"long\",\"name\":\"delta\"},{\"type\":\"string\",\"name\":\"isUnpatrolled\"},{\"type\":\"string\",\"name\":\"isNew\"},{\"type\":\"double\",\"name\":\"deltaBucket\"},{\"type\":\"string\",\"name\":\"isMinor\"},{\"type\":\"string\",\"name\":\"isAnonymous\"},{\"type\":\"long\",\"name\":\"deleted\"},{\"type\":\"string\",\"name\":\"cityName\"},{\"type\":\"long\",\"name\":\"metroCode\"},{\"type\":\"string\",\"name\":\"namespace\"},{\"type\":\"string\",\"name\":\"comment\"},{\"type\":\"string\",\"name\":\"page\"},{\"type\":\"long\",\"name\":\"commentLength\"},{\"type\":\"string\",\"name\":\"countryName\"},{\"type\":\"string\",\"name\":\"user\"},{\"type\":\"string\",\"name\":\"regionIsoCode\"}]'\n" + + " )\n" + + ")\n", + "localStorage", "/shared/export" + ); + + SqlTaskStatus exportTask = msqHelper.submitMsqTask(exportQuery); + + msqHelper.pollTaskIdForSuccess(exportTask.getTaskId()); + + if (exportTask.getState().isFailure()) { + Assert.fail(StringUtils.format( + "Unable to start the task successfully.\nPossible exception: %s", + exportTask.getError() + )); + } + + String resultQuery = "SELECT page, added, delta\n" + + " FROM TABLE(\n" + + " EXTERN(\n" + + " '{\"type\":\"local\",\"baseDir\":\"/shared/export/worker0/\",\"filter\":\"*.csv\"}',\n" + + " '{\"type\":\"csv\",\"findColumnsFromHeader\":false,\"columns\":[\"delta\",\"added\",\"page\"]}'\n" + + " )\n" + + " ) EXTEND (\"delta\" BIGINT, \"added\" BIGINT, \"page\" VARCHAR)\n" + + " WHERE delta != 0\n" + + " ORDER BY page"; + + SqlTaskStatus resultTaskStatus = msqHelper.submitMsqTask(resultQuery); + + msqHelper.pollTaskIdForSuccess(resultTaskStatus.getTaskId()); + + Map statusReport = msqHelper.fetchStatusReports(resultTaskStatus.getTaskId()); + MSQTaskReport taskReport = statusReport.get(MSQTaskReport.REPORT_KEY); + if (taskReport == null) { + throw new ISE("Unable to fetch the status report for the task [%]", resultTaskStatus.getTaskId()); + } + MSQTaskReportPayload taskReportPayload = Preconditions.checkNotNull( + taskReport.getPayload(), + "payload" + ); + MSQResultsReport resultsReport = Preconditions.checkNotNull( + taskReportPayload.getResults(), + "Results report for the task id is empty" + ); + + Yielder yielder = resultsReport.getResultYielder(); + List> actualResults = new ArrayList<>(); + + while (!yielder.isDone()) { + Object[] row = yielder.get(); + actualResults.add(Arrays.asList(row)); + yielder = yielder.next(null); + } + + ImmutableList> expectedResults = ImmutableList.of( + ImmutableList.of("Cherno Alpha", 111, 123), + ImmutableList.of("Gypsy Danger", -143, 57), + ImmutableList.of("Striker Eureka", 330, 459) + ); + + Assert.assertEquals( + expectedResults, + actualResults + ); + } } diff --git a/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java b/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java index 4792a5139c6b..d8e52a9e8fcd 100644 --- a/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java +++ b/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java @@ -20,6 +20,7 @@ package org.apache.druid.storage; import com.fasterxml.jackson.databind.Module; +import com.fasterxml.jackson.databind.jsontype.NamedType; import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.common.collect.ImmutableList; import com.google.inject.Binder; @@ -36,6 +37,7 @@ public List getJacksonModules() return ImmutableList.of( new SimpleModule(StorageConnector.class.getSimpleName()) .registerSubtypes(LocalFileStorageConnectorProvider.class) + .registerSubtypes(new NamedType(LocalFileStorageConnectorProvider.class, "localStorage")) ); } From 9ff0cd4dddb65d094d3a60302f3f3a23727e6f7d Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Fri, 2 Feb 2024 15:35:21 +0530 Subject: [PATCH 37/50] Update syntax --- docs/multi-stage-query/reference.md | 50 +++++++++++++++---- .../apache/druid/msq/exec/MSQExportTest.java | 11 ++-- .../apache/druid/msq/test/MSQTestBase.java | 6 +-- .../druid/testsEx/msq/ITMultiStageQuery.java | 2 +- .../druid/storage/StorageConnectorModule.java | 2 - sql/src/main/codegen/includes/common.ftl | 35 +++++++++---- .../ExternalDestinationSqlIdentifier.java | 11 +--- .../druid/sql/calcite/CalciteExportTest.java | 32 ++++++++++-- 8 files changed, 105 insertions(+), 44 deletions(-) diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 502c1256fc28..03191cc13072 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -130,11 +130,47 @@ SELECT FROM
``` -Exporting is currently supported for Amazon S3 storage. This can be done passing the function `S3()` as an argument to the `EXTERN` function. The `druid-s3-extensions` should be loaded. +Exporting is currently supported for Amazon S3 storage and local storage. + +##### S3 + +Exporting results to S3 can be done by passing the function `S3()` as an argument to the `EXTERN` function. The `druid-s3-extensions` should be loaded. +The `S3()` function is a druid function which configures the connection. Arguments to `S3()` should be passed as named parameters with the value in single quotes like the example below. + +```sql +INSERT INTO + EXTERN( + S3(bucket => 's3://your_bucket', prefix => 'prefix/to/files', tempDir => '/tmp/export') + ) +AS CSV +SELECT + +FROM
+``` + +Supported arguments to the function: + +| Parameter | Required | Description | Default | +|-------------|---------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --| +| `bucket` | Yes | The S3 bucket to which the files are exported to. | n/a | +| `prefix` | Yes | Path where the exported files would be created. If the location includes other files or directories, then they might get cleaned up as well. | n/a | +| `tempDir` | Yes | Directory path on the local disk to store temporary files required while uploading the data | n/a | +| `maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | +| `chunkSize` | No | Defines the size of each chunk to temporarily store in `tempDir`. The chunk size must be between 5 MiB and 5 GiB. A large chunk size reduces the API calls to S3, however it requires more disk space to store the temporary chunks. | 100MiB | + +##### LOCAL + +Exporting is also supported to the local storage, which exports the results to the filesystem of the MSQ worker. +This is useful in a single node setup or for testing, and is not suitable for production use cases. + +This can be done by passing the function `LOCAL()` as an argument to the `EXTERN FUNCTION`. +Arguments to `LOCAL()` should be passed as named parameters with the value in single quotes like the example below. ```sql INSERT INTO - EXTERN(S3(bucket=<...>, prefix=<...>, tempDir=<...>)) + EXTERN( + local(basePath => '/tmp/exportLocation') + ) AS CSV SELECT @@ -143,13 +179,9 @@ FROM
Supported arguments to the function: -| Parameter | Required | Description | Default | -|-------------|---------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --| -| `bucket` | Yes | The S3 bucket to which the files are exported to. | n/a | -| `prefix` | Yes | Path prepended to all the paths uploaded to the bucket to namespace the connector's files. Provide a unique value for the prefix and do not share the same prefix between different clusters. If the location includes other files or directories, then they might get cleaned up as well. | n/a | -| `tempDir` | Yes | Directory path on the local disk to store temporary files required while uploading the data | n/a | -| `maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | -| `chunkSize` | No | Defines the size of each chunk to temporarily store in `tempDir`. The chunk size must be between 5 MiB and 5 GiB. A large chunk size reduces the API calls to S3, however it requires more disk space to store the temporary chunks. | 100MiB | +| Parameter | Required | Description | Default | +|-------------|--------------------------------|-----------------------------------------------------------------| --| +| `basePath` | Yes | The file system path where the exported files would be created. If the location includes other files or directories, then they might get cleaned up as well.| n/a | For more information, see [Read external data with EXTERN](concepts.md#write-to-an-external-destination-with-extern). diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java index b465425f3eb7..54fb723ee672 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java @@ -53,8 +53,9 @@ public void testExport() throws IOException .add("dim1", ColumnType.STRING) .add("cnt", ColumnType.LONG).build(); - testIngestQuery().setSql( - "insert into extern(" + TestExportStorageConnector.TYPE_NAME + "()) as csv select cnt, dim1 from foo") + final String sql = StringUtils.format("insert into extern(%s()) as csv select cnt, dim1 from foo", TestExportStorageConnector.TYPE_NAME); + + testIngestQuery().setSql(sql) .setExpectedDataSource("foo1") .setQueryContext(DEFAULT_MSQ_CONTEXT) .setExpectedRowSignature(rowSignature) @@ -83,9 +84,9 @@ public void testNumberOfRowsPerFile() throws IOException Map queryContext = new HashMap<>(DEFAULT_MSQ_CONTEXT); queryContext.put(MultiStageQueryContext.CTX_ROWS_PER_PAGE, 1); - testIngestQuery().setSql( - StringUtils.format("insert into extern(localStorage(basePath='%s')) as csv select cnt, dim1 from foo", exportDir.getAbsolutePath()) - ) + final String sql = StringUtils.format("insert into extern(local(basePath=>'%s')) as csv select cnt, dim1 from foo", exportDir.getAbsolutePath()); + + testIngestQuery().setSql(sql) .setExpectedDataSource("foo1") .setQueryContext(queryContext) .setExpectedRowSignature(rowSignature) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index 0d24b0c0747c..ce89d2b7846c 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -177,9 +177,9 @@ import org.apache.druid.sql.calcite.view.InProcessViewManager; import org.apache.druid.sql.guice.SqlBindings; import org.apache.druid.storage.StorageConnector; +import org.apache.druid.storage.StorageConnectorModule; import org.apache.druid.storage.StorageConnectorProvider; import org.apache.druid.storage.local.LocalFileStorageConnector; -import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.PruneLoadSpec; import org.apache.druid.timeline.SegmentId; @@ -514,10 +514,10 @@ public String getFormatString() objectMapper.registerModule( new SimpleModule(StorageConnector.class.getSimpleName()) .registerSubtypes( - new NamedType(TestExportStorageConnectorProvider.class, TestExportStorageConnector.TYPE_NAME), - new NamedType(LocalFileStorageConnectorProvider.class, "localStorage") + new NamedType(TestExportStorageConnectorProvider.class, TestExportStorageConnector.TYPE_NAME) ) ); + objectMapper.registerModules(new StorageConnectorModule().getJacksonModules()); objectMapper.registerModules(sqlModule.getJacksonModules()); doReturn(mock(Request.class)).when(brokerClient).makeRequest(any(), anyString()); diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java index 8f0525713c41..df6665ce3d65 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java @@ -194,7 +194,7 @@ public void testExport() throws Exception { String exportQuery = StringUtils.format( - "REPLACE INTO extern(%s(basePath = '%s'))\n" + "REPLACE INTO extern(%s(basePath => '%s'))\n" + "AS CSV\n" + "OVERWRITE ALL\n" + "SELECT page, added, delta\n" diff --git a/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java b/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java index d8e52a9e8fcd..4792a5139c6b 100644 --- a/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java +++ b/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java @@ -20,7 +20,6 @@ package org.apache.druid.storage; import com.fasterxml.jackson.databind.Module; -import com.fasterxml.jackson.databind.jsontype.NamedType; import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.common.collect.ImmutableList; import com.google.inject.Binder; @@ -37,7 +36,6 @@ public List getJacksonModules() return ImmutableList.of( new SimpleModule(StorageConnector.class.getSimpleName()) .registerSubtypes(LocalFileStorageConnectorProvider.class) - .registerSubtypes(new NamedType(LocalFileStorageConnectorProvider.class, "localStorage")) ); } diff --git a/sql/src/main/codegen/includes/common.ftl b/sql/src/main/codegen/includes/common.ftl index aa8db161de0b..757a869003ed 100644 --- a/sql/src/main/codegen/includes/common.ftl +++ b/sql/src/main/codegen/includes/common.ftl @@ -122,19 +122,34 @@ String FileFormat() : SqlIdentifier ExternalDestination() : { final Span s; - SqlIdentifier destinationType; + SqlIdentifier destinationType = null; + String destinationTypeString = null; Map properties = new HashMap(); } { - destinationType = SimpleIdentifier() [ [ properties = ExternProperties() ] ] + ( + destinationType = SimpleIdentifier() { - s = span(); - return new ExternalDestinationSqlIdentifier( - destinationType.toString(), - s.pos(), - properties - ); + destinationTypeString = destinationType.toString(); + } + | + + { + // local is a reserved keyword in calcite. However, local is also a supported input source / destination and + // keeping the name is preferred for consistency in other places, and so that permission checks are applied + // correctly, so this is handled as a special case. + destinationTypeString = "local"; } + ) + [ [ properties = ExternProperties() ] ] + { + s = span(); + return new ExternalDestinationSqlIdentifier( + destinationTypeString, + s.pos(), + properties + ); + } } Map ExternProperties() : @@ -147,14 +162,14 @@ Map ExternProperties() : } { ( - identifier = SimpleIdentifier() value = SimpleStringLiteral() + identifier = SimpleIdentifier() value = SimpleStringLiteral() { properties.put(identifier.toString(), value); } ) ( - identifier = SimpleIdentifier() value = SimpleStringLiteral() + identifier = SimpleIdentifier() value = SimpleStringLiteral() { properties.put(identifier.toString(), value); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index 797a98d5ea20..82065877be29 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -97,18 +97,9 @@ public StorageConnectorProvider toStorageConnectorProvider(ObjectMapper objectMa final HashMap storageConnectorProperties = new HashMap<>(properties); storageConnectorProperties.put("type", getDestinationType()); - final StorageConnectorProvider storageConnectorProvider; - try { - storageConnectorProvider = objectMapper.convertValue( + return objectMapper.convertValue( storageConnectorProperties, StorageConnectorProvider.class ); - } - catch (IllegalArgumentException e) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.RUNTIME_FAILURE) - .build("No storage connector found for storage connector type:[%s].", getDestinationType()); - } - return storageConnectorProvider; } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index c0a63902c743..6796a07b3a79 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -32,7 +32,11 @@ import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.util.CalciteTests; import org.apache.druid.sql.destination.ExportDestination; +import org.apache.druid.storage.StorageConnectorModule; +import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; +import org.hamcrest.CoreMatchers; import org.junit.Test; +import org.junit.internal.matchers.ThrowableMessageMatcher; public class CalciteExportTest extends CalciteIngestionDmlTest { @@ -40,6 +44,7 @@ public class CalciteExportTest extends CalciteIngestionDmlTest public void configureGuice(DruidInjectorBuilder builder) { super.configureGuice(builder); + builder.addModule(new StorageConnectorModule()); builder.addModule(new TestExportModule()); } @@ -47,7 +52,7 @@ public void configureGuice(DruidInjectorBuilder builder) public void testReplaceIntoExtern() { testIngestionQuery() - .sql(StringUtils.format("REPLACE INTO EXTERN(%s()) " + .sql(StringUtils.format("REPLACE INTO EXTERN(%s(basePath => 'export')) " + "AS CSV " + "OVERWRITE ALL " + "SELECT dim2 FROM foo", TestExportStorageConnector.TYPE_NAME)) @@ -67,6 +72,23 @@ public void testReplaceIntoExtern() .verify(); } + @Test + public void testReplaceWithoutRequiredParameter() + { + testIngestionQuery() + .sql(StringUtils.format("REPLACE INTO EXTERN(%s()) " + + "AS CSV " + + "OVERWRITE ALL " + + "SELECT dim2 FROM foo", LocalFileStorageConnectorProvider.TYPE_NAME)) + .expectValidationError( + CoreMatchers.allOf( + CoreMatchers.instanceOf(IllegalArgumentException.class), + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Missing required creator property 'basePath'")) + ) + ) + .verify(); + } + @Test public void testExportWithPartitionedBy() { @@ -110,7 +132,7 @@ public void testInsertIntoExtern() public void testExportWithoutFormat() { testIngestionQuery() - .sql("INSERT INTO EXTERN(testStorage(bucket='bucket1',prefix='prefix1',tempDir='/tempdir',chunkSize='5242880',maxRetry='1')) " + .sql("INSERT INTO EXTERN(testStorage(bucket=>'bucket1',prefix=>'prefix1',tempDir=>'/tempdir',chunkSize=>'5242880',maxRetry=>'1')) " + "SELECT dim2 FROM foo") .expectValidationError( DruidException.class, @@ -125,8 +147,10 @@ public void testWithUnsupportedStorageConnector() testIngestionQuery() .sql("insert into extern(nonExistent()) as csv select __time, dim1 from foo") .expectValidationError( - DruidException.class, - "No storage connector found for storage connector type:[nonExistent]." + CoreMatchers.allOf( + CoreMatchers.instanceOf(IllegalArgumentException.class), + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Could not resolve type id 'nonExistent' as a subtype")) + ) ) .verify(); } From 8e21576c1f00e35973af3c27cb4e7bb7a919ea00 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Fri, 2 Feb 2024 16:27:51 +0530 Subject: [PATCH 38/50] Update tests --- .../calcite/parser/ExternalDestinationSqlIdentifier.java | 2 +- .../druid/sql/calcite/parser/DruidSqlUnparseTest.java | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index 82065877be29..fef328de588e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -69,7 +69,7 @@ public void unparse(SqlWriter writer, int leftPrec, int rightPrec) SqlWriter.Frame frame = writer.startFunCall(getDestinationType()); for (Map.Entry property : properties.entrySet()) { writer.sep(","); - writer.print(StringUtils.format("%s = '%s'", property.getKey(), property.getValue())); + writer.print(StringUtils.format("%s => '%s'", property.getKey(), property.getValue())); } writer.endFunCall(frame); writer.endFunCall(externFrame); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java index b2bb9d2863f4..0e843b4ca2ea 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/parser/DruidSqlUnparseTest.java @@ -100,8 +100,8 @@ private static DruidSqlParserImpl createTestParser(String parseString) @Test public void testUnparseExternalSqlIdentifierReplace() throws ParseException { - String sqlQuery = "REPLACE INTO EXTERN( s3(bucket='bucket1',prefix='prefix1') ) AS CSV OVERWRITE ALL SELECT dim2 FROM foo"; - String prettySqlQuery = "REPLACE INTO EXTERN(S3(bucket = 'bucket1', prefix = 'prefix1'))\n" + String sqlQuery = "REPLACE INTO EXTERN( s3(bucket=>'bucket1',prefix=>'prefix1') ) AS CSV OVERWRITE ALL SELECT dim2 FROM foo"; + String prettySqlQuery = "REPLACE INTO EXTERN(S3(bucket => 'bucket1', prefix => 'prefix1'))\n" + "AS csv\n" + "OVERWRITE ALL\n" + "SELECT \"dim2\"\n" @@ -115,8 +115,8 @@ public void testUnparseExternalSqlIdentifierReplace() throws ParseException @Test public void testUnparseExternalSqlIdentifierInsert() throws ParseException { - String sqlQuery = "INSERT INTO EXTERN( s3(bucket='bucket1',prefix='prefix1') ) AS CSV SELECT dim2 FROM foo"; - String prettySqlQuery = "INSERT INTO EXTERN(S3(bucket = 'bucket1', prefix = 'prefix1'))\n" + String sqlQuery = "INSERT INTO EXTERN( s3(bucket=>'bucket1',prefix=>'prefix1') ) AS CSV SELECT dim2 FROM foo"; + String prettySqlQuery = "INSERT INTO EXTERN(S3(bucket => 'bucket1', prefix => 'prefix1'))\n" + "AS csv\n" + "SELECT \"dim2\"\n" + " FROM \"foo\"\n"; From 62c2c0415eea735b15369a8e3411d6579cb8af42 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Fri, 2 Feb 2024 16:39:20 +0530 Subject: [PATCH 39/50] Update permission --- .../indexing/destination/DurableStorageMSQDestination.java | 4 +++- .../msq/indexing/destination/TaskReportMSQDestination.java | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DurableStorageMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DurableStorageMSQDestination.java index 7db459b5b105..e522243b60d2 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DurableStorageMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/DurableStorageMSQDestination.java @@ -20,9 +20,11 @@ package org.apache.druid.msq.indexing.destination; import com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.druid.msq.indexing.MSQControllerTask; import org.apache.druid.msq.querykit.ShuffleSpecFactories; import org.apache.druid.msq.querykit.ShuffleSpecFactory; import org.apache.druid.server.security.Resource; +import org.apache.druid.server.security.ResourceType; import java.util.Optional; @@ -59,6 +61,6 @@ public ShuffleSpecFactory getShuffleSpecFactory(int targetSize) @Override public Optional getDestinationResource() { - return Optional.empty(); + return Optional.of(new Resource(MSQControllerTask.DUMMY_DATASOURCE_FOR_SELECT, ResourceType.DATASOURCE)); } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/TaskReportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/TaskReportMSQDestination.java index f5e166d92d9d..3f199255ac76 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/TaskReportMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/TaskReportMSQDestination.java @@ -20,9 +20,11 @@ package org.apache.druid.msq.indexing.destination; import com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.druid.msq.indexing.MSQControllerTask; import org.apache.druid.msq.querykit.ShuffleSpecFactories; import org.apache.druid.msq.querykit.ShuffleSpecFactory; import org.apache.druid.server.security.Resource; +import org.apache.druid.server.security.ResourceType; import java.util.Optional; @@ -57,6 +59,6 @@ public ShuffleSpecFactory getShuffleSpecFactory(int targetSize) @Override public Optional getDestinationResource() { - return Optional.empty(); + return Optional.of(new Resource(MSQControllerTask.DUMMY_DATASOURCE_FOR_SELECT, ResourceType.DATASOURCE)); } } From 2c9e87b6318a6e0e708780f26c9d15455a5fba7b Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Sat, 3 Feb 2024 23:08:18 +0530 Subject: [PATCH 40/50] Address review comments --- .../apache/druid/msq/exec/ControllerImpl.java | 34 +++++++++--------- .../results/ExportResultsFrameProcessor.java | 35 +++++++++++-------- .../ExportResultsFrameProcessorFactory.java | 20 +++++++++-- .../apache/druid/msq/exec/MSQExportTest.java | 2 +- .../druid/testsEx/msq/ITMultiStageQuery.java | 24 ++++++------- .../sql/calcite/planner/IngestHandler.java | 5 +++ .../druid/sql/http/ArrayLinesWriter.java | 10 ++++++ .../apache/druid/sql/http/ArrayWriter.java | 27 ++++++++++++++ .../org/apache/druid/sql/http/CsvWriter.java | 16 +++++++++ .../druid/sql/http/ObjectLinesWriter.java | 10 ++++++ .../apache/druid/sql/http/ObjectWriter.java | 34 ++++++++++++++++++ .../apache/druid/sql/http/ResultFormat.java | 3 ++ .../druid/sql/calcite/CalciteExportTest.java | 30 +++++++++++++--- .../export/TestExportStorageConnector.java | 5 +-- 14 files changed, 201 insertions(+), 54 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 55273e685780..81c4a3ad02f9 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -203,7 +203,6 @@ import org.apache.druid.sql.calcite.planner.ColumnMappings; import org.apache.druid.sql.calcite.rel.DruidQuery; import org.apache.druid.sql.http.ResultFormat; -import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorProvider; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentTimeline; @@ -224,6 +223,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -1875,27 +1875,24 @@ private static QueryDefinition makeQueryDefinition( final ExportMSQDestination exportMSQDestination = (ExportMSQDestination) querySpec.getDestination(); final StorageConnectorProvider storageConnectorProvider = exportMSQDestination.getStorageConnectorProvider(); - final ResultFormat resultFormat = exportMSQDestination.getResultFormat(); - - // If the statement is a 'REPLACE' statement, delete the existing files at the destination. - if (exportMSQDestination.getReplaceTimeChunks() != null) { - if (Intervals.ONLY_ETERNITY.equals(exportMSQDestination.getReplaceTimeChunks())) { - StorageConnector storageConnector = storageConnectorProvider.get(); - try { - storageConnector.deleteRecursively(""); - } - catch (IOException e) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.RUNTIME_FAILURE) - .build(e, "Exception occurred while deleting existing files from export destination."); - } - } else { + try { + // Check that the export destination is empty as a sanity check. We want to avoid modifying any other files with export. + Iterator filesIterator = storageConnectorProvider.get().listDir(""); + if (filesIterator.hasNext()) { throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.UNSUPPORTED) - .build("Currently export only works with OVERWRITE ALL clause."); + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build("Found files at provided export destination. Export is only allowed to " + + "an empty path. Please provide an empty path or move the existing files."); } } + catch (IOException e) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.RUNTIME_FAILURE) + .build(e, "Exception occurred while connecting to export destination."); + } + + final ResultFormat resultFormat = exportMSQDestination.getResultFormat(); final QueryDefinitionBuilder builder = QueryDefinition.builder(); builder.addAll(queryDef); builder.add(StageDefinition.builder(queryDef.getNextStageNumber()) @@ -1904,6 +1901,7 @@ private static QueryDefinition makeQueryDefinition( .signature(queryDef.getFinalStageDefinition().getSignature()) .shuffleSpec(null) .processorFactory(new ExportResultsFrameProcessorFactory( + queryId, storageConnectorProvider, resultFormat )) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java index 84cb93ab49c9..de65d3e9d7ad 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java @@ -31,7 +31,6 @@ import org.apache.druid.frame.read.FrameReader; import org.apache.druid.frame.segment.FrameStorageAdapter; import org.apache.druid.java.util.common.Intervals; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.Unit; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; @@ -68,9 +67,8 @@ public ExportResultsFrameProcessor( final FrameReader frameReader, final StorageConnector storageConnector, final ObjectMapper jsonMapper, - final int partitionNumber, - final int workerNumber, - final ChannelCounters channelCounter + final ChannelCounters channelCounter, + final String exportFilePath ) { this.inputChannel = inputChannel; @@ -79,7 +77,7 @@ public ExportResultsFrameProcessor( this.storageConnector = storageConnector; this.jsonMapper = jsonMapper; this.channelCounter = channelCounter; - this.exportFilePath = getExportFilePath(workerNumber, partitionNumber, exportFormat); + this.exportFilePath = exportFilePath; } @Override @@ -111,16 +109,23 @@ public ReturnOrAwait runIncrementally(IntSet readableInputs) throws IOEx private void exportFrame(final Frame frame) throws IOException { - final RowSignature signature = frameReader.signature(); + final RowSignature exportRowSignature = createRowSignatureForExport(frameReader.signature()); final Sequence cursorSequence = new FrameStorageAdapter(frame, frameReader, Intervals.ETERNITY) .makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null); + // Add headers if we are writing to a new file. + final boolean writeHeader = !storageConnector.pathExists(exportFilePath); + try (OutputStream stream = storageConnector.write(exportFilePath)) { ResultFormat.Writer formatter = exportFormat.createFormatter(stream, jsonMapper); formatter.writeResponseStart(); + if (writeHeader) { + formatter.writeHeaderFromRowSignature(exportRowSignature, false); + } + SequenceUtils.forEach( cursorSequence, cursor -> { @@ -130,7 +135,7 @@ private void exportFrame(final Frame frame) throws IOException //noinspection rawtypes @SuppressWarnings("rawtypes") final List selectors = - frameReader.signature() + exportRowSignature .getColumnNames() .stream() .map(columnSelectorFactory::makeColumnValueSelector) @@ -138,11 +143,8 @@ private void exportFrame(final Frame frame) throws IOException while (!cursor.isDone()) { formatter.writeRowStart(); - for (int j = 0; j < signature.size(); j++) { - if (QueryKitUtils.PARTITION_BOOST_COLUMN.equals(signature.getColumnName(j))) { - continue; - } - formatter.writeRowField(signature.getColumnName(j), selectors.get(j).getObject()); + for (int j = 0; j < exportRowSignature.size(); j++) { + formatter.writeRowField(exportRowSignature.getColumnName(j), selectors.get(j).getObject()); } channelCounter.incrementRowCount(); formatter.writeRowEnd(); @@ -160,9 +162,14 @@ private void exportFrame(final Frame frame) throws IOException } } - private static String getExportFilePath(int workerNumber, int partitionNumber, ResultFormat exportFormat) + private static RowSignature createRowSignatureForExport(RowSignature inputRowSignature) { - return StringUtils.format("worker%s/partition%s.%s", workerNumber, partitionNumber, exportFormat.toString()); + RowSignature.Builder exportRowSignatureBuilder = RowSignature.builder(); + inputRowSignature.getColumnNames() + .stream() + .filter(name -> !QueryKitUtils.PARTITION_BOOST_COLUMN.equals(name)) + .forEach(name -> exportRowSignatureBuilder.add(name, inputRowSignature.getColumnType(name).orElse(null))); + return exportRowSignatureBuilder.build(); } @Override diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java index 030f020f25b6..f592d5ebbe14 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java @@ -27,6 +27,7 @@ import org.apache.druid.frame.processor.OutputChannelFactory; import org.apache.druid.frame.processor.OutputChannels; import org.apache.druid.frame.processor.manager.ProcessorManagers; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.msq.counters.ChannelCounters; @@ -51,19 +52,28 @@ @JsonTypeName("exportResults") public class ExportResultsFrameProcessorFactory extends BaseFrameProcessorFactory { + private final String queryId; private final StorageConnectorProvider storageConnectorProvider; private final ResultFormat exportFormat; @JsonCreator public ExportResultsFrameProcessorFactory( + @JsonProperty("queryId") String queryId, @JsonProperty("storageConnectorProvider") StorageConnectorProvider storageConnectorProvider, @JsonProperty("exportFormat") ResultFormat exportFormat ) { + this.queryId = queryId; this.storageConnectorProvider = storageConnectorProvider; this.exportFormat = exportFormat; } + @JsonProperty("queryId") + public String getQueryId() + { + return queryId; + } + @JsonProperty("exportFormat") public ResultFormat getExportFormat() { @@ -110,9 +120,8 @@ public ProcessorsAndChannels makeProcessors( readableInput.getChannelFrameReader(), storageConnectorProvider.get(), frameContext.jsonMapper(), - readableInput.getStagePartition().getPartitionNumber(), - workerNumber, - channelCounter + channelCounter, + getExportFilePath(queryId, workerNumber, readableInput.getStagePartition().getPartitionNumber(), exportFormat) ) ); @@ -121,4 +130,9 @@ public ProcessorsAndChannels makeProcessors( OutputChannels.none() ); } + + private static String getExportFilePath(String queryId, int workerNumber, int partitionNumber, ResultFormat exportFormat) + { + return StringUtils.format("%s-worker%s-partition%s.%s", queryId, workerNumber, partitionNumber, exportFormat.toString()); + } } diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java index 54fb723ee672..c41f8c489c05 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java @@ -96,7 +96,7 @@ public void testNumberOfRowsPerFile() throws IOException Assert.assertEquals( expectedFooFileContents().size(), - Objects.requireNonNull(new File(exportDir.getAbsolutePath(), "worker0").listFiles()).length + Objects.requireNonNull(new File(exportDir.getAbsolutePath()).listFiles()).length ); } diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java index df6665ce3d65..5c9d2826557b 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java @@ -194,9 +194,8 @@ public void testExport() throws Exception { String exportQuery = StringUtils.format( - "REPLACE INTO extern(%s(basePath => '%s'))\n" + "INSERT INTO extern(%s(basePath => '%s'))\n" + "AS CSV\n" - + "OVERWRITE ALL\n" + "SELECT page, added, delta\n" + "FROM TABLE(\n" + " EXTERN(\n" @@ -205,7 +204,7 @@ public void testExport() throws Exception + " '[{\"type\":\"string\",\"name\":\"timestamp\"},{\"type\":\"string\",\"name\":\"isRobot\"},{\"type\":\"string\",\"name\":\"diffUrl\"},{\"type\":\"long\",\"name\":\"added\"},{\"type\":\"string\",\"name\":\"countryIsoCode\"},{\"type\":\"string\",\"name\":\"regionName\"},{\"type\":\"string\",\"name\":\"channel\"},{\"type\":\"string\",\"name\":\"flags\"},{\"type\":\"long\",\"name\":\"delta\"},{\"type\":\"string\",\"name\":\"isUnpatrolled\"},{\"type\":\"string\",\"name\":\"isNew\"},{\"type\":\"double\",\"name\":\"deltaBucket\"},{\"type\":\"string\",\"name\":\"isMinor\"},{\"type\":\"string\",\"name\":\"isAnonymous\"},{\"type\":\"long\",\"name\":\"deleted\"},{\"type\":\"string\",\"name\":\"cityName\"},{\"type\":\"long\",\"name\":\"metroCode\"},{\"type\":\"string\",\"name\":\"namespace\"},{\"type\":\"string\",\"name\":\"comment\"},{\"type\":\"string\",\"name\":\"page\"},{\"type\":\"long\",\"name\":\"commentLength\"},{\"type\":\"string\",\"name\":\"countryName\"},{\"type\":\"string\",\"name\":\"user\"},{\"type\":\"string\",\"name\":\"regionIsoCode\"}]'\n" + " )\n" + ")\n", - "localStorage", "/shared/export" + "local", "/shared/export/" ); SqlTaskStatus exportTask = msqHelper.submitMsqTask(exportQuery); @@ -219,15 +218,16 @@ public void testExport() throws Exception )); } - String resultQuery = "SELECT page, added, delta\n" - + " FROM TABLE(\n" - + " EXTERN(\n" - + " '{\"type\":\"local\",\"baseDir\":\"/shared/export/worker0/\",\"filter\":\"*.csv\"}',\n" - + " '{\"type\":\"csv\",\"findColumnsFromHeader\":false,\"columns\":[\"delta\",\"added\",\"page\"]}'\n" - + " )\n" - + " ) EXTEND (\"delta\" BIGINT, \"added\" BIGINT, \"page\" VARCHAR)\n" - + " WHERE delta != 0\n" - + " ORDER BY page"; + String resultQuery = StringUtils.format( + "SELECT page, delta, added\n" + + " FROM TABLE(\n" + + " EXTERN(\n" + + " '{\"type\":\"local\",\"baseDir\":\"/shared/export/\",\"filter\":\"*.csv\"}',\n" + + " '{\"type\":\"csv\",\"findColumnsFromHeader\":true}'\n" + + " )\n" + + " ) EXTEND (\"added\" BIGINT, \"delta\" BIGINT, \"page\" VARCHAR)\n" + + " WHERE delta != 0\n" + + " ORDER BY page"); SqlTaskStatus resultTaskStatus = msqHelper.submitMsqTask(resultQuery); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index f25ab92ecdfe..fe0349c77b6b 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -371,6 +371,11 @@ protected DruidSqlIngest ingestNode() @Override public void validate() { + if (ingestNode().getTargetTable() instanceof ExternalDestinationSqlIdentifier) { + throw InvalidSqlInput.exception( + "REPLACE operations do no support EXTERN destinations. Use INSERT statements to write to an external destination." + ); + } if (!handlerContext.plannerContext().featureAvailable(EngineFeature.CAN_REPLACE)) { throw InvalidSqlInput.exception( "REPLACE operations are not supported by the requested SQL engine [%s]. Consider using MSQ.", diff --git a/sql/src/main/java/org/apache/druid/sql/http/ArrayLinesWriter.java b/sql/src/main/java/org/apache/druid/sql/http/ArrayLinesWriter.java index beda6deceaba..55b04a193338 100644 --- a/sql/src/main/java/org/apache/druid/sql/http/ArrayLinesWriter.java +++ b/sql/src/main/java/org/apache/druid/sql/http/ArrayLinesWriter.java @@ -25,6 +25,7 @@ import com.fasterxml.jackson.databind.SerializerProvider; import org.apache.calcite.rel.type.RelDataType; import org.apache.druid.java.util.common.jackson.JacksonUtils; +import org.apache.druid.segment.column.RowSignature; import javax.annotation.Nullable; import java.io.IOException; @@ -70,6 +71,15 @@ public void writeHeader( ArrayWriter.writeHeader(jsonGenerator, rowType, includeTypes, includeSqlTypes); } + @Override + public void writeHeaderFromRowSignature( + final RowSignature rowSignature, + final boolean includeTypes + ) throws IOException + { + ArrayWriter.writeHeader(jsonGenerator, rowSignature, includeTypes); + } + @Override public void writeRowStart() throws IOException { diff --git a/sql/src/main/java/org/apache/druid/sql/http/ArrayWriter.java b/sql/src/main/java/org/apache/druid/sql/http/ArrayWriter.java index cd863d5bf175..e70f7ecfdf65 100644 --- a/sql/src/main/java/org/apache/druid/sql/http/ArrayWriter.java +++ b/sql/src/main/java/org/apache/druid/sql/http/ArrayWriter.java @@ -74,6 +74,12 @@ public void writeHeader( writeHeader(jsonGenerator, rowType, includeTypes, includeSqlTypes); } + @Override + public void writeHeaderFromRowSignature(final RowSignature rowSignature, final boolean includeTypes) throws IOException + { + writeHeader(jsonGenerator, rowSignature, includeTypes); + } + @Override public void writeRowStart() throws IOException { @@ -129,4 +135,25 @@ static void writeHeader( jsonGenerator.writeEndArray(); } } + + static void writeHeader( + final JsonGenerator jsonGenerator, + final RowSignature signature, + final boolean includeTypes + ) throws IOException + { + jsonGenerator.writeStartArray(); + for (String columnName : signature.getColumnNames()) { + jsonGenerator.writeString(columnName); + } + jsonGenerator.writeEndArray(); + + if (includeTypes) { + jsonGenerator.writeStartArray(); + for (int i = 0; i < signature.size(); i++) { + jsonGenerator.writeString(signature.getColumnType(i).map(TypeSignature::asTypeString).orElse(null)); + } + jsonGenerator.writeEndArray(); + } + } } diff --git a/sql/src/main/java/org/apache/druid/sql/http/CsvWriter.java b/sql/src/main/java/org/apache/druid/sql/http/CsvWriter.java index e5e997306845..060cf40da900 100644 --- a/sql/src/main/java/org/apache/druid/sql/http/CsvWriter.java +++ b/sql/src/main/java/org/apache/druid/sql/http/CsvWriter.java @@ -94,6 +94,22 @@ public void writeHeader( } } + @Override + public void writeHeaderFromRowSignature(final RowSignature signature, final boolean includeTypes) + { + writer.writeNext(signature.getColumnNames().toArray(new String[0]), false); + + if (includeTypes) { + final String[] types = new String[signature.size()]; + + for (int i = 0; i < signature.size(); i++) { + types[i] = signature.getColumnType(i).map(TypeSignature::asTypeString).orElse(null); + } + + writer.writeNext(types, false); + } + } + @Override public void writeRowStart() { diff --git a/sql/src/main/java/org/apache/druid/sql/http/ObjectLinesWriter.java b/sql/src/main/java/org/apache/druid/sql/http/ObjectLinesWriter.java index a593b9b21b2a..93fc01e0ed3b 100644 --- a/sql/src/main/java/org/apache/druid/sql/http/ObjectLinesWriter.java +++ b/sql/src/main/java/org/apache/druid/sql/http/ObjectLinesWriter.java @@ -25,6 +25,7 @@ import com.fasterxml.jackson.databind.SerializerProvider; import org.apache.calcite.rel.type.RelDataType; import org.apache.druid.java.util.common.jackson.JacksonUtils; +import org.apache.druid.segment.column.RowSignature; import javax.annotation.Nullable; import java.io.IOException; @@ -70,6 +71,15 @@ public void writeHeader( ObjectWriter.writeHeader(jsonGenerator, rowType, includeTypes, includeSqlTypes); } + @Override + public void writeHeaderFromRowSignature( + final RowSignature rowSignature, + final boolean includeTypes + ) throws IOException + { + ObjectWriter.writeHeader(jsonGenerator, rowSignature, includeTypes); + } + @Override public void writeRowStart() throws IOException { diff --git a/sql/src/main/java/org/apache/druid/sql/http/ObjectWriter.java b/sql/src/main/java/org/apache/druid/sql/http/ObjectWriter.java index bdab65a1f7e6..6545ce80eacb 100644 --- a/sql/src/main/java/org/apache/druid/sql/http/ObjectWriter.java +++ b/sql/src/main/java/org/apache/druid/sql/http/ObjectWriter.java @@ -77,6 +77,12 @@ public void writeHeader( writeHeader(jsonGenerator, rowType, includeTypes, includeSqlTypes); } + @Override + public void writeHeaderFromRowSignature(final RowSignature signature, final boolean includeTypes) throws IOException + { + writeHeader(jsonGenerator, signature, includeTypes); + } + @Override public void writeRowStart() throws IOException { @@ -141,4 +147,32 @@ static void writeHeader( jsonGenerator.writeEndObject(); } + + static void writeHeader( + final JsonGenerator jsonGenerator, + final RowSignature signature, + final boolean includeTypes + ) throws IOException + { + jsonGenerator.writeStartObject(); + + for (int i = 0; i < signature.size(); i++) { + jsonGenerator.writeFieldName(signature.getColumnName(i)); + + if (!includeTypes) { + jsonGenerator.writeNull(); + } else { + jsonGenerator.writeStartObject(); + + jsonGenerator.writeStringField( + ObjectWriter.TYPE_HEADER_NAME, + signature.getColumnType(i).map(TypeSignature::asTypeString).orElse(null) + ); + + jsonGenerator.writeEndObject(); + } + } + + jsonGenerator.writeEndObject(); + } } diff --git a/sql/src/main/java/org/apache/druid/sql/http/ResultFormat.java b/sql/src/main/java/org/apache/druid/sql/http/ResultFormat.java index 7a16fb7b885a..9f2f37dd7eb7 100644 --- a/sql/src/main/java/org/apache/druid/sql/http/ResultFormat.java +++ b/sql/src/main/java/org/apache/druid/sql/http/ResultFormat.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.calcite.rel.type.RelDataType; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.column.RowSignature; import javax.annotation.Nullable; import javax.ws.rs.core.MediaType; @@ -132,6 +133,8 @@ public interface Writer extends Closeable void writeHeader(RelDataType rowType, boolean includeTypes, boolean includeSqlTypes) throws IOException; + void writeHeaderFromRowSignature(RowSignature rowSignature, boolean includeTypes) throws IOException; + /** * Start of each result row. */ diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index 6796a07b3a79..26e2342a5efb 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -35,6 +35,7 @@ import org.apache.druid.storage.StorageConnectorModule; import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import org.hamcrest.CoreMatchers; +import org.junit.Ignore; import org.junit.Test; import org.junit.internal.matchers.ThrowableMessageMatcher; @@ -48,7 +49,9 @@ public void configureGuice(DruidInjectorBuilder builder) builder.addModule(new TestExportModule()); } + // Disabled until replace supports external destinations. To be enabled after that point. @Test + @Ignore public void testReplaceIntoExtern() { testIngestionQuery() @@ -73,12 +76,32 @@ public void testReplaceIntoExtern() } @Test - public void testReplaceWithoutRequiredParameter() + public void testReplaceIntoExternShouldThrowUnsupportedException() { testIngestionQuery() - .sql(StringUtils.format("REPLACE INTO EXTERN(%s()) " + .sql(StringUtils.format("REPLACE INTO EXTERN(%s(basePath => 'export')) " + "AS CSV " + "OVERWRITE ALL " + + "SELECT dim2 FROM foo", TestExportStorageConnector.TYPE_NAME)) + .expectValidationError( + CoreMatchers.allOf( + CoreMatchers.instanceOf(DruidException.class), + ThrowableMessageMatcher.hasMessage( + CoreMatchers.containsString( + "REPLACE operations do no support EXTERN destinations. Use INSERT statements to write to an external destination." + ) + ) + ) + ) + .verify(); + } + + @Test + public void testExportWithoutRequiredParameter() + { + testIngestionQuery() + .sql(StringUtils.format("INSERT INTO EXTERN(%s()) " + + "AS CSV " + "SELECT dim2 FROM foo", LocalFileStorageConnectorProvider.TYPE_NAME)) .expectValidationError( CoreMatchers.allOf( @@ -93,9 +116,8 @@ public void testReplaceWithoutRequiredParameter() public void testExportWithPartitionedBy() { testIngestionQuery() - .sql(StringUtils.format("REPLACE INTO EXTERN(%s()) " + .sql(StringUtils.format("INSERT INTO EXTERN(%s()) " + "AS CSV " - + "OVERWRITE ALL " + "SELECT dim2 FROM foo " + "PARTITIONED BY ALL", TestExportStorageConnector.TYPE_NAME)) .expectValidationError( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnector.java b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnector.java index e0048a4dda86..b81b22ceb878 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnector.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnector.java @@ -19,6 +19,7 @@ package org.apache.druid.sql.calcite.export; +import com.google.common.collect.ImmutableList; import org.apache.druid.storage.StorageConnector; import java.io.ByteArrayOutputStream; @@ -44,7 +45,7 @@ public ByteArrayOutputStream getByteArrayOutputStream() @Override public boolean pathExists(String path) { - throw new UnsupportedOperationException(); + return true; } @Override @@ -86,6 +87,6 @@ public void deleteRecursively(String path) @Override public Iterator listDir(String dirName) { - throw new UnsupportedOperationException(); + return ImmutableList.of().stream().iterator(); } } From c79c496e9ef2ce9fe4c0451d839ecf0249bb3051 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 5 Feb 2024 15:33:38 +0530 Subject: [PATCH 41/50] Address review comments --- docs/multi-stage-query/reference.md | 23 ++-- .../output/AzureStorageConnectorProvider.java | 6 - .../GoogleStorageConnectorProvider.java | 6 - .../apache/druid/msq/exec/ControllerImpl.java | 8 +- .../destination/ExportMSQDestination.java | 25 ++-- .../ExportResultsFrameProcessorFactory.java | 17 +-- .../apache/druid/msq/exec/MSQExportTest.java | 2 +- .../destination/ExportMSQDestinationTest.java | 5 +- .../apache/druid/msq/test/MSQTestBase.java | 2 + .../s3/output/S3ExportStorageProvider.java | 67 ++++++++++ .../s3/output/S3StorageConnectorModule.java | 4 +- .../s3/output/S3StorageConnectorProvider.java | 6 - .../cluster/MultiStageQuery/docker-compose.py | 1 + .../druid/testsEx/msq/ITMultiStageQuery.java | 5 +- .../druid/storage/ExportStorageProvider.java | 29 +++++ .../apache/druid/storage/StorageConfig.java | 44 +++++++ .../druid/storage/StorageConnectorModule.java | 4 + .../storage/StorageConnectorProvider.java | 1 - .../local/LocalFileExportStorageProvider.java | 117 ++++++++++++++++++ .../LocalFileStorageConnectorProvider.java | 9 +- .../ExternalDestinationSqlIdentifier.java | 6 +- .../sql/calcite/planner/IngestHandler.java | 6 +- .../sql/destination/ExportDestination.java | 8 +- .../druid/sql/calcite/CalciteExportTest.java | 34 ++++- .../TestExportStorageConnectorProvider.java | 8 +- .../destination/ExportDestinationTest.java | 5 +- 26 files changed, 359 insertions(+), 89 deletions(-) create mode 100644 extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java create mode 100644 processing/src/main/java/org/apache/druid/storage/ExportStorageProvider.java create mode 100644 processing/src/main/java/org/apache/druid/storage/StorageConfig.java create mode 100644 processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 03191cc13072..2aac29b90eb0 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -150,12 +150,12 @@ FROM
Supported arguments to the function: -| Parameter | Required | Description | Default | -|-------------|---------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --| -| `bucket` | Yes | The S3 bucket to which the files are exported to. | n/a | -| `prefix` | Yes | Path where the exported files would be created. If the location includes other files or directories, then they might get cleaned up as well. | n/a | -| `tempDir` | Yes | Directory path on the local disk to store temporary files required while uploading the data | n/a | -| `maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | +| Parameter | Required | Description | Default | +|-------------|---------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --| +| `bucket` | Yes | The S3 bucket to which the files are exported to. | n/a | +| `prefix` | Yes | Path where the exported files would be created. The export query would expect the destination to be empty. If the location includes other files, then the query will fail. | n/a | +| `tempDir` | Yes | Directory path on the local disk to store temporary files required while uploading the data | n/a | +| `maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | | `chunkSize` | No | Defines the size of each chunk to temporarily store in `tempDir`. The chunk size must be between 5 MiB and 5 GiB. A large chunk size reduces the API calls to S3, however it requires more disk space to store the temporary chunks. | 100MiB | ##### LOCAL @@ -166,10 +166,13 @@ This is useful in a single node setup or for testing, and is not suitable for pr This can be done by passing the function `LOCAL()` as an argument to the `EXTERN FUNCTION`. Arguments to `LOCAL()` should be passed as named parameters with the value in single quotes like the example below. +To use local as an export destination, the runtime property `druid.export.storage.baseDir` must be configured on the indexer/middle manager. +The parameter provided to the `LOCAL()` function will be prefixed with this value when exporting to a local destination. + ```sql INSERT INTO EXTERN( - local(basePath => '/tmp/exportLocation') + local(exportPath => 'exportLocation/query1') ) AS CSV SELECT @@ -179,9 +182,9 @@ FROM
Supported arguments to the function: -| Parameter | Required | Description | Default | -|-------------|--------------------------------|-----------------------------------------------------------------| --| -| `basePath` | Yes | The file system path where the exported files would be created. If the location includes other files or directories, then they might get cleaned up as well.| n/a | +| Parameter | Required | Description | Default | +|-------------|--------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --| +| `exportPath` | Yes | The file system path where the exported files would be created. This argument will be prefixed with the runtime prop `druid.export.storage.baseDir`. The export query would expect the destination to be empty. If the location includes other files or directories, then the query will fail. | n/a | For more information, see [Read external data with EXTERN](concepts.md#write-to-an-external-destination-with-extern). diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/output/AzureStorageConnectorProvider.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/output/AzureStorageConnectorProvider.java index be39b49b2486..79be724c17f7 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/output/AzureStorageConnectorProvider.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/output/AzureStorageConnectorProvider.java @@ -58,10 +58,4 @@ public StorageConnector get() { return new AzureStorageConnector(this, azureStorage); } - - @Override - public String getType() - { - return AzureStorageDruidModule.SCHEME; - } } diff --git a/extensions-core/google-extensions/src/main/java/org/apache/druid/storage/google/output/GoogleStorageConnectorProvider.java b/extensions-core/google-extensions/src/main/java/org/apache/druid/storage/google/output/GoogleStorageConnectorProvider.java index c5f5abe28377..49856a9c1eff 100644 --- a/extensions-core/google-extensions/src/main/java/org/apache/druid/storage/google/output/GoogleStorageConnectorProvider.java +++ b/extensions-core/google-extensions/src/main/java/org/apache/druid/storage/google/output/GoogleStorageConnectorProvider.java @@ -60,10 +60,4 @@ public StorageConnector get() { return new GoogleStorageConnector(this, googleStorage, googleInputDataConfig); } - - @Override - public String getType() - { - return GoogleStorageDruidModule.SCHEME; - } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 81c4a3ad02f9..9f52c5302c01 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -203,7 +203,7 @@ import org.apache.druid.sql.calcite.planner.ColumnMappings; import org.apache.druid.sql.calcite.rel.DruidQuery; import org.apache.druid.sql.http.ResultFormat; -import org.apache.druid.storage.StorageConnectorProvider; +import org.apache.druid.storage.ExportStorageProvider; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentTimeline; import org.apache.druid.timeline.partition.DimensionRangeShardSpec; @@ -1873,11 +1873,11 @@ private static QueryDefinition makeQueryDefinition( } } else if (querySpec.getDestination() instanceof ExportMSQDestination) { final ExportMSQDestination exportMSQDestination = (ExportMSQDestination) querySpec.getDestination(); - final StorageConnectorProvider storageConnectorProvider = exportMSQDestination.getStorageConnectorProvider(); + final ExportStorageProvider exportStorageProvider = exportMSQDestination.getExportStorageProvider(); try { // Check that the export destination is empty as a sanity check. We want to avoid modifying any other files with export. - Iterator filesIterator = storageConnectorProvider.get().listDir(""); + Iterator filesIterator = exportStorageProvider.get().listDir(""); if (filesIterator.hasNext()) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.RUNTIME_FAILURE) @@ -1902,7 +1902,7 @@ private static QueryDefinition makeQueryDefinition( .shuffleSpec(null) .processorFactory(new ExportResultsFrameProcessorFactory( queryId, - storageConnectorProvider, + exportStorageProvider, resultFormat )) ); diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java index 6a08ad14dd7b..cdf4d425a92b 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java @@ -27,7 +27,7 @@ import org.apache.druid.server.security.Resource; import org.apache.druid.server.security.ResourceType; import org.apache.druid.sql.http.ResultFormat; -import org.apache.druid.storage.StorageConnectorProvider; +import org.apache.druid.storage.ExportStorageProvider; import org.joda.time.Interval; import javax.annotation.Nullable; @@ -37,7 +37,7 @@ /** * Destination used by tasks that write the results as files to an external destination. {@link #resultFormat} denotes - * the format of the file created and {@link #storageConnectorProvider} denotes the type of external + * the format of the file created and {@link #exportStorageProvider} denotes the type of external * destination. *
* {@link #replaceTimeChunks} denotes how existing files should be handled. @@ -47,28 +47,29 @@ public class ExportMSQDestination implements MSQDestination { public static final String TYPE = "export"; - private final StorageConnectorProvider storageConnectorProvider; + private final ExportStorageProvider exportStorageProvider; private final ResultFormat resultFormat; @Nullable private final List replaceTimeChunks; @JsonCreator public ExportMSQDestination( - @JsonProperty("storageConnectorProvider") StorageConnectorProvider storageConnectorProvider, + @JsonProperty("exportStorageProvider") ExportStorageProvider exportStorageProvider, @JsonProperty("resultFormat") ResultFormat resultFormat, @JsonProperty("replaceTimeChunks") @Nullable List replaceTimeChunks ) { - this.storageConnectorProvider = storageConnectorProvider; + this.exportStorageProvider = exportStorageProvider; this.resultFormat = resultFormat; this.replaceTimeChunks = replaceTimeChunks; } - @JsonProperty("storageConnectorProvider") - public StorageConnectorProvider getStorageConnectorProvider() + + @JsonProperty("exportStorageProvider") + public ExportStorageProvider getExportStorageProvider() { - return storageConnectorProvider; + return exportStorageProvider; } @JsonProperty("resultFormat") @@ -95,7 +96,7 @@ public boolean equals(Object o) return false; } ExportMSQDestination that = (ExportMSQDestination) o; - return Objects.equals(storageConnectorProvider, that.storageConnectorProvider) + return Objects.equals(exportStorageProvider, that.exportStorageProvider) && resultFormat == that.resultFormat && Objects.equals(replaceTimeChunks, that.replaceTimeChunks); } @@ -103,14 +104,14 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(storageConnectorProvider, resultFormat, replaceTimeChunks); + return Objects.hash(exportStorageProvider, resultFormat, replaceTimeChunks); } @Override public String toString() { return "ExportMSQDestination{" + - "storageConnectorProvider=" + storageConnectorProvider + + "exportStorageProvider=" + exportStorageProvider + ", resultFormat=" + resultFormat + ", replaceTimeChunks=" + replaceTimeChunks + '}'; @@ -125,6 +126,6 @@ public ShuffleSpecFactory getShuffleSpecFactory(int targetSize) @Override public Optional getDestinationResource() { - return Optional.of(new Resource(getStorageConnectorProvider().getType(), ResourceType.EXTERNAL)); + return Optional.of(new Resource(getExportStorageProvider().getResourceType(), ResourceType.EXTERNAL)); } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java index f592d5ebbe14..c9f9b6a40a81 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java @@ -42,7 +42,7 @@ import org.apache.druid.msq.kernel.StageDefinition; import org.apache.druid.msq.querykit.BaseFrameProcessorFactory; import org.apache.druid.sql.http.ResultFormat; -import org.apache.druid.storage.StorageConnectorProvider; +import org.apache.druid.storage.ExportStorageProvider; import org.apache.druid.utils.CollectionUtils; import javax.annotation.Nullable; @@ -53,18 +53,18 @@ public class ExportResultsFrameProcessorFactory extends BaseFrameProcessorFactory { private final String queryId; - private final StorageConnectorProvider storageConnectorProvider; + private final ExportStorageProvider exportStorageProvider; private final ResultFormat exportFormat; @JsonCreator public ExportResultsFrameProcessorFactory( @JsonProperty("queryId") String queryId, - @JsonProperty("storageConnectorProvider") StorageConnectorProvider storageConnectorProvider, + @JsonProperty("exportStorageProvider") ExportStorageProvider exportStorageProvider, @JsonProperty("exportFormat") ResultFormat exportFormat ) { this.queryId = queryId; - this.storageConnectorProvider = storageConnectorProvider; + this.exportStorageProvider = exportStorageProvider; this.exportFormat = exportFormat; } @@ -80,10 +80,11 @@ public ResultFormat getExportFormat() return exportFormat; } - @JsonProperty("storageConnectorProvider") - public StorageConnectorProvider getStorageConnectorProvider() + + @JsonProperty("exportStorageProvider") + public ExportStorageProvider getExportStorageProvider() { - return storageConnectorProvider; + return exportStorageProvider; } @Override @@ -118,7 +119,7 @@ public ProcessorsAndChannels makeProcessors( readableInput.getChannel(), exportFormat, readableInput.getChannelFrameReader(), - storageConnectorProvider.get(), + exportStorageProvider.get(), frameContext.jsonMapper(), channelCounter, getExportFilePath(queryId, workerNumber, readableInput.getStagePartition().getPartitionNumber(), exportFormat) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java index c41f8c489c05..e6c3b5e2931a 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQExportTest.java @@ -84,7 +84,7 @@ public void testNumberOfRowsPerFile() throws IOException Map queryContext = new HashMap<>(DEFAULT_MSQ_CONTEXT); queryContext.put(MultiStageQueryContext.CTX_ROWS_PER_PAGE, 1); - final String sql = StringUtils.format("insert into extern(local(basePath=>'%s')) as csv select cnt, dim1 from foo", exportDir.getAbsolutePath()); + final String sql = StringUtils.format("insert into extern(local(exportPath=>'%s')) as csv select cnt, dim1 from foo", exportDir.getAbsolutePath()); testIngestQuery().setSql(sql) .setExpectedDataSource("foo1") diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java index 211986f712c6..f8978126d140 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java @@ -24,11 +24,10 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.storage.StorageConnectorModule; -import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; +import org.apache.druid.storage.local.LocalFileExportStorageProvider; import org.junit.Assert; import org.junit.Test; -import java.io.File; import java.io.IOException; public class ExportMSQDestinationTest @@ -37,7 +36,7 @@ public class ExportMSQDestinationTest public void testSerde() throws IOException { ExportMSQDestination exportDestination = new ExportMSQDestination( - new LocalFileStorageConnectorProvider(new File("/path")), + new LocalFileExportStorageProvider("/path"), ResultFormat.CSV, Intervals.ONLY_ETERNITY ); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index ce89d2b7846c..f8fc01b9369f 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -176,6 +176,7 @@ import org.apache.druid.sql.calcite.util.SqlTestFramework; import org.apache.druid.sql.calcite.view.InProcessViewManager; import org.apache.druid.sql.guice.SqlBindings; +import org.apache.druid.storage.StorageConfig; import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.StorageConnectorModule; import org.apache.druid.storage.StorageConnectorProvider; @@ -473,6 +474,7 @@ public String getFormatString() ); binder.bind(Key.get(StorageConnector.class, MultiStageQuery.class)) .toProvider(() -> localFileStorageConnector); + binder.bind(StorageConfig.class).toInstance(new StorageConfig("/")); } catch (IOException e) { throw new ISE(e, "Unable to create setup storage connector"); diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java new file mode 100644 index 000000000000..97517cef8dec --- /dev/null +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.storage.s3.output; + +import com.fasterxml.jackson.annotation.JacksonInject; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import org.apache.druid.data.input.s3.S3InputSource; +import org.apache.druid.java.util.common.HumanReadableBytes; +import org.apache.druid.storage.ExportStorageProvider; +import org.apache.druid.storage.StorageConnector; +import org.apache.druid.storage.s3.ServerSideEncryptingAmazonS3; + +import java.io.File; + +@JsonTypeName(S3ExportStorageProvider.TYPE_NAME) +public class S3ExportStorageProvider extends S3OutputConfig implements ExportStorageProvider +{ + public static final String TYPE_NAME = S3InputSource.TYPE_KEY; + + @JacksonInject + ServerSideEncryptingAmazonS3 s3; + + @JsonCreator + public S3ExportStorageProvider( + @JsonProperty(value = "bucket", required = true) String bucket, + @JsonProperty(value = "prefix", required = true) String prefix, + @JsonProperty(value = "tempDir", required = true) File tempDir, + @JsonProperty("chunkSize") HumanReadableBytes chunkSize, + @JsonProperty("maxRetry") Integer maxRetry + ) + { + super(bucket, prefix, tempDir, chunkSize, maxRetry); + } + + @Override + public StorageConnector get() + { + return new S3StorageConnector(this, s3); + } + + @Override + @JsonIgnore + public String getResourceType() + { + return TYPE_NAME; + } +} diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorModule.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorModule.java index c51f63adfc5b..6d168875c950 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorModule.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorModule.java @@ -33,7 +33,9 @@ public class S3StorageConnectorModule implements DruidModule public List getJacksonModules() { return Collections.singletonList( - new SimpleModule(this.getClass().getSimpleName()).registerSubtypes(S3StorageConnectorProvider.class) + new SimpleModule(this.getClass().getSimpleName()) + .registerSubtypes(S3StorageConnectorProvider.class) + .registerSubtypes(S3ExportStorageProvider.class) ); } diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java index da40bb5b3871..7f4b43a0ede8 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorProvider.java @@ -55,10 +55,4 @@ public StorageConnector get() { return new S3StorageConnector(this, s3); } - - @Override - public String getType() - { - return S3StorageDruidModule.SCHEME; - } } diff --git a/integration-tests-ex/cases/cluster/MultiStageQuery/docker-compose.py b/integration-tests-ex/cases/cluster/MultiStageQuery/docker-compose.py index bb88aa6de2d8..159a0638dd1e 100644 --- a/integration-tests-ex/cases/cluster/MultiStageQuery/docker-compose.py +++ b/integration-tests-ex/cases/cluster/MultiStageQuery/docker-compose.py @@ -22,5 +22,6 @@ def define_indexer(self): self.add_property(service, 'druid.msq.intermediate.storage.enable', 'true') self.add_property(service, 'druid.msq.intermediate.storage.type', 'local') self.add_property(service, 'druid.msq.intermediate.storage.basePath', '/shared/durablestorage/') + self.add_property(service, 'druid.export.storage.baseDir', '/') generate(__file__, Template()) diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java index 5c9d2826557b..a8677a53e734 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQuery.java @@ -29,6 +29,7 @@ import org.apache.druid.msq.indexing.report.MSQTaskReport; import org.apache.druid.msq.indexing.report.MSQTaskReportPayload; import org.apache.druid.msq.sql.SqlTaskStatus; +import org.apache.druid.storage.local.LocalFileExportStorageProvider; import org.apache.druid.testing.clients.CoordinatorResourceTestClient; import org.apache.druid.testing.utils.DataLoaderHelper; import org.apache.druid.testing.utils.MsqTestQueryHelper; @@ -194,7 +195,7 @@ public void testExport() throws Exception { String exportQuery = StringUtils.format( - "INSERT INTO extern(%s(basePath => '%s'))\n" + "INSERT INTO extern(%s(exportPath => '%s'))\n" + "AS CSV\n" + "SELECT page, added, delta\n" + "FROM TABLE(\n" @@ -204,7 +205,7 @@ public void testExport() throws Exception + " '[{\"type\":\"string\",\"name\":\"timestamp\"},{\"type\":\"string\",\"name\":\"isRobot\"},{\"type\":\"string\",\"name\":\"diffUrl\"},{\"type\":\"long\",\"name\":\"added\"},{\"type\":\"string\",\"name\":\"countryIsoCode\"},{\"type\":\"string\",\"name\":\"regionName\"},{\"type\":\"string\",\"name\":\"channel\"},{\"type\":\"string\",\"name\":\"flags\"},{\"type\":\"long\",\"name\":\"delta\"},{\"type\":\"string\",\"name\":\"isUnpatrolled\"},{\"type\":\"string\",\"name\":\"isNew\"},{\"type\":\"double\",\"name\":\"deltaBucket\"},{\"type\":\"string\",\"name\":\"isMinor\"},{\"type\":\"string\",\"name\":\"isAnonymous\"},{\"type\":\"long\",\"name\":\"deleted\"},{\"type\":\"string\",\"name\":\"cityName\"},{\"type\":\"long\",\"name\":\"metroCode\"},{\"type\":\"string\",\"name\":\"namespace\"},{\"type\":\"string\",\"name\":\"comment\"},{\"type\":\"string\",\"name\":\"page\"},{\"type\":\"long\",\"name\":\"commentLength\"},{\"type\":\"string\",\"name\":\"countryName\"},{\"type\":\"string\",\"name\":\"user\"},{\"type\":\"string\",\"name\":\"regionIsoCode\"}]'\n" + " )\n" + ")\n", - "local", "/shared/export/" + LocalFileExportStorageProvider.TYPE_NAME, "/shared/export/" ); SqlTaskStatus exportTask = msqHelper.submitMsqTask(exportQuery); diff --git a/processing/src/main/java/org/apache/druid/storage/ExportStorageProvider.java b/processing/src/main/java/org/apache/druid/storage/ExportStorageProvider.java new file mode 100644 index 000000000000..0dd8984d70e0 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/storage/ExportStorageProvider.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.storage; + +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.google.inject.Provider; + +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") +public interface ExportStorageProvider extends Provider +{ + String getResourceType(); +} diff --git a/processing/src/main/java/org/apache/druid/storage/StorageConfig.java b/processing/src/main/java/org/apache/druid/storage/StorageConfig.java new file mode 100644 index 000000000000..0f49afd5f757 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/storage/StorageConfig.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.storage; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import javax.annotation.Nullable; + +public class StorageConfig +{ + @JsonProperty("baseDir") + @Nullable + private final String baseDir; + + @JsonCreator + public StorageConfig(@JsonProperty("baseDir") @Nullable String baseDir) + { + this.baseDir = baseDir; + } + + @Nullable + public String getBaseDir() + { + return baseDir; + } +} diff --git a/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java b/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java index 4792a5139c6b..a919742dc226 100644 --- a/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java +++ b/processing/src/main/java/org/apache/druid/storage/StorageConnectorModule.java @@ -23,7 +23,9 @@ import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.common.collect.ImmutableList; import com.google.inject.Binder; +import org.apache.druid.guice.JsonConfigProvider; import org.apache.druid.initialization.DruidModule; +import org.apache.druid.storage.local.LocalFileExportStorageProvider; import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import java.util.List; @@ -36,11 +38,13 @@ public List getJacksonModules() return ImmutableList.of( new SimpleModule(StorageConnector.class.getSimpleName()) .registerSubtypes(LocalFileStorageConnectorProvider.class) + .registerSubtypes(LocalFileExportStorageProvider.class) ); } @Override public void configure(Binder binder) { + JsonConfigProvider.bind(binder, "druid.export.storage", StorageConfig.class); } } diff --git a/processing/src/main/java/org/apache/druid/storage/StorageConnectorProvider.java b/processing/src/main/java/org/apache/druid/storage/StorageConnectorProvider.java index 28fee208e5b7..9fece71eab8e 100644 --- a/processing/src/main/java/org/apache/druid/storage/StorageConnectorProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/StorageConnectorProvider.java @@ -25,5 +25,4 @@ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") public interface StorageConnectorProvider extends Provider { - String getType(); } diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java new file mode 100644 index 000000000000..ea4e175b3142 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.storage.local; + +import com.fasterxml.jackson.annotation.JacksonInject; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import org.apache.druid.data.input.impl.LocalInputSource; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.storage.ExportStorageProvider; +import org.apache.druid.storage.StorageConfig; +import org.apache.druid.storage.StorageConnector; + +import java.io.File; +import java.io.IOException; +import java.util.Objects; + +@JsonTypeName(LocalFileExportStorageProvider.TYPE_NAME) +public class LocalFileExportStorageProvider implements ExportStorageProvider +{ + public static final String TYPE_NAME = LocalInputSource.TYPE_KEY; + + @JacksonInject + StorageConfig storageConfig; + + @JsonProperty + private final String exportPath; + + @JsonCreator + public LocalFileExportStorageProvider(@JsonProperty(value = "exportPath", required = true) String exportPath) + { + this.exportPath = exportPath; + } + + @Override + public StorageConnector get() + { + final String baseDir = storageConfig.getBaseDir(); + if (baseDir == null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("The runtime property `druid.export.storage.baseDir` must be configured."); + } + try { + final File exportDestination = new File(baseDir, exportPath); + final String finalOutputPath = exportDestination.getCanonicalPath(); + if (!finalOutputPath.startsWith(baseDir)) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("The provided destination must be within the path configured by runtime property `druid.export.storage.baseDir`"); + } + return new LocalFileStorageConnector(exportDestination); + } + catch (IOException e) { + throw new IAE( + e, + "Unable to create storage connector [%s] for base path [%s]", + LocalFileStorageConnector.class.getSimpleName(), + exportPath + ); + } + } + + @Override + @JsonIgnore + public String getResourceType() + { + return TYPE_NAME; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LocalFileExportStorageProvider that = (LocalFileExportStorageProvider) o; + return Objects.equals(exportPath, that.exportPath); + } + + @Override + public int hashCode() + { + return Objects.hash(exportPath); + } + + @Override + public String toString() + { + return "LocalFileExportStorageProvider{" + + "exportPath=" + exportPath + + '}'; + } +} diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java index cccc3c383c1a..82d1623f8404 100644 --- a/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileStorageConnectorProvider.java @@ -34,13 +34,12 @@ public class LocalFileStorageConnectorProvider implements StorageConnectorProvider { public static final String TYPE_NAME = "local"; - public static final String BASE_PATH_FIELD_NAME = "basePath"; @JsonProperty final File basePath; @JsonCreator - public LocalFileStorageConnectorProvider(@JsonProperty(value = BASE_PATH_FIELD_NAME, required = true) File basePath) + public LocalFileStorageConnectorProvider(@JsonProperty(value = "basePath", required = true) File basePath) { this.basePath = basePath; } @@ -79,10 +78,4 @@ public int hashCode() { return Objects.hash(basePath); } - - @Override - public String getType() - { - return LocalFileStorageConnectorProvider.TYPE_NAME; - } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java index fef328de588e..60d88f3c2df5 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/parser/ExternalDestinationSqlIdentifier.java @@ -26,7 +26,7 @@ import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.storage.StorageConnectorProvider; +import org.apache.druid.storage.ExportStorageProvider; import org.apache.druid.utils.CollectionUtils; import java.util.HashMap; @@ -92,14 +92,14 @@ public Object clone() throw DruidException.defensive("Function is deprecated, please use clone(SqlNode) instead."); } - public StorageConnectorProvider toStorageConnectorProvider(ObjectMapper objectMapper) + public ExportStorageProvider toExportStorageProvider(ObjectMapper objectMapper) { final HashMap storageConnectorProperties = new HashMap<>(properties); storageConnectorProperties.put("type", getDestinationType()); return objectMapper.convertValue( storageConnectorProperties, - StorageConnectorProvider.class + ExportStorageProvider.class ); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index fe0349c77b6b..67ad85f24fbc 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -52,7 +52,7 @@ import org.apache.druid.sql.destination.ExportDestination; import org.apache.druid.sql.destination.IngestDestination; import org.apache.druid.sql.destination.TableDestination; -import org.apache.druid.storage.StorageConnectorProvider; +import org.apache.druid.storage.ExportStorageProvider; import java.util.List; import java.util.regex.Pattern; @@ -224,8 +224,8 @@ private IngestDestination validateAndGetDataSourceForIngest() .build("Operation [%s] requires a target table", operationName()); } else if (tableIdentifier instanceof ExternalDestinationSqlIdentifier) { ExternalDestinationSqlIdentifier externalDestination = ((ExternalDestinationSqlIdentifier) tableIdentifier); - StorageConnectorProvider storageConnectorProvider = externalDestination.toStorageConnectorProvider(handlerContext.jsonMapper()); - dataSource = new ExportDestination(storageConnectorProvider); + ExportStorageProvider storageProvider = externalDestination.toExportStorageProvider(handlerContext.jsonMapper()); + dataSource = new ExportDestination(storageProvider); resourceActions.add(new ResourceAction(new Resource(externalDestination.getDestinationType(), ResourceType.EXTERNAL), Action.WRITE)); } else if (tableIdentifier.names.size() == 1) { // Unqualified name. diff --git a/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java b/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java index dd08535c7835..300998e64458 100644 --- a/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java +++ b/sql/src/main/java/org/apache/druid/sql/destination/ExportDestination.java @@ -21,7 +21,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; -import org.apache.druid.storage.StorageConnectorProvider; +import org.apache.druid.storage.ExportStorageProvider; import java.util.Objects; @@ -32,15 +32,15 @@ public class ExportDestination implements IngestDestination { public static final String TYPE_KEY = "external"; - private final StorageConnectorProvider storageConnectorProvider; + private final ExportStorageProvider storageConnectorProvider; - public ExportDestination(@JsonProperty("storageConnectorProvider") StorageConnectorProvider storageConnectorProvider) + public ExportDestination(@JsonProperty("storageConnectorProvider") ExportStorageProvider storageConnectorProvider) { this.storageConnectorProvider = storageConnectorProvider; } @JsonProperty("storageConnectorProvider") - public StorageConnectorProvider getStorageConnectorProvider() + public ExportStorageProvider getStorageConnectorProvider() { return storageConnectorProvider; } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java index 26e2342a5efb..cc4b2a0fec49 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteExportTest.java @@ -19,8 +19,13 @@ package org.apache.druid.sql.calcite; +import com.fasterxml.jackson.databind.Module; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.google.common.collect.ImmutableList; +import com.google.inject.Binder; import org.apache.druid.error.DruidException; import org.apache.druid.guice.DruidInjectorBuilder; +import org.apache.druid.initialization.DruidModule; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.Druids; import org.apache.druid.query.scan.ScanQuery; @@ -32,21 +37,42 @@ import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.util.CalciteTests; import org.apache.druid.sql.destination.ExportDestination; -import org.apache.druid.storage.StorageConnectorModule; +import org.apache.druid.storage.StorageConfig; +import org.apache.druid.storage.StorageConnector; +import org.apache.druid.storage.local.LocalFileExportStorageProvider; import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; import org.hamcrest.CoreMatchers; import org.junit.Ignore; import org.junit.Test; import org.junit.internal.matchers.ThrowableMessageMatcher; +import java.util.List; + public class CalciteExportTest extends CalciteIngestionDmlTest { @Override public void configureGuice(DruidInjectorBuilder builder) { super.configureGuice(builder); - builder.addModule(new StorageConnectorModule()); builder.addModule(new TestExportModule()); + builder.addModule(new DruidModule() + { + @Override + public List getJacksonModules() + { + return ImmutableList.of( + new SimpleModule(StorageConnector.class.getSimpleName()) + .registerSubtypes(LocalFileStorageConnectorProvider.class) + .registerSubtypes(LocalFileExportStorageProvider.class) + ); + } + + @Override + public void configure(Binder binder) + { + binder.bind(StorageConfig.class).toInstance(new StorageConfig("/tmp/export")); + } + }); } // Disabled until replace supports external destinations. To be enabled after that point. @@ -102,11 +128,11 @@ public void testExportWithoutRequiredParameter() testIngestionQuery() .sql(StringUtils.format("INSERT INTO EXTERN(%s()) " + "AS CSV " - + "SELECT dim2 FROM foo", LocalFileStorageConnectorProvider.TYPE_NAME)) + + "SELECT dim2 FROM foo", LocalFileExportStorageProvider.TYPE_NAME)) .expectValidationError( CoreMatchers.allOf( CoreMatchers.instanceOf(IllegalArgumentException.class), - ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Missing required creator property 'basePath'")) + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Missing required creator property 'exportPath'")) ) ) .verify(); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java index 1e0c9fbfe483..4142f25ac493 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java @@ -19,10 +19,10 @@ package org.apache.druid.sql.calcite.export; +import org.apache.druid.storage.ExportStorageProvider; import org.apache.druid.storage.StorageConnector; -import org.apache.druid.storage.StorageConnectorProvider; -public class TestExportStorageConnectorProvider implements StorageConnectorProvider +public class TestExportStorageConnectorProvider implements ExportStorageProvider { private static final StorageConnector STORAGE_CONNECTOR = new TestExportStorageConnector(); @@ -33,8 +33,8 @@ public StorageConnector get() } @Override - public String getType() + public String getResourceType() { - return TestExportStorageConnector.TYPE_NAME; + return "testExport"; } } diff --git a/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java b/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java index ec56654c8785..eadc21e389ef 100644 --- a/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java +++ b/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java @@ -22,11 +22,10 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.storage.StorageConnectorModule; -import org.apache.druid.storage.local.LocalFileStorageConnectorProvider; +import org.apache.druid.storage.local.LocalFileExportStorageProvider; import org.junit.Assert; import org.junit.Test; -import java.io.File; import java.io.IOException; public class ExportDestinationTest @@ -34,7 +33,7 @@ public class ExportDestinationTest @Test public void testSerde() throws IOException { - ExportDestination exportDestination = new ExportDestination(new LocalFileStorageConnectorProvider(new File("/basepath/export"))); + ExportDestination exportDestination = new ExportDestination(new LocalFileExportStorageProvider("/basepath/export")); ObjectMapper objectMapper = new DefaultObjectMapper(); objectMapper.registerModules(new StorageConnectorModule().getJacksonModules()); From 81ee2a3fe37d74f58ba1aeca1fda5761cf42a9ae Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 5 Feb 2024 18:21:20 +0530 Subject: [PATCH 42/50] Address review comments --- docs/multi-stage-query/reference.md | 22 +++--- .../s3/output/S3ExportStorageProvider.java | 67 +++++++++++++++++-- .../druid/storage/StorageConnectorUtils.java | 50 ++++++++++++++ .../local/LocalFileExportStorageProvider.java | 18 +---- 4 files changed, 125 insertions(+), 32 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/storage/StorageConnectorUtils.java diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 2aac29b90eb0..5d88e7ba657f 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -140,7 +140,7 @@ The `S3()` function is a druid function which configures the connection. Argumen ```sql INSERT INTO EXTERN( - S3(bucket => 's3://your_bucket', prefix => 'prefix/to/files', tempDir => '/tmp/export') + S3(bucket => 's3://your_bucket', prefix => 'prefix/to/files') ) AS CSV SELECT @@ -150,13 +150,13 @@ FROM
Supported arguments to the function: -| Parameter | Required | Description | Default | -|-------------|---------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --| -| `bucket` | Yes | The S3 bucket to which the files are exported to. | n/a | -| `prefix` | Yes | Path where the exported files would be created. The export query would expect the destination to be empty. If the location includes other files, then the query will fail. | n/a | -| `tempDir` | Yes | Directory path on the local disk to store temporary files required while uploading the data | n/a | -| `maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | -| `chunkSize` | No | Defines the size of each chunk to temporarily store in `tempDir`. The chunk size must be between 5 MiB and 5 GiB. A large chunk size reduces the API calls to S3, however it requires more disk space to store the temporary chunks. | 100MiB | +| Parameter | Required | Description | Default | +|-------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------| +| `bucket` | Yes | The S3 bucket to which the files are exported to. | n/a | +| `prefix` | Yes | Path where the exported files would be created. The export query would expect the destination to be empty. If the location includes other files, then the query will fail. | n/a | +| `tempSubDir` | No | Subdirectory of `druid.export.storage.baseDir` used to store temporary files required while uploading the data. If this argument is not present, the runtime property will be used as the temporary directory. | . | +| `maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | +| `chunkSize` | No | Defines the size of each chunk to temporarily store in `tempDir`. The chunk size must be between 5 MiB and 5 GiB. A large chunk size reduces the API calls to S3, however it requires more disk space to store the temporary chunks. | 100MiB | ##### LOCAL @@ -182,9 +182,9 @@ FROM
Supported arguments to the function: -| Parameter | Required | Description | Default | -|-------------|--------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --| -| `exportPath` | Yes | The file system path where the exported files would be created. This argument will be prefixed with the runtime prop `druid.export.storage.baseDir`. The export query would expect the destination to be empty. If the location includes other files or directories, then the query will fail. | n/a | +| Parameter | Required | Description | Default | +|-------------|--------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --| +| `exportPath` | Yes | Subdirectory of `druid.export.storage.baseDir` used to as the destination to export the results to. The export query expects the destination to be empty. If the location includes other files or directories, then the query will fail. | n/a | For more information, see [Read external data with EXTERN](concepts.md#write-to-an-external-destination-with-extern). diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java index 97517cef8dec..f30b5c08cad7 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java @@ -27,16 +27,33 @@ import org.apache.druid.data.input.s3.S3InputSource; import org.apache.druid.java.util.common.HumanReadableBytes; import org.apache.druid.storage.ExportStorageProvider; +import org.apache.druid.storage.StorageConfig; import org.apache.druid.storage.StorageConnector; +import org.apache.druid.storage.StorageConnectorUtils; import org.apache.druid.storage.s3.ServerSideEncryptingAmazonS3; +import javax.annotation.Nullable; import java.io.File; @JsonTypeName(S3ExportStorageProvider.TYPE_NAME) -public class S3ExportStorageProvider extends S3OutputConfig implements ExportStorageProvider +public class S3ExportStorageProvider implements ExportStorageProvider { public static final String TYPE_NAME = S3InputSource.TYPE_KEY; + @JsonProperty + private final String bucket; + @JsonProperty + private final String prefix; + @JsonProperty + private final String tempSubDir; + @JsonProperty + @Nullable + private final HumanReadableBytes chunkSize; + @JsonProperty + @Nullable + private final Integer maxRetry; + @JacksonInject + StorageConfig storageConfig; @JacksonInject ServerSideEncryptingAmazonS3 s3; @@ -44,18 +61,56 @@ public class S3ExportStorageProvider extends S3OutputConfig implements ExportSto public S3ExportStorageProvider( @JsonProperty(value = "bucket", required = true) String bucket, @JsonProperty(value = "prefix", required = true) String prefix, - @JsonProperty(value = "tempDir", required = true) File tempDir, - @JsonProperty("chunkSize") HumanReadableBytes chunkSize, - @JsonProperty("maxRetry") Integer maxRetry + @JsonProperty(value = "tempSubDir") @Nullable String tempSubDir, + @JsonProperty("chunkSize") @Nullable HumanReadableBytes chunkSize, + @JsonProperty("maxRetry") @Nullable Integer maxRetry ) { - super(bucket, prefix, tempDir, chunkSize, maxRetry); + this.bucket = bucket; + this.prefix = prefix; + this.tempSubDir = tempSubDir == null ? "" : tempSubDir; + this.chunkSize = chunkSize; + this.maxRetry = maxRetry; } @Override public StorageConnector get() { - return new S3StorageConnector(this, s3); + final File temporaryDirectory = StorageConnectorUtils.validateAndGetPath(storageConfig.getBaseDir(), tempSubDir); + final S3OutputConfig s3OutputConfig = new S3OutputConfig(bucket, prefix, temporaryDirectory, chunkSize, maxRetry); + return new S3StorageConnector(s3OutputConfig, s3); + } + + @JsonProperty("bucket") + public String getBucket() + { + return bucket; + } + + @JsonProperty("prefix") + public String getPrefix() + { + return prefix; + } + + @JsonProperty("tempSubDir") + public String getTempSubDir() + { + return tempSubDir; + } + + @JsonProperty("chunkSize") + @Nullable + public HumanReadableBytes getChunkSize() + { + return chunkSize; + } + + @JsonProperty("maxRetry") + @Nullable + public Integer getMaxRetry() + { + return maxRetry; } @Override diff --git a/processing/src/main/java/org/apache/druid/storage/StorageConnectorUtils.java b/processing/src/main/java/org/apache/druid/storage/StorageConnectorUtils.java new file mode 100644 index 000000000000..f46f2a5ca211 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/storage/StorageConnectorUtils.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.storage; + +import org.apache.druid.error.DruidException; + +import java.io.File; + +public class StorageConnectorUtils +{ + public static File validateAndGetPath(String basePath, String customPath) + { + if (basePath == null) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.NOT_FOUND) + .build( + "The runtime property `druid.export.storage.baseDir` must be configured for export functionality."); + } + final File baseDir = new File(basePath); + final File exportFile = new File(baseDir, customPath); + if (!exportFile.toPath().normalize().startsWith(baseDir.toPath())) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "The provided destination must be within the path configured by runtime property `druid.export.storage.baseDir`"); + } + return exportFile; + } + + private StorageConnectorUtils() + { + } +} diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java index ea4e175b3142..53a65d0dfef5 100644 --- a/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java @@ -25,11 +25,11 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import org.apache.druid.data.input.impl.LocalInputSource; -import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.storage.ExportStorageProvider; import org.apache.druid.storage.StorageConfig; import org.apache.druid.storage.StorageConnector; +import org.apache.druid.storage.StorageConnectorUtils; import java.io.File; import java.io.IOException; @@ -55,20 +55,8 @@ public LocalFileExportStorageProvider(@JsonProperty(value = "exportPath", requir @Override public StorageConnector get() { - final String baseDir = storageConfig.getBaseDir(); - if (baseDir == null) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("The runtime property `druid.export.storage.baseDir` must be configured."); - } + final File exportDestination = StorageConnectorUtils.validateAndGetPath(storageConfig.getBaseDir(), exportPath); try { - final File exportDestination = new File(baseDir, exportPath); - final String finalOutputPath = exportDestination.getCanonicalPath(); - if (!finalOutputPath.startsWith(baseDir)) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("The provided destination must be within the path configured by runtime property `druid.export.storage.baseDir`"); - } return new LocalFileStorageConnector(exportDestination); } catch (IOException e) { @@ -76,7 +64,7 @@ public StorageConnector get() e, "Unable to create storage connector [%s] for base path [%s]", LocalFileStorageConnector.class.getSimpleName(), - exportPath + exportDestination.toPath() ); } } From f9873a64e1d350a079953e297341f299babc4869 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Mon, 5 Feb 2024 18:52:13 +0530 Subject: [PATCH 43/50] Add tests --- .../storage/StorageConnectorUtilsTest.java | 58 ++++++++++++++ .../LocalFileExportStorageProviderTest.java | 77 +++++++++++++++++++ .../destination/ExportDestinationTest.java | 21 +++++ 3 files changed, 156 insertions(+) create mode 100644 processing/src/test/java/org/apache/druid/storage/StorageConnectorUtilsTest.java create mode 100644 processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java diff --git a/processing/src/test/java/org/apache/druid/storage/StorageConnectorUtilsTest.java b/processing/src/test/java/org/apache/druid/storage/StorageConnectorUtilsTest.java new file mode 100644 index 000000000000..73c02da294ff --- /dev/null +++ b/processing/src/test/java/org/apache/druid/storage/StorageConnectorUtilsTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.storage; + +import org.apache.druid.error.DruidException; +import org.junit.Assert; +import org.junit.Test; + +import java.io.File; + +public class StorageConnectorUtilsTest +{ + @Test + public void testEmptyPath() + { + Assert.assertThrows( + DruidException.class, + () -> StorageConnectorUtils.validateAndGetPath(null, "path") + ); + } + + @Test + public void testValidate() + { + File file = StorageConnectorUtils.validateAndGetPath("/base", "path"); + Assert.assertEquals("/base/path", file.toPath().toString()); + } + + @Test + public void testWithNonSubdir() + { + Assert.assertThrows( + DruidException.class, + () -> StorageConnectorUtils.validateAndGetPath("/base", "../path") + ); + Assert.assertThrows( + DruidException.class, + () -> StorageConnectorUtils.validateAndGetPath("/base", "../base1") + ); + } +} diff --git a/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java b/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java new file mode 100644 index 000000000000..f568d8f97589 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.storage.local; + +import com.fasterxml.jackson.databind.BeanProperty; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.InjectableValues; +import com.fasterxml.jackson.databind.ObjectMapper; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.storage.ExportStorageProvider; +import org.apache.druid.storage.StorageConfig; +import org.apache.druid.storage.StorageConnectorModule; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class LocalFileExportStorageProviderTest +{ + @Test + public void testSerde() throws IOException + { + ExportStorageProvider exportDestination = new LocalFileExportStorageProvider("/basepath/export"); + + ObjectMapper objectMapper = new DefaultObjectMapper(); + objectMapper.registerModules(new StorageConnectorModule().getJacksonModules()); + objectMapper.setInjectableValues(new InjectableValues() + { + @Override + public Object findInjectableValue( + Object valueId, + DeserializationContext ctxt, + BeanProperty forProperty, + Object beanInstance + ) + { + if (((String) valueId).contains("StorageConfig")) { + return new StorageConfig("/"); + } else { + throw new RuntimeException(); + } + } + }); + byte[] bytes = objectMapper.writeValueAsBytes(exportDestination); + + ExportStorageProvider deserialized = objectMapper.readValue(bytes, LocalFileExportStorageProvider.class); + Assert.assertEquals(exportDestination, deserialized); + } + + @Test + public void testEqualsAndHashCode() + { + EqualsVerifier.forClass(LocalFileExportStorageProvider.class) + .withNonnullFields("exportPath") + .withIgnoredFields("storageConfig") + .usingGetClass() + .verify(); + } +} diff --git a/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java b/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java index eadc21e389ef..a06de536a2fa 100644 --- a/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java +++ b/sql/src/test/java/org/apache/druid/sql/destination/ExportDestinationTest.java @@ -19,8 +19,12 @@ package org.apache.druid.sql.destination; +import com.fasterxml.jackson.databind.BeanProperty; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.storage.StorageConfig; import org.apache.druid.storage.StorageConnectorModule; import org.apache.druid.storage.local.LocalFileExportStorageProvider; import org.junit.Assert; @@ -37,6 +41,23 @@ public void testSerde() throws IOException ObjectMapper objectMapper = new DefaultObjectMapper(); objectMapper.registerModules(new StorageConnectorModule().getJacksonModules()); + objectMapper.setInjectableValues(new InjectableValues() + { + @Override + public Object findInjectableValue( + Object valueId, + DeserializationContext ctxt, + BeanProperty forProperty, + Object beanInstance + ) + { + if (((String) valueId).contains("StorageConfig")) { + return new StorageConfig("/"); + } else { + throw new RuntimeException(); + } + } + }); byte[] bytes = objectMapper.writeValueAsBytes(exportDestination); ExportDestination deserialized = objectMapper.readValue(bytes, ExportDestination.class); From c7f8234ee38f7d6f07f049b62dbec84959adb778 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 6 Feb 2024 17:03:24 +0530 Subject: [PATCH 44/50] Add check for runtime parameter for bucket and path --- docs/multi-stage-query/reference.md | 28 +++--- .../storage/s3/output/S3ExportConfig.java | 71 ++++++++++++++++ .../s3/output/S3ExportStorageProvider.java | 85 ++++++++++--------- .../s3/output/S3StorageConnectorModule.java | 2 + .../output/S3ExportStorageProviderTest.java | 36 ++++---- .../druid/storage/StorageConnectorUtils.java | 50 ----------- .../local/LocalFileExportStorageProvider.java | 23 ++++- .../LocalFileExportStorageProviderTest.java | 31 +++++++ 8 files changed, 194 insertions(+), 132 deletions(-) create mode 100644 extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportConfig.java rename processing/src/test/java/org/apache/druid/storage/StorageConnectorUtilsTest.java => extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/output/S3ExportStorageProviderTest.java (52%) delete mode 100644 processing/src/main/java/org/apache/druid/storage/StorageConnectorUtils.java diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 74c30938561a..d75b94c9fb7d 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -99,7 +99,7 @@ For more information, see [Read external data with EXTERN](concepts.md#read-exte This variation of EXTERN requires one argument, the details of the destination as specified below. This variation additionally requires an `AS` clause to specify the format of the exported rows. -INSERT statements and REPLACE statements are both supported with an `EXTERN` destination. +Only INSERT statements are supported with an `EXTERN` destination. Only `CSV` format is supported at the moment. Please note that partitioning (`PARTITIONED BY`) and clustering (`CLUSTERED BY`) is not currently supported with export statements. @@ -116,20 +116,6 @@ SELECT FROM
``` -REPLACE statements have an additional OVERWRITE clause. As partitioning is not yet supported, only `OVERWRITE ALL` -is allowed. REPLACE deletes any currently existing files at the specified directory, and creates new files with the results of the query. - - -```sql -REPLACE INTO - EXTERN() -AS CSV -OVERWRITE ALL -SELECT - -FROM
-``` - Exporting is currently supported for Amazon S3 storage and local storage. ##### S3 @@ -154,9 +140,15 @@ Supported arguments to the function: |-------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------| | `bucket` | Yes | The S3 bucket to which the files are exported to. | n/a | | `prefix` | Yes | Path where the exported files would be created. The export query would expect the destination to be empty. If the location includes other files, then the query will fail. | n/a | -| `tempSubDir` | No | Subdirectory of `druid.export.storage.baseDir` used to store temporary files required while uploading the data. If this argument is not present, the runtime property will be used as the temporary directory. | . | -| `maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | -| `chunkSize` | No | Defines the size of each chunk to temporarily store in `tempDir`. The chunk size must be between 5 MiB and 5 GiB. A large chunk size reduces the API calls to S3, however it requires more disk space to store the temporary chunks. | 100MiB | + +The following runtime parameters must be configured to export into an S3 destination: + +| Runtime Parameter | Required | Description | Default | +|----------------------------------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----| +| `druid.export.storage.s3.tempSubDir` | Yes | Directory used to store temporary files required while uploading the data. | n/a | +| `druid.export.storage.s3.allowedExportPaths` | Yes | An array of S3 prefixes which are whitelisted as export destinations. | n/a | +| `druid.export.storage.s3.maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | +| `druid.export.storage.s3.chunkSize` | No | Defines the size of each chunk to temporarily store in `tempDir`. The chunk size must be between 5 MiB and 5 GiB. A large chunk size reduces the API calls to S3, however it requires more disk space to store the temporary chunks. | 100MiB | ##### LOCAL diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportConfig.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportConfig.java new file mode 100644 index 000000000000..cb67d314e7f2 --- /dev/null +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportConfig.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.storage.s3.output; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.java.util.common.HumanReadableBytes; + +import java.util.List; + +public class S3ExportConfig +{ + @JsonProperty("tempDir") + private final String tempDir; + @JsonProperty("chunkSize") + private final HumanReadableBytes chunkSize; + @JsonProperty("maxRetry") + private final int maxRetry; + @JsonProperty("allowedExportPaths") + private final List allowedExportPaths; + + @JsonCreator + public S3ExportConfig( + @JsonProperty("tempDir") final String tempDir, + @JsonProperty("chunkSize") final HumanReadableBytes chunkSize, + @JsonProperty("maxRetry") final int maxRetry, + @JsonProperty("allowedExportPaths") final List allowedExportPaths) + { + this.tempDir = tempDir; + this.chunkSize = chunkSize; + this.maxRetry = maxRetry; + this.allowedExportPaths = allowedExportPaths; + } + + public String getTempDir() + { + return tempDir; + } + + public HumanReadableBytes getChunkSize() + { + return chunkSize; + } + + public int getMaxRetry() + { + return maxRetry; + } + + public List getAllowedExportPaths() + { + return allowedExportPaths; + } +} diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java index f30b5c08cad7..d4c9b93e322d 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java @@ -24,16 +24,18 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; +import com.google.common.annotations.VisibleForTesting; +import org.apache.druid.data.input.impl.CloudObjectLocation; import org.apache.druid.data.input.s3.S3InputSource; -import org.apache.druid.java.util.common.HumanReadableBytes; +import org.apache.druid.error.DruidException; import org.apache.druid.storage.ExportStorageProvider; -import org.apache.druid.storage.StorageConfig; import org.apache.druid.storage.StorageConnector; -import org.apache.druid.storage.StorageConnectorUtils; +import org.apache.druid.storage.s3.S3StorageDruidModule; import org.apache.druid.storage.s3.ServerSideEncryptingAmazonS3; -import javax.annotation.Nullable; import java.io.File; +import java.net.URI; +import java.util.List; @JsonTypeName(S3ExportStorageProvider.TYPE_NAME) public class S3ExportStorageProvider implements ExportStorageProvider @@ -43,44 +45,63 @@ public class S3ExportStorageProvider implements ExportStorageProvider private final String bucket; @JsonProperty private final String prefix; - @JsonProperty - private final String tempSubDir; - @JsonProperty - @Nullable - private final HumanReadableBytes chunkSize; - @JsonProperty - @Nullable - private final Integer maxRetry; @JacksonInject - StorageConfig storageConfig; + S3ExportConfig s3ExportConfig; @JacksonInject ServerSideEncryptingAmazonS3 s3; @JsonCreator public S3ExportStorageProvider( @JsonProperty(value = "bucket", required = true) String bucket, - @JsonProperty(value = "prefix", required = true) String prefix, - @JsonProperty(value = "tempSubDir") @Nullable String tempSubDir, - @JsonProperty("chunkSize") @Nullable HumanReadableBytes chunkSize, - @JsonProperty("maxRetry") @Nullable Integer maxRetry + @JsonProperty(value = "prefix", required = true) String prefix ) { this.bucket = bucket; this.prefix = prefix; - this.tempSubDir = tempSubDir == null ? "" : tempSubDir; - this.chunkSize = chunkSize; - this.maxRetry = maxRetry; } @Override public StorageConnector get() { - final File temporaryDirectory = StorageConnectorUtils.validateAndGetPath(storageConfig.getBaseDir(), tempSubDir); - final S3OutputConfig s3OutputConfig = new S3OutputConfig(bucket, prefix, temporaryDirectory, chunkSize, maxRetry); + final String tempDir = s3ExportConfig.getTempDir(); + if (tempDir == null) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.NOT_FOUND) + .build("The runtime property `druid.export.storage.s3.tempDir` must be configured for S3 export."); + } + validateS3Prefix(s3ExportConfig.getAllowedExportPaths(), bucket, prefix); + final S3OutputConfig s3OutputConfig = new S3OutputConfig( + bucket, + prefix, + new File(tempDir), + s3ExportConfig.getChunkSize(), + s3ExportConfig.getMaxRetry() + ); return new S3StorageConnector(s3OutputConfig, s3); } + @VisibleForTesting + static void validateS3Prefix(List allowedExportPaths, String bucket, String prefix) + { + if (allowedExportPaths == null) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.NOT_FOUND) + .build( + "The runtime property `druid.export.storage.s3.allowedExportPaths` must be configured for S3 export."); + } + final URI providedUri = new CloudObjectLocation(bucket, prefix).toUri(S3StorageDruidModule.SCHEME); + for (final String path : allowedExportPaths) { + final URI allowedUri = URI.create(path.endsWith("/") ? path : path + "/"); + if (allowedUri.getHost().equals(providedUri.getHost()) && providedUri.getPath().startsWith(allowedUri.getPath())) { + return; + } + } + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("None of the allowed prefixes matched the input path [%s]", providedUri); + } + @JsonProperty("bucket") public String getBucket() { @@ -93,26 +114,6 @@ public String getPrefix() return prefix; } - @JsonProperty("tempSubDir") - public String getTempSubDir() - { - return tempSubDir; - } - - @JsonProperty("chunkSize") - @Nullable - public HumanReadableBytes getChunkSize() - { - return chunkSize; - } - - @JsonProperty("maxRetry") - @Nullable - public Integer getMaxRetry() - { - return maxRetry; - } - @Override @JsonIgnore public String getResourceType() diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorModule.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorModule.java index 6d168875c950..a57a93a525fb 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorModule.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3StorageConnectorModule.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.Module; import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.inject.Binder; +import org.apache.druid.guice.JsonConfigProvider; import org.apache.druid.initialization.DruidModule; import java.util.Collections; @@ -42,5 +43,6 @@ public List getJacksonModules() @Override public void configure(Binder binder) { + JsonConfigProvider.bind(binder, "druid.export.storage.s3", S3ExportConfig.class); } } diff --git a/processing/src/test/java/org/apache/druid/storage/StorageConnectorUtilsTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/output/S3ExportStorageProviderTest.java similarity index 52% rename from processing/src/test/java/org/apache/druid/storage/StorageConnectorUtilsTest.java rename to extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/output/S3ExportStorageProviderTest.java index 73c02da294ff..75a1ec81858a 100644 --- a/processing/src/test/java/org/apache/druid/storage/StorageConnectorUtilsTest.java +++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/output/S3ExportStorageProviderTest.java @@ -17,42 +17,38 @@ * under the License. */ -package org.apache.druid.storage; +package org.apache.druid.storage.s3.output; +import com.google.common.collect.ImmutableList; import org.apache.druid.error.DruidException; import org.junit.Assert; import org.junit.Test; -import java.io.File; +import java.util.List; -public class StorageConnectorUtilsTest +public class S3ExportStorageProviderTest { - @Test - public void testEmptyPath() - { - Assert.assertThrows( - DruidException.class, - () -> StorageConnectorUtils.validateAndGetPath(null, "path") - ); - } + private final List validPrefixes = ImmutableList.of( + "s3://bucket-name/validPath1", + "s3://bucket-name/validPath2" + ); @Test - public void testValidate() + public void testValidatePaths() { - File file = StorageConnectorUtils.validateAndGetPath("/base", "path"); - Assert.assertEquals("/base/path", file.toPath().toString()); - } + S3ExportStorageProvider.validateS3Prefix(validPrefixes, "bucket-name", "validPath1/"); + S3ExportStorageProvider.validateS3Prefix(validPrefixes, "bucket-name", "validPath1/validSubPath/"); + + S3ExportStorageProvider.validateS3Prefix(ImmutableList.of("s3://bucket-name"), "bucket-name", ""); + S3ExportStorageProvider.validateS3Prefix(ImmutableList.of("s3://bucket-name"), "bucket-name", "validPath"); - @Test - public void testWithNonSubdir() - { Assert.assertThrows( DruidException.class, - () -> StorageConnectorUtils.validateAndGetPath("/base", "../path") + () -> S3ExportStorageProvider.validateS3Prefix(validPrefixes, "bucket-name", "invalidPath1") ); Assert.assertThrows( DruidException.class, - () -> StorageConnectorUtils.validateAndGetPath("/base", "../base1") + () -> S3ExportStorageProvider.validateS3Prefix(validPrefixes, "bucket-name", "validPath123") ); } } diff --git a/processing/src/main/java/org/apache/druid/storage/StorageConnectorUtils.java b/processing/src/main/java/org/apache/druid/storage/StorageConnectorUtils.java deleted file mode 100644 index f46f2a5ca211..000000000000 --- a/processing/src/main/java/org/apache/druid/storage/StorageConnectorUtils.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.storage; - -import org.apache.druid.error.DruidException; - -import java.io.File; - -public class StorageConnectorUtils -{ - public static File validateAndGetPath(String basePath, String customPath) - { - if (basePath == null) { - throw DruidException.forPersona(DruidException.Persona.OPERATOR) - .ofCategory(DruidException.Category.NOT_FOUND) - .build( - "The runtime property `druid.export.storage.baseDir` must be configured for export functionality."); - } - final File baseDir = new File(basePath); - final File exportFile = new File(baseDir, customPath); - if (!exportFile.toPath().normalize().startsWith(baseDir.toPath())) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build( - "The provided destination must be within the path configured by runtime property `druid.export.storage.baseDir`"); - } - return exportFile; - } - - private StorageConnectorUtils() - { - } -} diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java index 53a65d0dfef5..1ea0ef48586a 100644 --- a/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java @@ -25,11 +25,11 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import org.apache.druid.data.input.impl.LocalInputSource; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.storage.ExportStorageProvider; import org.apache.druid.storage.StorageConfig; import org.apache.druid.storage.StorageConnector; -import org.apache.druid.storage.StorageConnectorUtils; import java.io.File; import java.io.IOException; @@ -55,7 +55,7 @@ public LocalFileExportStorageProvider(@JsonProperty(value = "exportPath", requir @Override public StorageConnector get() { - final File exportDestination = StorageConnectorUtils.validateAndGetPath(storageConfig.getBaseDir(), exportPath); + final File exportDestination = validateAndGetPath(storageConfig.getBaseDir(), exportPath); try { return new LocalFileStorageConnector(exportDestination); } @@ -102,4 +102,23 @@ public String toString() "exportPath=" + exportPath + '}'; } + + public static File validateAndGetPath(String basePath, String customPath) + { + if (basePath == null) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.NOT_FOUND) + .build( + "The runtime property `druid.export.storage.baseDir` must be configured for local export."); + } + final File baseDir = new File(basePath); + final File exportFile = new File(baseDir, customPath); + if (!exportFile.toPath().normalize().startsWith(baseDir.toPath())) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "The provided destination must be within the path configured by runtime property `druid.export.storage.baseDir`"); + } + return exportFile; + } } diff --git a/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java b/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java index f568d8f97589..439f542c4656 100644 --- a/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java +++ b/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.ObjectMapper; import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.error.DruidException; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.storage.ExportStorageProvider; import org.apache.druid.storage.StorageConfig; @@ -31,6 +32,7 @@ import org.junit.Assert; import org.junit.Test; +import java.io.File; import java.io.IOException; public class LocalFileExportStorageProviderTest @@ -74,4 +76,33 @@ public void testEqualsAndHashCode() .usingGetClass() .verify(); } + + @Test + public void testEmptyPath() + { + Assert.assertThrows( + DruidException.class, + () -> LocalFileExportStorageProvider.validateAndGetPath(null, "path") + ); + } + + @Test + public void testValidate() + { + File file = LocalFileExportStorageProvider.validateAndGetPath("/base", "path"); + Assert.assertEquals("/base/path", file.toPath().toString()); + } + + @Test + public void testWithNonSubdir() + { + Assert.assertThrows( + DruidException.class, + () -> LocalFileExportStorageProvider.validateAndGetPath("/base", "../path") + ); + Assert.assertThrows( + DruidException.class, + () -> LocalFileExportStorageProvider.validateAndGetPath("/base", "../base1") + ); + } } From cf15323be6eee61fc717d155c7b33a61312bbee9 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 6 Feb 2024 17:49:01 +0530 Subject: [PATCH 45/50] Add check for runtime parameter for bucket and path --- docs/multi-stage-query/reference.md | 10 +++--- .../s3/output/S3ExportStorageProvider.java | 33 +++++++++++++------ .../output/S3ExportStorageProviderTest.java | 6 ++++ .../druid/java/util/common/StringUtils.java | 5 +++ 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index d75b94c9fb7d..ad00ce9f4abb 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -143,11 +143,11 @@ Supported arguments to the function: The following runtime parameters must be configured to export into an S3 destination: -| Runtime Parameter | Required | Description | Default | -|----------------------------------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----| -| `druid.export.storage.s3.tempSubDir` | Yes | Directory used to store temporary files required while uploading the data. | n/a | -| `druid.export.storage.s3.allowedExportPaths` | Yes | An array of S3 prefixes which are whitelisted as export destinations. | n/a | -| `druid.export.storage.s3.maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | +| Runtime Parameter | Required | Description | Default | +|----------------------------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----| +| `druid.export.storage.s3.tempSubDir` | Yes | Directory used to store temporary files required while uploading the data. | n/a | +| `druid.export.storage.s3.allowedExportPaths` | Yes | An array of S3 prefixes which are whitelisted as export destinations. Export query fail if the export destination does not match any of the configured prefixes. Example: `[\"s3://bucket1/export/\", \"s3://bucket2/export/\"]` | n/a | +| `druid.export.storage.s3.maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | | `druid.export.storage.s3.chunkSize` | No | Defines the size of each chunk to temporarily store in `tempDir`. The chunk size must be between 5 MiB and 5 GiB. A large chunk size reduces the API calls to S3, however it requires more disk space to store the temporary chunks. | 100MiB | ##### LOCAL diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java index d4c9b93e322d..a7d47f1fc16a 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java @@ -28,11 +28,13 @@ import org.apache.druid.data.input.impl.CloudObjectLocation; import org.apache.druid.data.input.s3.S3InputSource; import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.storage.ExportStorageProvider; import org.apache.druid.storage.StorageConnector; import org.apache.druid.storage.s3.S3StorageDruidModule; import org.apache.druid.storage.s3.ServerSideEncryptingAmazonS3; +import javax.validation.constraints.NotNull; import java.io.File; import java.net.URI; import java.util.List; @@ -70,7 +72,14 @@ public StorageConnector get() .ofCategory(DruidException.Category.NOT_FOUND) .build("The runtime property `druid.export.storage.s3.tempDir` must be configured for S3 export."); } - validateS3Prefix(s3ExportConfig.getAllowedExportPaths(), bucket, prefix); + final List allowedExportPaths = s3ExportConfig.getAllowedExportPaths(); + if (allowedExportPaths == null) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.NOT_FOUND) + .build( + "The runtime property `druid.export.storage.s3.allowedExportPaths` must be configured for S3 export."); + } + validateS3Prefix(allowedExportPaths, bucket, prefix); final S3OutputConfig s3OutputConfig = new S3OutputConfig( bucket, prefix, @@ -82,18 +91,12 @@ public StorageConnector get() } @VisibleForTesting - static void validateS3Prefix(List allowedExportPaths, String bucket, String prefix) + static void validateS3Prefix(@NotNull final List allowedExportPaths, final String bucket, final String prefix) { - if (allowedExportPaths == null) { - throw DruidException.forPersona(DruidException.Persona.OPERATOR) - .ofCategory(DruidException.Category.NOT_FOUND) - .build( - "The runtime property `druid.export.storage.s3.allowedExportPaths` must be configured for S3 export."); - } final URI providedUri = new CloudObjectLocation(bucket, prefix).toUri(S3StorageDruidModule.SCHEME); for (final String path : allowedExportPaths) { - final URI allowedUri = URI.create(path.endsWith("/") ? path : path + "/"); - if (allowedUri.getHost().equals(providedUri.getHost()) && providedUri.getPath().startsWith(allowedUri.getPath())) { + final URI allowedUri = URI.create(path); + if (validateUri(allowedUri, providedUri)) { return; } } @@ -102,6 +105,16 @@ static void validateS3Prefix(List allowedExportPaths, String bucket, Str .build("None of the allowed prefixes matched the input path [%s]", providedUri); } + private static boolean validateUri(final URI allowedUri, final URI providedUri) + { + if (!allowedUri.getHost().equals(providedUri.getHost())) { + return false; + } + final String allowedPath = StringUtils.maybeAppendTrailingSlash(allowedUri.getPath()); + final String providedPath = StringUtils.maybeAppendTrailingSlash(providedUri.getPath()); + return providedPath.startsWith(allowedPath); + } + @JsonProperty("bucket") public String getBucket() { diff --git a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/output/S3ExportStorageProviderTest.java b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/output/S3ExportStorageProviderTest.java index 75a1ec81858a..362f8583fd13 100644 --- a/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/output/S3ExportStorageProviderTest.java +++ b/extensions-core/s3-extensions/src/test/java/org/apache/druid/storage/s3/output/S3ExportStorageProviderTest.java @@ -37,11 +37,17 @@ public class S3ExportStorageProviderTest public void testValidatePaths() { S3ExportStorageProvider.validateS3Prefix(validPrefixes, "bucket-name", "validPath1/"); + S3ExportStorageProvider.validateS3Prefix(validPrefixes, "bucket-name", "validPath1"); S3ExportStorageProvider.validateS3Prefix(validPrefixes, "bucket-name", "validPath1/validSubPath/"); S3ExportStorageProvider.validateS3Prefix(ImmutableList.of("s3://bucket-name"), "bucket-name", ""); S3ExportStorageProvider.validateS3Prefix(ImmutableList.of("s3://bucket-name"), "bucket-name", "validPath"); + S3ExportStorageProvider.validateS3Prefix(validPrefixes, "bucket-name", "validPath1/../validPath2/"); + Assert.assertThrows( + DruidException.class, + () -> S3ExportStorageProvider.validateS3Prefix(validPrefixes, "incorrect-bucket", "validPath1/") + ); Assert.assertThrows( DruidException.class, () -> S3ExportStorageProvider.validateS3Prefix(validPrefixes, "bucket-name", "invalidPath1") diff --git a/processing/src/main/java/org/apache/druid/java/util/common/StringUtils.java b/processing/src/main/java/org/apache/druid/java/util/common/StringUtils.java index 9759f1639151..e5d9b2c8e4ec 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/StringUtils.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/StringUtils.java @@ -454,6 +454,11 @@ public static String maybeRemoveTrailingSlash(String s) return s != null && s.endsWith("/") ? s.substring(0, s.length() - 1) : s; } + public static String maybeAppendTrailingSlash(String s) + { + return s != null && !s.endsWith("/") ? s + "/" : s; + } + /** * Removes all occurrences of the given char from the given string. This method is an optimal version of * {@link String#replace(CharSequence, CharSequence) s.replace("c", "")}. From 2ff3410dbec0875e94b22a71aec85fcb5c18b642 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Tue, 6 Feb 2024 20:44:21 +0530 Subject: [PATCH 46/50] Add tests --- .../destination/ExportMSQDestinationTest.java | 6 +++++ .../LocalFileExportStorageProviderTest.java | 23 ++++--------------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java index f8978126d140..5010536030ad 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java @@ -19,10 +19,12 @@ package org.apache.druid.msq.indexing.destination; +import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.sql.http.ResultFormat; +import org.apache.druid.storage.StorageConfig; import org.apache.druid.storage.StorageConnectorModule; import org.apache.druid.storage.local.LocalFileExportStorageProvider; import org.junit.Assert; @@ -43,6 +45,10 @@ public void testSerde() throws IOException ObjectMapper objectMapper = new DefaultObjectMapper(); new StorageConnectorModule().getJacksonModules().forEach(objectMapper::registerModule); String string = objectMapper.writeValueAsString(exportDestination); + objectMapper.setInjectableValues( + new InjectableValues.Std() + .addValue(StorageConfig.class, new StorageConfig("/")) + ); ExportMSQDestination newDest = objectMapper.readValue(string, ExportMSQDestination.class); Assert.assertEquals(exportDestination, newDest); diff --git a/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java b/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java index 439f542c4656..1d49701f9640 100644 --- a/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java +++ b/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java @@ -19,8 +19,6 @@ package org.apache.druid.storage.local; -import com.fasterxml.jackson.databind.BeanProperty; -import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.ObjectMapper; import nl.jqno.equalsverifier.EqualsVerifier; @@ -44,23 +42,10 @@ public void testSerde() throws IOException ObjectMapper objectMapper = new DefaultObjectMapper(); objectMapper.registerModules(new StorageConnectorModule().getJacksonModules()); - objectMapper.setInjectableValues(new InjectableValues() - { - @Override - public Object findInjectableValue( - Object valueId, - DeserializationContext ctxt, - BeanProperty forProperty, - Object beanInstance - ) - { - if (((String) valueId).contains("StorageConfig")) { - return new StorageConfig("/"); - } else { - throw new RuntimeException(); - } - } - }); + objectMapper.setInjectableValues( + new InjectableValues.Std() + .addValue(StorageConfig.class, new StorageConfig("/")) + ); byte[] bytes = objectMapper.writeValueAsBytes(exportDestination); ExportStorageProvider deserialized = objectMapper.readValue(bytes, LocalFileExportStorageProvider.class); From c71cc5a8561333d80c8491d8320f8d59411b60a9 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 7 Feb 2024 08:49:39 +0530 Subject: [PATCH 47/50] Update docs --- docs/multi-stage-query/concepts.md | 2 +- docs/multi-stage-query/reference.md | 56 ++++++++++++++--------------- 2 files changed, 27 insertions(+), 31 deletions(-) diff --git a/docs/multi-stage-query/concepts.md b/docs/multi-stage-query/concepts.md index 2993f609487a..e27c8f8cf8a8 100644 --- a/docs/multi-stage-query/concepts.md +++ b/docs/multi-stage-query/concepts.md @@ -118,7 +118,7 @@ for dimension-based pruning, see [Clustering](#clustering). ### Write to an external destination with `EXTERN` Query tasks can write data to an external destination through the `EXTERN` function, when it is used with the `INTO` -clause, such as `REPLACE INTO EXTERN(...)` The EXTERN function takes arguments which specifies where to the files should be created. +clause, such as `REPLACE INTO EXTERN(...)`. The EXTERN function takes arguments that specify where to write the files. The format can be specified using an `AS` clause. For more information about the syntax, see [`EXTERN`](./reference.md#extern-function). diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index ad00ce9f4abb..91201781b96e 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -95,18 +95,19 @@ For more information, see [Read external data with EXTERN](concepts.md#read-exte #### `EXTERN` to export to a destination -`EXTERN` can be used to specify a destination, where the data needs to be exported. +`EXTERN` can be used to specify a destination where you want to export data to. This variation of EXTERN requires one argument, the details of the destination as specified below. This variation additionally requires an `AS` clause to specify the format of the exported rows. -Only INSERT statements are supported with an `EXTERN` destination. -Only `CSV` format is supported at the moment. -Please note that partitioning (`PARTITIONED BY`) and clustering (`CLUSTERED BY`) is not currently supported with export statements. +Keep the following in mind when using EXTERN to export rows: +- Only INSERT statements are supported. +- Only `CSV` format is supported as an export format. +- Partitioning (`PARTITIONED BY`) and clustering (`CLUSTERED BY`) aren't supported with export statements. +- You can export to Amazon S3 or local storage. +- The destination provided should contain no other files or directories. -Export statements support the context parameter `rowsPerPage` for the number of rows in each exported file. The default value -is 100,000. +When you export data, use the `rowsPerPage` context parameter to control how many rows get exported. The default is 100,000. -INSERT statements append the results to the existing files at the destination. ```sql INSERT INTO EXTERN() @@ -116,12 +117,10 @@ SELECT FROM
``` -Exporting is currently supported for Amazon S3 storage and local storage. - ##### S3 -Exporting results to S3 can be done by passing the function `S3()` as an argument to the `EXTERN` function. The `druid-s3-extensions` should be loaded. -The `S3()` function is a druid function which configures the connection. Arguments to `S3()` should be passed as named parameters with the value in single quotes like the example below. +Export results to S3 by passing the function `S3()` as an argument to the `EXTERN` function. Note that this requires the `druid-s3-extensions`. +The `S3()` function is a Druid function that configures the connection. Arguments for `S3()` should be passed as named parameters with the value in single quotes like the following example: ```sql INSERT INTO @@ -134,32 +133,29 @@ SELECT FROM
``` -Supported arguments to the function: +Supported arguments for the function: -| Parameter | Required | Description | Default | -|-------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------| -| `bucket` | Yes | The S3 bucket to which the files are exported to. | n/a | -| `prefix` | Yes | Path where the exported files would be created. The export query would expect the destination to be empty. If the location includes other files, then the query will fail. | n/a | +| Parameter | Required | Description | Default | +|-------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------| +| `bucket` | Yes | The S3 bucket to which the files are exported to. | n/a | +| `prefix` | Yes | Path where the exported files would be created. The export query expects the destination to be empty. If the location includes other files, then the query will fail. | n/a | The following runtime parameters must be configured to export into an S3 destination: -| Runtime Parameter | Required | Description | Default | -|----------------------------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----| -| `druid.export.storage.s3.tempSubDir` | Yes | Directory used to store temporary files required while uploading the data. | n/a | -| `druid.export.storage.s3.allowedExportPaths` | Yes | An array of S3 prefixes which are whitelisted as export destinations. Export query fail if the export destination does not match any of the configured prefixes. Example: `[\"s3://bucket1/export/\", \"s3://bucket2/export/\"]` | n/a | -| `druid.export.storage.s3.maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | +| Runtime Parameter | Required | Description | Default | +|----------------------------------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----| +| `druid.export.storage.s3.tempSubDir` | Yes | Directory used to store temporary files required while uploading the data. | n/a | +| `druid.export.storage.s3.allowedExportPaths` | Yes | An array of S3 prefixes that are whitelisted as export destinations. Export queries fail if the export destination does not match any of the configured prefixes. Example: `[\"s3://bucket1/export/\", \"s3://bucket2/export/\"]` | n/a | +| `druid.export.storage.s3.maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | | `druid.export.storage.s3.chunkSize` | No | Defines the size of each chunk to temporarily store in `tempDir`. The chunk size must be between 5 MiB and 5 GiB. A large chunk size reduces the API calls to S3, however it requires more disk space to store the temporary chunks. | 100MiB | ##### LOCAL -Exporting is also supported to the local storage, which exports the results to the filesystem of the MSQ worker. -This is useful in a single node setup or for testing, and is not suitable for production use cases. - -This can be done by passing the function `LOCAL()` as an argument to the `EXTERN FUNCTION`. -Arguments to `LOCAL()` should be passed as named parameters with the value in single quotes like the example below. +You can export to the local storage, which exports the results to the filesystem of the MSQ worker. +This is useful in a single node setup or for testing but is not suitable for production use cases. -To use local as an export destination, the runtime property `druid.export.storage.baseDir` must be configured on the indexer/middle manager. -The parameter provided to the `LOCAL()` function will be prefixed with this value when exporting to a local destination. +Export results to local storage by passing the function `LOCAL()` as an argument for the `EXTERN FUNCTION`. To use local storage as an export destination, the runtime property `druid.export.storage.baseDir` must be configured on the Indexer/Middle Manager. +Arguments to `LOCAL()` should be passed as named parameters with the value in single quotes in the following example: ```sql INSERT INTO @@ -174,9 +170,9 @@ FROM
Supported arguments to the function: -| Parameter | Required | Description | Default | +| Parameter | Required | Description | Default | |-------------|--------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --| -| `exportPath` | Yes | Subdirectory of `druid.export.storage.baseDir` used to as the destination to export the results to. The export query expects the destination to be empty. If the location includes other files or directories, then the query will fail. | n/a | +| `exportPath` | Yes | Subdirectory of `druid.export.storage.baseDir` used as the destination to export the results to. The export query expects the destination to be empty. If the location includes other files or directories, then the query will fail. | n/a | For more information, see [Read external data with EXTERN](concepts.md#write-to-an-external-destination-with-extern). From 180f13292dc2035a6220dc142ca7493dc8daec46 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 7 Feb 2024 14:37:07 +0530 Subject: [PATCH 48/50] Fix NPE --- .../apache/druid/storage/s3/output/S3ExportConfig.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportConfig.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportConfig.java index cb67d314e7f2..1cbb2b47e9bc 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportConfig.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportConfig.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.java.util.common.HumanReadableBytes; +import javax.annotation.Nullable; import java.util.List; public class S3ExportConfig @@ -32,15 +33,15 @@ public class S3ExportConfig @JsonProperty("chunkSize") private final HumanReadableBytes chunkSize; @JsonProperty("maxRetry") - private final int maxRetry; + private final Integer maxRetry; @JsonProperty("allowedExportPaths") private final List allowedExportPaths; @JsonCreator public S3ExportConfig( @JsonProperty("tempDir") final String tempDir, - @JsonProperty("chunkSize") final HumanReadableBytes chunkSize, - @JsonProperty("maxRetry") final int maxRetry, + @JsonProperty("chunkSize") @Nullable final HumanReadableBytes chunkSize, + @JsonProperty("maxRetry") @Nullable final Integer maxRetry, @JsonProperty("allowedExportPaths") final List allowedExportPaths) { this.tempDir = tempDir; @@ -59,7 +60,7 @@ public HumanReadableBytes getChunkSize() return chunkSize; } - public int getMaxRetry() + public Integer getMaxRetry() { return maxRetry; } From 5206e903dd39c3082104f117b039fbc437959227 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 7 Feb 2024 18:27:49 +0530 Subject: [PATCH 49/50] Update docs, remove deadcode --- docs/multi-stage-query/concepts.md | 2 +- docs/multi-stage-query/reference.md | 22 ++++++++------- .../apache/druid/msq/exec/ControllerImpl.java | 5 ++-- .../destination/ExportMSQDestination.java | 28 ++----------------- .../druid/msq/sql/MSQTaskQueryMaker.java | 9 +----- .../destination/ExportMSQDestinationTest.java | 4 +-- .../storage/s3/output/S3ExportConfig.java | 12 ++++---- .../s3/output/S3ExportStorageProvider.java | 16 +++++++++-- .../druid/storage/ExportStorageProvider.java | 5 ++++ .../local/LocalFileExportStorageProvider.java | 19 +++++++++++-- .../LocalFileExportStorageProviderTest.java | 6 ++-- .../TestExportStorageConnectorProvider.java | 6 ++++ 12 files changed, 70 insertions(+), 64 deletions(-) diff --git a/docs/multi-stage-query/concepts.md b/docs/multi-stage-query/concepts.md index e27c8f8cf8a8..27b7d12c91c9 100644 --- a/docs/multi-stage-query/concepts.md +++ b/docs/multi-stage-query/concepts.md @@ -118,7 +118,7 @@ for dimension-based pruning, see [Clustering](#clustering). ### Write to an external destination with `EXTERN` Query tasks can write data to an external destination through the `EXTERN` function, when it is used with the `INTO` -clause, such as `REPLACE INTO EXTERN(...)`. The EXTERN function takes arguments that specify where to write the files. +clause, such as `INSERT INTO EXTERN(...)`. The EXTERN function takes arguments that specify where to write the files. The format can be specified using an `AS` clause. For more information about the syntax, see [`EXTERN`](./reference.md#extern-function). diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 91201781b96e..25f55b31f74f 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -125,7 +125,7 @@ The `S3()` function is a Druid function that configures the connection. Argument ```sql INSERT INTO EXTERN( - S3(bucket => 's3://your_bucket', prefix => 'prefix/to/files') + S3(bucket => 'your_bucket', prefix => 'prefix/to/files') ) AS CSV SELECT @@ -135,16 +135,16 @@ FROM
Supported arguments for the function: -| Parameter | Required | Description | Default | -|-------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------| -| `bucket` | Yes | The S3 bucket to which the files are exported to. | n/a | -| `prefix` | Yes | Path where the exported files would be created. The export query expects the destination to be empty. If the location includes other files, then the query will fail. | n/a | +| Parameter | Required | Description | Default | +|-------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------| +| `bucket` | Yes | The S3 bucket to which the files are exported to. The bucket and prefix combination should be whitelisted in `druid.export.storage.s3.allowedExportPaths`. | n/a | +| `prefix` | Yes | Path where the exported files would be created. The export query expects the destination to be empty. If the location includes other files, then the query will fail. The bucket and prefix combination should be whitelisted in `druid.export.storage.s3.allowedExportPaths`. | n/a | The following runtime parameters must be configured to export into an S3 destination: | Runtime Parameter | Required | Description | Default | |----------------------------------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----| -| `druid.export.storage.s3.tempSubDir` | Yes | Directory used to store temporary files required while uploading the data. | n/a | +| `druid.export.storage.s3.tempLocalDir` | Yes | Directory used on the local storage of the worker to store temporary files required while uploading the data. | n/a | | `druid.export.storage.s3.allowedExportPaths` | Yes | An array of S3 prefixes that are whitelisted as export destinations. Export queries fail if the export destination does not match any of the configured prefixes. Example: `[\"s3://bucket1/export/\", \"s3://bucket2/export/\"]` | n/a | | `druid.export.storage.s3.maxRetry` | No | Defines the max number times to attempt S3 API calls to avoid failures due to transient errors. | 10 | | `druid.export.storage.s3.chunkSize` | No | Defines the size of each chunk to temporarily store in `tempDir`. The chunk size must be between 5 MiB and 5 GiB. A large chunk size reduces the API calls to S3, however it requires more disk space to store the temporary chunks. | 100MiB | @@ -154,7 +154,9 @@ The following runtime parameters must be configured to export into an S3 destina You can export to the local storage, which exports the results to the filesystem of the MSQ worker. This is useful in a single node setup or for testing but is not suitable for production use cases. -Export results to local storage by passing the function `LOCAL()` as an argument for the `EXTERN FUNCTION`. To use local storage as an export destination, the runtime property `druid.export.storage.baseDir` must be configured on the Indexer/Middle Manager. +Export results to local storage by passing the function `LOCAL()` as an argument for the `EXTERN FUNCTION`. +To use local storage as an export destination, the runtime property `druid.export.storage.baseDir` must be configured on the Indexer/Middle Manager. +This value must be set to an absolute path on the local machine. Exporting data will be allowed to paths which match the prefix set by this value. Arguments to `LOCAL()` should be passed as named parameters with the value in single quotes in the following example: ```sql @@ -170,9 +172,9 @@ FROM
Supported arguments to the function: -| Parameter | Required | Description | Default | -|-------------|--------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --| -| `exportPath` | Yes | Subdirectory of `druid.export.storage.baseDir` used as the destination to export the results to. The export query expects the destination to be empty. If the location includes other files or directories, then the query will fail. | n/a | +| Parameter | Required | Description | Default | +|-------------|--------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --| +| `exportPath` | Yes | Absolute path to a subdirectory of `druid.export.storage.baseDir` used as the destination to export the results to. The export query expects the destination to be empty. If the location includes other files or directories, then the query will fail. | n/a | For more information, see [Read external data with EXTERN](concepts.md#write-to-an-external-destination-with-extern). diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index a779be4e5c86..d62bcce04ddc 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1886,8 +1886,9 @@ private static QueryDefinition makeQueryDefinition( if (filesIterator.hasNext()) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.RUNTIME_FAILURE) - .build("Found files at provided export destination. Export is only allowed to " - + "an empty path. Please provide an empty path or move the existing files."); + .build("Found files at provided export destination[%s]. Export is only allowed to " + + "an empty path. Please provide an empty path/subdirectory or move the existing files.", + exportStorageProvider.getBasePath()); } } catch (IOException e) { diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java index cdf4d425a92b..3187ace349b1 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/destination/ExportMSQDestination.java @@ -20,7 +20,6 @@ package org.apache.druid.msq.indexing.destination; import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.msq.querykit.ShuffleSpecFactories; import org.apache.druid.msq.querykit.ShuffleSpecFactory; @@ -28,10 +27,7 @@ import org.apache.druid.server.security.ResourceType; import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.storage.ExportStorageProvider; -import org.joda.time.Interval; -import javax.annotation.Nullable; -import java.util.List; import java.util.Objects; import java.util.Optional; @@ -39,29 +35,21 @@ * Destination used by tasks that write the results as files to an external destination. {@link #resultFormat} denotes * the format of the file created and {@link #exportStorageProvider} denotes the type of external * destination. - *
- * {@link #replaceTimeChunks} denotes how existing files should be handled. - * - If the value is null, the results are appended to the existing files. - * - If the value is present, existing files will be deleted according to time intervals. */ public class ExportMSQDestination implements MSQDestination { public static final String TYPE = "export"; private final ExportStorageProvider exportStorageProvider; private final ResultFormat resultFormat; - @Nullable - private final List replaceTimeChunks; @JsonCreator public ExportMSQDestination( @JsonProperty("exportStorageProvider") ExportStorageProvider exportStorageProvider, - @JsonProperty("resultFormat") ResultFormat resultFormat, - @JsonProperty("replaceTimeChunks") @Nullable List replaceTimeChunks + @JsonProperty("resultFormat") ResultFormat resultFormat ) { this.exportStorageProvider = exportStorageProvider; this.resultFormat = resultFormat; - this.replaceTimeChunks = replaceTimeChunks; } @@ -78,14 +66,6 @@ public ResultFormat getResultFormat() return resultFormat; } - @Nullable - @JsonProperty("replaceTimeChunks") - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getReplaceTimeChunks() - { - return replaceTimeChunks; - } - @Override public boolean equals(Object o) { @@ -97,14 +77,13 @@ public boolean equals(Object o) } ExportMSQDestination that = (ExportMSQDestination) o; return Objects.equals(exportStorageProvider, that.exportStorageProvider) - && resultFormat == that.resultFormat - && Objects.equals(replaceTimeChunks, that.replaceTimeChunks); + && resultFormat == that.resultFormat; } @Override public int hashCode() { - return Objects.hash(exportStorageProvider, resultFormat, replaceTimeChunks); + return Objects.hash(exportStorageProvider, resultFormat); } @Override @@ -113,7 +92,6 @@ public String toString() return "ExportMSQDestination{" + "exportStorageProvider=" + exportStorageProvider + ", resultFormat=" + resultFormat + - ", replaceTimeChunks=" + replaceTimeChunks + '}'; } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java index d40cea44841b..f5a1fd8c90d9 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskQueryMaker.java @@ -213,16 +213,9 @@ public QueryResponse runQuery(final DruidQuery druidQuery) ExportDestination exportDestination = ((ExportDestination) targetDataSource); ResultFormat format = ResultFormat.fromString(sqlQueryContext.getString(DruidSqlIngest.SQL_EXPORT_FILE_FORMAT)); - if (replaceTimeChunks != null && !Intervals.ONLY_ETERNITY.equals(replaceTimeChunks)) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.UNSUPPORTED) - .build("Currently export only works with OVERWRITE ALL clause"); - } - destination = new ExportMSQDestination( exportDestination.getStorageConnectorProvider(), - format, - replaceTimeChunks + format ); } else if (targetDataSource instanceof TableDestination) { Granularity segmentGranularityObject; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java index 5010536030ad..697866bc9122 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/destination/ExportMSQDestinationTest.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.jackson.DefaultObjectMapper; -import org.apache.druid.java.util.common.Intervals; import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.storage.StorageConfig; import org.apache.druid.storage.StorageConnectorModule; @@ -39,8 +38,7 @@ public void testSerde() throws IOException { ExportMSQDestination exportDestination = new ExportMSQDestination( new LocalFileExportStorageProvider("/path"), - ResultFormat.CSV, - Intervals.ONLY_ETERNITY + ResultFormat.CSV ); ObjectMapper objectMapper = new DefaultObjectMapper(); new StorageConnectorModule().getJacksonModules().forEach(objectMapper::registerModule); diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportConfig.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportConfig.java index 1cbb2b47e9bc..d5477c2998e0 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportConfig.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportConfig.java @@ -28,8 +28,8 @@ public class S3ExportConfig { - @JsonProperty("tempDir") - private final String tempDir; + @JsonProperty("tempLocalDir") + private final String tempLocalDir; @JsonProperty("chunkSize") private final HumanReadableBytes chunkSize; @JsonProperty("maxRetry") @@ -39,20 +39,20 @@ public class S3ExportConfig @JsonCreator public S3ExportConfig( - @JsonProperty("tempDir") final String tempDir, + @JsonProperty("tempLocalDir") final String tempLocalDir, @JsonProperty("chunkSize") @Nullable final HumanReadableBytes chunkSize, @JsonProperty("maxRetry") @Nullable final Integer maxRetry, @JsonProperty("allowedExportPaths") final List allowedExportPaths) { - this.tempDir = tempDir; + this.tempLocalDir = tempLocalDir; this.chunkSize = chunkSize; this.maxRetry = maxRetry; this.allowedExportPaths = allowedExportPaths; } - public String getTempDir() + public String getTempLocalDir() { - return tempDir; + return tempLocalDir; } public HumanReadableBytes getChunkSize() diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java index a7d47f1fc16a..7577f56f76f2 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/output/S3ExportStorageProvider.java @@ -66,11 +66,11 @@ public S3ExportStorageProvider( @Override public StorageConnector get() { - final String tempDir = s3ExportConfig.getTempDir(); + final String tempDir = s3ExportConfig.getTempLocalDir(); if (tempDir == null) { throw DruidException.forPersona(DruidException.Persona.OPERATOR) .ofCategory(DruidException.Category.NOT_FOUND) - .build("The runtime property `druid.export.storage.s3.tempDir` must be configured for S3 export."); + .build("The runtime property `druid.export.storage.s3.tempLocalDir` must be configured for S3 export."); } final List allowedExportPaths = s3ExportConfig.getAllowedExportPaths(); if (allowedExportPaths == null) { @@ -102,7 +102,10 @@ static void validateS3Prefix(@NotNull final List allowedExportPaths, fin } throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.INVALID_INPUT) - .build("None of the allowed prefixes matched the input path [%s]", providedUri); + .build("None of the allowed prefixes matched the input path [%s]. " + + "Please reach out to the cluster admin for the whitelisted paths for export. " + + "The paths are controlled via the property `druid.export.storage.s3.allowedExportPaths`.", + providedUri); } private static boolean validateUri(final URI allowedUri, final URI providedUri) @@ -133,4 +136,11 @@ public String getResourceType() { return TYPE_NAME; } + + @Override + @JsonIgnore + public String getBasePath() + { + return new CloudObjectLocation(bucket, prefix).toUri(S3StorageDruidModule.SCHEME).toString(); + } } diff --git a/processing/src/main/java/org/apache/druid/storage/ExportStorageProvider.java b/processing/src/main/java/org/apache/druid/storage/ExportStorageProvider.java index 0dd8984d70e0..890ac577b1a5 100644 --- a/processing/src/main/java/org/apache/druid/storage/ExportStorageProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/ExportStorageProvider.java @@ -26,4 +26,9 @@ public interface ExportStorageProvider extends Provider { String getResourceType(); + + /** + * Return a URI representation of the base path. This is used to be used for logging and error messages. + */ + String getBasePath(); } diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java index 1ea0ef48586a..678e6f12ca80 100644 --- a/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java @@ -76,6 +76,13 @@ public String getResourceType() return TYPE_NAME; } + @Override + @JsonIgnore + public String getBasePath() + { + return exportPath; + } + @Override public boolean equals(Object o) { @@ -112,12 +119,18 @@ public static File validateAndGetPath(String basePath, String customPath) "The runtime property `druid.export.storage.baseDir` must be configured for local export."); } final File baseDir = new File(basePath); - final File exportFile = new File(baseDir, customPath); + if (!baseDir.isAbsolute()) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "The runtime property `druid.export.storage.baseDir` must be an absolute path."); + } + final File exportFile = new File(customPath); if (!exportFile.toPath().normalize().startsWith(baseDir.toPath())) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.INVALID_INPUT) - .build( - "The provided destination must be within the path configured by runtime property `druid.export.storage.baseDir`"); + .build("The provided destination must be within the path configured by runtime property `druid.export.storage.baseDir` " + + "Please reach out to the cluster admin for the allowed path. ", customPath); } return exportFile; } diff --git a/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java b/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java index 1d49701f9640..4daef2f9cd9e 100644 --- a/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java +++ b/processing/src/test/java/org/apache/druid/storage/local/LocalFileExportStorageProviderTest.java @@ -74,7 +74,7 @@ public void testEmptyPath() @Test public void testValidate() { - File file = LocalFileExportStorageProvider.validateAndGetPath("/base", "path"); + File file = LocalFileExportStorageProvider.validateAndGetPath("/base", "/base/path"); Assert.assertEquals("/base/path", file.toPath().toString()); } @@ -83,11 +83,11 @@ public void testWithNonSubdir() { Assert.assertThrows( DruidException.class, - () -> LocalFileExportStorageProvider.validateAndGetPath("/base", "../path") + () -> LocalFileExportStorageProvider.validateAndGetPath("/base", "/base/../path") ); Assert.assertThrows( DruidException.class, - () -> LocalFileExportStorageProvider.validateAndGetPath("/base", "../base1") + () -> LocalFileExportStorageProvider.validateAndGetPath("/base", "/base1") ); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java index 4142f25ac493..b1ca59e2ccc3 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/export/TestExportStorageConnectorProvider.java @@ -37,4 +37,10 @@ public String getResourceType() { return "testExport"; } + + @Override + public String getBasePath() + { + return "testExport"; + } } From 10217a5a37ade6c96a33cb0bc2a6a1f3efb2e221 Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 7 Feb 2024 19:24:05 +0530 Subject: [PATCH 50/50] Fix formatting --- .../druid/storage/local/LocalFileExportStorageProvider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java b/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java index 678e6f12ca80..f0d4c87b41f3 100644 --- a/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java +++ b/processing/src/main/java/org/apache/druid/storage/local/LocalFileExportStorageProvider.java @@ -129,7 +129,7 @@ public static File validateAndGetPath(String basePath, String customPath) if (!exportFile.toPath().normalize().startsWith(baseDir.toPath())) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.INVALID_INPUT) - .build("The provided destination must be within the path configured by runtime property `druid.export.storage.baseDir` " + .build("The provided destination [%s] must be within the path configured by runtime property `druid.export.storage.baseDir` " + "Please reach out to the cluster admin for the allowed path. ", customPath); } return exportFile;