From 6fbbcc4974769ece10aad45d6b6de6a45b2e93a2 Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Fri, 23 Jun 2023 07:20:42 +0530 Subject: [PATCH 1/5] initial commit --- .../apache/druid/msq/exec/ControllerImpl.java | 7 +- .../apache/druid/msq/util/IntervalUtils.java | 19 ++++ .../apache/druid/msq/exec/MSQFaultsTest.java | 34 ++++++- .../druid/msq/util/IntervalUtilsTest.java | 99 +++++++++++++++++++ 4 files changed, 157 insertions(+), 2 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index dd163d406d3a..a02bd61eaf97 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -936,7 +936,12 @@ private List generateSegmentIdsWithShardSpecsForAppend( } } - if (allocation == null) { + // Even if allocation isn't null, the overlord makes the best effort job of allocating a segment with the given + // segmentGranularity. This is commonly seen in case when there is already a coarser segment in the interval where + // the requested segment is present and that segment completely overlaps the request. Throw an error if the interval + // doesn't match the granularity requested + if (allocation == null + || !IntervalUtils.doesIntervalMatchesGranularity(allocation.getInterval(), segmentGranularity)) { throw new MSQException( new InsertCannotAllocateSegmentFault( task.getDataSource(), diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/IntervalUtils.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/IntervalUtils.java index 43a844b5a6f1..11381be6370c 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/IntervalUtils.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/IntervalUtils.java @@ -19,6 +19,9 @@ package org.apache.druid.msq.util; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.granularity.AllGranularity; +import org.apache.druid.java.util.common.granularity.Granularity; import org.joda.time.Interval; import java.util.ArrayList; @@ -61,4 +64,20 @@ public static List difference(final List list1, final List= TIMESTAMP '2000-01-02 00:00:00' and __time < TIMESTAMP '2000-01-03 00:00:00' group by 1, 2 PARTITIONED by day clustered by dim1") + .setExpectedDataSource("foo1") + .setExpectedRowSignature(rowSignature) + .setExpectedMSQFault( + new InsertCannotAllocateSegmentFault( + "foo1", + Intervals.of("2000-01-02T00:00:00.000Z/2000-01-03T00:00:00.000Z") + ) + ) + .verifyResults(); + } + + @Test public void testInsertCannotBeEmptyFault() { diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/util/IntervalUtilsTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/util/IntervalUtilsTest.java index 6dd47313e7b1..5383e31b8c88 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/util/IntervalUtilsTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/util/IntervalUtilsTest.java @@ -19,8 +19,12 @@ package org.apache.druid.msq.util; +import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.PeriodGranularity; import org.joda.time.Interval; +import org.joda.time.Period; import org.junit.Assert; import org.junit.Test; @@ -84,6 +88,101 @@ public void test_difference() ); } + @Test + public void test_doesIntervalMatchesGranularity_withStandardGranularities() + { + + Assert.assertTrue( + IntervalUtils.doesIntervalMatchesGranularity(Intervals.ETERNITY, Granularities.ALL) + ); + + Assert.assertTrue( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("2000-01-01/2001-01-01"), Granularities.YEAR + ) + ); + + Assert.assertTrue( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("2000-01-01/2000-04-01"), Granularities.QUARTER + ) + ); + + + Assert.assertTrue( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("2000-01-01/2000-02-01"), Granularities.MONTH + ) + ); + + // With the way WEEK granularities work, this needs to be aligned to an actual week + Assert.assertTrue( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("1999-12-27/2000-01-03"), Granularities.WEEK + ) + ); + + Assert.assertTrue( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("2000-01-01/2000-01-02"), Granularities.DAY + ) + ); + + Assert.assertTrue( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("2000-01-01T00:00:00.000/2000-01-01T08:00:00.000"), Granularities.EIGHT_HOUR + ) + ); + + Assert.assertTrue( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("2000-01-01T00:00:00.000/2000-01-01T01:00:00.000"), Granularities.HOUR + ) + ); + + Assert.assertTrue( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("2000-01-01T00:00:00.000/2000-01-01T00:01:00.000"), Granularities.MINUTE + ) + ); + + Assert.assertTrue( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("2000-01-01T00:00:00.000/2000-01-01T00:00:01.000"), Granularities.SECOND + ) + ); + + Assert.assertFalse( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("2000-01-01/2002-01-01"), Granularities.YEAR + ) + ); + + Assert.assertFalse( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("2000-01-01/2002-01-08"), Granularities.YEAR + ) + ); + } + + @Test + public void test_doesIntervalMatchesGranularity_withPeriodGranularity() + { + Assert.assertTrue( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("2000-01-01/2000-01-04"), + new PeriodGranularity(new Period("P3D"), DateTimes.of("2000-01-01"), null) + ) + ); + + Assert.assertFalse( + IntervalUtils.doesIntervalMatchesGranularity( + Intervals.of("2000-01-01/2000-01-04"), + new PeriodGranularity(new Period("P3D"), DateTimes.of("2000-01-02"), null) + ) + ); + } + public static List intervals(final String... intervals) { return Arrays.stream(intervals).map(Intervals::of).collect(Collectors.toList()); From 881e8e5da180ac4f56faee0fc6fa7512cddedc7e Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Mon, 26 Jun 2023 10:09:13 +0530 Subject: [PATCH 2/5] review comments --- docs/multi-stage-query/reference.md | 1 + .../apache/druid/msq/exec/ControllerImpl.java | 18 ++- .../druid/msq/guice/MSQIndexingModule.java | 2 + .../InsertAllocatedIncorrectSegmentFault.java | 115 ++++++++++++++++++ .../apache/druid/msq/util/IntervalUtils.java | 21 ++-- .../apache/druid/msq/exec/MSQFaultsTest.java | 24 +++- .../msq/indexing/error/MSQFaultSerdeTest.java | 5 + .../druid/msq/util/IntervalUtilsTest.java | 28 ++--- .../util/common/granularity/Granularity.java | 3 +- website/.spelling | 1 + 10 files changed, 186 insertions(+), 32 deletions(-) create mode 100644 extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertAllocatedIncorrectSegmentFault.java diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 3e54abfce8b4..d0d5a8be74a8 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -424,6 +424,7 @@ The following table describes error codes you may encounter in the `multiStageQu | `ColumnNameRestricted` | The query uses a restricted column name. | `columnName`: The restricted column name. | | `ColumnTypeNotSupported` | The column type is not supported. This can be because:

  • Support for writing or reading from a particular column type is not supported.
  • The query attempted to use a column type that is not supported by the frame format. This occurs with ARRAY types, which are not yet implemented for frames.
| `columnName`: The column name with an unsupported type.

`columnType`: The unknown column type. | | `InsertCannotAllocateSegment` | The controller task could not allocate a new segment ID due to conflict with existing segments or pending segments. Common reasons for such conflicts:

  • Attempting to mix different granularities in the same intervals of the same datasource.
  • Prior ingestions that used non-extendable shard specs.
| `dataSource`

`interval`: The interval for the attempted new segment allocation. | +| `InsertAllocatedIncorrectSegment` | The controller task could not allocate a new segment ID of the specified granularity due to conflict with existing segments or pending segments.

This happens when a coarser segment is already overlapping the interval for which the allocation was requestd. Either use a REPLACE to overwrite over the existing overlapping segment or re-run INSERT with the pre-existing segment granularity in order to append to the interval| `dataSource`

`requestedInterval`: The interval for the attempted new segment allocation.

`allocatedInterval` The interval allocated for the requested segment | | `InsertCannotBeEmpty` | An INSERT or REPLACE query did not generate any output rows in a situation where output rows are required for success. This can happen for INSERT or REPLACE queries with `PARTITIONED BY` set to something other than `ALL` or `ALL TIME`. | `dataSource` | | `InsertLockPreempted` | An INSERT or REPLACE query was canceled by a higher-priority ingestion job, such as a real-time ingestion task. | | | `InsertTimeNull` | An INSERT or REPLACE query encountered a null timestamp in the `__time` field.

This can happen due to using an expression like `TIME_PARSE(timestamp) AS __time` with a timestamp that cannot be parsed. ([`TIME_PARSE`](../querying/sql-scalar.md#date-and-time-functions) returns null when it cannot parse a timestamp.) In this case, try parsing your timestamps using a different function or pattern. Or, if your timestamps may genuinely be null, consider using [`COALESCE`](../querying/sql-scalar.md#other-scalar-functions) to provide a default value. One option is [`CURRENT_TIMESTAMP`](../querying/sql-scalar.md#date-and-time-functions), which represents the start time of the job.| diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index a02bd61eaf97..0e1555cdffdc 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -99,6 +99,7 @@ import org.apache.druid.msq.indexing.error.CanceledFault; import org.apache.druid.msq.indexing.error.CannotParseExternalDataFault; import org.apache.druid.msq.indexing.error.FaultsExceededChecker; +import org.apache.druid.msq.indexing.error.InsertAllocatedIncorrectSegmentFault; import org.apache.druid.msq.indexing.error.InsertCannotAllocateSegmentFault; import org.apache.druid.msq.indexing.error.InsertCannotBeEmptyFault; import org.apache.druid.msq.indexing.error.InsertLockPreemptedFault; @@ -936,16 +937,25 @@ private List generateSegmentIdsWithShardSpecsForAppend( } } + if (allocation == null) { + throw new MSQException( + new InsertCannotAllocateSegmentFault( + task.getDataSource(), + segmentGranularity.bucket(timestamp) + ) + ); + } + // Even if allocation isn't null, the overlord makes the best effort job of allocating a segment with the given // segmentGranularity. This is commonly seen in case when there is already a coarser segment in the interval where // the requested segment is present and that segment completely overlaps the request. Throw an error if the interval // doesn't match the granularity requested - if (allocation == null - || !IntervalUtils.doesIntervalMatchesGranularity(allocation.getInterval(), segmentGranularity)) { + if (!IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity(allocation.getInterval(), segmentGranularity)) { throw new MSQException( - new InsertCannotAllocateSegmentFault( + new InsertAllocatedIncorrectSegmentFault( task.getDataSource(), - segmentGranularity.bucket(timestamp) + segmentGranularity.bucket(timestamp), + allocation.getInterval() ) ); } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java index 59300316d587..dd539ac810c0 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java @@ -43,6 +43,7 @@ import org.apache.druid.msq.indexing.error.ColumnNameRestrictedFault; import org.apache.druid.msq.indexing.error.ColumnTypeNotSupportedFault; import org.apache.druid.msq.indexing.error.DurableStorageConfigurationFault; +import org.apache.druid.msq.indexing.error.InsertAllocatedIncorrectSegmentFault; import org.apache.druid.msq.indexing.error.InsertCannotAllocateSegmentFault; import org.apache.druid.msq.indexing.error.InsertCannotBeEmptyFault; import org.apache.druid.msq.indexing.error.InsertLockPreemptedFault; @@ -111,6 +112,7 @@ public class MSQIndexingModule implements DruidModule ColumnNameRestrictedFault.class, DurableStorageConfigurationFault.class, InsertCannotAllocateSegmentFault.class, + InsertAllocatedIncorrectSegmentFault.class, InsertCannotBeEmptyFault.class, InsertLockPreemptedFault.class, InsertTimeNullFault.class, diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertAllocatedIncorrectSegmentFault.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertAllocatedIncorrectSegmentFault.java new file mode 100644 index 000000000000..530bbbc5289e --- /dev/null +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertAllocatedIncorrectSegmentFault.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.msq.indexing.error; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.google.common.base.Preconditions; +import org.apache.druid.java.util.common.granularity.DurationGranularity; +import org.apache.druid.java.util.common.granularity.GranularityType; +import org.joda.time.Interval; + +import java.util.Objects; + +@JsonTypeName(InsertAllocatedIncorrectSegmentFault.CODE) +public class InsertAllocatedIncorrectSegmentFault extends BaseMSQFault +{ + static final String CODE = "InsertAllocatedIncorrectSegment"; + + private final String dataSource; + private final Interval requestedInterval; + private final Interval allocatedInterval; + + @JsonCreator + public InsertAllocatedIncorrectSegmentFault( + @JsonProperty("dataSource") final String dataSource, + @JsonProperty("requestedInterval") final Interval requestedInterval, + @JsonProperty("allocatedInterval") final Interval allocatedInterval + ) + { + super( + CODE, + "Requested segment for dataSource [%s], interval [%s], but got [%s] interval instead. " + + "This happens when an overlapping segment is already present with a coarser granularity for the requested interval. " + + "Either set the partition granularity for the INSERT to [%s] to append to existing data or use REPLACE to " + + "overwrite over the pre-existing segment", + dataSource, + requestedInterval, + allocatedInterval, + convertIntervalToGranularityString(allocatedInterval) + ); + this.dataSource = Preconditions.checkNotNull(dataSource, "dataSource"); + this.requestedInterval = Preconditions.checkNotNull(requestedInterval, "requestedInterval"); + this.allocatedInterval = Preconditions.checkNotNull(allocatedInterval, "allocatedInterval"); + } + + @JsonProperty + public String getDataSource() + { + return dataSource; + } + + @JsonProperty + public Interval getRequestedInterval() + { + return requestedInterval; + } + + @JsonProperty + public Interval getAllocatedInterval() + { + return allocatedInterval; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + InsertAllocatedIncorrectSegmentFault that = (InsertAllocatedIncorrectSegmentFault) o; + return Objects.equals(dataSource, that.dataSource) + && Objects.equals(requestedInterval, that.requestedInterval) + && Objects.equals(allocatedInterval, that.allocatedInterval); + } + + /** + * Converts the given interval to a string representing the granularity which is more user-friendly. + */ + private static String convertIntervalToGranularityString(final Interval interval) + { + try { + return GranularityType.fromPeriod(interval.toPeriod()).name(); + } + catch (Exception e) { + return new DurationGranularity(interval.toDurationMillis(), null).toString(); + } + } + + @Override + public int hashCode() + { + return Objects.hash(dataSource, requestedInterval, allocatedInterval); + } +} diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/IntervalUtils.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/IntervalUtils.java index 11381be6370c..b7d9528722cd 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/IntervalUtils.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/IntervalUtils.java @@ -19,7 +19,7 @@ package org.apache.druid.msq.util; -import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.AllGranularity; import org.apache.druid.java.util.common.granularity.Granularity; import org.joda.time.Interval; @@ -28,7 +28,7 @@ import java.util.List; /** - * Things that would make sense in {@link org.apache.druid.java.util.common.Intervals} if this were not an extension. + * Things that would make sense in {@link Intervals} if this were not an extension. */ public class IntervalUtils { @@ -66,18 +66,21 @@ public static List difference(final List list1, final List= TIMESTAMP '2000-01-02 00:00:00' and __time < TIMESTAMP '2000-01-03 00:00:00' group by 1, 2 PARTITIONED by day clustered by dim1") + "insert into foo1" + + " select __time, dim1 , count(*) as cnt" + + " from foo" + + " where dim1 is not null and __time >= TIMESTAMP '2000-01-02 00:00:00' and __time < TIMESTAMP '2000-01-03 00:00:00'" + + " group by 1, 2" + + " PARTITIONED by day" + + " clustered by dim1" + ) .setExpectedDataSource("foo1") .setExpectedRowSignature(rowSignature) .setExpectedMSQFault( @@ -93,13 +101,21 @@ public void testInsertCannotAllocateSegmentFaultWhenInvalidAllocation() )).when(testTaskActionClient).submit(isA(SegmentAllocateAction.class)); testIngestQuery().setSql( - "insert into foo1 select __time, dim1 , count(*) as cnt from foo where dim1 is not null and __time >= TIMESTAMP '2000-01-02 00:00:00' and __time < TIMESTAMP '2000-01-03 00:00:00' group by 1, 2 PARTITIONED by day clustered by dim1") + "insert into foo1" + + " select __time, dim1 , count(*) as cnt" + + " from foo" + + " where dim1 is not null and __time >= TIMESTAMP '2000-01-02 00:00:00' and __time < TIMESTAMP '2000-01-03 00:00:00'" + + " group by 1, 2" + + " PARTITIONED by day" + + " clustered by dim1" + ) .setExpectedDataSource("foo1") .setExpectedRowSignature(rowSignature) .setExpectedMSQFault( - new InsertCannotAllocateSegmentFault( + new InsertAllocatedIncorrectSegmentFault( "foo1", - Intervals.of("2000-01-02T00:00:00.000Z/2000-01-03T00:00:00.000Z") + Intervals.of("2000-01-02T00:00:00.000Z/2000-01-03T00:00:00.000Z"), + Intervals.of("2000-01-01T00:00:00.000Z/2000-02-01T00:00:00.000Z") ) ) .verifyResults(); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java index 484989e7badf..c499d9da1649 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java @@ -55,6 +55,11 @@ public void testFaultSerde() throws IOException assertFaultSerde(new ColumnTypeNotSupportedFault("the column", ColumnType.STRING_ARRAY)); assertFaultSerde(new ColumnNameRestrictedFault("the column")); assertFaultSerde(new InsertCannotAllocateSegmentFault("the datasource", Intervals.ETERNITY)); + assertFaultSerde(new InsertAllocatedIncorrectSegmentFault( + "the datasource", + Intervals.of("2000-01-01/2002-01-01"), + Intervals.ETERNITY + )); assertFaultSerde(new InsertCannotBeEmptyFault("the datasource")); assertFaultSerde(InsertLockPreemptedFault.INSTANCE); assertFaultSerde(InsertTimeNullFault.INSTANCE); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/util/IntervalUtilsTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/util/IntervalUtilsTest.java index 5383e31b8c88..bd9e336ead4a 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/util/IntervalUtilsTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/util/IntervalUtilsTest.java @@ -93,73 +93,73 @@ public void test_doesIntervalMatchesGranularity_withStandardGranularities() { Assert.assertTrue( - IntervalUtils.doesIntervalMatchesGranularity(Intervals.ETERNITY, Granularities.ALL) + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity(Intervals.ETERNITY, Granularities.ALL) ); Assert.assertTrue( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("2000-01-01/2001-01-01"), Granularities.YEAR ) ); Assert.assertTrue( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("2000-01-01/2000-04-01"), Granularities.QUARTER ) ); Assert.assertTrue( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("2000-01-01/2000-02-01"), Granularities.MONTH ) ); // With the way WEEK granularities work, this needs to be aligned to an actual week Assert.assertTrue( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("1999-12-27/2000-01-03"), Granularities.WEEK ) ); Assert.assertTrue( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("2000-01-01/2000-01-02"), Granularities.DAY ) ); Assert.assertTrue( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("2000-01-01T00:00:00.000/2000-01-01T08:00:00.000"), Granularities.EIGHT_HOUR ) ); Assert.assertTrue( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("2000-01-01T00:00:00.000/2000-01-01T01:00:00.000"), Granularities.HOUR ) ); Assert.assertTrue( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("2000-01-01T00:00:00.000/2000-01-01T00:01:00.000"), Granularities.MINUTE ) ); Assert.assertTrue( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("2000-01-01T00:00:00.000/2000-01-01T00:00:01.000"), Granularities.SECOND ) ); Assert.assertFalse( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("2000-01-01/2002-01-01"), Granularities.YEAR ) ); Assert.assertFalse( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("2000-01-01/2002-01-08"), Granularities.YEAR ) ); @@ -169,14 +169,14 @@ public void test_doesIntervalMatchesGranularity_withStandardGranularities() public void test_doesIntervalMatchesGranularity_withPeriodGranularity() { Assert.assertTrue( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("2000-01-01/2000-01-04"), new PeriodGranularity(new Period("P3D"), DateTimes.of("2000-01-01"), null) ) ); Assert.assertFalse( - IntervalUtils.doesIntervalMatchesGranularity( + IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity( Intervals.of("2000-01-01/2000-01-04"), new PeriodGranularity(new Period("P3D"), DateTimes.of("2000-01-02"), null) ) diff --git a/processing/src/main/java/org/apache/druid/java/util/common/granularity/Granularity.java b/processing/src/main/java/org/apache/druid/java/util/common/granularity/Granularity.java index 812538ae6e05..6f5d9fb61e1b 100644 --- a/processing/src/main/java/org/apache/druid/java/util/common/granularity/Granularity.java +++ b/processing/src/main/java/org/apache/druid/java/util/common/granularity/Granularity.java @@ -145,7 +145,8 @@ public static List granularitiesFinerThan(final Granularity gran0) public abstract DateTime toDate(String filePath, Formatter formatter); /** - * Return true if time chunks populated by this granularity includes the given interval time chunk. + * Return true only if the time chunks populated by this granularity includes the given interval time chunk. The + * interval must fit exactly into the scheme of the granularity for this to return true */ public abstract boolean isAligned(Interval interval); diff --git a/website/.spelling b/website/.spelling index 997d387c7626..15507fb86705 100644 --- a/website/.spelling +++ b/website/.spelling @@ -712,6 +712,7 @@ ColumnTypeNotSupported DurableStorageConfiguration ColumnTypeNotSupported InsertCannotAllocateSegment +InsertAllocatedIncorrectSegment InsertCannotBeEmpty InsertCannotOrderByDescending InsertCannotReplaceExistingSegment From 1a61c14d51d79fc7e1671652b6ac1bb585b78edf Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Mon, 26 Jun 2023 10:15:15 +0530 Subject: [PATCH 3/5] spelling --- docs/multi-stage-query/reference.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index d0d5a8be74a8..f247574d65d2 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -424,7 +424,7 @@ The following table describes error codes you may encounter in the `multiStageQu | `ColumnNameRestricted` | The query uses a restricted column name. | `columnName`: The restricted column name. | | `ColumnTypeNotSupported` | The column type is not supported. This can be because:

  • Support for writing or reading from a particular column type is not supported.
  • The query attempted to use a column type that is not supported by the frame format. This occurs with ARRAY types, which are not yet implemented for frames.
| `columnName`: The column name with an unsupported type.

`columnType`: The unknown column type. | | `InsertCannotAllocateSegment` | The controller task could not allocate a new segment ID due to conflict with existing segments or pending segments. Common reasons for such conflicts:

  • Attempting to mix different granularities in the same intervals of the same datasource.
  • Prior ingestions that used non-extendable shard specs.
| `dataSource`

`interval`: The interval for the attempted new segment allocation. | -| `InsertAllocatedIncorrectSegment` | The controller task could not allocate a new segment ID of the specified granularity due to conflict with existing segments or pending segments.

This happens when a coarser segment is already overlapping the interval for which the allocation was requestd. Either use a REPLACE to overwrite over the existing overlapping segment or re-run INSERT with the pre-existing segment granularity in order to append to the interval| `dataSource`

`requestedInterval`: The interval for the attempted new segment allocation.

`allocatedInterval` The interval allocated for the requested segment | +| `InsertAllocatedIncorrectSegment` | The controller task could not allocate a new segment ID of the specified granularity due to conflict with existing segments or pending segments.

This happens when a coarser segment is already overlapping the interval for which the allocation was requested. Either use a REPLACE to overwrite over the existing overlapping segment or re-run INSERT with the pre-existing segment granularity in order to append to the interval| `dataSource`

`requestedInterval`: The interval for the attempted new segment allocation.

`allocatedInterval` The interval allocated for the requested segment | | `InsertCannotBeEmpty` | An INSERT or REPLACE query did not generate any output rows in a situation where output rows are required for success. This can happen for INSERT or REPLACE queries with `PARTITIONED BY` set to something other than `ALL` or `ALL TIME`. | `dataSource` | | `InsertLockPreempted` | An INSERT or REPLACE query was canceled by a higher-priority ingestion job, such as a real-time ingestion task. | | | `InsertTimeNull` | An INSERT or REPLACE query encountered a null timestamp in the `__time` field.

This can happen due to using an expression like `TIME_PARSE(timestamp) AS __time` with a timestamp that cannot be parsed. ([`TIME_PARSE`](../querying/sql-scalar.md#date-and-time-functions) returns null when it cannot parse a timestamp.) In this case, try parsing your timestamps using a different function or pattern. Or, if your timestamps may genuinely be null, consider using [`COALESCE`](../querying/sql-scalar.md#other-scalar-functions) to provide a default value. One option is [`CURRENT_TIMESTAMP`](../querying/sql-scalar.md#date-and-time-functions), which represents the start time of the job.| From 06f3565e9a0bd91d877b2e966be7dadc5b322d53 Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Mon, 26 Jun 2023 13:47:57 +0530 Subject: [PATCH 4/5] equals fix --- .../indexing/error/InsertAllocatedIncorrectSegmentFault.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertAllocatedIncorrectSegmentFault.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertAllocatedIncorrectSegmentFault.java index 530bbbc5289e..3d2aadbba928 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertAllocatedIncorrectSegmentFault.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertAllocatedIncorrectSegmentFault.java @@ -88,6 +88,9 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } + if (!super.equals(o)) { + return false; + } InsertAllocatedIncorrectSegmentFault that = (InsertAllocatedIncorrectSegmentFault) o; return Objects.equals(dataSource, that.dataSource) && Objects.equals(requestedInterval, that.requestedInterval) @@ -110,6 +113,6 @@ private static String convertIntervalToGranularityString(final Interval interval @Override public int hashCode() { - return Objects.hash(dataSource, requestedInterval, allocatedInterval); + return Objects.hash(super.hashCode(), dataSource, requestedInterval, allocatedInterval); } } From 8bf00cf00fe63f2a357618cbb9d50ce9366e3fe7 Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Mon, 26 Jun 2023 17:34:15 +0530 Subject: [PATCH 5/5] delete existing error class --- docs/multi-stage-query/reference.md | 3 +- .../apache/druid/msq/exec/ControllerImpl.java | 8 +- .../druid/msq/guice/MSQIndexingModule.java | 2 - .../InsertAllocatedIncorrectSegmentFault.java | 118 ------------------ .../InsertCannotAllocateSegmentFault.java | 70 ++++++++++- .../apache/druid/msq/util/IntervalUtils.java | 4 +- .../apache/druid/msq/exec/MSQFaultsTest.java | 6 +- .../msq/indexing/error/MSQFaultSerdeTest.java | 4 +- .../druid/msq/util/IntervalUtilsTest.java | 28 ++--- website/.spelling | 1 - 10 files changed, 91 insertions(+), 153 deletions(-) delete mode 100644 extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertAllocatedIncorrectSegmentFault.java diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index f247574d65d2..f33fc78838cb 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -423,8 +423,7 @@ The following table describes error codes you may encounter in the `multiStageQu | `CannotParseExternalData` | A worker task could not parse data from an external datasource. | `errorMessage`: More details on why parsing failed. | | `ColumnNameRestricted` | The query uses a restricted column name. | `columnName`: The restricted column name. | | `ColumnTypeNotSupported` | The column type is not supported. This can be because:

  • Support for writing or reading from a particular column type is not supported.
  • The query attempted to use a column type that is not supported by the frame format. This occurs with ARRAY types, which are not yet implemented for frames.
| `columnName`: The column name with an unsupported type.

`columnType`: The unknown column type. | -| `InsertCannotAllocateSegment` | The controller task could not allocate a new segment ID due to conflict with existing segments or pending segments. Common reasons for such conflicts:

  • Attempting to mix different granularities in the same intervals of the same datasource.
  • Prior ingestions that used non-extendable shard specs.
| `dataSource`

`interval`: The interval for the attempted new segment allocation. | -| `InsertAllocatedIncorrectSegment` | The controller task could not allocate a new segment ID of the specified granularity due to conflict with existing segments or pending segments.

This happens when a coarser segment is already overlapping the interval for which the allocation was requested. Either use a REPLACE to overwrite over the existing overlapping segment or re-run INSERT with the pre-existing segment granularity in order to append to the interval| `dataSource`

`requestedInterval`: The interval for the attempted new segment allocation.

`allocatedInterval` The interval allocated for the requested segment | +| `InsertCannotAllocateSegment` | The controller task could not allocate a new segment ID due to conflict with existing segments or pending segments. Common reasons for such conflicts:

  • Attempting to mix different granularities in the same intervals of the same datasource.
  • Prior ingestions that used non-extendable shard specs.


Use REPLACE to overwrite the existing data or if the error contains the `allocatedInterval` then alternatively rerun the INSERT job with the mentioned granularity to append to existing data. Note that it might not always be possible to append to the existing data using INSERT and can only be done if `allocatedInterval` is present. | `dataSource`

`interval`: The interval for the attempted new segment allocation.

`allocatedInterval`: The incorrect interval allocated by the overlord. It can be null | | `InsertCannotBeEmpty` | An INSERT or REPLACE query did not generate any output rows in a situation where output rows are required for success. This can happen for INSERT or REPLACE queries with `PARTITIONED BY` set to something other than `ALL` or `ALL TIME`. | `dataSource` | | `InsertLockPreempted` | An INSERT or REPLACE query was canceled by a higher-priority ingestion job, such as a real-time ingestion task. | | | `InsertTimeNull` | An INSERT or REPLACE query encountered a null timestamp in the `__time` field.

This can happen due to using an expression like `TIME_PARSE(timestamp) AS __time` with a timestamp that cannot be parsed. ([`TIME_PARSE`](../querying/sql-scalar.md#date-and-time-functions) returns null when it cannot parse a timestamp.) In this case, try parsing your timestamps using a different function or pattern. Or, if your timestamps may genuinely be null, consider using [`COALESCE`](../querying/sql-scalar.md#other-scalar-functions) to provide a default value. One option is [`CURRENT_TIMESTAMP`](../querying/sql-scalar.md#date-and-time-functions), which represents the start time of the job.| diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 0e1555cdffdc..db5e69971791 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -99,7 +99,6 @@ import org.apache.druid.msq.indexing.error.CanceledFault; import org.apache.druid.msq.indexing.error.CannotParseExternalDataFault; import org.apache.druid.msq.indexing.error.FaultsExceededChecker; -import org.apache.druid.msq.indexing.error.InsertAllocatedIncorrectSegmentFault; import org.apache.druid.msq.indexing.error.InsertCannotAllocateSegmentFault; import org.apache.druid.msq.indexing.error.InsertCannotBeEmptyFault; import org.apache.druid.msq.indexing.error.InsertLockPreemptedFault; @@ -941,7 +940,8 @@ private List generateSegmentIdsWithShardSpecsForAppend( throw new MSQException( new InsertCannotAllocateSegmentFault( task.getDataSource(), - segmentGranularity.bucket(timestamp) + segmentGranularity.bucket(timestamp), + null ) ); } @@ -950,9 +950,9 @@ private List generateSegmentIdsWithShardSpecsForAppend( // segmentGranularity. This is commonly seen in case when there is already a coarser segment in the interval where // the requested segment is present and that segment completely overlaps the request. Throw an error if the interval // doesn't match the granularity requested - if (!IntervalUtils.isEternityOrDoesIntervalAlignWithGranularity(allocation.getInterval(), segmentGranularity)) { + if (!IntervalUtils.isAligned(allocation.getInterval(), segmentGranularity)) { throw new MSQException( - new InsertAllocatedIncorrectSegmentFault( + new InsertCannotAllocateSegmentFault( task.getDataSource(), segmentGranularity.bucket(timestamp), allocation.getInterval() diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java index dd539ac810c0..59300316d587 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/guice/MSQIndexingModule.java @@ -43,7 +43,6 @@ import org.apache.druid.msq.indexing.error.ColumnNameRestrictedFault; import org.apache.druid.msq.indexing.error.ColumnTypeNotSupportedFault; import org.apache.druid.msq.indexing.error.DurableStorageConfigurationFault; -import org.apache.druid.msq.indexing.error.InsertAllocatedIncorrectSegmentFault; import org.apache.druid.msq.indexing.error.InsertCannotAllocateSegmentFault; import org.apache.druid.msq.indexing.error.InsertCannotBeEmptyFault; import org.apache.druid.msq.indexing.error.InsertLockPreemptedFault; @@ -112,7 +111,6 @@ public class MSQIndexingModule implements DruidModule ColumnNameRestrictedFault.class, DurableStorageConfigurationFault.class, InsertCannotAllocateSegmentFault.class, - InsertAllocatedIncorrectSegmentFault.class, InsertCannotBeEmptyFault.class, InsertLockPreemptedFault.class, InsertTimeNullFault.class, diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertAllocatedIncorrectSegmentFault.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertAllocatedIncorrectSegmentFault.java deleted file mode 100644 index 3d2aadbba928..000000000000 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertAllocatedIncorrectSegmentFault.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.msq.indexing.error; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.annotation.JsonTypeName; -import com.google.common.base.Preconditions; -import org.apache.druid.java.util.common.granularity.DurationGranularity; -import org.apache.druid.java.util.common.granularity.GranularityType; -import org.joda.time.Interval; - -import java.util.Objects; - -@JsonTypeName(InsertAllocatedIncorrectSegmentFault.CODE) -public class InsertAllocatedIncorrectSegmentFault extends BaseMSQFault -{ - static final String CODE = "InsertAllocatedIncorrectSegment"; - - private final String dataSource; - private final Interval requestedInterval; - private final Interval allocatedInterval; - - @JsonCreator - public InsertAllocatedIncorrectSegmentFault( - @JsonProperty("dataSource") final String dataSource, - @JsonProperty("requestedInterval") final Interval requestedInterval, - @JsonProperty("allocatedInterval") final Interval allocatedInterval - ) - { - super( - CODE, - "Requested segment for dataSource [%s], interval [%s], but got [%s] interval instead. " - + "This happens when an overlapping segment is already present with a coarser granularity for the requested interval. " - + "Either set the partition granularity for the INSERT to [%s] to append to existing data or use REPLACE to " - + "overwrite over the pre-existing segment", - dataSource, - requestedInterval, - allocatedInterval, - convertIntervalToGranularityString(allocatedInterval) - ); - this.dataSource = Preconditions.checkNotNull(dataSource, "dataSource"); - this.requestedInterval = Preconditions.checkNotNull(requestedInterval, "requestedInterval"); - this.allocatedInterval = Preconditions.checkNotNull(allocatedInterval, "allocatedInterval"); - } - - @JsonProperty - public String getDataSource() - { - return dataSource; - } - - @JsonProperty - public Interval getRequestedInterval() - { - return requestedInterval; - } - - @JsonProperty - public Interval getAllocatedInterval() - { - return allocatedInterval; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - if (!super.equals(o)) { - return false; - } - InsertAllocatedIncorrectSegmentFault that = (InsertAllocatedIncorrectSegmentFault) o; - return Objects.equals(dataSource, that.dataSource) - && Objects.equals(requestedInterval, that.requestedInterval) - && Objects.equals(allocatedInterval, that.allocatedInterval); - } - - /** - * Converts the given interval to a string representing the granularity which is more user-friendly. - */ - private static String convertIntervalToGranularityString(final Interval interval) - { - try { - return GranularityType.fromPeriod(interval.toPeriod()).name(); - } - catch (Exception e) { - return new DurationGranularity(interval.toDurationMillis(), null).toString(); - } - } - - @Override - public int hashCode() - { - return Objects.hash(super.hashCode(), dataSource, requestedInterval, allocatedInterval); - } -} diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertCannotAllocateSegmentFault.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertCannotAllocateSegmentFault.java index 403af37d9bf0..f632ae677362 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertCannotAllocateSegmentFault.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/InsertCannotAllocateSegmentFault.java @@ -23,8 +23,12 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import com.google.common.base.Preconditions; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.granularity.DurationGranularity; +import org.apache.druid.java.util.common.granularity.GranularityType; import org.joda.time.Interval; +import javax.annotation.Nullable; import java.util.Objects; @JsonTypeName(InsertCannotAllocateSegmentFault.CODE) @@ -35,15 +39,20 @@ public class InsertCannotAllocateSegmentFault extends BaseMSQFault private final String dataSource; private final Interval interval; + @Nullable + private final Interval allocatedInterval; + @JsonCreator public InsertCannotAllocateSegmentFault( @JsonProperty("dataSource") final String dataSource, - @JsonProperty("interval") final Interval interval + @JsonProperty("interval") final Interval interval, + @Nullable @JsonProperty("allocatedInterval") final Interval allocatedInterval ) { - super(CODE, "Cannot allocate segment for dataSource [%s], interval [%s]", dataSource, interval); + super(CODE, getErrorMessage(dataSource, interval, allocatedInterval)); this.dataSource = Preconditions.checkNotNull(dataSource, "dataSource"); this.interval = Preconditions.checkNotNull(interval, "interval"); + this.allocatedInterval = allocatedInterval; } @JsonProperty @@ -58,6 +67,57 @@ public Interval getInterval() return interval; } + @Nullable + @JsonProperty + public Interval getAllocatedInterval() + { + return allocatedInterval; + } + + private static String getErrorMessage( + final String dataSource, + final Interval interval, + @Nullable final Interval allocatedInterval + ) + { + String errorMessage; + if (allocatedInterval == null) { + errorMessage = StringUtils.format( + "Cannot allocate segment for dataSource [%s], interval [%s]. This can happen if the prior ingestion " + + "uses non-extendable shard specs or if the partitioned by granularity is different from the granularity of the " + + "pre-existing segments. Check the granularities of the pre-existing segments or re-run the ingestion with REPLACE " + + "to overwrite over the existing data", + dataSource, + interval + ); + } else { + errorMessage = StringUtils.format( + "Requested segment for dataSource [%s], interval [%s], but got [%s] interval instead. " + + "This happens when an overlapping segment is already present with a coarser granularity for the requested interval. " + + "Either set the partition granularity for the INSERT to [%s] to append to existing data or use REPLACE to " + + "overwrite over the pre-existing segment", + dataSource, + interval, + allocatedInterval, + convertIntervalToGranularityString(allocatedInterval) + ); + } + return errorMessage; + } + + /** + * Converts the given interval to a string representing the granularity which is more user-friendly. + */ + private static String convertIntervalToGranularityString(final Interval interval) + { + try { + return GranularityType.fromPeriod(interval.toPeriod()).name(); + } + catch (Exception e) { + return new DurationGranularity(interval.toDurationMillis(), null).toString(); + } + } + @Override public boolean equals(Object o) { @@ -71,12 +131,14 @@ public boolean equals(Object o) return false; } InsertCannotAllocateSegmentFault that = (InsertCannotAllocateSegmentFault) o; - return Objects.equals(dataSource, that.dataSource) && Objects.equals(interval, that.interval); + return Objects.equals(dataSource, that.dataSource) + && Objects.equals(interval, that.interval) + && Objects.equals(allocatedInterval, that.allocatedInterval); } @Override public int hashCode() { - return Objects.hash(super.hashCode(), dataSource, interval); + return Objects.hash(super.hashCode(), dataSource, interval, allocatedInterval); } } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/IntervalUtils.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/IntervalUtils.java index b7d9528722cd..328a0c0b74d7 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/IntervalUtils.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/IntervalUtils.java @@ -67,12 +67,10 @@ public static List difference(final List list1, final List