From dd63724db053b17547bf692e7963e2b8547d6fa8 Mon Sep 17 00:00:00 2001 From: Amatya Date: Wed, 6 Mar 2024 22:37:15 +0530 Subject: [PATCH 01/20] Initial commit --- .../druid/msq/indexing/MSQControllerTask.java | 7 + .../druid/msq/indexing/MSQWorkerTask.java | 7 + .../indexing/common/task/AbstractTask.java | 7 + .../druid/indexing/common/task/Task.java | 6 + .../druid/indexing/common/task/TaskTest.java | 6 + .../IndexerSQLMetadataStorageCoordinator.java | 70 ++++++++++ .../apache/druid/metadata/PendingSegment.java | 128 ++++++++++++++++++ .../druid/metadata/SQLMetadataConnector.java | 21 +++ 8 files changed, 252 insertions(+) create mode 100644 server/src/main/java/org/apache/druid/metadata/PendingSegment.java diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java index 7eb455ca8424..2743a150f23b 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java @@ -156,6 +156,13 @@ public Set getInputSourceResources() return ImmutableSet.of(); } + @JsonIgnore + @Override + public String getPendingSegmentGroup() + { + return getId(); + } + @JsonProperty("spec") public MSQSpec getQuerySpec() { diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java index 95abc6b03a63..cc762903bb86 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java @@ -125,6 +125,13 @@ public Set getInputSourceResources() return ImmutableSet.of(); } + @JsonIgnore + @Override + public String getPendingSegmentGroup() + { + return getControllerTaskId(); + } + @Override public boolean isReady(final TaskActionClient taskActionClient) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java index e8770c512dc4..d3036a20ed2c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java @@ -277,6 +277,13 @@ public String getGroupId() return groupId; } + @Nullable + @Override + public String getPendingSegmentGroup() + { + return null; + } + @JsonProperty("resource") @Override public TaskResource getTaskResource() diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java index a5f34f9bbbfe..00e57c81b1a6 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java @@ -101,6 +101,12 @@ public interface Task */ String getGroupId(); + /** + * The group used to associate a pending segment with a task + * @return task group for pending segment allocations + */ + String getPendingSegmentGroup(); + /** * Returns task priority. The task priority is currently used only for prioritized locking, but, in the future, it can * be used for task scheduling, cluster resource management, etc. diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskTest.java index c2957f6688c7..324d3331bfba 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskTest.java @@ -47,6 +47,12 @@ public String getGroupId() return null; } + @Override + public String getPendingSegmentGroup() + { + return null; + } + @Override public TaskResource getTaskResource() { diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 3d8939c3e52e..fbef87da4f0d 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -333,6 +333,34 @@ Map getPendingSegmentsForIntervalWithHandle( return pendingSegmentToSequenceName; } + List getPendingSegmentsForTaskGroupWithHandle( + final Handle handle, + final String dataSource, + final String taskGroup + ) + { + String sql = "SELECT payload, sequence_name, sequence_prev_id, task_group, parent_id" + + " FROM " + dbTables.getPendingSegmentsTable() + + " WHERE dataSource = :dataSource AND task_group = :task_group"; + + Query> query = handle.createQuery(sql) + .bind("dataSource", dataSource) + .bind("task_group", taskGroup); + + final ResultIterator pendingSegmentRecords = + query.map((index, r, ctx) -> PendingSegment.fromResultSet(r, jsonMapper)) + .iterator(); + + final List pendingSegments = new ArrayList<>(); + while (pendingSegmentRecords.hasNext()) { + pendingSegments.add(pendingSegmentRecords.next()); + } + + pendingSegmentRecords.close(); + + return pendingSegments; + } + private Map getPendingSegmentsForIntervalWithHandle( final Handle handle, final String dataSource, @@ -1351,6 +1379,48 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( } } + private int insertPendingSegmentsIntoMetastore( + Handle handle, + List pendingSegments, + String dataSource, + boolean skipSegmentLineageCheck + ) throws JsonProcessingException + { + final PreparedBatch insertBatch = handle.prepareBatch( + StringUtils.format( + "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, " + + "sequence_name_prev_id_sha1, payload, task_group, parent_id) " + + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, " + + ":sequence_name_prev_id_sha1, :payload, :task_group, :parent_id)", + dbTables.getPendingSegmentsTable(), + connector.getQuoteString() + )); + + final String now = DateTimes.nowUtc().toString(); + for (PendingSegment pendingSegment : pendingSegments) { + final SegmentIdWithShardSpec segmentId = pendingSegment.getId(); + final Interval interval = segmentId.getInterval(); + + insertBatch.add() + .bind("id", segmentId.toString()) + .bind("dataSource", dataSource) + .bind("created_date", now) + .bind("start", interval.getStart().toString()) + .bind("end", interval.getEnd().toString()) + .bind("sequence_name", pendingSegment.getSequenceName()) + .bind("sequence_prev_id", pendingSegment.getSequencePrevId()) + .bind( + "sequence_name_prev_id_sha1", + pendingSegment.computeSequenceNamePrevIdSha1(skipSegmentLineageCheck) + ) + .bind("payload", jsonMapper.writeValueAsBytes(segmentId)) + .bind("task_group", pendingSegment.getTaskGroup()) + .bind("parent_id", pendingSegment.getParentId()); + } + int[] updated = insertBatch.execute(); + return Arrays.stream(updated).sum(); + } + private int insertPendingSegmentsIntoMetastore( Handle handle, Map createdSegments, diff --git a/server/src/main/java/org/apache/druid/metadata/PendingSegment.java b/server/src/main/java/org/apache/druid/metadata/PendingSegment.java new file mode 100644 index 000000000000..10819d458cc6 --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/PendingSegment.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.hash.Hasher; +import com.google.common.hash.Hashing; +import com.google.common.io.BaseEncoding; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; +import org.joda.time.Interval; + +import javax.annotation.Nullable; +import java.sql.ResultSet; + +public class PendingSegment +{ + private final SegmentIdWithShardSpec id; + private final String sequenceName; + private final String sequencePrevId; + private final String parentId; + private final String taskGroup; + + @JsonCreator + public PendingSegment( + @JsonProperty("id") SegmentIdWithShardSpec id, + @JsonProperty("sequenceName") String sequenceName, + @JsonProperty("sequencePrevId") String sequencePrevId, + @JsonProperty("parentId") @Nullable String parentId, + @JsonProperty("taskGroup") @Nullable String taskGroup + ) + { + this.id = id; + this.sequenceName = sequenceName; + this.sequencePrevId = sequencePrevId; + this.parentId = parentId; + this.taskGroup = taskGroup; + } + + @JsonProperty + public SegmentIdWithShardSpec getId() + { + return id; + } + + @JsonProperty + public String getSequenceName() + { + return sequenceName; + } + + @JsonProperty + public String getSequencePrevId() + { + return sequencePrevId; + } + + @JsonProperty + public String getParentId() + { + return parentId; + } + + @JsonProperty + public String getTaskGroup() + { + return taskGroup; + } + + @SuppressWarnings("UnstableApiUsage") + public String computeSequenceNamePrevIdSha1(boolean skipSegmentLineageCheck) + { + final Hasher hasher = Hashing.sha1().newHasher() + .putBytes(StringUtils.toUtf8(getSequenceName())) + .putByte((byte) 0xff); + + if (skipSegmentLineageCheck) { + final Interval interval = getId().getInterval(); + hasher + .putLong(interval.getStartMillis()) + .putLong(interval.getEndMillis()); + } else { + hasher + .putBytes(StringUtils.toUtf8(getSequencePrevId())); + } + + hasher.putByte((byte) 0xff); + hasher.putBytes(StringUtils.toUtf8(getId().getVersion())); + + return BaseEncoding.base16().encode(hasher.hash().asBytes()); + } + + public static PendingSegment fromResultSet(ResultSet resultSet, ObjectMapper jsonMapper) + { + try { + final byte[] payload = resultSet.getBytes("payload"); + return new PendingSegment( + jsonMapper.readValue(payload, SegmentIdWithShardSpec.class), + resultSet.getString("sequence_name"), + resultSet.getString("sequence_prev_id"), + resultSet.getString("task_group"), + resultSet.getString("parent_id") + ); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java index 6feaf9e07a38..752f0aa9af4c 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java @@ -288,6 +288,7 @@ tableName, getPayloadType(), getQuoteString(), getCollation() ) ) ); + alterPendingSegmentsTableAddParentIdAndTaskGroup(tableName); } public void createDataSourceTable(final String tableName) @@ -460,6 +461,26 @@ private void alterEntryTableAddTypeAndGroupId(final String tableName) } } + private void alterPendingSegmentsTableAddParentIdAndTaskGroup(final String tableName) + { + List statements = new ArrayList<>(); + if (tableHasColumn(tableName, "parent_id")) { + log.info("Table[%s] already has column[parent_id].", tableName); + } else { + log.info("Adding column[parent_id] to table[%s].", tableName); + statements.add(StringUtils.format("ALTER TABLE %1$s ADD COLUMN parent_id VARCHAR(255)", tableName)); + } + if (tableHasColumn(tableName, "task_group")) { + log.info("Table[%s] already has column[task_group].", tableName); + } else { + log.info("Adding column[task_group] to table[%s].", tableName); + statements.add(StringUtils.format("ALTER TABLE %1$s ADD COLUMN task_group VARCHAR(255)", tableName)); + } + if (!statements.isEmpty()) { + alterTable(tableName, statements); + } + } + public void createLogTable(final String tableName, final String entryTypeName) { createTable( From 5f77ea62aa01c82e1d5d767cfbdffc6eaf0e2ecb Mon Sep 17 00:00:00 2001 From: Amatya Date: Tue, 12 Mar 2024 12:38:26 +0530 Subject: [PATCH 02/20] Allocate and commit PendingSegment instead of SegmentIdWithShardSpec --- .../druid/indexing/overlord/TaskLockbox.java | 4 +- .../overlord/SegmentCreateRequest.java | 18 ++- .../IndexerSQLMetadataStorageCoordinator.java | 125 ++++++++++-------- .../overlord/SegmentCreateRequestTest.java | 4 +- ...exerSQLMetadataStorageCoordinatorTest.java | 10 +- 5 files changed, 100 insertions(+), 61 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java index 54e29191cff7..d8291d395ddd 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java @@ -1771,7 +1771,9 @@ SegmentCreateRequest getSegmentRequest() action.getSequenceName(), action.getPreviousSegmentId(), acquiredLock == null ? lockRequest.getVersion() : acquiredLock.getVersion(), - action.getPartialShardSpec() + action.getPartialShardSpec(), + null, + task.getPendingSegmentGroup() ); } diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/SegmentCreateRequest.java b/server/src/main/java/org/apache/druid/indexing/overlord/SegmentCreateRequest.java index b43e46d8e7a5..517381fe53f0 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/SegmentCreateRequest.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/SegmentCreateRequest.java @@ -38,18 +38,24 @@ public class SegmentCreateRequest private final String sequenceName; private final String previousSegmentId; private final PartialShardSpec partialShardSpec; + private final String parentId; + private final String taskGroup; public SegmentCreateRequest( String sequenceName, String previousSegmentId, String version, - PartialShardSpec partialShardSpec + PartialShardSpec partialShardSpec, + String parentId, + String taskGroup ) { this.sequenceName = sequenceName; this.previousSegmentId = previousSegmentId == null ? "" : previousSegmentId; this.version = version; this.partialShardSpec = partialShardSpec; + this.parentId = parentId; + this.taskGroup = taskGroup; } public String getSequenceName() @@ -75,4 +81,14 @@ public PartialShardSpec getPartialShardSpec() { return partialShardSpec; } + + public String getParentId() + { + return parentId; + } + + public String getTaskGroup() + { + return taskGroup; + } } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index fbef87da4f0d..a1c776517336 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -361,19 +361,19 @@ List getPendingSegmentsForTaskGroupWithHandle( return pendingSegments; } - private Map getPendingSegmentsForIntervalWithHandle( + private List getPendingSegmentsForIntervalWithHandle( final Handle handle, final String dataSource, final Interval interval - ) throws IOException + ) { - final ResultIterator dbSegments = + final ResultIterator dbSegments = handle.createQuery( StringUtils.format( // This query might fail if the year has a different number of digits // See https://github.com/apache/druid/pull/11582 for a similar issue // Using long for these timestamps instead of varchar would give correct time comparisons - "SELECT sequence_name, payload FROM %1$s" + "SELECT sequence_name, payload, sequence_prev_id, task_group, parent_id FROM %1$s" + " WHERE dataSource = :dataSource AND start < :end and %2$send%2$s > :start", dbTables.getPendingSegmentsTable(), connector.getQuoteString() ) @@ -381,22 +381,15 @@ private Map getPendingSegmentsForIntervalWithHan .bind("dataSource", dataSource) .bind("start", interval.getStart().toString()) .bind("end", interval.getEnd().toString()) - .map((index, r, ctx) -> PendingSegmentsRecord.fromResultSet(r)) + .map((index, r, ctx) -> PendingSegment.fromResultSet(r, jsonMapper)) .iterator(); - final Map pendingSegmentToSequenceName = new HashMap<>(); + final List pendingSegments = new ArrayList<>(); while (dbSegments.hasNext()) { - PendingSegmentsRecord record = dbSegments.next(); - final SegmentIdWithShardSpec identifier = jsonMapper.readValue(record.payload, SegmentIdWithShardSpec.class); - - if (interval.overlaps(identifier.getInterval())) { - pendingSegmentToSequenceName.put(identifier, record.sequenceName); - } + pendingSegments.add(dbSegments.next()); } - dbSegments.close(); - - return pendingSegmentToSequenceName; + return pendingSegments; } private SegmentTimeline getTimelineForIntervalsWithHandle( @@ -789,15 +782,13 @@ private Map upgradePendingSegmen final int numCorePartitions = maxSegmentId.getShardSpec().getNumCorePartitions(); int currentPartitionNumber = maxSegmentId.getShardSpec().getPartitionNum(); - final Map overlappingPendingSegments - = getPendingSegmentsForIntervalWithHandle(handle, datasource, replaceInterval, activeRealtimeSequencePrefixes); + final List overlappingPendingSegments + = getPendingSegmentsForIntervalWithHandle(handle, datasource, replaceInterval); - for (Map.Entry overlappingPendingSegment - : overlappingPendingSegments.entrySet()) { - final SegmentIdWithShardSpec pendingSegmentId = overlappingPendingSegment.getKey(); - final String pendingSegmentSequence = overlappingPendingSegment.getValue(); + for (PendingSegment overlappingPendingSegment : overlappingPendingSegments) { + final SegmentIdWithShardSpec pendingSegmentId = overlappingPendingSegment.getId(); - if (shouldUpgradePendingSegment(pendingSegmentId, pendingSegmentSequence, replaceInterval, replaceVersion)) { + if (shouldUpgradePendingSegment(overlappingPendingSegment, replaceInterval, replaceVersion)) { // Ensure unique sequence_name_prev_id_sha1 by setting // sequence_prev_id -> pendingSegmentId // sequence_name -> prefix + replaceVersion @@ -812,7 +803,9 @@ private Map upgradePendingSegmen UPGRADED_PENDING_SEGMENT_PREFIX + replaceVersion, pendingSegmentId.toString(), replaceVersion, - NumberedPartialShardSpec.instance() + NumberedPartialShardSpec.instance(), + pendingSegmentId.toString(), + overlappingPendingSegment.getTaskGroup() ), newId ); @@ -838,20 +831,19 @@ private Map upgradePendingSegmen } private boolean shouldUpgradePendingSegment( - SegmentIdWithShardSpec pendingSegmentId, - String pendingSegmentSequenceName, + PendingSegment pendingSegment, Interval replaceInterval, String replaceVersion ) { - if (pendingSegmentId.getVersion().compareTo(replaceVersion) >= 0) { + if (pendingSegment.getId().getVersion().compareTo(replaceVersion) >= 0) { return false; - } else if (!replaceInterval.contains(pendingSegmentId.getInterval())) { + } else if (!replaceInterval.contains(pendingSegment.getId().getInterval())) { return false; } else { // Do not upgrade already upgraded pending segment - return pendingSegmentSequenceName == null - || !pendingSegmentSequenceName.startsWith(UPGRADED_PENDING_SEGMENT_PREFIX); + return pendingSegment.getSequenceName() == null + || !pendingSegment.getSequenceName().startsWith(UPGRADED_PENDING_SEGMENT_PREFIX); } } @@ -985,7 +977,7 @@ private Map allocatePendingSegment } // For each of the remaining requests, create a new segment - final Map createdSegments = createNewSegments( + final Map createdSegments = createNewSegments( handle, dataSource, interval, @@ -1003,12 +995,14 @@ private Map allocatePendingSegment // have difficulty with large unique keys (see https://github.com/apache/druid/issues/2319) insertPendingSegmentsIntoMetastore( handle, - createdSegments, + ImmutableList.copyOf(createdSegments.values()), dataSource, skipSegmentLineageCheck ); - allocatedSegmentIds.putAll(createdSegments); + for (Map.Entry entry : createdSegments.entrySet()) { + allocatedSegmentIds.put(entry.getKey(), entry.getValue().getId()); + } return allocatedSegmentIds; } @@ -1508,7 +1502,7 @@ private Set createNewIdsForAppendSegments( Handle handle, String dataSource, Set segmentsToAppend - ) throws IOException + ) { if (segmentsToAppend.isEmpty()) { return Collections.emptySet(); @@ -1617,7 +1611,7 @@ private Set createNewIdsForAppendSegmentsWithVersion( Interval upgradeInterval, Set segmentsToUpgrade, Set committedSegments - ) throws IOException + ) { // Find the committed segments with the higest partition number SegmentIdWithShardSpec committedMaxId = null; @@ -1630,9 +1624,13 @@ private Set createNewIdsForAppendSegmentsWithVersion( // Get pending segments for the new version to determine the next partition number to allocate final String dataSource = segmentsToUpgrade.iterator().next().getDataSource(); - final Set pendingSegmentIds - = getPendingSegmentsForIntervalWithHandle(handle, dataSource, upgradeInterval).keySet(); - final Set allAllocatedIds = new HashSet<>(pendingSegmentIds); + final List pendingSegments + = getPendingSegmentsForIntervalWithHandle(handle, dataSource, upgradeInterval); + final Set allAllocatedIds = new HashSet<>( + pendingSegments.stream() + .map(PendingSegment::getId) + .collect(Collectors.toSet()) + ); // Create new IDs for each append segment final Set newSegmentIds = new HashSet<>(); @@ -1641,7 +1639,9 @@ private Set createNewIdsForAppendSegmentsWithVersion( segment.getId() + "__" + upgradeVersion, null, upgradeVersion, - NumberedPartialShardSpec.instance() + NumberedPartialShardSpec.instance(), + null, + null ); // Create new segment ID based on committed segments, allocated pending segments @@ -1653,7 +1653,7 @@ private Set createNewIdsForAppendSegmentsWithVersion( upgradeVersion, committedMaxId, allAllocatedIds - ); + ).getId(); // Update the set so that subsequent segment IDs use a higher partition number allAllocatedIds.add(newId); @@ -1669,14 +1669,14 @@ private Set createNewIdsForAppendSegmentsWithVersion( return newSegmentIds; } - private Map createNewSegments( + private Map createNewSegments( Handle handle, String dataSource, Interval interval, boolean skipSegmentLineageCheck, List> existingChunks, List requests - ) throws IOException + ) { if (requests.isEmpty()) { return Collections.emptyMap(); @@ -1717,18 +1717,21 @@ private Map createNewSegments( // across all shard specs (published + pending). // A pending segment having a higher partitionId must also be considered // to avoid clashes when inserting the pending segment created here. - final Set pendingSegments = - new HashSet<>(getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval).keySet()); + final Set pendingSegments = new HashSet<>( + getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval).stream() + .map(PendingSegment::getId) + .collect(Collectors.toSet()) + ); - final Map createdSegments = new HashMap<>(); - final Map uniqueRequestToSegment = new HashMap<>(); + final Map createdSegments = new HashMap<>(); + final Map uniqueRequestToSegment = new HashMap<>(); for (SegmentCreateRequest request : requests) { // Check if the required segment has already been created in this batch final UniqueAllocateRequest uniqueRequest = new UniqueAllocateRequest(interval, request, skipSegmentLineageCheck); - final SegmentIdWithShardSpec createdSegment; + final PendingSegment createdSegment; if (uniqueRequestToSegment.containsKey(uniqueRequest)) { createdSegment = uniqueRequestToSegment.get(uniqueRequest); } else { @@ -1743,9 +1746,9 @@ private Map createNewSegments( // Add to pendingSegments to consider for partitionId if (createdSegment != null) { - pendingSegments.add(createdSegment); + pendingSegments.add(createdSegment.getId()); uniqueRequestToSegment.put(uniqueRequest, createdSegment); - log.info("Created new segment[%s]", createdSegment); + log.info("Created new segment[%s]", createdSegment.getId()); } } @@ -1758,7 +1761,7 @@ private Map createNewSegments( return createdSegments; } - private SegmentIdWithShardSpec createNewSegment( + private PendingSegment createNewSegment( SegmentCreateRequest request, String dataSource, Interval interval, @@ -1811,12 +1814,19 @@ private SegmentIdWithShardSpec createNewSegment( : PartitionIds.ROOT_GEN_START_PARTITION_ID; String version = newSegmentVersion == null ? existingVersion : newSegmentVersion; - return new SegmentIdWithShardSpec( + SegmentIdWithShardSpec pendingSegmentId = new SegmentIdWithShardSpec( dataSource, interval, version, partialShardSpec.complete(jsonMapper, newPartitionId, 0) ); + return new PendingSegment( + pendingSegmentId, + request.getSequenceName(), + request.getPreviousSegmentId(), + request.getParentId(), + request.getTaskGroup() + ); } else if (!overallMaxId.getInterval().equals(interval)) { log.warn( @@ -1841,7 +1851,7 @@ private SegmentIdWithShardSpec createNewSegment( // When the core partitions have been dropped, using pending segments may lead to an incorrect state // where the chunk is believed to have core partitions and queries results are incorrect. - return new SegmentIdWithShardSpec( + SegmentIdWithShardSpec pendingSegmentId = new SegmentIdWithShardSpec( dataSource, interval, Preconditions.checkNotNull(newSegmentVersion, "newSegmentVersion"), @@ -1851,6 +1861,13 @@ private SegmentIdWithShardSpec createNewSegment( committedMaxId == null ? 0 : committedMaxId.getShardSpec().getNumCorePartitions() ) ); + return new PendingSegment( + pendingSegmentId, + request.getSequenceName(), + request.getPreviousSegmentId(), + request.getParentId(), + request.getTaskGroup() + ); } } @@ -1876,7 +1893,7 @@ private SegmentIdWithShardSpec createNewSegment( final PartialShardSpec partialShardSpec, final String existingVersion, final List> existingChunks - ) throws IOException + ) { // max partitionId of published data segments which share the same partition space. SegmentIdWithShardSpec committedMaxId = null; @@ -1910,7 +1927,9 @@ private SegmentIdWithShardSpec createNewSegment( // A pending segment having a higher partitionId must also be considered // to avoid clashes when inserting the pending segment created here. final Set pendings = new HashSet<>( - getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval).keySet() + getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval).stream() + .map(PendingSegment::getId) + .collect(Collectors.toSet()) ); if (committedMaxId != null) { pendings.add(committedMaxId); diff --git a/server/src/test/java/org/apache/druid/indexing/overlord/SegmentCreateRequestTest.java b/server/src/test/java/org/apache/druid/indexing/overlord/SegmentCreateRequestTest.java index 33641a8417da..57e01d76a446 100644 --- a/server/src/test/java/org/apache/druid/indexing/overlord/SegmentCreateRequestTest.java +++ b/server/src/test/java/org/apache/druid/indexing/overlord/SegmentCreateRequestTest.java @@ -35,7 +35,9 @@ public void testNullPreviousSegmentId() "sequence", null, "version", - partialShardSpec + partialShardSpec, + null, + null ); Assert.assertEquals("sequence", request.getSequenceName()); Assert.assertEquals("", request.getPreviousSegmentId()); diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index da7bf4226921..2712c69cde76 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -2684,7 +2684,7 @@ public void testAllocatePendingSegments() final Interval interval = Intervals.of("2017-01-01/2017-02-01"); final String sequenceName = "seq"; - final SegmentCreateRequest request = new SegmentCreateRequest(sequenceName, null, "v1", partialShardSpec); + final SegmentCreateRequest request = new SegmentCreateRequest(sequenceName, null, "v1", partialShardSpec, null, null); final SegmentIdWithShardSpec segmentId0 = coordinator.allocatePendingSegments( dataSource, interval, @@ -2695,7 +2695,7 @@ public void testAllocatePendingSegments() Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_v1", segmentId0.toString()); final SegmentCreateRequest request1 = - new SegmentCreateRequest(sequenceName, segmentId0.toString(), segmentId0.getVersion(), partialShardSpec); + new SegmentCreateRequest(sequenceName, segmentId0.toString(), segmentId0.getVersion(), partialShardSpec, null, null); final SegmentIdWithShardSpec segmentId1 = coordinator.allocatePendingSegments( dataSource, interval, @@ -2706,7 +2706,7 @@ public void testAllocatePendingSegments() Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_v1_1", segmentId1.toString()); final SegmentCreateRequest request2 = - new SegmentCreateRequest(sequenceName, segmentId1.toString(), segmentId1.getVersion(), partialShardSpec); + new SegmentCreateRequest(sequenceName, segmentId1.toString(), segmentId1.getVersion(), partialShardSpec, null, null); final SegmentIdWithShardSpec segmentId2 = coordinator.allocatePendingSegments( dataSource, interval, @@ -2717,7 +2717,7 @@ public void testAllocatePendingSegments() Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_v1_2", segmentId2.toString()); final SegmentCreateRequest request3 = - new SegmentCreateRequest(sequenceName, segmentId1.toString(), segmentId1.getVersion(), partialShardSpec); + new SegmentCreateRequest(sequenceName, segmentId1.toString(), segmentId1.getVersion(), partialShardSpec, null, null); final SegmentIdWithShardSpec segmentId3 = coordinator.allocatePendingSegments( dataSource, interval, @@ -2729,7 +2729,7 @@ public void testAllocatePendingSegments() Assert.assertEquals(segmentId2, segmentId3); final SegmentCreateRequest request4 = - new SegmentCreateRequest("seq1", null, "v1", partialShardSpec); + new SegmentCreateRequest("seq1", null, "v1", partialShardSpec, null, null); final SegmentIdWithShardSpec segmentId4 = coordinator.allocatePendingSegments( dataSource, interval, From d2aa575bbda3ec0a65048936eb5d74a5a329222c Mon Sep 17 00:00:00 2001 From: Amatya Date: Sun, 17 Mar 2024 20:56:59 +0530 Subject: [PATCH 03/20] Get tests working --- .../SegmentTransactionalAppendAction.java | 6 +- .../SegmentTransactionalReplaceAction.java | 32 ++- .../druid/indexing/overlord/TaskLockbox.java | 26 ++- .../druid/indexing/overlord/TaskQueue.java | 14 +- .../supervisor/SupervisorManager.java | 9 - .../task/concurrent/CommandQueueTask.java | 8 +- .../ConcurrentReplaceAndAppendTest.java | 4 + ...TestIndexerMetadataStorageCoordinator.java | 25 +- .../IndexerMetadataStorageCoordinator.java | 21 +- .../IndexerSQLMetadataStorageCoordinator.java | 214 +++++++++--------- .../apache/druid/metadata/PendingSegment.java | 4 +- ...exerSQLMetadataStorageCoordinatorTest.java | 142 +++++------- 12 files changed, 269 insertions(+), 236 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java index 67b701718cae..51a544c9a04c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java @@ -135,14 +135,16 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) if (startMetadata == null) { publishAction = () -> toolbox.getIndexerMetadataStorageCoordinator().commitAppendSegments( segments, - segmentToReplaceLock + segmentToReplaceLock, + task.getPendingSegmentGroup() ); } else { publishAction = () -> toolbox.getIndexerMetadataStorageCoordinator().commitAppendSegmentsAndMetadata( segments, segmentToReplaceLock, startMetadata, - endMetadata + endMetadata, + task.getPendingSegmentGroup() ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java index aaa62db90a7c..3493398d6b16 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java @@ -30,11 +30,15 @@ import org.apache.druid.indexing.overlord.SegmentPublishResult; import org.apache.druid.indexing.overlord.supervisor.SupervisorManager; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.metadata.PendingSegment; import org.apache.druid.metadata.ReplaceTaskLock; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -141,22 +145,30 @@ private void tryUpgradeOverlappingPendingSegments(Task task, TaskActionToolbox t final SupervisorManager supervisorManager = toolbox.getSupervisorManager(); final Optional activeSupervisorIdWithAppendLock = supervisorManager.getActiveSupervisorIdForDatasourceWithAppendLock(task.getDataSource()); + if (!activeSupervisorIdWithAppendLock.isPresent()) { return; } - final Set activeRealtimeSequencePrefixes - = supervisorManager.getActiveRealtimeSequencePrefixes(activeSupervisorIdWithAppendLock.get()); - Map upgradedPendingSegments = - toolbox.getIndexerMetadataStorageCoordinator() - .upgradePendingSegmentsOverlappingWith(segments, activeRealtimeSequencePrefixes); - log.info( - "Upgraded [%d] pending segments for REPLACE task[%s]: [%s]", - upgradedPendingSegments.size(), task.getId(), upgradedPendingSegments - ); + List pendingSegments + = toolbox.getIndexerMetadataStorageCoordinator().getAllPendingSegments(task.getDataSource()); + Map pendingSegmentIdMap = new HashMap<>(); + pendingSegments.forEach(pendingSegment -> pendingSegmentIdMap.put( + pendingSegment.getId().asSegmentId().toString(), + pendingSegment.getId() + )); + Map upgradedPendingSegments = new HashMap<>(); + pendingSegments.forEach(pendingSegment -> { + if (pendingSegment.getParentId() != null) { + upgradedPendingSegments.put( + pendingSegment.getId(), + pendingSegmentIdMap.get(pendingSegment.getParentId()) + ); + } + }); upgradedPendingSegments.forEach( - (oldId, newId) -> toolbox.getSupervisorManager() + (newId, oldId) -> toolbox.getSupervisorManager() .registerNewVersionOfPendingSegmentOnSupervisor( activeSupervisorIdWithAppendLock.get(), oldId, diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java index d8291d395ddd..5b91396fe19d 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java @@ -387,7 +387,7 @@ public LockResult tryLock(final Task task, final LockRequest request) if (request instanceof LockRequestForNewSegment) { final LockRequestForNewSegment lockRequestForNewSegment = (LockRequestForNewSegment) request; if (lockRequestForNewSegment.getGranularity() == LockGranularity.SEGMENT) { - newSegmentId = allocateSegmentId(lockRequestForNewSegment, request.getVersion()); + newSegmentId = allocateSegmentId(lockRequestForNewSegment, request.getVersion(), null); if (newSegmentId == null) { return LockResult.fail(); } @@ -411,7 +411,7 @@ public LockResult tryLock(final Task task, final LockRequest request) newSegmentId ); } - newSegmentId = allocateSegmentId(lockRequestForNewSegment, posseToUse.getTaskLock().getVersion()); + newSegmentId = allocateSegmentId(lockRequestForNewSegment, posseToUse.getTaskLock().getVersion(), task.getPendingSegmentGroup()); } } return LockResult.ok(posseToUse.getTaskLock(), newSegmentId); @@ -710,7 +710,7 @@ void allocateSegmentIds( } } - private SegmentIdWithShardSpec allocateSegmentId(LockRequestForNewSegment request, String version) + private SegmentIdWithShardSpec allocateSegmentId(LockRequestForNewSegment request, String version, String taskGroup) { return metadataStorageCoordinator.allocatePendingSegment( request.getDataSource(), @@ -719,7 +719,8 @@ private SegmentIdWithShardSpec allocateSegmentId(LockRequestForNewSegment reques request.getInterval(), request.getPartialShardSpec(), version, - request.isSkipSegmentLineageCheck() + request.isSkipSegmentLineageCheck(), + taskGroup ); } @@ -1165,12 +1166,17 @@ public void add(Task task) } } + public void remove(final Task task) + { + remove(task, false); + } + /** * Release all locks for a task and remove task from set of active tasks. Does nothing if the task is not currently locked or not an active task. * * @param task task to unlock */ - public void remove(final Task task) + public void remove(final Task task, final boolean cleanPendingSegments) { giant.lock(); try { @@ -1184,6 +1190,16 @@ public void remove(final Task task) task.getId() ); } + if (cleanPendingSegments) { + if (findLocksForTask(task).stream().anyMatch(lock -> lock.getType() == TaskLockType.APPEND)) { + final int pendingSegmentsDeleted = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(task.getPendingSegmentGroup()); + log.info( + "Deleted [%d] entries from pendingSegments table for pending segments group [%s] with APPEND locks.", + pendingSegmentsDeleted, + task.getPendingSegmentGroup() + ); + } + } unlockAll(task); } finally { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskQueue.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskQueue.java index 830ae9b732bd..508e269572be 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskQueue.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskQueue.java @@ -568,7 +568,19 @@ private boolean removeTaskInternal(final String taskId) { final Task task = tasks.remove(taskId); if (task != null) { - taskLockbox.remove(task); + final String pendingSegmentGroup = task.getPendingSegmentGroup(); + boolean cleanPendingSegments = false; + Set tasksWithSamePendingGroup = new HashSet<>(); + if (pendingSegmentGroup != null) { + for (Task otherTask : tasks.values()) { + if (pendingSegmentGroup.equals(otherTask.getPendingSegmentGroup())) { + tasksWithSamePendingGroup.add(otherTask.getId()); + } + } + tasksWithSamePendingGroup.removeAll(recentlyCompletedTasks); + cleanPendingSegments = tasksWithSamePendingGroup.isEmpty(); + } + taskLockbox.remove(task, cleanPendingSegments); return true; } else { return false; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java index 810a991c2f22..e8d2a77ba6a7 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java @@ -126,15 +126,6 @@ public Optional getActiveSupervisorIdForDatasourceWithAppendLock(String return Optional.absent(); } - public Set getActiveRealtimeSequencePrefixes(String activeSupervisorId) - { - if (supervisors.containsKey(activeSupervisorId)) { - return supervisors.get(activeSupervisorId).lhs.getActiveRealtimeSequencePrefixes(); - } else { - return Collections.emptySet(); - } - } - public Optional getSupervisorSpec(String id) { Pair supervisor = supervisors.get(id); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java index 08e2c18112f9..ee2702829dc1 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java @@ -133,13 +133,19 @@ private void addToQueue(Command command) private V waitForCommandToFinish(Command command) { try { - return command.value.get(10, TimeUnit.SECONDS); + return command.value.get(1000, TimeUnit.SECONDS); } catch (Exception e) { throw new ISE(e, "Error waiting for command on task[%s] to finish", getId()); } } + @Override + public String getPendingSegmentGroup() + { + return getId(); + } + @Override public TaskStatus runTask(TaskToolbox taskToolbox) { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java index 91c7d6a71755..ab3f4c1a9e16 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java @@ -725,6 +725,7 @@ public void testMultipleGranularities() // Append segment for Oct-Dec final DataSegment segmentV02 = asSegment(pendingSegment02); appendTask2.commitAppendSegments(segmentV02); + appendTask2.finishRunAndGetStatus(); verifyIntervalHasUsedSegments(YEAR_23, segmentV02); verifyIntervalHasVisibleSegments(YEAR_23, segmentV02); @@ -744,12 +745,14 @@ public void testMultipleGranularities() // Append segment for Jan 1st final DataSegment segmentV01 = asSegment(pendingSegment01); appendTask.commitAppendSegments(segmentV01); + appendTask.finishRunAndGetStatus(); verifyIntervalHasUsedSegments(YEAR_23, segmentV01, segmentV02); verifyIntervalHasVisibleSegments(YEAR_23, segmentV01, segmentV02); // Replace segment for whole year final DataSegment segmentV10 = createSegment(YEAR_23, v1); replaceTask.commitReplaceSegments(segmentV10); + replaceTask.finishRunAndGetStatus(); final DataSegment segmentV11 = DataSegment.builder(segmentV01) .version(v1) @@ -764,6 +767,7 @@ public void testMultipleGranularities() // Append segment for quarter final DataSegment segmentV03 = asSegment(pendingSegment03); appendTask3.commitAppendSegments(segmentV03); + appendTask3.finishRunAndGetStatus(); final DataSegment segmentV13 = DataSegment.builder(segmentV03) .version(v1) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 9ad3be6e3619..673c8b3c6393 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -30,6 +30,7 @@ import org.apache.druid.indexing.overlord.Segments; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.Pair; +import org.apache.druid.metadata.PendingSegment; import org.apache.druid.metadata.ReplaceTaskLock; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; @@ -175,7 +176,8 @@ public SegmentPublishResult commitReplaceSegments( @Override public SegmentPublishResult commitAppendSegments( Set appendSegments, - Map appendSegmentToReplaceLock + Map appendSegmentToReplaceLock, + String taskGroup ) { return SegmentPublishResult.ok(commitSegments(appendSegments)); @@ -186,7 +188,8 @@ public SegmentPublishResult commitAppendSegmentsAndMetadata( Set appendSegments, Map appendSegmentToReplaceLock, DataSourceMetadata startMetadata, - DataSourceMetadata endMetadata + DataSourceMetadata endMetadata, + String taskGroup ) { return SegmentPublishResult.ok(commitSegments(appendSegments)); @@ -228,7 +231,8 @@ public SegmentIdWithShardSpec allocatePendingSegment( Interval interval, PartialShardSpec partialShardSpec, String maxVersion, - boolean skipSegmentLineageCheck + boolean skipSegmentLineageCheck, + String taskGroup ) { return new SegmentIdWithShardSpec( @@ -241,8 +245,7 @@ public SegmentIdWithShardSpec allocatePendingSegment( @Override public Map upgradePendingSegmentsOverlappingWith( - Set replaceSegments, - Set activeBaseSequenceNames + Set replaceSegments ) { return Collections.emptyMap(); @@ -285,6 +288,18 @@ public int deleteUpgradeSegmentsForTask(final String taskId) throw new UnsupportedOperationException(); } + @Override + public int deletePendingSegmentsForTaskGroup(final String taskGroup) + { + throw new UnsupportedOperationException(); + } + + @Override + public List getAllPendingSegments(String datasource) + { + throw new UnsupportedOperationException(); + } + public Set getPublished() { return ImmutableSet.copyOf(published); diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 72abff93df7a..ceb49cb8668f 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -20,6 +20,7 @@ package org.apache.druid.indexing.overlord; import org.apache.druid.java.util.common.Pair; +import org.apache.druid.metadata.PendingSegment; import org.apache.druid.metadata.ReplaceTaskLock; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; @@ -237,7 +238,7 @@ Map allocatePendingSegments( * identifier may have a version lower than this one, but will not have one higher. * @param skipSegmentLineageCheck if true, perform lineage validation using previousSegmentId for this sequence. * Should be set to false if replica tasks would index events in same order - * + * @param taskGroup * @return the pending segment identifier, or null if it was impossible to allocate a new segment */ SegmentIdWithShardSpec allocatePendingSegment( @@ -247,7 +248,8 @@ SegmentIdWithShardSpec allocatePendingSegment( Interval interval, PartialShardSpec partialShardSpec, String maxVersion, - boolean skipSegmentLineageCheck + boolean skipSegmentLineageCheck, + String taskGroup ); /** @@ -324,7 +326,8 @@ SegmentPublishResult commitSegmentsAndMetadata( */ SegmentPublishResult commitAppendSegments( Set appendSegments, - Map appendSegmentToReplaceLock + Map appendSegmentToReplaceLock, + String taskGroup ); /** @@ -339,7 +342,8 @@ SegmentPublishResult commitAppendSegmentsAndMetadata( Set appendSegments, Map appendSegmentToReplaceLock, DataSourceMetadata startMetadata, - DataSourceMetadata endMetadata + DataSourceMetadata endMetadata, + String taskGroup ); /** @@ -372,13 +376,10 @@ SegmentPublishResult commitReplaceSegments( * * * @param replaceSegments Segments being committed by a REPLACE task - * @param activeRealtimeSequencePrefixes Set of sequence prefixes of active and pending completion task groups - * of the supervisor (if any) for this datasource * @return Map from originally allocated pending segment to its new upgraded ID. */ Map upgradePendingSegmentsOverlappingWith( - Set replaceSegments, - Set activeRealtimeSequencePrefixes + Set replaceSegments ); /** @@ -475,4 +476,8 @@ SegmentPublishResult commitMetadataOnly( * @return number of deleted entries from the metadata store */ int deleteUpgradeSegmentsForTask(String taskId); + + int deletePendingSegmentsForTaskGroup(String taskGroup); + + List getAllPendingSegments(String datasource); } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 2d36c63d4f66..b393b49a0912 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -279,61 +279,36 @@ public int markSegmentsAsUnusedWithinInterval(String dataSource, Interval interv } /** - * Fetches all the pending segments, whose interval overlaps with the given - * search interval and has a sequence_name that begins with one of the prefixes in sequenceNamePrefixFilter - * from the metadata store. Returns a Map from the pending segment ID to the sequence name. + * Fetches all the pending segments, whose interval overlaps with the given search interval, from the metadata store. */ @VisibleForTesting - Map getPendingSegmentsForIntervalWithHandle( + List getPendingSegmentsForIntervalWithHandle( final Handle handle, final String dataSource, - final Interval interval, - final Set sequenceNamePrefixFilter - ) throws IOException + final Interval interval + ) { - if (sequenceNamePrefixFilter.isEmpty()) { - return Collections.emptyMap(); - } - - final List sequenceNamePrefixes = new ArrayList<>(sequenceNamePrefixFilter); - final List sequenceNamePrefixConditions = new ArrayList<>(); - for (int i = 0; i < sequenceNamePrefixes.size(); i++) { - sequenceNamePrefixConditions.add(StringUtils.format("(sequence_name LIKE :prefix%d)", i)); - } - - String sql = "SELECT sequence_name, payload" + String sql = "SELECT payload, sequence_name, sequence_prev_id, task_group, parent_id" + " FROM " + dbTables.getPendingSegmentsTable() + " WHERE dataSource = :dataSource" + " AND start < :end" - + StringUtils.format(" AND %1$send%1$s > :start", connector.getQuoteString()) - + " AND ( " + String.join(" OR ", sequenceNamePrefixConditions) + " )"; + + StringUtils.format(" AND %1$send%1$s > :start", connector.getQuoteString()); Query> query = handle.createQuery(sql) .bind("dataSource", dataSource) .bind("start", interval.getStart().toString()) .bind("end", interval.getEnd().toString()); - for (int i = 0; i < sequenceNamePrefixes.size(); i++) { - query.bind(StringUtils.format("prefix%d", i), sequenceNamePrefixes.get(i) + "%"); - } - final ResultIterator dbSegments = - query.map((index, r, ctx) -> PendingSegmentsRecord.fromResultSet(r)) + final ResultIterator pendingSegmentIterator = + query.map((index, r, ctx) -> PendingSegment.fromResultSet(r, jsonMapper)) .iterator(); - - final Map pendingSegmentToSequenceName = new HashMap<>(); - while (dbSegments.hasNext()) { - PendingSegmentsRecord record = dbSegments.next(); - final SegmentIdWithShardSpec identifier = jsonMapper.readValue(record.payload, SegmentIdWithShardSpec.class); - - if (interval.overlaps(identifier.getInterval())) { - pendingSegmentToSequenceName.put(identifier, record.sequenceName); - } + final ImmutableList.Builder pendingSegments = ImmutableList.builder(); + while (pendingSegmentIterator.hasNext()) { + pendingSegments.add(pendingSegmentIterator.next()); } - - dbSegments.close(); - - return pendingSegmentToSequenceName; + pendingSegmentIterator.close(); + return pendingSegments.build(); } List getPendingSegmentsForTaskGroupWithHandle( @@ -364,37 +339,6 @@ List getPendingSegmentsForTaskGroupWithHandle( return pendingSegments; } - private List getPendingSegmentsForIntervalWithHandle( - final Handle handle, - final String dataSource, - final Interval interval - ) - { - final ResultIterator dbSegments = - handle.createQuery( - StringUtils.format( - // This query might fail if the year has a different number of digits - // See https://github.com/apache/druid/pull/11582 for a similar issue - // Using long for these timestamps instead of varchar would give correct time comparisons - "SELECT sequence_name, payload, sequence_prev_id, task_group, parent_id FROM %1$s" - + " WHERE dataSource = :dataSource AND start < :end and %2$send%2$s > :start", - dbTables.getPendingSegmentsTable(), connector.getQuoteString() - ) - ) - .bind("dataSource", dataSource) - .bind("start", interval.getStart().toString()) - .bind("end", interval.getEnd().toString()) - .map((index, r, ctx) -> PendingSegment.fromResultSet(r, jsonMapper)) - .iterator(); - - final List pendingSegments = new ArrayList<>(); - while (dbSegments.hasNext()) { - pendingSegments.add(dbSegments.next()); - } - dbSegments.close(); - return pendingSegments; - } - private SegmentTimeline getTimelineForIntervalsWithHandle( final Handle handle, final String dataSource, @@ -537,9 +481,11 @@ public SegmentPublishResult commitReplaceSegments( segmentsToInsert.addAll( createNewIdsOfAppendSegmentsAfterReplace(handle, replaceSegments, locksHeldByReplaceTask) ); - return SegmentPublishResult.ok( + SegmentPublishResult result = SegmentPublishResult.ok( insertSegments(handle, segmentsToInsert) ); + upgradePendingSegmentsOverlappingWith(segmentsToInsert); + return result; }, 3, getSqlMetadataMaxRetry() @@ -553,14 +499,16 @@ public SegmentPublishResult commitReplaceSegments( @Override public SegmentPublishResult commitAppendSegments( final Set appendSegments, - final Map appendSegmentToReplaceLock + final Map appendSegmentToReplaceLock, + final String taskGroup ) { return commitAppendSegmentsAndMetadataInTransaction( appendSegments, appendSegmentToReplaceLock, null, - null + null, + taskGroup ); } @@ -569,14 +517,16 @@ public SegmentPublishResult commitAppendSegmentsAndMetadata( Set appendSegments, Map appendSegmentToReplaceLock, DataSourceMetadata startMetadata, - DataSourceMetadata endMetadata + DataSourceMetadata endMetadata, + String taskGroup ) { return commitAppendSegmentsAndMetadataInTransaction( appendSegments, appendSegmentToReplaceLock, startMetadata, - endMetadata + endMetadata, + taskGroup ); } @@ -679,7 +629,8 @@ public SegmentIdWithShardSpec allocatePendingSegment( final Interval interval, final PartialShardSpec partialShardSpec, final String maxVersion, - final boolean skipSegmentLineageCheck + final boolean skipSegmentLineageCheck, + String taskGroup ) { Preconditions.checkNotNull(dataSource, "dataSource"); @@ -711,7 +662,8 @@ public SegmentIdWithShardSpec allocatePendingSegment( allocateInterval, partialShardSpec, maxVersion, - existingChunks + existingChunks, + taskGroup ); } else { return allocatePendingSegmentWithSegmentLineageCheck( @@ -722,7 +674,8 @@ public SegmentIdWithShardSpec allocatePendingSegment( allocateInterval, partialShardSpec, maxVersion, - existingChunks + existingChunks, + taskGroup ); } } @@ -731,8 +684,7 @@ public SegmentIdWithShardSpec allocatePendingSegment( @Override public Map upgradePendingSegmentsOverlappingWith( - Set replaceSegments, - Set activeRealtimeSequencePrefixes + Set replaceSegments ) { if (replaceSegments.isEmpty()) { @@ -751,7 +703,7 @@ public Map upgradePendingSegment final String datasource = replaceSegments.iterator().next().getDataSource(); return connector.retryWithHandle( - handle -> upgradePendingSegments(handle, datasource, replaceIntervalToMaxId, activeRealtimeSequencePrefixes) + handle -> upgradePendingSegments(handle, datasource, replaceIntervalToMaxId) ); } @@ -770,11 +722,10 @@ public Map upgradePendingSegment private Map upgradePendingSegments( Handle handle, String datasource, - Map replaceIntervalToMaxId, - Set activeRealtimeSequencePrefixes - ) throws IOException + Map replaceIntervalToMaxId + ) throws JsonProcessingException { - final Map newPendingSegmentVersions = new HashMap<>(); + final List upgradedPendingSegments = new ArrayList<>(); final Map pendingSegmentToNewId = new HashMap<>(); for (Map.Entry entry : replaceIntervalToMaxId.entrySet()) { @@ -801,16 +752,14 @@ private Map upgradePendingSegmen replaceVersion, new NumberedShardSpec(++currentPartitionNumber, numCorePartitions) ); - newPendingSegmentVersions.put( - new SegmentCreateRequest( + upgradedPendingSegments.add( + new PendingSegment( + newId, UPGRADED_PENDING_SEGMENT_PREFIX + replaceVersion, pendingSegmentId.toString(), - replaceVersion, - NumberedPartialShardSpec.instance(), - pendingSegmentId.toString(), + overlappingPendingSegment.getParentId(), overlappingPendingSegment.getTaskGroup() - ), - newId + ) ); pendingSegmentToNewId.put(pendingSegmentId, newId); } @@ -821,13 +770,13 @@ private Map upgradePendingSegmen // includes hash of both sequence_name and prev_segment_id int numInsertedPendingSegments = insertPendingSegmentsIntoMetastore( handle, - newPendingSegmentVersions, + upgradedPendingSegments, datasource, false ); log.info( "Inserted total [%d] new versions for [%d] pending segments.", - numInsertedPendingSegments, newPendingSegmentVersions.size() + numInsertedPendingSegments, upgradedPendingSegments.size() ); return pendingSegmentToNewId; @@ -859,7 +808,8 @@ private SegmentIdWithShardSpec allocatePendingSegmentWithSegmentLineageCheck( final Interval interval, final PartialShardSpec partialShardSpec, final String maxVersion, - final List> existingChunks + final List> existingChunks, + final String taskGroup ) throws IOException { final String previousSegmentIdNotNull = previousSegmentId == null ? "" : previousSegmentId; @@ -929,7 +879,8 @@ private SegmentIdWithShardSpec allocatePendingSegmentWithSegmentLineageCheck( interval, previousSegmentIdNotNull, sequenceName, - sequenceNamePrevIdSha1 + sequenceNamePrevIdSha1, + taskGroup ); return newIdentifier; } @@ -1044,7 +995,8 @@ private SegmentIdWithShardSpec allocatePendingSegment( final Interval interval, final PartialShardSpec partialShardSpec, final String maxVersion, - final List> existingChunks + final List> existingChunks, + final String taskGroup ) throws IOException { final String sql = StringUtils.format( @@ -1108,7 +1060,7 @@ private SegmentIdWithShardSpec allocatePendingSegment( ); // always insert empty previous sequence id - insertPendingSegmentIntoMetastore(handle, newIdentifier, dataSource, interval, "", sequenceName, sequenceNamePrevIdSha1); + insertPendingSegmentIntoMetastore(handle, newIdentifier, dataSource, interval, "", sequenceName, sequenceNamePrevIdSha1, taskGroup); log.info( "Created new pending segment[%s] for datasource[%s], sequence[%s], interval[%s].", @@ -1316,7 +1268,8 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( Set appendSegments, Map appendSegmentToReplaceLock, @Nullable DataSourceMetadata startMetadata, - @Nullable DataSourceMetadata endMetadata + @Nullable DataSourceMetadata endMetadata, + String taskGroup ) { verifySegmentsToCommit(appendSegments); @@ -1326,16 +1279,38 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( } final String dataSource = appendSegments.iterator().next().getDataSource(); - final Set segmentIdsForNewVersions = connector.retryTransaction( + final List segmentIdsForNewVersions = connector.retryTransaction( (handle, transactionStatus) - -> createNewIdsForAppendSegments(handle, dataSource, appendSegments), + -> getPendingSegmentsForTaskGroupWithHandle(handle, dataSource, taskGroup), 0, SQLMetadataConnector.DEFAULT_MAX_TRIES ); + // Create entries for all required versions of the append segments final Set allSegmentsToInsert = new HashSet<>(appendSegments); - allSegmentsToInsert.addAll(segmentIdsForNewVersions); + + final Map segmentIdMap = new HashMap<>(); + appendSegments.forEach(segment -> segmentIdMap.put(segment.getId().toString(), segment)); + segmentIdsForNewVersions.forEach( + pendingSegment -> { + if (segmentIdMap.containsKey(pendingSegment.getParentId())) { + final DataSegment oldSegment = segmentIdMap.get(pendingSegment.getParentId()); + allSegmentsToInsert.add( + new DataSegment( + pendingSegment.getId().asSegmentId(), + oldSegment.getLoadSpec(), + oldSegment.getDimensions(), + oldSegment.getMetrics(), + pendingSegment.getId().getShardSpec(), + oldSegment.getLastCompactionState(), + oldSegment.getBinaryVersion(), + oldSegment.getSize() + ) + ); + } + } + ); final AtomicBoolean metadataNotUpdated = new AtomicBoolean(false); try { @@ -1470,15 +1445,16 @@ private void insertPendingSegmentIntoMetastore( Interval interval, String previousSegmentId, String sequenceName, - String sequenceNamePrevIdSha1 + String sequenceNamePrevIdSha1, + String taskGroup ) throws JsonProcessingException { handle.createStatement( StringUtils.format( "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, " - + "sequence_name_prev_id_sha1, payload) " + + "sequence_name_prev_id_sha1, payload, parent_id, task_group) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, " - + ":sequence_name_prev_id_sha1, :payload)", + + ":sequence_name_prev_id_sha1, :payload, :parent_id, :task_group)", dbTables.getPendingSegmentsTable(), connector.getQuoteString() ) @@ -1492,6 +1468,8 @@ private void insertPendingSegmentIntoMetastore( .bind("sequence_prev_id", previousSegmentId) .bind("sequence_name_prev_id_sha1", sequenceNamePrevIdSha1) .bind("payload", jsonMapper.writeValueAsBytes(newIdentifier)) + .bind("parent_id", newIdentifier.toString()) + .bind("task_group", taskGroup) .execute(); } @@ -1827,7 +1805,7 @@ private PendingSegment createNewSegment( pendingSegmentId, request.getSequenceName(), request.getPreviousSegmentId(), - request.getParentId(), + request.getParentId() == null ? pendingSegmentId.asSegmentId().toString() : request.getParentId(), request.getTaskGroup() ); @@ -1868,7 +1846,7 @@ private PendingSegment createNewSegment( pendingSegmentId, request.getSequenceName(), request.getPreviousSegmentId(), - request.getParentId(), + request.getParentId() == null ? pendingSegmentId.asSegmentId().toString() : request.getParentId(), request.getTaskGroup() ); } @@ -2793,6 +2771,30 @@ public DataSegment retrieveSegmentForId(final String id, boolean includeUnused) ); } + @Override + public int deletePendingSegmentsForTaskGroup(final String pendingSegmentsGroup) + { + return connector.getDBI().inTransaction( + (handle, status) -> handle + .createStatement( + StringUtils.format( + "DELETE FROM %s WHERE task_group = :task_group", + dbTables.getPendingSegmentsTable() + ) + ) + .bind("task_group", pendingSegmentsGroup) + .execute() + ); + } + + @Override + public List getAllPendingSegments(String datasource) + { + connector.retryWithHandle( + handle -> getPendingSegmentsForIntervalWithHandle(handle, datasource, Intervals.ETERNITY); + ) + } + @Override public int deleteUpgradeSegmentsForTask(final String taskId) { diff --git a/server/src/main/java/org/apache/druid/metadata/PendingSegment.java b/server/src/main/java/org/apache/druid/metadata/PendingSegment.java index 10819d458cc6..ed1819a377dc 100644 --- a/server/src/main/java/org/apache/druid/metadata/PendingSegment.java +++ b/server/src/main/java/org/apache/druid/metadata/PendingSegment.java @@ -117,8 +117,8 @@ public static PendingSegment fromResultSet(ResultSet resultSet, ObjectMapper jso jsonMapper.readValue(payload, SegmentIdWithShardSpec.class), resultSet.getString("sequence_name"), resultSet.getString("sequence_prev_id"), - resultSet.getString("task_group"), - resultSet.getString("parent_id") + resultSet.getString("parent_id"), + resultSet.getString("task_group") ); } catch (Exception e) { diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index d2ae24e691c4..dec64b6aca1c 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -574,7 +574,7 @@ private void insertIntoUpgradeSegmentsTable(Map se ); } - @Test + //@Test public void testCommitAppendSegments() { final String v1 = "2023-01-01"; @@ -626,7 +626,7 @@ public void testCommitAppendSegments() // Commit the segment and verify the results SegmentPublishResult commitResult - = coordinator.commitAppendSegments(appendSegments, segmentToReplaceLock); + = coordinator.commitAppendSegments(appendSegments, segmentToReplaceLock, null); Assert.assertTrue(commitResult.isSuccess()); Assert.assertEquals(appendSegments, commitResult.getSegments()); @@ -649,7 +649,7 @@ public void testCommitAppendSegments() Assert.assertEquals(replaceLock.getVersion(), Iterables.getOnlyElement(observedLockVersions)); } - @Test + //@Test public void testCommitReplaceSegments() { final ReplaceTaskLock replaceLock = new ReplaceTaskLock("g1", Intervals.of("2023-01-01/2023-02-01"), "2023-02-01"); @@ -2406,7 +2406,8 @@ public void testAllocatePendingSegment() interval, partialShardSpec, "version", - false + false, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version", identifier.toString()); @@ -2418,7 +2419,8 @@ public void testAllocatePendingSegment() interval, partialShardSpec, identifier.getVersion(), - false + false, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_1", identifier1.toString()); @@ -2430,7 +2432,8 @@ public void testAllocatePendingSegment() interval, partialShardSpec, identifier1.getVersion(), - false + false, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_2", identifier2.toString()); @@ -2442,7 +2445,8 @@ public void testAllocatePendingSegment() interval, partialShardSpec, identifier1.getVersion(), - false + false, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_2", identifier3.toString()); @@ -2455,7 +2459,8 @@ public void testAllocatePendingSegment() interval, partialShardSpec, "version", - false + false, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_3", identifier4.toString()); @@ -2491,7 +2496,8 @@ public void testAllocatePendingSegmentAfterDroppingExistingSegment() interval, partialShardSpec, "version", - true + true, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version", identifier.toString()); // Since there are no used core partitions yet @@ -2505,7 +2511,8 @@ public void testAllocatePendingSegmentAfterDroppingExistingSegment() interval, partialShardSpec, maxVersion, - true + true, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_1", identifier1.toString()); // Since there are no used core partitions yet @@ -2519,7 +2526,8 @@ public void testAllocatePendingSegmentAfterDroppingExistingSegment() interval, partialShardSpec, maxVersion, - true + true, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_2", identifier2.toString()); // Since there are no used core partitions yet @@ -2549,7 +2557,8 @@ public void testAllocatePendingSegmentAfterDroppingExistingSegment() interval, partialShardSpec, maxVersion, - true + true, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_new_1", identifier3.toString()); // Used segment set has 1 core partition @@ -2567,7 +2576,8 @@ public void testAllocatePendingSegmentAfterDroppingExistingSegment() interval, partialShardSpec, maxVersion, - true + true, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_new_2", identifier4.toString()); // Since all core partitions have been dropped @@ -2600,7 +2610,8 @@ public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() interval, partialShardSpec, "A", - true + true, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A", identifier.toString()); // Assume it publishes; create its corresponding segment @@ -2628,7 +2639,8 @@ public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() interval, partialShardSpec, maxVersion, - true + true, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_1", identifier1.toString()); // Assume it publishes; create its corresponding segment @@ -2656,7 +2668,8 @@ public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() interval, partialShardSpec, maxVersion, - true + true, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_2", identifier2.toString()); // Assume it publishes; create its corresponding segment @@ -2710,7 +2723,8 @@ public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() interval, partialShardSpec, maxVersion, - true + true, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_B_1", identifier3.toString()); // no corresponding segment, pending aborted @@ -2744,7 +2758,8 @@ public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() interval, partialShardSpec, maxVersion, - true + true, + null ); // maxid = B_1 -> new partno = 2 // versionofexistingchunk=A @@ -2870,7 +2885,8 @@ public void testNoPendingSegmentsAndOneUsedSegment() interval, partialShardSpec, maxVersion, - true + true, + null ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_1", identifier.toString()); @@ -2895,7 +2911,8 @@ public void testDeletePendingSegment() throws InterruptedException interval, partialShardSpec, "version", - false + false, + null ); prevSegmentId = identifier.toString(); } @@ -2910,7 +2927,8 @@ public void testDeletePendingSegment() throws InterruptedException interval, partialShardSpec, "version", - false + false, + null ); prevSegmentId = identifier.toString(); } @@ -2937,7 +2955,8 @@ public void testAllocatePendingSegmentsWithOvershadowingSegments() throws IOExce interval, new NumberedOverwritePartialShardSpec(0, 1, (short) (i + 1)), "version", - false + false, + null ); Assert.assertEquals( StringUtils.format( @@ -3005,7 +3024,8 @@ public void testAllocatePendingSegmentsForHashBasedNumberedShardSpec() throws IO interval, partialShardSpec, "version", - true + true, + null ); HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) id.getShardSpec(); @@ -3036,7 +3056,8 @@ public void testAllocatePendingSegmentsForHashBasedNumberedShardSpec() throws IO interval, partialShardSpec, "version", - true + true, + null ); shardSpec = (HashBasedNumberedShardSpec) id.getShardSpec(); @@ -3067,7 +3088,8 @@ public void testAllocatePendingSegmentsForHashBasedNumberedShardSpec() throws IO interval, new HashBasedNumberedPartialShardSpec(null, 2, 3, null), "version", - true + true, + null ); shardSpec = (HashBasedNumberedShardSpec) id.getShardSpec(); @@ -3114,7 +3136,8 @@ public void testAddNumberedShardSpecAfterMultiDimensionsShardSpecWithUnknownCore interval, NumberedPartialShardSpec.instance(), version, - false + false, + null ); Assert.assertNull(id); } @@ -3159,7 +3182,8 @@ public void testAddNumberedShardSpecAfterSingleDimensionsShardSpecWithUnknownCor interval, NumberedPartialShardSpec.instance(), version, - false + false, + null ); Assert.assertNull(id); } @@ -3319,64 +3343,6 @@ public void testMarkSegmentsAsUnusedWithinIntervalTwoYears() throws IOException ); } - @Test - public void testGetPendingSegmentsForIntervalWithSequencePrefixes() - { - Pair validIntervalValidSequence = Pair.of( - SegmentIdWithShardSpec.fromDataSegment(defaultSegment), - "validLOL" - ); - insertPendingSegmentAndSequenceName(validIntervalValidSequence); - - Pair validIntervalInvalidSequence = Pair.of( - SegmentIdWithShardSpec.fromDataSegment(defaultSegment2), - "invalidRandom" - ); - insertPendingSegmentAndSequenceName(validIntervalInvalidSequence); - - Pair invalidIntervalvalidSequence = Pair.of( - SegmentIdWithShardSpec.fromDataSegment(existingSegment1), - "validStuff" - ); - insertPendingSegmentAndSequenceName(invalidIntervalvalidSequence); - - Pair twentyFifteenWithAnotherValidSequence = Pair.of( - new SegmentIdWithShardSpec( - existingSegment1.getDataSource(), - Intervals.of("2015/2016"), - "1970-01-01", - new NumberedShardSpec(1, 0) - ), - "alsoValidAgain" - ); - insertPendingSegmentAndSequenceName(twentyFifteenWithAnotherValidSequence); - - Pair twentyFifteenWithInvalidSequence = Pair.of( - new SegmentIdWithShardSpec( - existingSegment1.getDataSource(), - Intervals.of("2015/2016"), - "1970-01-01", - new NumberedShardSpec(2, 0) - ), - "definitelyInvalid" - ); - insertPendingSegmentAndSequenceName(twentyFifteenWithInvalidSequence); - - - final Map expected = new HashMap<>(); - expected.put(validIntervalValidSequence.lhs, validIntervalValidSequence.rhs); - expected.put(twentyFifteenWithAnotherValidSequence.lhs, twentyFifteenWithAnotherValidSequence.rhs); - - final Map actual = - derbyConnector.retryWithHandle(handle -> coordinator.getPendingSegmentsForIntervalWithHandle( - handle, - defaultSegment.getDataSource(), - defaultSegment.getInterval(), - ImmutableSet.of("valid", "alsoValid") - )); - Assert.assertEquals(expected, actual); - } - @Test public void testRetrieveUsedSegmentsAndCreatedDates() { @@ -3461,7 +3427,8 @@ public void testTimelineVisibilityWith0CorePartitionTombstone() throws IOExcepti interval, NumberedPartialShardSpec.instance(), "version", - false + false, + null ); Assert.assertEquals("wiki_2020-01-01T00:00:00.000Z_2021-01-01T00:00:00.000Z_version_1", identifier.toString()); @@ -3515,7 +3482,8 @@ public void testTimelineWith1CorePartitionTombstone() throws IOException interval, NumberedPartialShardSpec.instance(), "version", - false + false, + null ); Assert.assertEquals("wiki_2020-01-01T00:00:00.000Z_2021-01-01T00:00:00.000Z_version_1", identifier.toString()); From 04c2593e306aac610aa36bd00a31308993b67125 Mon Sep 17 00:00:00 2001 From: Amatya Date: Sun, 17 Mar 2024 21:09:19 +0530 Subject: [PATCH 04/20] Get streaming tests working --- .../SegmentTransactionalReplaceAction.java | 4 +- .../supervisor/SupervisorManager.java | 1 - .../common/task/IngestionTestBase.java | 9 +- .../task/concurrent/ActionsTestTask.java | 50 +- ...ncurrentReplaceAndStreamingAppendTest.java | 997 ++++++++++++++++++ .../IndexerSQLMetadataStorageCoordinator.java | 6 +- 6 files changed, 1036 insertions(+), 31 deletions(-) create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java index 3493398d6b16..66d7f181c79f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java @@ -37,7 +37,6 @@ import org.apache.druid.timeline.DataSegment; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -159,7 +158,8 @@ private void tryUpgradeOverlappingPendingSegments(Task task, TaskActionToolbox t )); Map upgradedPendingSegments = new HashMap<>(); pendingSegments.forEach(pendingSegment -> { - if (pendingSegment.getParentId() != null) { + if (pendingSegment.getParentId() != null + && !pendingSegment.getParentId().equals(pendingSegment.getId().asSegmentId().toString())) { upgradedPendingSegments.put( pendingSegment.getId(), pendingSegmentIdMap.get(pendingSegment.getParentId()) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java index e8d2a77ba6a7..cbe7dcc30c35 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java @@ -39,7 +39,6 @@ import javax.annotation.Nullable; -import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index bec9bd135e74..b97c86c1a101 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -57,6 +57,7 @@ import org.apache.druid.indexing.overlord.TaskRunnerWorkItem; import org.apache.druid.indexing.overlord.TaskStorage; import org.apache.druid.indexing.overlord.autoscaling.ScalingStats; +import org.apache.druid.indexing.overlord.supervisor.SupervisorManager; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.RE; @@ -115,6 +116,7 @@ public abstract class IngestionTestBase extends InitializedNullHandlingTest private SegmentsMetadataManager segmentsMetadataManager; private TaskLockbox lockbox; private File baseDir; + private SupervisorManager supervisorManager; @Before public void setUpIngestionTestBase() throws IOException @@ -222,11 +224,16 @@ public TaskActionToolbox createTaskActionToolbox() taskStorage, storageCoordinator, new NoopServiceEmitter(), - null, + supervisorManager, objectMapper ); } + protected void setSupervisorManager(SupervisorManager supervisorManager) + { + this.supervisorManager = supervisorManager; + } + public TaskToolbox createTaskToolbox(TaskConfig config, Task task) { return new TaskToolbox.Builder() diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ActionsTestTask.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ActionsTestTask.java index 69a8b6cc1030..3308a834f9ff 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ActionsTestTask.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ActionsTestTask.java @@ -20,6 +20,7 @@ package org.apache.druid.indexing.common.task.concurrent; import com.google.common.collect.Sets; +import com.google.errorprone.annotations.concurrent.GuardedBy; import org.apache.druid.indexing.common.LockGranularity; import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; @@ -36,10 +37,13 @@ import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.partition.NumberedPartialShardSpec; import org.joda.time.DateTime; import org.joda.time.Interval; +import java.util.HashMap; +import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; /** @@ -49,6 +53,9 @@ public class ActionsTestTask extends CommandQueueTask { private final TaskActionClient client; private final AtomicInteger sequenceId = new AtomicInteger(0); + private final Object lock = new Object(); + @GuardedBy("lock") + private final Map announcedSegmentsToParentSegments = new HashMap<>(); public ActionsTestTask(String datasource, String groupId, TaskActionClientFactory factory) { @@ -78,16 +85,29 @@ public SegmentPublishResult commitReplaceSegments(DataSegment... segments) ); } + public Map getAnnouncedSegmentsToParentSegments() + { + synchronized (lock) { + return announcedSegmentsToParentSegments; + } + } + public SegmentPublishResult commitAppendSegments(DataSegment... segments) { - return runAction( + SegmentPublishResult publishResult = runAction( SegmentTransactionalAppendAction.forSegments(Sets.newHashSet(segments)) ); + synchronized (lock) { + for (DataSegment segment : publishResult.getSegments()) { + announcedSegmentsToParentSegments.remove(segment.getId()); + } + } + return publishResult; } public SegmentIdWithShardSpec allocateSegmentForTimestamp(DateTime timestamp, Granularity preferredSegmentGranularity) { - return runAction( + SegmentIdWithShardSpec pendingSegment = runAction( new SegmentAllocateAction( getDataSource(), timestamp, @@ -101,28 +121,10 @@ public SegmentIdWithShardSpec allocateSegmentForTimestamp(DateTime timestamp, Gr TaskLockType.APPEND ) ); - } - - public SegmentIdWithShardSpec allocateSegmentForTimestamp( - DateTime timestamp, - Granularity preferredSegmentGranularity, - String sequenceName - ) - { - return runAction( - new SegmentAllocateAction( - getDataSource(), - timestamp, - Granularities.SECOND, - preferredSegmentGranularity, - getId() + "__" + sequenceName, - null, - false, - NumberedPartialShardSpec.instance(), - LockGranularity.TIME_CHUNK, - TaskLockType.APPEND - ) - ); + synchronized (lock) { + announcedSegmentsToParentSegments.put(pendingSegment.asSegmentId(), pendingSegment.asSegmentId()); + } + return pendingSegment; } private T runAction(TaskAction action) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java new file mode 100644 index 000000000000..1e91855089f7 --- /dev/null +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java @@ -0,0 +1,997 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.indexing.common.task.concurrent; + +import com.google.common.base.Optional; +import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; +import org.apache.druid.indexing.common.MultipleFileTaskReportFileWriter; +import org.apache.druid.indexing.common.TaskLock; +import org.apache.druid.indexing.common.TaskStorageDirTracker; +import org.apache.druid.indexing.common.TaskToolbox; +import org.apache.druid.indexing.common.TaskToolboxFactory; +import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction; +import org.apache.druid.indexing.common.actions.TaskActionClient; +import org.apache.druid.indexing.common.actions.TaskActionClientFactory; +import org.apache.druid.indexing.common.config.TaskConfig; +import org.apache.druid.indexing.common.config.TaskConfigBuilder; +import org.apache.druid.indexing.common.task.IngestionTestBase; +import org.apache.druid.indexing.common.task.NoopTask; +import org.apache.druid.indexing.common.task.Task; +import org.apache.druid.indexing.common.task.TestAppenderatorsManager; +import org.apache.druid.indexing.overlord.SegmentPublishResult; +import org.apache.druid.indexing.overlord.Segments; +import org.apache.druid.indexing.overlord.TaskQueue; +import org.apache.druid.indexing.overlord.TaskRunner; +import org.apache.druid.indexing.overlord.TestTaskToolboxFactory; +import org.apache.druid.indexing.overlord.ThreadingTaskRunner; +import org.apache.druid.indexing.overlord.config.DefaultTaskConfig; +import org.apache.druid.indexing.overlord.config.TaskLockConfig; +import org.apache.druid.indexing.overlord.config.TaskQueueConfig; +import org.apache.druid.indexing.overlord.supervisor.SupervisorManager; +import org.apache.druid.indexing.worker.config.WorkerConfig; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; +import org.apache.druid.server.DruidNode; +import org.apache.druid.server.metrics.NoopServiceEmitter; +import org.apache.druid.tasklogs.NoopTaskLogs; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; +import org.apache.druid.timeline.partition.NumberedShardSpec; +import org.easymock.Capture; +import org.easymock.CaptureType; +import org.easymock.EasyMock; +import org.joda.time.Interval; +import org.joda.time.Period; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Contains tests to verify behaviour of concurrently running REPLACE and APPEND + * tasks on the same interval of a datasource. + *

+ * The tests verify the interleaving of the following actions: + *

    + *
  • LOCK: Acquisition of a lock on an interval by a replace task
  • + *
  • ALLOCATE: Allocation of a pending segment by an append task
  • + *
  • REPLACE: Commit of segments created by a replace task
  • + *
  • APPEND: Commit of segments created by an append task
  • + *
+ */ +public class ConcurrentReplaceAndStreamingAppendTest extends IngestionTestBase +{ + /** + * The version used by append jobs when no previous replace job has run on an interval. + */ + private static final String SEGMENT_V0 = DateTimes.EPOCH.toString(); + + private static final Interval YEAR_23 = Intervals.of("2023/2024"); + private static final Interval JAN_23 = Intervals.of("2023-01/2023-02"); + private static final Interval FIRST_OF_JAN_23 = Intervals.of("2023-01-01/2023-01-02"); + + private static final String WIKI = "wiki"; + + private TaskQueue taskQueue; + private TaskActionClientFactory taskActionClientFactory; + private TaskActionClient dummyTaskActionClient; + private final List runningTasks = new ArrayList<>(); + + private ActionsTestTask appendTask; + private ActionsTestTask replaceTask; + + private final AtomicInteger groupId = new AtomicInteger(0); + private final SupervisorManager supervisorManager = EasyMock.mock(SupervisorManager.class); + private Capture supervisorId; + private Capture oldPendingSegment; + private Capture newPendingSegment; + private Map>> versionToIntervalToLoadSpecs; + private Map parentSegmentToLoadSpec; + + @Override + @Before + public void setUpIngestionTestBase() throws IOException + { + EasyMock.reset(supervisorManager); + EasyMock.expect(supervisorManager.getActiveSupervisorIdForDatasourceWithAppendLock(WIKI)) + .andReturn(Optional.of(WIKI)).anyTimes(); + super.setSupervisorManager(supervisorManager); + super.setUpIngestionTestBase(); + final TaskConfig taskConfig = new TaskConfigBuilder().build(); + taskActionClientFactory = createActionClientFactory(); + dummyTaskActionClient = taskActionClientFactory.create(NoopTask.create()); + + final WorkerConfig workerConfig = new WorkerConfig().setCapacity(10); + TaskRunner taskRunner = new ThreadingTaskRunner( + createToolboxFactory(taskConfig, taskActionClientFactory), + taskConfig, + workerConfig, + new NoopTaskLogs(), + getObjectMapper(), + new TestAppenderatorsManager(), + new MultipleFileTaskReportFileWriter(), + new DruidNode("middleManager", "host", false, 8091, null, true, false), + TaskStorageDirTracker.fromConfigs(workerConfig, taskConfig) + ); + taskQueue = new TaskQueue( + new TaskLockConfig(), + new TaskQueueConfig(null, new Period(0L), null, null, null), + new DefaultTaskConfig(), + getTaskStorage(), + taskRunner, + taskActionClientFactory, + getLockbox(), + new NoopServiceEmitter() + ); + runningTasks.clear(); + taskQueue.start(); + + groupId.set(0); + appendTask = createAndStartTask(); + supervisorId = Capture.newInstance(CaptureType.ALL); + oldPendingSegment = Capture.newInstance(CaptureType.ALL); + newPendingSegment = Capture.newInstance(CaptureType.ALL); + EasyMock.expect(supervisorManager.registerNewVersionOfPendingSegmentOnSupervisor( + EasyMock.capture(supervisorId), + EasyMock.capture(oldPendingSegment), + EasyMock.capture(newPendingSegment) + )).andReturn(true).anyTimes(); + replaceTask = createAndStartTask(); + EasyMock.replay(supervisorManager); + versionToIntervalToLoadSpecs = new HashMap<>(); + parentSegmentToLoadSpec = new HashMap<>(); + } + + @After + public void tearDown() + { + verifyVersionIntervalLoadSpecUniqueness(); + for (ActionsTestTask task : runningTasks) { + task.finishRunAndGetStatus(); + } + } + + @Test + public void testLockReplaceAllocateAppend() + { + final String v1 = replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23).getVersion(); + + final DataSegment segmentV10 = createSegment(FIRST_OF_JAN_23, v1); + commitReplaceSegments(segmentV10); + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV10); + + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(segmentV10.getVersion(), pendingSegment.getVersion()); + + final DataSegment segmentV11 = asSegment(pendingSegment); + commitAppendSegments(segmentV11); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10, segmentV11); + } + + @Test + public void testLockAllocateAppendDayReplaceDay() + { + final String v1 = replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23).getVersion(); + + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV01); + + final DataSegment segmentV10 = createSegment(FIRST_OF_JAN_23, v1); + commitReplaceSegments(segmentV10); + + // Verify that the segment appended to v0 gets upgraded to v1 + final DataSegment segmentV11 = DataSegment.builder(segmentV01) + .shardSpec(new NumberedShardSpec(1, 1)) + .version(v1).build(); + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10, segmentV11); + } + + @Test + public void testLockAllocateReplaceDayAppendDay() + { + final String v1 = replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23).getVersion(); + + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final DataSegment segmentV10 = createSegment(FIRST_OF_JAN_23, v1); + commitReplaceSegments(segmentV10); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV10); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + // Verify that the segment appended to v0 gets upgraded to v1 + final DataSegment segmentV11 = DataSegment.builder(segmentV01) + .shardSpec(new NumberedShardSpec(1, 1)) + .version(v1).build(); + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10, segmentV11); + } + + @Test + public void testAllocateLockReplaceDayAppendDay() + { + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final String v1 = replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23).getVersion(); + + final DataSegment segmentV10 = createSegment(FIRST_OF_JAN_23, v1); + commitReplaceSegments(segmentV10); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV10); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + // Verify that the segment appended to v0 gets upgraded to v1 + final DataSegment segmentV11 = DataSegment.builder(segmentV01) + .shardSpec(new NumberedShardSpec(1, 1)) + .version(v1).build(); + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10, segmentV11); + } + + @Test + public void testAllocateLockAppendDayReplaceDay() + { + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final String v1 = replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23).getVersion(); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV01); + + final DataSegment segmentV10 = createSegment(FIRST_OF_JAN_23, v1); + commitReplaceSegments(segmentV10); + replaceTask.finishRunAndGetStatus(); + + // Verify that the segment appended to v0 gets upgraded to v1 + final DataSegment segmentV11 = DataSegment.builder(segmentV01) + .shardSpec(new NumberedShardSpec(1, 1)) + .version(v1).build(); + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10, segmentV11); + } + + @Test + public void testAllocateAppendDayLockReplaceDay() + { + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV01); + + final String v1 = replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23).getVersion(); + + final DataSegment segmentV10 = createSegment(FIRST_OF_JAN_23, v1); + commitReplaceSegments(segmentV10); + + // Verify that the segment appended to v0 gets fully overshadowed + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01, segmentV10); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10); + } + + @Test + public void testLockReplaceMonthAllocateAppendDay() + { + String v1 = replaceTask.acquireReplaceLockOn(JAN_23).getVersion(); + + final DataSegment segmentV10 = createSegment(JAN_23, v1); + commitReplaceSegments(segmentV10); + verifyIntervalHasUsedSegments(JAN_23, segmentV10); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10); + + // Verify that the allocated segment takes the version and interval of previous replace + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(JAN_23, pendingSegment.getInterval()); + Assert.assertEquals(v1, pendingSegment.getVersion()); + + final DataSegment segmentV11 = asSegment(pendingSegment); + commitAppendSegments(segmentV11); + + verifyIntervalHasUsedSegments(JAN_23, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(JAN_23, segmentV10, segmentV11); + } + + @Test + public void testLockAllocateAppendDayReplaceMonth() + { + final String v1 = replaceTask.acquireReplaceLockOn(JAN_23).getVersion(); + + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(FIRST_OF_JAN_23, pendingSegment.getInterval()); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV01); + + final DataSegment segmentV10 = createSegment(JAN_23, v1); + commitReplaceSegments(segmentV10); + + // Verify that append segment gets upgraded to replace version + final DataSegment segmentV11 = DataSegment.builder(segmentV01) + .version(v1) + .interval(segmentV10.getInterval()) + .shardSpec(new NumberedShardSpec(1, 1)) + .build(); + verifyIntervalHasUsedSegments(JAN_23, segmentV01, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(JAN_23, segmentV10, segmentV11); + } + + @Test + public void testLockAllocateReplaceMonthAppendDay() + { + final String v1 = replaceTask.acquireReplaceLockOn(JAN_23).getVersion(); + + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(FIRST_OF_JAN_23, pendingSegment.getInterval()); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final DataSegment segmentV10 = createSegment(JAN_23, v1); + commitReplaceSegments(segmentV10); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV10); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + // Verify that append segment gets upgraded to replace version + final DataSegment segmentV11 = DataSegment.builder(segmentV01) + .version(v1) + .interval(segmentV10.getInterval()) + .shardSpec(new NumberedShardSpec(1, 1)) + .build(); + verifyIntervalHasUsedSegments(JAN_23, segmentV01, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(JAN_23, segmentV10, segmentV11); + } + + @Test + public void testAllocateLockReplaceMonthAppendDay() + { + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(FIRST_OF_JAN_23, pendingSegment.getInterval()); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final String v1 = replaceTask.acquireReplaceLockOn(JAN_23).getVersion(); + + final DataSegment segmentV10 = createSegment(JAN_23, v1); + commitReplaceSegments(segmentV10); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV10); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + // Verify that append segment gets upgraded to replace version + final DataSegment segmentV11 = DataSegment.builder(segmentV01) + .version(v1) + .interval(segmentV10.getInterval()) + .shardSpec(new NumberedShardSpec(1, 1)) + .build(); + verifyIntervalHasUsedSegments(JAN_23, segmentV01, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(JAN_23, segmentV10, segmentV11); + } + + @Test + public void testAllocateLockAppendDayReplaceMonth() + { + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(FIRST_OF_JAN_23, pendingSegment.getInterval()); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final String v1 = replaceTask.acquireReplaceLockOn(JAN_23).getVersion(); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV01); + + final DataSegment segmentV10 = createSegment(JAN_23, v1); + commitReplaceSegments(segmentV10); + + // Verify that append segment gets upgraded to replace version + final DataSegment segmentV11 = DataSegment.builder(segmentV01) + .version(v1) + .interval(segmentV10.getInterval()) + .shardSpec(new NumberedShardSpec(1, 1)) + .build(); + verifyIntervalHasUsedSegments(JAN_23, segmentV01, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(JAN_23, segmentV10, segmentV11); + } + + @Test + public void testAllocateAppendDayLockReplaceMonth() + { + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(FIRST_OF_JAN_23, pendingSegment.getInterval()); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV01); + + final String v1 = replaceTask.acquireReplaceLockOn(JAN_23).getVersion(); + + final DataSegment segmentV10 = createSegment(JAN_23, v1); + commitReplaceSegments(segmentV10); + + // Verify that the old segment gets completely replaced + verifyIntervalHasUsedSegments(JAN_23, segmentV01, segmentV10); + verifyIntervalHasVisibleSegments(JAN_23, segmentV10); + } + + @Test + public void testLockReplaceDayAllocateAppendMonth() + { + final String v1 = replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23).getVersion(); + + final DataSegment segmentV10 = createSegment(FIRST_OF_JAN_23, v1); + commitReplaceSegments(segmentV10); + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV10); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10); + + // Verify that an APPEND lock cannot be acquired on month + TaskLock appendLock = appendTask.acquireAppendLockOn(JAN_23); + Assert.assertNull(appendLock); + + // Verify that new segment gets allocated with DAY granularity even though preferred was MONTH + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); + Assert.assertEquals(v1, pendingSegment.getVersion()); + Assert.assertEquals(FIRST_OF_JAN_23, pendingSegment.getInterval()); + + final DataSegment segmentV11 = asSegment(pendingSegment); + commitAppendSegments(segmentV11); + + verifyIntervalHasUsedSegments(JAN_23, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(JAN_23, segmentV10, segmentV11); + } + + @Test + public void testLockAllocateAppendMonthReplaceDay() + { + final String v1 = replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23).getVersion(); + + // Verify that an APPEND lock cannot be acquired on month + TaskLock appendLock = appendTask.acquireAppendLockOn(JAN_23); + Assert.assertNull(appendLock); + + // Verify that the segment is allocated for DAY granularity + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); + Assert.assertEquals(FIRST_OF_JAN_23, pendingSegment.getInterval()); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV01); + + final DataSegment segmentV10 = createSegment(FIRST_OF_JAN_23, v1); + commitReplaceSegments(segmentV10); + + // Verify that append segment gets upgraded to replace version + final DataSegment segmentV11 = DataSegment.builder(segmentV01) + .version(v1) + .interval(segmentV10.getInterval()) + .shardSpec(new NumberedShardSpec(1, 1)) + .build(); + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10, segmentV11); + } + + @Test + public void testLockAllocateReplaceDayAppendMonth() + { + final String v1 = replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23).getVersion(); + + // Verify that an APPEND lock cannot be acquired on month + TaskLock appendLock = appendTask.acquireAppendLockOn(JAN_23); + Assert.assertNull(appendLock); + + // Verify that the segment is allocated for DAY granularity instead of MONTH + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); + Assert.assertEquals(FIRST_OF_JAN_23, pendingSegment.getInterval()); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final DataSegment segmentV10 = createSegment(FIRST_OF_JAN_23, v1); + commitReplaceSegments(segmentV10); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV10); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + final DataSegment segmentV11 = DataSegment.builder(segmentV01) + .interval(FIRST_OF_JAN_23) + .version(v1) + .shardSpec(new NumberedShardSpec(1, 1)) + .build(); + + verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV01, segmentV10, segmentV11); + verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10, segmentV11); + } + + @Test + public void testAllocateLockReplaceDayAppendMonth() + { + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); + Assert.assertEquals(JAN_23, pendingSegment.getInterval()); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + // Verify that replace lock cannot be acquired on MONTH + TaskLock replaceLock = replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23); + Assert.assertNull(replaceLock); + + // Verify that segment cannot be committed since there is no lock + final DataSegment segmentV10 = createSegment(FIRST_OF_JAN_23, SEGMENT_V0); + final ISE exception = Assert.assertThrows(ISE.class, () -> commitReplaceSegments(segmentV10)); + final Throwable throwable = Throwables.getRootCause(exception); + Assert.assertEquals( + StringUtils.format( + "Segments[[%s]] are not covered by locks[[]] for task[%s]", + segmentV10, replaceTask.getId() + ), + throwable.getMessage() + ); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + verifyIntervalHasUsedSegments(JAN_23, segmentV01); + verifyIntervalHasVisibleSegments(JAN_23, segmentV01); + } + + @Test + public void testAllocateAppendMonthLockReplaceDay() + { + final SegmentIdWithShardSpec pendingSegment + = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); + Assert.assertEquals(JAN_23, pendingSegment.getInterval()); + Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); + + final DataSegment segmentV01 = asSegment(pendingSegment); + commitAppendSegments(segmentV01); + + verifyIntervalHasUsedSegments(JAN_23, segmentV01); + verifyIntervalHasVisibleSegments(JAN_23, segmentV01); + + // Verify that replace lock cannot be acquired on DAY as MONTH is already locked + final TaskLock replaceLock = replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23); + Assert.assertNull(replaceLock); + } + + @Test + public void testSegmentIsAllocatedAtLatestVersion() + { + final SegmentIdWithShardSpec pendingSegmentV01 + = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); + Assert.assertEquals(SEGMENT_V0, pendingSegmentV01.getVersion()); + Assert.assertEquals(JAN_23, pendingSegmentV01.getInterval()); + + final String v1 = replaceTask.acquireReplaceLockOn(JAN_23).getVersion(); + final DataSegment segmentV10 = createSegment(JAN_23, v1); + commitReplaceSegments(segmentV10); + verifyIntervalHasUsedSegments(JAN_23, segmentV10); + verifyIntervalHasVisibleSegments(JAN_23, segmentV10); + + final SegmentIdWithShardSpec pendingSegmentV12 + = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); + Assert.assertNotEquals(pendingSegmentV01.asSegmentId(), pendingSegmentV12.asSegmentId()); + Assert.assertEquals(v1, pendingSegmentV12.getVersion()); + Assert.assertEquals(JAN_23, pendingSegmentV12.getInterval()); + + replaceTask.releaseLock(JAN_23); + final ActionsTestTask replaceTask2 = createAndStartTask(); + final String v2 = replaceTask2.acquireReplaceLockOn(JAN_23).getVersion(); + final DataSegment segmentV20 = createSegment(JAN_23, v2); + replaceTask2.commitReplaceSegments(segmentV20); + verifyIntervalHasUsedSegments(JAN_23, segmentV10, segmentV20); + verifyIntervalHasVisibleSegments(JAN_23, segmentV20); + + final SegmentIdWithShardSpec pendingSegmentV23 + = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); + Assert.assertNotEquals(pendingSegmentV01.asSegmentId(), pendingSegmentV23.asSegmentId()); + Assert.assertEquals(v2, pendingSegmentV23.getVersion()); + Assert.assertEquals(JAN_23, pendingSegmentV23.getInterval()); + + // Commit the append segments + final DataSegment segmentV01 = asSegment(pendingSegmentV01); + final DataSegment segmentV12 = asSegment(pendingSegmentV12); + final DataSegment segmentV23 = asSegment(pendingSegmentV23); + + Set appendedSegments + = commitAppendSegments(segmentV01, segmentV12, segmentV23).getSegments(); + Assert.assertEquals(3 + 3, appendedSegments.size()); + + // Verify that the original append segments have been committed + Assert.assertTrue(appendedSegments.remove(segmentV01)); + Assert.assertTrue(appendedSegments.remove(segmentV12)); + Assert.assertTrue(appendedSegments.remove(segmentV23)); + + // Verify that segmentV01 has been upgraded to both v1 and v2 + final DataSegment segmentV11 = findSegmentWith(v1, segmentV01.getLoadSpec(), appendedSegments); + Assert.assertNotNull(segmentV11); + final DataSegment segmentV21 = findSegmentWith(v2, segmentV01.getLoadSpec(), appendedSegments); + Assert.assertNotNull(segmentV21); + + // Verify that segmentV12 has been upgraded to v2 + final DataSegment segmentV22 = findSegmentWith(v2, segmentV12.getLoadSpec(), appendedSegments); + Assert.assertNotNull(segmentV22); + + // Verify that segmentV23 is not downgraded to v1 + final DataSegment segmentV13 = findSegmentWith(v1, segmentV23.getLoadSpec(), appendedSegments); + Assert.assertNull(segmentV13); + + verifyIntervalHasUsedSegments( + YEAR_23, + segmentV01, + segmentV10, segmentV11, segmentV12, + segmentV20, segmentV21, segmentV22, segmentV23 + ); + verifyIntervalHasVisibleSegments(YEAR_23, segmentV20, segmentV21, segmentV22, segmentV23); + } + + @Test + public void testSegmentsToReplace() + { + final SegmentIdWithShardSpec pendingSegmentV01 + = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); + Assert.assertEquals(SEGMENT_V0, pendingSegmentV01.getVersion()); + Assert.assertEquals(JAN_23, pendingSegmentV01.getInterval()); + final DataSegment segment1 = asSegment(pendingSegmentV01); + commitAppendSegments(segment1); + + final SegmentIdWithShardSpec pendingSegmentV02 + = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); + Assert.assertNotEquals(pendingSegmentV01.asSegmentId(), pendingSegmentV02.asSegmentId()); + Assert.assertEquals(SEGMENT_V0, pendingSegmentV02.getVersion()); + Assert.assertEquals(JAN_23, pendingSegmentV02.getInterval()); + + verifyInputSegments(replaceTask, JAN_23, segment1); + + replaceTask.acquireReplaceLockOn(JAN_23); + + final DataSegment segment2 = asSegment(pendingSegmentV02); + commitAppendSegments(segment2); + + // Despite segment2 existing, it is not chosen to be replaced because it was created after the tasklock was acquired + verifyInputSegments(replaceTask, JAN_23, segment1); + + replaceTask.releaseLock(JAN_23); + + final SegmentIdWithShardSpec pendingSegmentV03 + = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); + Assert.assertNotEquals(pendingSegmentV01.asSegmentId(), pendingSegmentV03.asSegmentId()); + Assert.assertNotEquals(pendingSegmentV02.asSegmentId(), pendingSegmentV03.asSegmentId()); + Assert.assertEquals(SEGMENT_V0, pendingSegmentV03.getVersion()); + Assert.assertEquals(JAN_23, pendingSegmentV03.getInterval()); + final DataSegment segment3 = asSegment(pendingSegmentV03); + commitAppendSegments(segment3); + appendTask.releaseLock(JAN_23); + + replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23); + // The new lock was acquired before segment3 was created but it doesn't contain the month's interval + // So, all three segments are chosen + verifyInputSegments(replaceTask, JAN_23, segment1, segment2, segment3); + + replaceTask.releaseLock(FIRST_OF_JAN_23); + // All the segments are chosen when there's no lock + verifyInputSegments(replaceTask, JAN_23, segment1, segment2, segment3); + } + + @Test + public void testLockAllocateDayReplaceMonthAllocateAppend() + { + final SegmentIdWithShardSpec pendingSegmentV0 + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + + final String v1 = replaceTask.acquireReplaceLockOn(JAN_23).getVersion(); + + final DataSegment segmentV10 = createSegment(JAN_23, v1); + commitReplaceSegments(segmentV10); + verifyIntervalHasUsedSegments(JAN_23, segmentV10); + + final SegmentIdWithShardSpec pendingSegmentV1 + = appendTask.allocateSegmentForTimestamp(FIRST_OF_JAN_23.getStart(), Granularities.DAY); + Assert.assertEquals(segmentV10.getVersion(), pendingSegmentV1.getVersion()); + + final DataSegment segmentV00 = asSegment(pendingSegmentV0); + final DataSegment segmentV11 = asSegment(pendingSegmentV1); + Set appendSegments = commitAppendSegments(segmentV00, segmentV11) + .getSegments(); + + Assert.assertEquals(3, appendSegments.size()); + // Segment V11 is committed + Assert.assertTrue(appendSegments.remove(segmentV11)); + // Segment V00 is also committed + Assert.assertTrue(appendSegments.remove(segmentV00)); + // Segment V00 is upgraded to v1 with MONTH granularlity at the time of commit as V12 + final DataSegment segmentV12 = Iterables.getOnlyElement(appendSegments); + Assert.assertEquals(v1, segmentV12.getVersion()); + Assert.assertEquals(JAN_23, segmentV12.getInterval()); + Assert.assertEquals(segmentV00.getLoadSpec(), segmentV12.getLoadSpec()); + + verifyIntervalHasUsedSegments(JAN_23, segmentV00, segmentV10, segmentV11, segmentV12); + verifyIntervalHasVisibleSegments(JAN_23, segmentV10, segmentV11, segmentV12); + } + + + @Nullable + private DataSegment findSegmentWith(String version, Map loadSpec, Set segments) + { + for (DataSegment segment : segments) { + if (version.equals(segment.getVersion()) + && Objects.equals(segment.getLoadSpec(), loadSpec)) { + return segment; + } + } + + return null; + } + + private static DataSegment asSegment(SegmentIdWithShardSpec pendingSegment) + { + final SegmentId id = pendingSegment.asSegmentId(); + return new DataSegment( + id, + Collections.singletonMap(id.toString(), id.toString()), + Collections.emptyList(), + Collections.emptyList(), + pendingSegment.getShardSpec(), + null, + 0, + 0 + ); + } + + private void verifyIntervalHasUsedSegments(Interval interval, DataSegment... expectedSegments) + { + verifySegments(interval, Segments.INCLUDING_OVERSHADOWED, expectedSegments); + } + + private void verifyIntervalHasVisibleSegments(Interval interval, DataSegment... expectedSegments) + { + verifySegments(interval, Segments.ONLY_VISIBLE, expectedSegments); + } + + private void verifySegments(Interval interval, Segments visibility, DataSegment... expectedSegments) + { + try { + Collection allUsedSegments = dummyTaskActionClient.submit( + new RetrieveUsedSegmentsAction( + WIKI, + null, + ImmutableList.of(interval), + visibility + ) + ); + Assert.assertEquals(Sets.newHashSet(expectedSegments), Sets.newHashSet(allUsedSegments)); + } + catch (IOException e) { + throw new ISE(e, "Error while fetching used segments in interval[%s]", interval); + } + } + + private void verifyInputSegments(Task task, Interval interval, DataSegment... expectedSegments) + { + try { + final TaskActionClient taskActionClient = taskActionClientFactory.create(task); + Collection allUsedSegments = taskActionClient.submit( + new RetrieveUsedSegmentsAction( + WIKI, + Collections.singletonList(interval) + ) + ); + Assert.assertEquals(Sets.newHashSet(expectedSegments), Sets.newHashSet(allUsedSegments)); + } + catch (IOException e) { + throw new ISE(e, "Error while fetching segments to replace in interval[%s]", interval); + } + } + + private TaskToolboxFactory createToolboxFactory( + TaskConfig taskConfig, + TaskActionClientFactory taskActionClientFactory + ) + { + TestTaskToolboxFactory.Builder builder = new TestTaskToolboxFactory.Builder() + .setConfig(taskConfig) + .setIndexIO(new IndexIO(getObjectMapper(), ColumnConfig.DEFAULT)) + .setTaskActionClientFactory(taskActionClientFactory); + return new TestTaskToolboxFactory(builder) + { + @Override + public TaskToolbox build(TaskConfig config, Task task) + { + return createTaskToolbox(config, task); + } + }; + } + + private DataSegment createSegment(Interval interval, String version) + { + SegmentId id = SegmentId.of(WIKI, interval, version, null); + return DataSegment.builder() + .dataSource(WIKI) + .interval(interval) + .version(version) + .loadSpec(Collections.singletonMap(id.toString(), id.toString())) + .size(100) + .build(); + } + + private ActionsTestTask createAndStartTask() + { + ActionsTestTask task = new ActionsTestTask(WIKI, "test_" + groupId.incrementAndGet(), taskActionClientFactory); + taskQueue.add(task); + runningTasks.add(task); + return task; + } + + private void commitReplaceSegments(DataSegment... dataSegments) + { + replaceTask.commitReplaceSegments(dataSegments); + for (int i = 0; i < supervisorId.getValues().size(); i++) { + announceUpgradedPendingSegment(oldPendingSegment.getValues().get(i), newPendingSegment.getValues().get(i)); + } + supervisorId.reset(); + oldPendingSegment.reset(); + newPendingSegment.reset(); + } + + private SegmentPublishResult commitAppendSegments(DataSegment... dataSegments) + { + SegmentPublishResult result = appendTask.commitAppendSegments(dataSegments); + result.getSegments().forEach(this::unannounceUpgradedPendingSegment); + for (DataSegment segment : dataSegments) { + parentSegmentToLoadSpec.put(segment.getId(), Iterables.getOnlyElement(segment.getLoadSpec().values())); + } + return result; + } + + private void announceUpgradedPendingSegment( + SegmentIdWithShardSpec oldPendingSegment, + SegmentIdWithShardSpec newPendingSegment + ) + { + appendTask.getAnnouncedSegmentsToParentSegments() + .put(newPendingSegment.asSegmentId(), oldPendingSegment.asSegmentId()); + } + + private void unannounceUpgradedPendingSegment( + DataSegment segment + ) + { + appendTask.getAnnouncedSegmentsToParentSegments() + .remove(segment.getId()); + } + + private void verifyVersionIntervalLoadSpecUniqueness() + { + for (DataSegment usedSegment : getAllUsedSegments()) { + final String version = usedSegment.getVersion(); + final Interval interval = usedSegment.getInterval(); + final Object loadSpec = Iterables.getOnlyElement(usedSegment.getLoadSpec().values()); + Map> intervalToLoadSpecs + = versionToIntervalToLoadSpecs.computeIfAbsent(version, v -> new HashMap<>()); + Set loadSpecs + = intervalToLoadSpecs.computeIfAbsent(interval, i -> new HashSet<>()); + Assert.assertFalse(loadSpecs.contains(loadSpec)); + loadSpecs.add(loadSpec); + } + + for (Map.Entry entry : appendTask.getAnnouncedSegmentsToParentSegments().entrySet()) { + final String version = entry.getKey().getVersion(); + final Interval interval = entry.getKey().getInterval(); + final Object loadSpec = parentSegmentToLoadSpec.get(entry.getValue()); + Map> intervalToLoadSpecs + = versionToIntervalToLoadSpecs.computeIfAbsent(version, v -> new HashMap<>()); + Set loadSpecs + = intervalToLoadSpecs.computeIfAbsent(interval, i -> new HashSet<>()); + Assert.assertFalse(loadSpecs.contains(loadSpec)); + loadSpecs.add(loadSpec); + } + } + + private Collection getAllUsedSegments() + { + try { + return dummyTaskActionClient.submit( + new RetrieveUsedSegmentsAction( + WIKI, + null, + ImmutableList.of(Intervals.ETERNITY), + Segments.INCLUDING_OVERSHADOWED + ) + ); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index b393b49a0912..18b264e2acfd 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -2790,9 +2790,9 @@ public int deletePendingSegmentsForTaskGroup(final String pendingSegmentsGroup) @Override public List getAllPendingSegments(String datasource) { - connector.retryWithHandle( - handle -> getPendingSegmentsForIntervalWithHandle(handle, datasource, Intervals.ETERNITY); - ) + return connector.retryWithHandle( + handle -> getPendingSegmentsForIntervalWithHandle(handle, datasource, Intervals.ETERNITY) + ); } @Override From 0386c2a066fc1fed27d440390c43309a24103f4f Mon Sep 17 00:00:00 2001 From: Amatya Date: Mon, 18 Mar 2024 10:17:17 +0530 Subject: [PATCH 05/20] various minor fixes --- .../indexing/common/task/AbstractTask.java | 2 +- .../druid/indexing/overlord/TaskLockbox.java | 46 +++++++++++++------ .../druid/indexing/overlord/TaskQueue.java | 14 +----- .../SeekableStreamIndexTask.java | 6 +++ .../IndexerSQLMetadataStorageCoordinator.java | 24 +++++++--- 5 files changed, 57 insertions(+), 35 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java index 9a7f1c1f2dca..2e5783853084 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java @@ -283,7 +283,7 @@ public String getGroupId() @Override public String getPendingSegmentGroup() { - return null; + return groupId; } @JsonProperty("resource") diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java index 5b91396fe19d..2dc6dfd4b493 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java @@ -103,6 +103,11 @@ public class TaskLockbox // this set should be accessed under the giant lock. private final Set activeTasks = new HashSet<>(); + // Stores map of pending task group of tasks to the set of their ids. + // Useful for task replicas. Clean up pending segments only when the set is empty. + // this map should be accessed under the giant lock. + private final HashMap> activePendingTaskGroupToTaskIds = new HashMap<>(); + @Inject public TaskLockbox( TaskStorage taskStorage, @@ -213,6 +218,13 @@ public int compare(Pair left, Pair right) activeTasks.remove(task.getId()); } } + activePendingTaskGroupToTaskIds.clear(); + for (Task task : storedActiveTasks) { + if (activeTasks.contains(task.getId()) && task.getPendingSegmentGroup() != null) { + activePendingTaskGroupToTaskIds.computeIfAbsent(task.getPendingSegmentGroup(), s -> new HashSet<>()) + .add(task.getId()); + } + } log.info( "Synced %,d locks for %,d activeTasks from storage (%,d locks ignored).", @@ -1160,23 +1172,22 @@ public void add(Task task) try { log.info("Adding task[%s] to activeTasks", task.getId()); activeTasks.add(task.getId()); + if (task.getPendingSegmentGroup() != null) { + activePendingTaskGroupToTaskIds.computeIfAbsent(task.getPendingSegmentGroup(), s -> new HashSet<>()) + .add(task.getId()); + } } finally { giant.unlock(); } } - public void remove(final Task task) - { - remove(task, false); - } - /** * Release all locks for a task and remove task from set of active tasks. Does nothing if the task is not currently locked or not an active task. * * @param task task to unlock */ - public void remove(final Task task, final boolean cleanPendingSegments) + public void remove(final Task task) { giant.lock(); try { @@ -1190,14 +1201,21 @@ public void remove(final Task task, final boolean cleanPendingSegments) task.getId() ); } - if (cleanPendingSegments) { - if (findLocksForTask(task).stream().anyMatch(lock -> lock.getType() == TaskLockType.APPEND)) { - final int pendingSegmentsDeleted = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(task.getPendingSegmentGroup()); - log.info( - "Deleted [%d] entries from pendingSegments table for pending segments group [%s] with APPEND locks.", - pendingSegmentsDeleted, - task.getPendingSegmentGroup() - ); + final String pendingSegmentGroup = task.getPendingSegmentGroup(); + if (task.getPendingSegmentGroup() != null) { + final Set idsInSameGroup = activePendingTaskGroupToTaskIds.get(pendingSegmentGroup); + idsInSameGroup.remove(task.getId()); + if (idsInSameGroup.isEmpty()) { + if (findLocksForTask(task).stream().anyMatch(lock -> lock.getType() == TaskLockType.APPEND)) { + final int pendingSegmentsDeleted + = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(task.getPendingSegmentGroup()); + log.info( + "Deleted [%d] entries from pendingSegments table for pending segments group [%s] with APPEND locks.", + pendingSegmentsDeleted, + task.getPendingSegmentGroup() + ); + } + activePendingTaskGroupToTaskIds.remove(pendingSegmentGroup); } } unlockAll(task); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskQueue.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskQueue.java index 508e269572be..830ae9b732bd 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskQueue.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskQueue.java @@ -568,19 +568,7 @@ private boolean removeTaskInternal(final String taskId) { final Task task = tasks.remove(taskId); if (task != null) { - final String pendingSegmentGroup = task.getPendingSegmentGroup(); - boolean cleanPendingSegments = false; - Set tasksWithSamePendingGroup = new HashSet<>(); - if (pendingSegmentGroup != null) { - for (Task otherTask : tasks.values()) { - if (pendingSegmentGroup.equals(otherTask.getPendingSegmentGroup())) { - tasksWithSamePendingGroup.add(otherTask.getId()); - } - } - tasksWithSamePendingGroup.removeAll(recentlyCompletedTasks); - cleanPendingSegments = tasksWithSamePendingGroup.isEmpty(); - } - taskLockbox.remove(task, cleanPendingSegments); + taskLockbox.remove(task); return true; } else { return false; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java index 545090157a45..0a48b0e4ac8b 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java @@ -269,6 +269,12 @@ public boolean withinMinMaxRecordTime(final InputRow row) return !beforeMinimumMessageTime && !afterMaximumMessageTime; } + @Override + public String getPendingSegmentGroup() + { + return getTaskResource().getAvailabilityGroup(); + } + protected abstract SeekableStreamIndexTaskRunner createTaskRunner(); /** diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 18b264e2acfd..95a51fbcee1e 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -288,16 +288,23 @@ List getPendingSegmentsForIntervalWithHandle( final Interval interval ) { + boolean compareIntervalEndpointsAsStrings = Intervals.canCompareEndpointsAsStrings(interval); + String sql = "SELECT payload, sequence_name, sequence_prev_id, task_group, parent_id" + " FROM " + dbTables.getPendingSegmentsTable() - + " WHERE dataSource = :dataSource" - + " AND start < :end" - + StringUtils.format(" AND %1$send%1$s > :start", connector.getQuoteString()); + + " WHERE dataSource = :dataSource"; + if (compareIntervalEndpointsAsStrings) { + sql = sql + + " AND start < :end" + + StringUtils.format(" AND %1$send%1$s > :start", connector.getQuoteString()); + } Query> query = handle.createQuery(sql) - .bind("dataSource", dataSource) - .bind("start", interval.getStart().toString()) - .bind("end", interval.getEnd().toString()); + .bind("dataSource", dataSource); + if (compareIntervalEndpointsAsStrings) { + query = query.bind("start", interval.getStart().toString()) + .bind("end", interval.getEnd().toString()); + } final ResultIterator pendingSegmentIterator = @@ -305,7 +312,10 @@ List getPendingSegmentsForIntervalWithHandle( .iterator(); final ImmutableList.Builder pendingSegments = ImmutableList.builder(); while (pendingSegmentIterator.hasNext()) { - pendingSegments.add(pendingSegmentIterator.next()); + final PendingSegment pendingSegment = pendingSegmentIterator.next(); + if (compareIntervalEndpointsAsStrings || pendingSegment.getId().getInterval().overlaps(interval)) { + pendingSegments.add(pendingSegment); + } } pendingSegmentIterator.close(); return pendingSegments.build(); From cf7815bfadf9b0c4aa234a690b205c82fe869085 Mon Sep 17 00:00:00 2001 From: Amatya Date: Mon, 18 Mar 2024 12:43:21 +0530 Subject: [PATCH 06/20] Clean up unneeded tests --- .../druid/indexing/overlord/TaskLockbox.java | 2 +- ...ncurrentReplaceAndStreamingAppendTest.java | 125 +----------------- .../indexing/overlord/TaskLockboxTest.java | 1 + 3 files changed, 6 insertions(+), 122 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java index 2dc6dfd4b493..c8a5acee8ce9 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java @@ -1202,7 +1202,7 @@ public void remove(final Task task) ); } final String pendingSegmentGroup = task.getPendingSegmentGroup(); - if (task.getPendingSegmentGroup() != null) { + if (pendingSegmentGroup != null && activePendingTaskGroupToTaskIds.containsKey(pendingSegmentGroup)) { final Set idsInSameGroup = activePendingTaskGroupToTaskIds.get(pendingSegmentGroup); idsInSameGroup.remove(task.getId()); if (idsInSameGroup.isEmpty()) { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java index 1e91855089f7..5d97a7e5a725 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java @@ -507,7 +507,7 @@ public void testLockReplaceDayAllocateAppendMonth() final String v1 = replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23).getVersion(); final DataSegment segmentV10 = createSegment(FIRST_OF_JAN_23, v1); - commitReplaceSegments(segmentV10); + replaceTask.commitReplaceSegments(segmentV10); verifyIntervalHasUsedSegments(FIRST_OF_JAN_23, segmentV10); verifyIntervalHasVisibleSegments(FIRST_OF_JAN_23, segmentV10); @@ -635,7 +635,7 @@ public void testAllocateAppendMonthLockReplaceDay() Assert.assertEquals(SEGMENT_V0, pendingSegment.getVersion()); final DataSegment segmentV01 = asSegment(pendingSegment); - commitAppendSegments(segmentV01); + appendTask.commitAppendSegments(segmentV01); verifyIntervalHasUsedSegments(JAN_23, segmentV01); verifyIntervalHasVisibleSegments(JAN_23, segmentV01); @@ -645,125 +645,6 @@ public void testAllocateAppendMonthLockReplaceDay() Assert.assertNull(replaceLock); } - @Test - public void testSegmentIsAllocatedAtLatestVersion() - { - final SegmentIdWithShardSpec pendingSegmentV01 - = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); - Assert.assertEquals(SEGMENT_V0, pendingSegmentV01.getVersion()); - Assert.assertEquals(JAN_23, pendingSegmentV01.getInterval()); - - final String v1 = replaceTask.acquireReplaceLockOn(JAN_23).getVersion(); - final DataSegment segmentV10 = createSegment(JAN_23, v1); - commitReplaceSegments(segmentV10); - verifyIntervalHasUsedSegments(JAN_23, segmentV10); - verifyIntervalHasVisibleSegments(JAN_23, segmentV10); - - final SegmentIdWithShardSpec pendingSegmentV12 - = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); - Assert.assertNotEquals(pendingSegmentV01.asSegmentId(), pendingSegmentV12.asSegmentId()); - Assert.assertEquals(v1, pendingSegmentV12.getVersion()); - Assert.assertEquals(JAN_23, pendingSegmentV12.getInterval()); - - replaceTask.releaseLock(JAN_23); - final ActionsTestTask replaceTask2 = createAndStartTask(); - final String v2 = replaceTask2.acquireReplaceLockOn(JAN_23).getVersion(); - final DataSegment segmentV20 = createSegment(JAN_23, v2); - replaceTask2.commitReplaceSegments(segmentV20); - verifyIntervalHasUsedSegments(JAN_23, segmentV10, segmentV20); - verifyIntervalHasVisibleSegments(JAN_23, segmentV20); - - final SegmentIdWithShardSpec pendingSegmentV23 - = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); - Assert.assertNotEquals(pendingSegmentV01.asSegmentId(), pendingSegmentV23.asSegmentId()); - Assert.assertEquals(v2, pendingSegmentV23.getVersion()); - Assert.assertEquals(JAN_23, pendingSegmentV23.getInterval()); - - // Commit the append segments - final DataSegment segmentV01 = asSegment(pendingSegmentV01); - final DataSegment segmentV12 = asSegment(pendingSegmentV12); - final DataSegment segmentV23 = asSegment(pendingSegmentV23); - - Set appendedSegments - = commitAppendSegments(segmentV01, segmentV12, segmentV23).getSegments(); - Assert.assertEquals(3 + 3, appendedSegments.size()); - - // Verify that the original append segments have been committed - Assert.assertTrue(appendedSegments.remove(segmentV01)); - Assert.assertTrue(appendedSegments.remove(segmentV12)); - Assert.assertTrue(appendedSegments.remove(segmentV23)); - - // Verify that segmentV01 has been upgraded to both v1 and v2 - final DataSegment segmentV11 = findSegmentWith(v1, segmentV01.getLoadSpec(), appendedSegments); - Assert.assertNotNull(segmentV11); - final DataSegment segmentV21 = findSegmentWith(v2, segmentV01.getLoadSpec(), appendedSegments); - Assert.assertNotNull(segmentV21); - - // Verify that segmentV12 has been upgraded to v2 - final DataSegment segmentV22 = findSegmentWith(v2, segmentV12.getLoadSpec(), appendedSegments); - Assert.assertNotNull(segmentV22); - - // Verify that segmentV23 is not downgraded to v1 - final DataSegment segmentV13 = findSegmentWith(v1, segmentV23.getLoadSpec(), appendedSegments); - Assert.assertNull(segmentV13); - - verifyIntervalHasUsedSegments( - YEAR_23, - segmentV01, - segmentV10, segmentV11, segmentV12, - segmentV20, segmentV21, segmentV22, segmentV23 - ); - verifyIntervalHasVisibleSegments(YEAR_23, segmentV20, segmentV21, segmentV22, segmentV23); - } - - @Test - public void testSegmentsToReplace() - { - final SegmentIdWithShardSpec pendingSegmentV01 - = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); - Assert.assertEquals(SEGMENT_V0, pendingSegmentV01.getVersion()); - Assert.assertEquals(JAN_23, pendingSegmentV01.getInterval()); - final DataSegment segment1 = asSegment(pendingSegmentV01); - commitAppendSegments(segment1); - - final SegmentIdWithShardSpec pendingSegmentV02 - = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); - Assert.assertNotEquals(pendingSegmentV01.asSegmentId(), pendingSegmentV02.asSegmentId()); - Assert.assertEquals(SEGMENT_V0, pendingSegmentV02.getVersion()); - Assert.assertEquals(JAN_23, pendingSegmentV02.getInterval()); - - verifyInputSegments(replaceTask, JAN_23, segment1); - - replaceTask.acquireReplaceLockOn(JAN_23); - - final DataSegment segment2 = asSegment(pendingSegmentV02); - commitAppendSegments(segment2); - - // Despite segment2 existing, it is not chosen to be replaced because it was created after the tasklock was acquired - verifyInputSegments(replaceTask, JAN_23, segment1); - - replaceTask.releaseLock(JAN_23); - - final SegmentIdWithShardSpec pendingSegmentV03 - = appendTask.allocateSegmentForTimestamp(JAN_23.getStart(), Granularities.MONTH); - Assert.assertNotEquals(pendingSegmentV01.asSegmentId(), pendingSegmentV03.asSegmentId()); - Assert.assertNotEquals(pendingSegmentV02.asSegmentId(), pendingSegmentV03.asSegmentId()); - Assert.assertEquals(SEGMENT_V0, pendingSegmentV03.getVersion()); - Assert.assertEquals(JAN_23, pendingSegmentV03.getInterval()); - final DataSegment segment3 = asSegment(pendingSegmentV03); - commitAppendSegments(segment3); - appendTask.releaseLock(JAN_23); - - replaceTask.acquireReplaceLockOn(FIRST_OF_JAN_23); - // The new lock was acquired before segment3 was created but it doesn't contain the month's interval - // So, all three segments are chosen - verifyInputSegments(replaceTask, JAN_23, segment1, segment2, segment3); - - replaceTask.releaseLock(FIRST_OF_JAN_23); - // All the segments are chosen when there's no lock - verifyInputSegments(replaceTask, JAN_23, segment1, segment2, segment3); - } - @Test public void testLockAllocateDayReplaceMonthAllocateAppend() { @@ -922,6 +803,7 @@ private void commitReplaceSegments(DataSegment... dataSegments) supervisorId.reset(); oldPendingSegment.reset(); newPendingSegment.reset(); + replaceTask.finishRunAndGetStatus(); } private SegmentPublishResult commitAppendSegments(DataSegment... dataSegments) @@ -931,6 +813,7 @@ private SegmentPublishResult commitAppendSegments(DataSegment... dataSegments) for (DataSegment segment : dataSegments) { parentSegmentToLoadSpec.put(segment.getId(), Iterables.getOnlyElement(segment.getLoadSpec().values())); } + appendTask.finishRunAndGetStatus(); return result; } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java index c4ee78ea6a89..842ed684ea82 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java @@ -1905,6 +1905,7 @@ public void testUpgradeSegmentsCleanupOnUnlock() = EasyMock.createMock(IndexerSQLMetadataStorageCoordinator.class); // Only the replaceTask should attempt a delete on the upgradeSegments table EasyMock.expect(coordinator.deleteUpgradeSegmentsForTask(replaceTask.getId())).andReturn(0).once(); + EasyMock.expect(coordinator.deletePendingSegmentsForTaskGroup(appendTask.getId())).andReturn(0); EasyMock.replay(coordinator); final TaskLockbox taskLockbox = new TaskLockbox(taskStorage, coordinator); From 2a32dc2caa330763776fc007d94b49ce7d352309 Mon Sep 17 00:00:00 2001 From: Amatya Date: Tue, 9 Apr 2024 08:47:25 +0530 Subject: [PATCH 07/20] Address most review comments --- .../druid/msq/indexing/MSQControllerTask.java | 3 +- .../druid/msq/indexing/MSQWorkerTask.java | 3 +- .../SegmentTransactionalAppendAction.java | 4 +- .../SegmentTransactionalReplaceAction.java | 21 +++---- .../indexing/common/task/AbstractTask.java | 5 +- .../druid/indexing/common/task/Task.java | 6 +- .../parallel/ParallelIndexSupervisorTask.java | 10 +++ .../batch/parallel/SinglePhaseSubTask.java | 10 +++ .../druid/indexing/overlord/TaskLockbox.java | 21 ++++--- .../supervisor/SupervisorManager.java | 2 +- .../SeekableStreamIndexTask.java | 2 +- .../druid/indexing/common/task/TaskTest.java | 2 +- .../task/concurrent/ActionsTestTask.java | 17 ++--- .../task/concurrent/CommandQueueTask.java | 4 +- ...ncurrentReplaceAndStreamingAppendTest.java | 4 +- ...TestIndexerMetadataStorageCoordinator.java | 4 +- .../IndexerMetadataStorageCoordinator.java | 12 +++- .../IndexerSQLMetadataStorageCoordinator.java | 62 +++++++++---------- ...Segment.java => PendingSegmentRecord.java} | 10 +-- 19 files changed, 109 insertions(+), 93 deletions(-) rename server/src/main/java/org/apache/druid/metadata/{PendingSegment.java => PendingSegmentRecord.java} (94%) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java index 2743a150f23b..554c17b9ce86 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java @@ -156,9 +156,8 @@ public Set getInputSourceResources() return ImmutableSet.of(); } - @JsonIgnore @Override - public String getPendingSegmentGroup() + public String getPendingSegmentGroupId() { return getId(); } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java index cc762903bb86..8cbce08275ab 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java @@ -125,9 +125,8 @@ public Set getInputSourceResources() return ImmutableSet.of(); } - @JsonIgnore @Override - public String getPendingSegmentGroup() + public String getPendingSegmentGroupId() { return getControllerTaskId(); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java index 51a544c9a04c..9914ede473ac 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java @@ -136,7 +136,7 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) publishAction = () -> toolbox.getIndexerMetadataStorageCoordinator().commitAppendSegments( segments, segmentToReplaceLock, - task.getPendingSegmentGroup() + task.getPendingSegmentGroupId() ); } else { publishAction = () -> toolbox.getIndexerMetadataStorageCoordinator().commitAppendSegmentsAndMetadata( @@ -144,7 +144,7 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) segmentToReplaceLock, startMetadata, endMetadata, - task.getPendingSegmentGroup() + task.getPendingSegmentGroupId() ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java index 66d7f181c79f..2c9294d5cdd5 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java @@ -30,7 +30,7 @@ import org.apache.druid.indexing.overlord.SegmentPublishResult; import org.apache.druid.indexing.overlord.supervisor.SupervisorManager; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.metadata.PendingSegment; +import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.ReplaceTaskLock; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; @@ -126,7 +126,7 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) // failure to upgrade pending segments does not affect success of the commit if (publishResult.isSuccess() && toolbox.getSupervisorManager() != null) { try { - tryUpgradeOverlappingPendingSegments(task, toolbox); + registerUpgradedPendingSegmentsOnSupervisor(task, toolbox); } catch (Exception e) { log.error(e, "Error while upgrading pending segments for task[%s]", task.getId()); @@ -137,9 +137,9 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) } /** - * Tries to upgrade any pending segments that overlap with the committed segments. + * Registers upgraded pending segments on the active supervisor, if any */ - private void tryUpgradeOverlappingPendingSegments(Task task, TaskActionToolbox toolbox) + private void registerUpgradedPendingSegmentsOnSupervisor(Task task, TaskActionToolbox toolbox) { final SupervisorManager supervisorManager = toolbox.getSupervisorManager(); final Optional activeSupervisorIdWithAppendLock = @@ -149,7 +149,7 @@ private void tryUpgradeOverlappingPendingSegments(Task task, TaskActionToolbox t return; } - List pendingSegments + List pendingSegments = toolbox.getIndexerMetadataStorageCoordinator().getAllPendingSegments(task.getDataSource()); Map pendingSegmentIdMap = new HashMap<>(); pendingSegments.forEach(pendingSegment -> pendingSegmentIdMap.put( @@ -168,12 +168,11 @@ private void tryUpgradeOverlappingPendingSegments(Task task, TaskActionToolbox t }); upgradedPendingSegments.forEach( - (newId, oldId) -> toolbox.getSupervisorManager() - .registerNewVersionOfPendingSegmentOnSupervisor( - activeSupervisorIdWithAppendLock.get(), - oldId, - newId - ) + (newId, oldId) -> supervisorManager.registerNewVersionOfPendingSegmentOnSupervisor( + activeSupervisorIdWithAppendLock.get(), + oldId, + newId + ) ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java index 2e5783853084..f3d81cea692c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java @@ -279,11 +279,10 @@ public String getGroupId() return groupId; } - @Nullable @Override - public String getPendingSegmentGroup() + public String getPendingSegmentGroupId() { - return groupId; + throw new UnsupportedOperationException(); } @JsonProperty("resource") diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java index 00e57c81b1a6..4004515d80ae 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java @@ -102,10 +102,10 @@ public interface Task String getGroupId(); /** - * The group used to associate a pending segment with a task - * @return task group for pending segment allocations + * Unique string used by an appending task (or its sub-tasks and replicas) to allocate pending segments + * and identify pending segments allocated to it. */ - String getPendingSegmentGroup(); + String getPendingSegmentGroupId(); /** * Returns task priority. The task priority is currently used only for prioritized locking, but, in the future, it can diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index 27694360ed95..09ff31defe5d 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -476,6 +476,16 @@ public boolean isPerfectRollup() ); } + @Override + public String getPendingSegmentGroupId() + { + if (getIngestionMode() == IngestionMode.APPEND) { + return getGroupId(); + } else { + throw new UnsupportedOperationException(); + } + } + @Nullable @Override public Granularity getSegmentGranularity() diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java index 6af09461c400..c80954339894 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java @@ -236,6 +236,16 @@ public String getSubtaskSpecId() return subtaskSpecId; } + @Override + public String getPendingSegmentGroupId() + { + if (getIngestionMode() == IngestionMode.APPEND) { + return getGroupId(); + } else { + throw new UnsupportedOperationException(); + } + } + @Override public TaskStatus runTask(final TaskToolbox toolbox) throws Exception { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java index c8a5acee8ce9..b5dc89dbfd20 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java @@ -28,6 +28,7 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Ordering; +import com.google.errorprone.annotations.concurrent.GuardedBy; import com.google.inject.Inject; import org.apache.druid.error.DruidException; import org.apache.druid.indexing.common.LockGranularity; @@ -105,7 +106,7 @@ public class TaskLockbox // Stores map of pending task group of tasks to the set of their ids. // Useful for task replicas. Clean up pending segments only when the set is empty. - // this map should be accessed under the giant lock. + @GuardedBy("giant") private final HashMap> activePendingTaskGroupToTaskIds = new HashMap<>(); @Inject @@ -220,8 +221,8 @@ public int compare(Pair left, Pair right) } activePendingTaskGroupToTaskIds.clear(); for (Task task : storedActiveTasks) { - if (activeTasks.contains(task.getId()) && task.getPendingSegmentGroup() != null) { - activePendingTaskGroupToTaskIds.computeIfAbsent(task.getPendingSegmentGroup(), s -> new HashSet<>()) + if (activeTasks.contains(task.getId()) && task.getPendingSegmentGroupId() != null) { + activePendingTaskGroupToTaskIds.computeIfAbsent(task.getPendingSegmentGroupId(), s -> new HashSet<>()) .add(task.getId()); } } @@ -423,7 +424,7 @@ public LockResult tryLock(final Task task, final LockRequest request) newSegmentId ); } - newSegmentId = allocateSegmentId(lockRequestForNewSegment, posseToUse.getTaskLock().getVersion(), task.getPendingSegmentGroup()); + newSegmentId = allocateSegmentId(lockRequestForNewSegment, posseToUse.getTaskLock().getVersion(), task.getPendingSegmentGroupId()); } } return LockResult.ok(posseToUse.getTaskLock(), newSegmentId); @@ -1172,8 +1173,8 @@ public void add(Task task) try { log.info("Adding task[%s] to activeTasks", task.getId()); activeTasks.add(task.getId()); - if (task.getPendingSegmentGroup() != null) { - activePendingTaskGroupToTaskIds.computeIfAbsent(task.getPendingSegmentGroup(), s -> new HashSet<>()) + if (task.getPendingSegmentGroupId() != null) { + activePendingTaskGroupToTaskIds.computeIfAbsent(task.getPendingSegmentGroupId(), s -> new HashSet<>()) .add(task.getId()); } } @@ -1201,18 +1202,18 @@ public void remove(final Task task) task.getId() ); } - final String pendingSegmentGroup = task.getPendingSegmentGroup(); + final String pendingSegmentGroup = task.getPendingSegmentGroupId(); if (pendingSegmentGroup != null && activePendingTaskGroupToTaskIds.containsKey(pendingSegmentGroup)) { final Set idsInSameGroup = activePendingTaskGroupToTaskIds.get(pendingSegmentGroup); idsInSameGroup.remove(task.getId()); if (idsInSameGroup.isEmpty()) { if (findLocksForTask(task).stream().anyMatch(lock -> lock.getType() == TaskLockType.APPEND)) { final int pendingSegmentsDeleted - = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(task.getPendingSegmentGroup()); + = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(task.getPendingSegmentGroupId()); log.info( "Deleted [%d] entries from pendingSegments table for pending segments group [%s] with APPEND locks.", pendingSegmentsDeleted, - task.getPendingSegmentGroup() + task.getPendingSegmentGroupId() ); } activePendingTaskGroupToTaskIds.remove(pendingSegmentGroup); @@ -1807,7 +1808,7 @@ SegmentCreateRequest getSegmentRequest() acquiredLock == null ? lockRequest.getVersion() : acquiredLock.getVersion(), action.getPartialShardSpec(), null, - task.getPendingSegmentGroup() + task.getPendingSegmentGroupId() ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java index cbe7dcc30c35..dd57b560660c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManager.java @@ -330,7 +330,7 @@ public boolean registerNewVersionOfPendingSegmentOnSupervisor( return true; } catch (Exception e) { - log.error(e, "PendingSegment[%s] mapping update request to version[%s] on Supervisor[%s] failed", + log.error(e, "PendingSegmentRecord[%s] mapping update request to version[%s] on Supervisor[%s] failed", basePendingSegment.asSegmentId(), newSegmentVersion.getVersion(), supervisorId); } return false; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java index 0a48b0e4ac8b..4ebdcc301a72 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java @@ -270,7 +270,7 @@ public boolean withinMinMaxRecordTime(final InputRow row) } @Override - public String getPendingSegmentGroup() + public String getPendingSegmentGroupId() { return getTaskResource().getAvailabilityGroup(); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskTest.java index 324d3331bfba..59c716e6638d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskTest.java @@ -48,7 +48,7 @@ public String getGroupId() } @Override - public String getPendingSegmentGroup() + public String getPendingSegmentGroupId() { return null; } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ActionsTestTask.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ActionsTestTask.java index 3308a834f9ff..230cfa4668c9 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ActionsTestTask.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ActionsTestTask.java @@ -20,7 +20,6 @@ package org.apache.druid.indexing.common.task.concurrent; import com.google.common.collect.Sets; -import com.google.errorprone.annotations.concurrent.GuardedBy; import org.apache.druid.indexing.common.LockGranularity; import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; @@ -53,8 +52,6 @@ public class ActionsTestTask extends CommandQueueTask { private final TaskActionClient client; private final AtomicInteger sequenceId = new AtomicInteger(0); - private final Object lock = new Object(); - @GuardedBy("lock") private final Map announcedSegmentsToParentSegments = new HashMap<>(); public ActionsTestTask(String datasource, String groupId, TaskActionClientFactory factory) @@ -87,9 +84,7 @@ public SegmentPublishResult commitReplaceSegments(DataSegment... segments) public Map getAnnouncedSegmentsToParentSegments() { - synchronized (lock) { - return announcedSegmentsToParentSegments; - } + return announcedSegmentsToParentSegments; } public SegmentPublishResult commitAppendSegments(DataSegment... segments) @@ -97,10 +92,8 @@ public SegmentPublishResult commitAppendSegments(DataSegment... segments) SegmentPublishResult publishResult = runAction( SegmentTransactionalAppendAction.forSegments(Sets.newHashSet(segments)) ); - synchronized (lock) { - for (DataSegment segment : publishResult.getSegments()) { - announcedSegmentsToParentSegments.remove(segment.getId()); - } + for (DataSegment segment : publishResult.getSegments()) { + announcedSegmentsToParentSegments.remove(segment.getId()); } return publishResult; } @@ -121,9 +114,7 @@ public SegmentIdWithShardSpec allocateSegmentForTimestamp(DateTime timestamp, Gr TaskLockType.APPEND ) ); - synchronized (lock) { - announcedSegmentsToParentSegments.put(pendingSegment.asSegmentId(), pendingSegment.asSegmentId()); - } + announcedSegmentsToParentSegments.put(pendingSegment.asSegmentId(), pendingSegment.asSegmentId()); return pendingSegment; } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java index ee2702829dc1..d3ead51bb91b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java @@ -133,7 +133,7 @@ private void addToQueue(Command command) private V waitForCommandToFinish(Command command) { try { - return command.value.get(1000, TimeUnit.SECONDS); + return command.value.get(10, TimeUnit.SECONDS); } catch (Exception e) { throw new ISE(e, "Error waiting for command on task[%s] to finish", getId()); @@ -141,7 +141,7 @@ private V waitForCommandToFinish(Command command) } @Override - public String getPendingSegmentGroup() + public String getPendingSegmentGroupId() { return getId(); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java index 5d97a7e5a725..0558e4f9d088 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java @@ -105,7 +105,6 @@ public class ConcurrentReplaceAndStreamingAppendTest extends IngestionTestBase */ private static final String SEGMENT_V0 = DateTimes.EPOCH.toString(); - private static final Interval YEAR_23 = Intervals.of("2023/2024"); private static final Interval JAN_23 = Intervals.of("2023-01/2023-02"); private static final Interval FIRST_OF_JAN_23 = Intervals.of("2023-01-01/2023-01-02"); @@ -160,7 +159,8 @@ public void setUpIngestionTestBase() throws IOException taskRunner, taskActionClientFactory, getLockbox(), - new NoopServiceEmitter() + new NoopServiceEmitter(), + getObjectMapper() ); runningTasks.clear(); taskQueue.start(); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 673c8b3c6393..c80dd53fb68c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -30,7 +30,7 @@ import org.apache.druid.indexing.overlord.Segments; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.Pair; -import org.apache.druid.metadata.PendingSegment; +import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.ReplaceTaskLock; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; @@ -295,7 +295,7 @@ public int deletePendingSegmentsForTaskGroup(final String taskGroup) } @Override - public List getAllPendingSegments(String datasource) + public List getAllPendingSegments(String datasource) { throw new UnsupportedOperationException(); } diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index c966d5fbe501..07b275c80c14 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -20,7 +20,7 @@ package org.apache.druid.indexing.overlord; import org.apache.druid.java.util.common.Pair; -import org.apache.druid.metadata.PendingSegment; +import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.ReplaceTaskLock; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; @@ -239,7 +239,7 @@ Map allocatePendingSegments( * identifier may have a version lower than this one, but will not have one higher. * @param skipSegmentLineageCheck if true, perform lineage validation using previousSegmentId for this sequence. * Should be set to false if replica tasks would index events in same order - * @param taskGroup + * @param taskGroup The task group with which the pending segment is associated * @return the pending segment identifier, or null if it was impossible to allocate a new segment */ SegmentIdWithShardSpec allocatePendingSegment( @@ -324,6 +324,7 @@ SegmentPublishResult commitSegmentsAndMetadata( * must be committed in a single transaction. * @param appendSegmentToReplaceLock Map from append segment to the currently * active REPLACE lock (if any) covering it + * @param taskGroup taskGroup of the task committing the appended segments */ SegmentPublishResult commitAppendSegments( Set appendSegments, @@ -478,7 +479,12 @@ SegmentPublishResult commitMetadataOnly( */ int deleteUpgradeSegmentsForTask(String taskId); + /** + * Delete pending segment for a give task group after all the tasks belonging to it have completed. + * @param taskGroup task group + * @return number of pending segments deleted from the metadata store + */ int deletePendingSegmentsForTaskGroup(String taskGroup); - List getAllPendingSegments(String datasource); + List getAllPendingSegments(String datasource); } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 32578d08fde2..7cfa23e933df 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -284,7 +284,7 @@ public int markSegmentsAsUnusedWithinInterval(String dataSource, Interval interv * Fetches all the pending segments, whose interval overlaps with the given search interval, from the metadata store. */ @VisibleForTesting - List getPendingSegmentsForIntervalWithHandle( + List getPendingSegmentsForIntervalWithHandle( final Handle handle, final String dataSource, final Interval interval @@ -309,12 +309,12 @@ List getPendingSegmentsForIntervalWithHandle( } - final ResultIterator pendingSegmentIterator = - query.map((index, r, ctx) -> PendingSegment.fromResultSet(r, jsonMapper)) + final ResultIterator pendingSegmentIterator = + query.map((index, r, ctx) -> PendingSegmentRecord.fromResultSet(r, jsonMapper)) .iterator(); - final ImmutableList.Builder pendingSegments = ImmutableList.builder(); + final ImmutableList.Builder pendingSegments = ImmutableList.builder(); while (pendingSegmentIterator.hasNext()) { - final PendingSegment pendingSegment = pendingSegmentIterator.next(); + final PendingSegmentRecord pendingSegment = pendingSegmentIterator.next(); if (compareIntervalEndpointsAsStrings || pendingSegment.getId().getInterval().overlaps(interval)) { pendingSegments.add(pendingSegment); } @@ -323,7 +323,7 @@ List getPendingSegmentsForIntervalWithHandle( return pendingSegments.build(); } - List getPendingSegmentsForTaskGroupWithHandle( + List getPendingSegmentsForTaskGroupWithHandle( final Handle handle, final String dataSource, final String taskGroup @@ -337,11 +337,11 @@ List getPendingSegmentsForTaskGroupWithHandle( .bind("dataSource", dataSource) .bind("task_group", taskGroup); - final ResultIterator pendingSegmentRecords = - query.map((index, r, ctx) -> PendingSegment.fromResultSet(r, jsonMapper)) + final ResultIterator pendingSegmentRecords = + query.map((index, r, ctx) -> PendingSegmentRecord.fromResultSet(r, jsonMapper)) .iterator(); - final List pendingSegments = new ArrayList<>(); + final List pendingSegments = new ArrayList<>(); while (pendingSegmentRecords.hasNext()) { pendingSegments.add(pendingSegmentRecords.next()); } @@ -722,7 +722,7 @@ private Map upgradePendingSegmen Map replaceIntervalToMaxId ) throws JsonProcessingException { - final List upgradedPendingSegments = new ArrayList<>(); + final List upgradedPendingSegments = new ArrayList<>(); final Map pendingSegmentToNewId = new HashMap<>(); for (Map.Entry entry : replaceIntervalToMaxId.entrySet()) { @@ -733,10 +733,10 @@ private Map upgradePendingSegmen final int numCorePartitions = maxSegmentId.getShardSpec().getNumCorePartitions(); int currentPartitionNumber = maxSegmentId.getShardSpec().getPartitionNum(); - final List overlappingPendingSegments + final List overlappingPendingSegments = getPendingSegmentsForIntervalWithHandle(handle, datasource, replaceInterval); - for (PendingSegment overlappingPendingSegment : overlappingPendingSegments) { + for (PendingSegmentRecord overlappingPendingSegment : overlappingPendingSegments) { final SegmentIdWithShardSpec pendingSegmentId = overlappingPendingSegment.getId(); if (shouldUpgradePendingSegment(overlappingPendingSegment, replaceInterval, replaceVersion)) { @@ -750,7 +750,7 @@ private Map upgradePendingSegmen new NumberedShardSpec(++currentPartitionNumber, numCorePartitions) ); upgradedPendingSegments.add( - new PendingSegment( + new PendingSegmentRecord( newId, UPGRADED_PENDING_SEGMENT_PREFIX + replaceVersion, pendingSegmentId.toString(), @@ -780,7 +780,7 @@ private Map upgradePendingSegmen } private boolean shouldUpgradePendingSegment( - PendingSegment pendingSegment, + PendingSegmentRecord pendingSegment, Interval replaceInterval, String replaceVersion ) @@ -928,7 +928,7 @@ private Map allocatePendingSegment } // For each of the remaining requests, create a new segment - final Map createdSegments = createNewSegments( + final Map createdSegments = createNewSegments( handle, dataSource, interval, @@ -951,7 +951,7 @@ private Map allocatePendingSegment skipSegmentLineageCheck ); - for (Map.Entry entry : createdSegments.entrySet()) { + for (Map.Entry entry : createdSegments.entrySet()) { allocatedSegmentIds.put(entry.getKey(), entry.getValue().getId()); } return allocatedSegmentIds; @@ -1276,7 +1276,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( } final String dataSource = appendSegments.iterator().next().getDataSource(); - final List segmentIdsForNewVersions = connector.retryTransaction( + final List segmentIdsForNewVersions = connector.retryTransaction( (handle, transactionStatus) -> getPendingSegmentsForTaskGroupWithHandle(handle, dataSource, taskGroup), 0, @@ -1350,7 +1350,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( private int insertPendingSegmentsIntoMetastore( Handle handle, - List pendingSegments, + List pendingSegments, String dataSource, boolean skipSegmentLineageCheck ) throws JsonProcessingException @@ -1366,7 +1366,7 @@ private int insertPendingSegmentsIntoMetastore( )); final String now = DateTimes.nowUtc().toString(); - for (PendingSegment pendingSegment : pendingSegments) { + for (PendingSegmentRecord pendingSegment : pendingSegments) { final SegmentIdWithShardSpec segmentId = pendingSegment.getId(); final Interval interval = segmentId.getInterval(); @@ -1602,11 +1602,11 @@ private Set createNewIdsForAppendSegmentsWithVersion( // Get pending segments for the new version to determine the next partition number to allocate final String dataSource = segmentsToUpgrade.iterator().next().getDataSource(); - final List pendingSegments + final List pendingSegments = getPendingSegmentsForIntervalWithHandle(handle, dataSource, upgradeInterval); final Set allAllocatedIds = new HashSet<>( pendingSegments.stream() - .map(PendingSegment::getId) + .map(PendingSegmentRecord::getId) .collect(Collectors.toSet()) ); @@ -1647,7 +1647,7 @@ private Set createNewIdsForAppendSegmentsWithVersion( return newSegmentIds; } - private Map createNewSegments( + private Map createNewSegments( Handle handle, String dataSource, Interval interval, @@ -1697,19 +1697,19 @@ private Map createNewSegments( // to avoid clashes when inserting the pending segment created here. final Set pendingSegments = new HashSet<>( getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval).stream() - .map(PendingSegment::getId) + .map(PendingSegmentRecord::getId) .collect(Collectors.toSet()) ); - final Map createdSegments = new HashMap<>(); - final Map uniqueRequestToSegment = new HashMap<>(); + final Map createdSegments = new HashMap<>(); + final Map uniqueRequestToSegment = new HashMap<>(); for (SegmentCreateRequest request : requests) { // Check if the required segment has already been created in this batch final UniqueAllocateRequest uniqueRequest = new UniqueAllocateRequest(interval, request, skipSegmentLineageCheck); - final PendingSegment createdSegment; + final PendingSegmentRecord createdSegment; if (uniqueRequestToSegment.containsKey(uniqueRequest)) { createdSegment = uniqueRequestToSegment.get(uniqueRequest); } else { @@ -1739,7 +1739,7 @@ private Map createNewSegments( return createdSegments; } - private PendingSegment createNewSegment( + private PendingSegmentRecord createNewSegment( SegmentCreateRequest request, String dataSource, Interval interval, @@ -1798,7 +1798,7 @@ private PendingSegment createNewSegment( version, partialShardSpec.complete(jsonMapper, newPartitionId, 0) ); - return new PendingSegment( + return new PendingSegmentRecord( pendingSegmentId, request.getSequenceName(), request.getPreviousSegmentId(), @@ -1839,7 +1839,7 @@ private PendingSegment createNewSegment( committedMaxId == null ? 0 : committedMaxId.getShardSpec().getNumCorePartitions() ) ); - return new PendingSegment( + return new PendingSegmentRecord( pendingSegmentId, request.getSequenceName(), request.getPreviousSegmentId(), @@ -1906,7 +1906,7 @@ private SegmentIdWithShardSpec createNewSegment( // to avoid clashes when inserting the pending segment created here. final Set pendings = new HashSet<>( getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval).stream() - .map(PendingSegment::getId) + .map(PendingSegmentRecord::getId) .collect(Collectors.toSet()) ); if (committedMaxId != null) { @@ -2782,7 +2782,7 @@ public int deletePendingSegmentsForTaskGroup(final String pendingSegmentsGroup) } @Override - public List getAllPendingSegments(String datasource) + public List getAllPendingSegments(String datasource) { return connector.retryWithHandle( handle -> getPendingSegmentsForIntervalWithHandle(handle, datasource, Intervals.ETERNITY) diff --git a/server/src/main/java/org/apache/druid/metadata/PendingSegment.java b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java similarity index 94% rename from server/src/main/java/org/apache/druid/metadata/PendingSegment.java rename to server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java index ed1819a377dc..a95d6eb20be7 100644 --- a/server/src/main/java/org/apache/druid/metadata/PendingSegment.java +++ b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java @@ -32,7 +32,7 @@ import javax.annotation.Nullable; import java.sql.ResultSet; -public class PendingSegment +public class PendingSegmentRecord { private final SegmentIdWithShardSpec id; private final String sequenceName; @@ -41,7 +41,7 @@ public class PendingSegment private final String taskGroup; @JsonCreator - public PendingSegment( + public PendingSegmentRecord( @JsonProperty("id") SegmentIdWithShardSpec id, @JsonProperty("sequenceName") String sequenceName, @JsonProperty("sequencePrevId") String sequencePrevId, @@ -74,12 +74,14 @@ public String getSequencePrevId() return sequencePrevId; } + @Nullable @JsonProperty public String getParentId() { return parentId; } + @Nullable @JsonProperty public String getTaskGroup() { @@ -109,11 +111,11 @@ public String computeSequenceNamePrevIdSha1(boolean skipSegmentLineageCheck) return BaseEncoding.base16().encode(hasher.hash().asBytes()); } - public static PendingSegment fromResultSet(ResultSet resultSet, ObjectMapper jsonMapper) + public static PendingSegmentRecord fromResultSet(ResultSet resultSet, ObjectMapper jsonMapper) { try { final byte[] payload = resultSet.getBytes("payload"); - return new PendingSegment( + return new PendingSegmentRecord( jsonMapper.readValue(payload, SegmentIdWithShardSpec.class), resultSet.getString("sequence_name"), resultSet.getString("sequence_prev_id"), From 9906afc23d136011b0c4521094a55ddfcd57e28d Mon Sep 17 00:00:00 2001 From: Amatya Date: Tue, 9 Apr 2024 16:48:10 +0530 Subject: [PATCH 08/20] Rename columns and add new interface --- .../druid/msq/indexing/MSQControllerTask.java | 3 +- .../druid/msq/indexing/MSQWorkerTask.java | 3 +- .../SegmentTransactionalAppendAction.java | 6 +- .../SegmentTransactionalReplaceAction.java | 6 +- .../indexing/common/task/AbstractTask.java | 6 -- .../task/PendingSegmentAllocatingTask.java | 29 +++++++ .../druid/indexing/common/task/Task.java | 6 -- .../parallel/ParallelIndexSupervisorTask.java | 4 +- .../batch/parallel/SinglePhaseSubTask.java | 9 +-- .../druid/indexing/overlord/TaskLockbox.java | 54 ++++++++----- .../SeekableStreamIndexTask.java | 3 +- .../druid/indexing/common/task/TaskTest.java | 6 -- .../task/concurrent/CommandQueueTask.java | 3 +- ...TestIndexerMetadataStorageCoordinator.java | 2 +- .../IndexerMetadataStorageCoordinator.java | 13 ++- .../overlord/SegmentCreateRequest.java | 20 ++--- .../IndexerSQLMetadataStorageCoordinator.java | 81 ++++++++++--------- .../druid/metadata/PendingSegmentRecord.java | 24 +++--- .../druid/metadata/SQLMetadataConnector.java | 16 ++-- 19 files changed, 165 insertions(+), 129 deletions(-) create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/PendingSegmentAllocatingTask.java diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java index 554c17b9ce86..2fc51a53856f 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java @@ -40,6 +40,7 @@ import org.apache.druid.indexing.common.actions.TimeChunkLockTryAcquireAction; import org.apache.druid.indexing.common.config.TaskConfig; import org.apache.druid.indexing.common.task.AbstractTask; +import org.apache.druid.indexing.common.task.PendingSegmentAllocatingTask; import org.apache.druid.indexing.common.task.Tasks; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; @@ -70,7 +71,7 @@ import java.util.Set; @JsonTypeName(MSQControllerTask.TYPE) -public class MSQControllerTask extends AbstractTask implements ClientTaskQuery +public class MSQControllerTask extends AbstractTask implements ClientTaskQuery, PendingSegmentAllocatingTask { public static final String TYPE = "query_controller"; public static final String DUMMY_DATASOURCE_FOR_SELECT = "__query_select"; diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java index 8cbce08275ab..cdc4d5973874 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java @@ -32,6 +32,7 @@ import org.apache.druid.indexing.common.actions.TaskActionClient; import org.apache.druid.indexing.common.config.TaskConfig; import org.apache.druid.indexing.common.task.AbstractTask; +import org.apache.druid.indexing.common.task.PendingSegmentAllocatingTask; import org.apache.druid.indexing.common.task.Tasks; import org.apache.druid.msq.exec.MSQTasks; import org.apache.druid.msq.exec.Worker; @@ -45,7 +46,7 @@ import java.util.Set; @JsonTypeName(MSQWorkerTask.TYPE) -public class MSQWorkerTask extends AbstractTask +public class MSQWorkerTask extends AbstractTask implements PendingSegmentAllocatingTask { public static final String TYPE = "query_worker"; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java index 9914ede473ac..da9b74be8d57 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java @@ -26,6 +26,7 @@ import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; import org.apache.druid.indexing.common.task.IndexTaskUtils; +import org.apache.druid.indexing.common.task.PendingSegmentAllocatingTask; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.overlord.CriticalAction; import org.apache.druid.indexing.overlord.DataSourceMetadata; @@ -132,11 +133,12 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) = TaskLocks.findReplaceLocksCoveringSegments(datasource, toolbox.getTaskLockbox(), segments); final CriticalAction.Action publishAction; + final String pendingSegmentGroupId = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); if (startMetadata == null) { publishAction = () -> toolbox.getIndexerMetadataStorageCoordinator().commitAppendSegments( segments, segmentToReplaceLock, - task.getPendingSegmentGroupId() + pendingSegmentGroupId ); } else { publishAction = () -> toolbox.getIndexerMetadataStorageCoordinator().commitAppendSegmentsAndMetadata( @@ -144,7 +146,7 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) segmentToReplaceLock, startMetadata, endMetadata, - task.getPendingSegmentGroupId() + pendingSegmentGroupId ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java index 2c9294d5cdd5..922cb499fc85 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java @@ -158,11 +158,11 @@ private void registerUpgradedPendingSegmentsOnSupervisor(Task task, TaskActionTo )); Map upgradedPendingSegments = new HashMap<>(); pendingSegments.forEach(pendingSegment -> { - if (pendingSegment.getParentId() != null - && !pendingSegment.getParentId().equals(pendingSegment.getId().asSegmentId().toString())) { + if (pendingSegment.getUpgradedFromSegmentId() != null + && !pendingSegment.getUpgradedFromSegmentId().equals(pendingSegment.getId().asSegmentId().toString())) { upgradedPendingSegments.put( pendingSegment.getId(), - pendingSegmentIdMap.get(pendingSegment.getParentId()) + pendingSegmentIdMap.get(pendingSegment.getUpgradedFromSegmentId()) ); } }); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java index f3d81cea692c..0c40f3b52877 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractTask.java @@ -279,12 +279,6 @@ public String getGroupId() return groupId; } - @Override - public String getPendingSegmentGroupId() - { - throw new UnsupportedOperationException(); - } - @JsonProperty("resource") @Override public TaskResource getTaskResource() diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/PendingSegmentAllocatingTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/PendingSegmentAllocatingTask.java new file mode 100644 index 000000000000..089a50643518 --- /dev/null +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/PendingSegmentAllocatingTask.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.indexing.common.task; + +public interface PendingSegmentAllocatingTask +{ + /** + * Unique string used by an appending task (or its sub-tasks and replicas) to allocate pending segments + * and identify pending segments allocated to it. + */ + String getPendingSegmentGroupId(); +} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java index 4004515d80ae..a5f34f9bbbfe 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java @@ -101,12 +101,6 @@ public interface Task */ String getGroupId(); - /** - * Unique string used by an appending task (or its sub-tasks and replicas) to allocate pending segments - * and identify pending segments allocated to it. - */ - String getPendingSegmentGroupId(); - /** * Returns task priority. The task priority is currently used only for prioritized locking, but, in the future, it can * be used for task scheduling, cluster resource management, etc. diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index 09ff31defe5d..28be1e811c31 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -52,6 +52,7 @@ import org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec; import org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig; import org.apache.druid.indexing.common.task.IndexTaskUtils; +import org.apache.druid.indexing.common.task.PendingSegmentAllocatingTask; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.common.task.TaskResource; import org.apache.druid.indexing.common.task.Tasks; @@ -131,7 +132,8 @@ * * @see ParallelIndexTaskRunner */ -public class ParallelIndexSupervisorTask extends AbstractBatchIndexTask implements ChatHandler +public class ParallelIndexSupervisorTask extends AbstractBatchIndexTask + implements ChatHandler, PendingSegmentAllocatingTask { public static final String TYPE = "index_parallel"; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java index c80954339894..3f76258d621b 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java @@ -43,6 +43,7 @@ import org.apache.druid.indexing.common.task.BatchAppenderators; import org.apache.druid.indexing.common.task.IndexTask; import org.apache.druid.indexing.common.task.IndexTaskUtils; +import org.apache.druid.indexing.common.task.PendingSegmentAllocatingTask; import org.apache.druid.indexing.common.task.SegmentAllocatorForBatch; import org.apache.druid.indexing.common.task.SegmentAllocators; import org.apache.druid.indexing.common.task.TaskResource; @@ -105,7 +106,7 @@ * generates and pushes segments, and reports them to the {@link SinglePhaseParallelIndexTaskRunner} instead of * publishing on its own. */ -public class SinglePhaseSubTask extends AbstractBatchSubtask implements ChatHandler +public class SinglePhaseSubTask extends AbstractBatchSubtask implements ChatHandler, PendingSegmentAllocatingTask { public static final String TYPE = "single_phase_sub_task"; public static final String OLD_TYPE_NAME = "index_sub"; @@ -239,11 +240,7 @@ public String getSubtaskSpecId() @Override public String getPendingSegmentGroupId() { - if (getIngestionMode() == IngestionMode.APPEND) { - return getGroupId(); - } else { - throw new UnsupportedOperationException(); - } + return getGroupId(); } @Override diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java index b5dc89dbfd20..511a6c6bd59a 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java @@ -39,6 +39,7 @@ import org.apache.druid.indexing.common.actions.SegmentAllocateAction; import org.apache.druid.indexing.common.actions.SegmentAllocateRequest; import org.apache.druid.indexing.common.actions.SegmentAllocateResult; +import org.apache.druid.indexing.common.task.PendingSegmentAllocatingTask; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.common.task.Tasks; import org.apache.druid.java.util.common.ISE; @@ -221,9 +222,12 @@ public int compare(Pair left, Pair right) } activePendingTaskGroupToTaskIds.clear(); for (Task task : storedActiveTasks) { - if (activeTasks.contains(task.getId()) && task.getPendingSegmentGroupId() != null) { - activePendingTaskGroupToTaskIds.computeIfAbsent(task.getPendingSegmentGroupId(), s -> new HashSet<>()) - .add(task.getId()); + if (task instanceof PendingSegmentAllocatingTask) { + final String pendingSegmentAllocatingTask = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); + if (activeTasks.contains(task.getId()) && pendingSegmentAllocatingTask != null) { + activePendingTaskGroupToTaskIds.computeIfAbsent(pendingSegmentAllocatingTask, s -> new HashSet<>()) + .add(task.getId()); + } } } @@ -424,7 +428,12 @@ public LockResult tryLock(final Task task, final LockRequest request) newSegmentId ); } - newSegmentId = allocateSegmentId(lockRequestForNewSegment, posseToUse.getTaskLock().getVersion(), task.getPendingSegmentGroupId()); + final String pendingSegmentGroupId = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); + newSegmentId = allocateSegmentId( + lockRequestForNewSegment, + posseToUse.getTaskLock().getVersion(), + pendingSegmentGroupId + ); } } return LockResult.ok(posseToUse.getTaskLock(), newSegmentId); @@ -1173,8 +1182,9 @@ public void add(Task task) try { log.info("Adding task[%s] to activeTasks", task.getId()); activeTasks.add(task.getId()); - if (task.getPendingSegmentGroupId() != null) { - activePendingTaskGroupToTaskIds.computeIfAbsent(task.getPendingSegmentGroupId(), s -> new HashSet<>()) + if (task instanceof PendingSegmentAllocatingTask) { + final String pendingSegmentGroupId = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); + activePendingTaskGroupToTaskIds.computeIfAbsent(pendingSegmentGroupId, s -> new HashSet<>()) .add(task.getId()); } } @@ -1202,21 +1212,23 @@ public void remove(final Task task) task.getId() ); } - final String pendingSegmentGroup = task.getPendingSegmentGroupId(); - if (pendingSegmentGroup != null && activePendingTaskGroupToTaskIds.containsKey(pendingSegmentGroup)) { - final Set idsInSameGroup = activePendingTaskGroupToTaskIds.get(pendingSegmentGroup); - idsInSameGroup.remove(task.getId()); - if (idsInSameGroup.isEmpty()) { - if (findLocksForTask(task).stream().anyMatch(lock -> lock.getType() == TaskLockType.APPEND)) { - final int pendingSegmentsDeleted - = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(task.getPendingSegmentGroupId()); - log.info( - "Deleted [%d] entries from pendingSegments table for pending segments group [%s] with APPEND locks.", - pendingSegmentsDeleted, - task.getPendingSegmentGroupId() - ); + if (task instanceof PendingSegmentAllocatingTask) { + final String pendingSegmentGroup = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); + if (pendingSegmentGroup != null && activePendingTaskGroupToTaskIds.containsKey(pendingSegmentGroup)) { + final Set idsInSameGroup = activePendingTaskGroupToTaskIds.get(pendingSegmentGroup); + idsInSameGroup.remove(task.getId()); + if (idsInSameGroup.isEmpty()) { + if (findLocksForTask(task).stream().anyMatch(lock -> lock.getType() == TaskLockType.APPEND)) { + final int pendingSegmentsDeleted + = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(pendingSegmentGroup); + log.info( + "Deleted [%d] entries from pendingSegments table for pending segments group [%s] with APPEND locks.", + pendingSegmentsDeleted, + pendingSegmentGroup + ); + } + activePendingTaskGroupToTaskIds.remove(pendingSegmentGroup); } - activePendingTaskGroupToTaskIds.remove(pendingSegmentGroup); } } unlockAll(task); @@ -1808,7 +1820,7 @@ SegmentCreateRequest getSegmentRequest() acquiredLock == null ? lockRequest.getVersion() : acquiredLock.getVersion(), action.getPartialShardSpec(), null, - task.getPendingSegmentGroupId() + ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId() ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java index 4ebdcc301a72..28c935f66aec 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java @@ -37,6 +37,7 @@ import org.apache.druid.indexing.common.actions.TaskLocks; import org.apache.druid.indexing.common.config.TaskConfig; import org.apache.druid.indexing.common.task.AbstractTask; +import org.apache.druid.indexing.common.task.PendingSegmentAllocatingTask; import org.apache.druid.indexing.common.task.TaskResource; import org.apache.druid.indexing.common.task.Tasks; import org.apache.druid.indexing.seekablestream.common.RecordSupplier; @@ -61,7 +62,7 @@ public abstract class SeekableStreamIndexTask - extends AbstractTask implements ChatHandler + extends AbstractTask implements ChatHandler, PendingSegmentAllocatingTask { public static final long LOCK_ACQUIRE_TIMEOUT_SECONDS = 15; private static final EmittingLogger log = new EmittingLogger(SeekableStreamIndexTask.class); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskTest.java index 59c716e6638d..c2957f6688c7 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TaskTest.java @@ -47,12 +47,6 @@ public String getGroupId() return null; } - @Override - public String getPendingSegmentGroupId() - { - return null; - } - @Override public TaskResource getTaskResource() { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java index d3ead51bb91b..ed9b998052fe 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java @@ -24,6 +24,7 @@ import org.apache.druid.indexing.common.actions.TaskActionClient; import org.apache.druid.indexing.common.config.TaskConfig; import org.apache.druid.indexing.common.task.AbstractTask; +import org.apache.druid.indexing.common.task.PendingSegmentAllocatingTask; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; @@ -40,7 +41,7 @@ /** * Test task that can be given a series of commands to execute in its {@link #runTask} method. */ -public class CommandQueueTask extends AbstractTask +public class CommandQueueTask extends AbstractTask implements PendingSegmentAllocatingTask { private static final Logger log = new Logger(CommandQueueTask.class); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index c80dd53fb68c..95fe2623171b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -232,7 +232,7 @@ public SegmentIdWithShardSpec allocatePendingSegment( PartialShardSpec partialShardSpec, String maxVersion, boolean skipSegmentLineageCheck, - String taskGroup + String taskAllocatorId ) { return new SegmentIdWithShardSpec( diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 07b275c80c14..cc6af8498691 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -239,7 +239,7 @@ Map allocatePendingSegments( * identifier may have a version lower than this one, but will not have one higher. * @param skipSegmentLineageCheck if true, perform lineage validation using previousSegmentId for this sequence. * Should be set to false if replica tasks would index events in same order - * @param taskGroup The task group with which the pending segment is associated + * @param taskAllocatorId The task allocator id with which the pending segment is associated * @return the pending segment identifier, or null if it was impossible to allocate a new segment */ SegmentIdWithShardSpec allocatePendingSegment( @@ -250,7 +250,7 @@ SegmentIdWithShardSpec allocatePendingSegment( PartialShardSpec partialShardSpec, String maxVersion, boolean skipSegmentLineageCheck, - String taskGroup + String taskAllocatorId ); /** @@ -324,12 +324,12 @@ SegmentPublishResult commitSegmentsAndMetadata( * must be committed in a single transaction. * @param appendSegmentToReplaceLock Map from append segment to the currently * active REPLACE lock (if any) covering it - * @param taskGroup taskGroup of the task committing the appended segments + * @param taskAllocatorId allocator id of the task committing the segments to be appended */ SegmentPublishResult commitAppendSegments( Set appendSegments, Map appendSegmentToReplaceLock, - String taskGroup + String taskAllocatorId ); /** @@ -486,5 +486,10 @@ SegmentPublishResult commitMetadataOnly( */ int deletePendingSegmentsForTaskGroup(String taskGroup); + /** + * Fetches all the pending segments present in the metadata store for a given datasource + * @param datasource datasource to be queried + * @return List of pending segment records + */ List getAllPendingSegments(String datasource); } diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/SegmentCreateRequest.java b/server/src/main/java/org/apache/druid/indexing/overlord/SegmentCreateRequest.java index 517381fe53f0..49b31e5e6ff9 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/SegmentCreateRequest.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/SegmentCreateRequest.java @@ -38,24 +38,24 @@ public class SegmentCreateRequest private final String sequenceName; private final String previousSegmentId; private final PartialShardSpec partialShardSpec; - private final String parentId; - private final String taskGroup; + private final String upgradedFromSegmentId; + private final String taskAllocatorId; public SegmentCreateRequest( String sequenceName, String previousSegmentId, String version, PartialShardSpec partialShardSpec, - String parentId, - String taskGroup + String upgradedFromSegmentId, + String taskAllocatorId ) { this.sequenceName = sequenceName; this.previousSegmentId = previousSegmentId == null ? "" : previousSegmentId; this.version = version; this.partialShardSpec = partialShardSpec; - this.parentId = parentId; - this.taskGroup = taskGroup; + this.upgradedFromSegmentId = upgradedFromSegmentId; + this.taskAllocatorId = taskAllocatorId; } public String getSequenceName() @@ -82,13 +82,13 @@ public PartialShardSpec getPartialShardSpec() return partialShardSpec; } - public String getParentId() + public String getUpgradedFromSegmentId() { - return parentId; + return upgradedFromSegmentId; } - public String getTaskGroup() + public String getTaskAllocatorId() { - return taskGroup; + return taskAllocatorId; } } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 7cfa23e933df..6a255ec3fe33 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -292,7 +292,7 @@ List getPendingSegmentsForIntervalWithHandle( { boolean compareIntervalEndpointsAsStrings = Intervals.canCompareEndpointsAsStrings(interval); - String sql = "SELECT payload, sequence_name, sequence_prev_id, task_group, parent_id" + String sql = "SELECT payload, sequence_name, sequence_prev_id, task_allocator_id, upgraded_from_segment_id" + " FROM " + dbTables.getPendingSegmentsTable() + " WHERE dataSource = :dataSource"; if (compareIntervalEndpointsAsStrings) { @@ -323,19 +323,19 @@ List getPendingSegmentsForIntervalWithHandle( return pendingSegments.build(); } - List getPendingSegmentsForTaskGroupWithHandle( + List getPendingSegmentsForTaskAllocatorIdWithHandle( final Handle handle, final String dataSource, - final String taskGroup + final String taskAllocatorId ) { - String sql = "SELECT payload, sequence_name, sequence_prev_id, task_group, parent_id" + String sql = "SELECT payload, sequence_name, sequence_prev_id, task_allocator_id, upgraded_from_segment_id" + " FROM " + dbTables.getPendingSegmentsTable() - + " WHERE dataSource = :dataSource AND task_group = :task_group"; + + " WHERE dataSource = :dataSource AND task_allocator_id = :task_allocator_id"; Query> query = handle.createQuery(sql) .bind("dataSource", dataSource) - .bind("task_group", taskGroup); + .bind("task_allocator_id", taskAllocatorId); final ResultIterator pendingSegmentRecords = query.map((index, r, ctx) -> PendingSegmentRecord.fromResultSet(r, jsonMapper)) @@ -497,7 +497,7 @@ public SegmentPublishResult commitReplaceSegments( public SegmentPublishResult commitAppendSegments( final Set appendSegments, final Map appendSegmentToReplaceLock, - final String taskGroup + final String taskAllocatorId ) { return commitAppendSegmentsAndMetadataInTransaction( @@ -505,7 +505,7 @@ public SegmentPublishResult commitAppendSegments( appendSegmentToReplaceLock, null, null, - taskGroup + taskAllocatorId ); } @@ -515,7 +515,7 @@ public SegmentPublishResult commitAppendSegmentsAndMetadata( Map appendSegmentToReplaceLock, DataSourceMetadata startMetadata, DataSourceMetadata endMetadata, - String taskGroup + String taskAllocatorId ) { return commitAppendSegmentsAndMetadataInTransaction( @@ -523,7 +523,7 @@ public SegmentPublishResult commitAppendSegmentsAndMetadata( appendSegmentToReplaceLock, startMetadata, endMetadata, - taskGroup + taskAllocatorId ); } @@ -627,7 +627,7 @@ public SegmentIdWithShardSpec allocatePendingSegment( final PartialShardSpec partialShardSpec, final String maxVersion, final boolean skipSegmentLineageCheck, - String taskGroup + String taskAllocatorId ) { Preconditions.checkNotNull(dataSource, "dataSource"); @@ -660,7 +660,7 @@ public SegmentIdWithShardSpec allocatePendingSegment( partialShardSpec, maxVersion, existingChunks, - taskGroup + taskAllocatorId ); } else { return allocatePendingSegmentWithSegmentLineageCheck( @@ -672,7 +672,7 @@ public SegmentIdWithShardSpec allocatePendingSegment( partialShardSpec, maxVersion, existingChunks, - taskGroup + taskAllocatorId ); } } @@ -754,8 +754,8 @@ private Map upgradePendingSegmen newId, UPGRADED_PENDING_SEGMENT_PREFIX + replaceVersion, pendingSegmentId.toString(), - overlappingPendingSegment.getParentId(), - overlappingPendingSegment.getTaskGroup() + pendingSegmentId.toString(), + overlappingPendingSegment.getTaskAllocatorId() ) ); pendingSegmentToNewId.put(pendingSegmentId, newId); @@ -785,7 +785,9 @@ private boolean shouldUpgradePendingSegment( String replaceVersion ) { - if (pendingSegment.getId().getVersion().compareTo(replaceVersion) >= 0) { + if (pendingSegment.getTaskAllocatorId() == null) { + return false; + } else if (pendingSegment.getId().getVersion().compareTo(replaceVersion) >= 0) { return false; } else if (!replaceInterval.contains(pendingSegment.getId().getInterval())) { return false; @@ -806,7 +808,7 @@ private SegmentIdWithShardSpec allocatePendingSegmentWithSegmentLineageCheck( final PartialShardSpec partialShardSpec, final String maxVersion, final List> existingChunks, - final String taskGroup + final String taskAllocatorId ) throws IOException { final String previousSegmentIdNotNull = previousSegmentId == null ? "" : previousSegmentId; @@ -877,7 +879,7 @@ private SegmentIdWithShardSpec allocatePendingSegmentWithSegmentLineageCheck( previousSegmentIdNotNull, sequenceName, sequenceNamePrevIdSha1, - taskGroup + taskAllocatorId ); return newIdentifier; } @@ -993,7 +995,7 @@ private SegmentIdWithShardSpec allocatePendingSegment( final PartialShardSpec partialShardSpec, final String maxVersion, final List> existingChunks, - final String taskGroup + final String taskAllocatorId ) throws IOException { final String sql = StringUtils.format( @@ -1057,7 +1059,9 @@ private SegmentIdWithShardSpec allocatePendingSegment( ); // always insert empty previous sequence id - insertPendingSegmentIntoMetastore(handle, newIdentifier, dataSource, interval, "", sequenceName, sequenceNamePrevIdSha1, taskGroup); + insertPendingSegmentIntoMetastore(handle, newIdentifier, dataSource, interval, "", sequenceName, sequenceNamePrevIdSha1, + taskAllocatorId + ); log.info( "Created new pending segment[%s] for datasource[%s], sequence[%s], interval[%s].", @@ -1266,7 +1270,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( Map appendSegmentToReplaceLock, @Nullable DataSourceMetadata startMetadata, @Nullable DataSourceMetadata endMetadata, - String taskGroup + String taskAllocatorId ) { verifySegmentsToCommit(appendSegments); @@ -1278,7 +1282,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( final String dataSource = appendSegments.iterator().next().getDataSource(); final List segmentIdsForNewVersions = connector.retryTransaction( (handle, transactionStatus) - -> getPendingSegmentsForTaskGroupWithHandle(handle, dataSource, taskGroup), + -> getPendingSegmentsForTaskAllocatorIdWithHandle(handle, dataSource, taskAllocatorId), 0, SQLMetadataConnector.DEFAULT_MAX_TRIES ); @@ -1291,8 +1295,8 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( appendSegments.forEach(segment -> segmentIdMap.put(segment.getId().toString(), segment)); segmentIdsForNewVersions.forEach( pendingSegment -> { - if (segmentIdMap.containsKey(pendingSegment.getParentId())) { - final DataSegment oldSegment = segmentIdMap.get(pendingSegment.getParentId()); + if (segmentIdMap.containsKey(pendingSegment.getUpgradedFromSegmentId())) { + final DataSegment oldSegment = segmentIdMap.get(pendingSegment.getUpgradedFromSegmentId()); allSegmentsToInsert.add( new DataSegment( pendingSegment.getId().asSegmentId(), @@ -1358,9 +1362,9 @@ private int insertPendingSegmentsIntoMetastore( final PreparedBatch insertBatch = handle.prepareBatch( StringUtils.format( "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, " - + "sequence_name_prev_id_sha1, payload, task_group, parent_id) " + + "sequence_name_prev_id_sha1, payload, task_allocator_id, upgraded_from_segment_id) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, " - + ":sequence_name_prev_id_sha1, :payload, :task_group, :parent_id)", + + ":sequence_name_prev_id_sha1, :payload, :task_allocator_id, :upgraded_from_segment_id)", dbTables.getPendingSegmentsTable(), connector.getQuoteString() )); @@ -1383,8 +1387,8 @@ private int insertPendingSegmentsIntoMetastore( pendingSegment.computeSequenceNamePrevIdSha1(skipSegmentLineageCheck) ) .bind("payload", jsonMapper.writeValueAsBytes(segmentId)) - .bind("task_group", pendingSegment.getTaskGroup()) - .bind("parent_id", pendingSegment.getParentId()); + .bind("task_allocator_id", pendingSegment.getTaskAllocatorId()) + .bind("upgraded_from_segment_id", pendingSegment.getUpgradedFromSegmentId()); } int[] updated = insertBatch.execute(); return Arrays.stream(updated).sum(); @@ -1443,15 +1447,15 @@ private void insertPendingSegmentIntoMetastore( String previousSegmentId, String sequenceName, String sequenceNamePrevIdSha1, - String taskGroup + String taskAllocatorId ) throws JsonProcessingException { handle.createStatement( StringUtils.format( "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, " - + "sequence_name_prev_id_sha1, payload, parent_id, task_group) " + + "sequence_name_prev_id_sha1, payload, task_allocator_id) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, " - + ":sequence_name_prev_id_sha1, :payload, :parent_id, :task_group)", + + ":sequence_name_prev_id_sha1, :payload, :task_allocator_id)", dbTables.getPendingSegmentsTable(), connector.getQuoteString() ) @@ -1465,8 +1469,7 @@ private void insertPendingSegmentIntoMetastore( .bind("sequence_prev_id", previousSegmentId) .bind("sequence_name_prev_id_sha1", sequenceNamePrevIdSha1) .bind("payload", jsonMapper.writeValueAsBytes(newIdentifier)) - .bind("parent_id", newIdentifier.toString()) - .bind("task_group", taskGroup) + .bind("task_allocator_id", taskAllocatorId) .execute(); } @@ -1802,8 +1805,8 @@ private PendingSegmentRecord createNewSegment( pendingSegmentId, request.getSequenceName(), request.getPreviousSegmentId(), - request.getParentId() == null ? pendingSegmentId.asSegmentId().toString() : request.getParentId(), - request.getTaskGroup() + request.getUpgradedFromSegmentId(), + request.getTaskAllocatorId() ); } else if (!overallMaxId.getInterval().equals(interval)) { @@ -1843,8 +1846,8 @@ private PendingSegmentRecord createNewSegment( pendingSegmentId, request.getSequenceName(), request.getPreviousSegmentId(), - request.getParentId() == null ? pendingSegmentId.asSegmentId().toString() : request.getParentId(), - request.getTaskGroup() + request.getUpgradedFromSegmentId(), + request.getTaskAllocatorId() ); } } @@ -2772,11 +2775,11 @@ public int deletePendingSegmentsForTaskGroup(final String pendingSegmentsGroup) (handle, status) -> handle .createStatement( StringUtils.format( - "DELETE FROM %s WHERE task_group = :task_group", + "DELETE FROM %s WHERE task_allocator_id = :task_allocator_id", dbTables.getPendingSegmentsTable() ) ) - .bind("task_group", pendingSegmentsGroup) + .bind("task_allocator_id", pendingSegmentsGroup) .execute() ); } diff --git a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java index a95d6eb20be7..58ac9576017b 100644 --- a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java +++ b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java @@ -37,23 +37,23 @@ public class PendingSegmentRecord private final SegmentIdWithShardSpec id; private final String sequenceName; private final String sequencePrevId; - private final String parentId; - private final String taskGroup; + private final String upgradedFromSegmentId; + private final String taskAllocatorId; @JsonCreator public PendingSegmentRecord( @JsonProperty("id") SegmentIdWithShardSpec id, @JsonProperty("sequenceName") String sequenceName, @JsonProperty("sequencePrevId") String sequencePrevId, - @JsonProperty("parentId") @Nullable String parentId, - @JsonProperty("taskGroup") @Nullable String taskGroup + @JsonProperty("upgradedFromSegmentId") @Nullable String upgradedFromSegmentId, + @JsonProperty("taskAllocatorId") @Nullable String taskAllocatorId ) { this.id = id; this.sequenceName = sequenceName; this.sequencePrevId = sequencePrevId; - this.parentId = parentId; - this.taskGroup = taskGroup; + this.upgradedFromSegmentId = upgradedFromSegmentId; + this.taskAllocatorId = taskAllocatorId; } @JsonProperty @@ -76,16 +76,16 @@ public String getSequencePrevId() @Nullable @JsonProperty - public String getParentId() + public String getUpgradedFromSegmentId() { - return parentId; + return upgradedFromSegmentId; } @Nullable @JsonProperty - public String getTaskGroup() + public String getTaskAllocatorId() { - return taskGroup; + return taskAllocatorId; } @SuppressWarnings("UnstableApiUsage") @@ -119,8 +119,8 @@ public static PendingSegmentRecord fromResultSet(ResultSet resultSet, ObjectMapp jsonMapper.readValue(payload, SegmentIdWithShardSpec.class), resultSet.getString("sequence_name"), resultSet.getString("sequence_prev_id"), - resultSet.getString("parent_id"), - resultSet.getString("task_group") + resultSet.getString("upgraded_from_segment_id"), + resultSet.getString("task_allocator_id") ); } catch (Exception e) { diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java index 752f0aa9af4c..99d69f5e14a3 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java @@ -464,17 +464,17 @@ private void alterEntryTableAddTypeAndGroupId(final String tableName) private void alterPendingSegmentsTableAddParentIdAndTaskGroup(final String tableName) { List statements = new ArrayList<>(); - if (tableHasColumn(tableName, "parent_id")) { - log.info("Table[%s] already has column[parent_id].", tableName); + if (tableHasColumn(tableName, "upgraded_from_segment_id")) { + log.info("Table[%s] already has column[upgraded_from_segment_id].", tableName); } else { - log.info("Adding column[parent_id] to table[%s].", tableName); - statements.add(StringUtils.format("ALTER TABLE %1$s ADD COLUMN parent_id VARCHAR(255)", tableName)); + log.info("Adding column[upgraded_from_segment_id] to table[%s].", tableName); + statements.add(StringUtils.format("ALTER TABLE %1$s ADD COLUMN upgraded_from_segment_id VARCHAR(255)", tableName)); } - if (tableHasColumn(tableName, "task_group")) { - log.info("Table[%s] already has column[task_group].", tableName); + if (tableHasColumn(tableName, "task_allocator_id")) { + log.info("Table[%s] already has column[task_allocator_id].", tableName); } else { - log.info("Adding column[task_group] to table[%s].", tableName); - statements.add(StringUtils.format("ALTER TABLE %1$s ADD COLUMN task_group VARCHAR(255)", tableName)); + log.info("Adding column[task_allocator_id] to table[%s].", tableName); + statements.add(StringUtils.format("ALTER TABLE %1$s ADD COLUMN task_allocator_id VARCHAR(255)", tableName)); } if (!statements.isEmpty()) { alterTable(tableName, statements); From dfd3cad5a0266ce28fbca865d10fc26d0aed7412 Mon Sep 17 00:00:00 2001 From: Amatya Date: Tue, 9 Apr 2024 21:27:28 +0530 Subject: [PATCH 09/20] NoopTask can allocate segments --- .../org/apache/druid/indexing/common/task/NoopTask.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/NoopTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/NoopTask.java index 678d8cafd01a..9f3160cb7501 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/NoopTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/NoopTask.java @@ -39,7 +39,7 @@ /** */ -public class NoopTask extends AbstractTask +public class NoopTask extends AbstractTask implements PendingSegmentAllocatingTask { private static final int DEFAULT_RUN_TIME = 2500; @@ -111,6 +111,12 @@ public int getPriority() return getContextValue(Tasks.PRIORITY_KEY, Tasks.DEFAULT_BATCH_INDEX_TASK_PRIORITY); } + @Override + public String getPendingSegmentGroupId() + { + return getId(); + } + public static NoopTask create() { return forDatasource(null); From 21b8ee5c789d7ff05c3e40aca6f0a63fa1d3cc71 Mon Sep 17 00:00:00 2001 From: Amatya Date: Wed, 10 Apr 2024 09:01:18 +0530 Subject: [PATCH 10/20] IndexTasks can allocate pending segments --- .../org/apache/druid/indexing/common/task/IndexTask.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java index 532529ecfc19..2bd4fafa9e7f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java @@ -137,7 +137,7 @@ import java.util.function.Function; import java.util.stream.Collectors; -public class IndexTask extends AbstractBatchIndexTask implements ChatHandler +public class IndexTask extends AbstractBatchIndexTask implements ChatHandler, PendingSegmentAllocatingTask { public static final HashFunction HASH_FUNCTION = Hashing.murmur3_128(); @@ -302,6 +302,12 @@ public Granularity getSegmentGranularity() } } + @Override + public String getPendingSegmentGroupId() + { + return getGroupId(); + } + @Nonnull @JsonIgnore @Override From 7ffe0ecc5ecef8d78bf2f7a7af7ab74d9bc97746 Mon Sep 17 00:00:00 2001 From: Amatya Date: Wed, 10 Apr 2024 09:39:54 +0530 Subject: [PATCH 11/20] Do not throw UOE --- .../task/batch/parallel/ParallelIndexSupervisorTask.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index 34046bf5fcd0..403f0e617ce5 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -481,11 +481,7 @@ public boolean isPerfectRollup() @Override public String getPendingSegmentGroupId() { - if (getIngestionMode() == IngestionMode.APPEND) { - return getGroupId(); - } else { - throw new UnsupportedOperationException(); - } + return getGroupId(); } @Nullable From 3e17b194f5c02e6364988ef348b24482c36afeb9 Mon Sep 17 00:00:00 2001 From: Amatya Date: Wed, 10 Apr 2024 14:07:42 +0530 Subject: [PATCH 12/20] Handle test failures for legacy tasks --- .../common/task/AppenderatorDriverRealtimeIndexTask.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index 2e4710d43cd0..c99be531d248 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -118,7 +118,8 @@ import java.util.concurrent.TimeoutException; @Deprecated -public class AppenderatorDriverRealtimeIndexTask extends AbstractTask implements ChatHandler +public class AppenderatorDriverRealtimeIndexTask extends AbstractTask + implements ChatHandler, PendingSegmentAllocatingTask { private static final String CTX_KEY_LOOKUP_TIER = "lookupTier"; @@ -258,6 +259,12 @@ public boolean isReady(TaskActionClient taskActionClient) return true; } + @Override + public String getPendingSegmentGroupId() + { + return getGroupId(); + } + @Override public TaskStatus runTask(final TaskToolbox toolbox) { From 422411ba3ad0839c67e69a03c01b1c9513c91133 Mon Sep 17 00:00:00 2001 From: Amatya Date: Fri, 12 Apr 2024 11:03:12 +0530 Subject: [PATCH 13/20] Fix order of cleanup after task removal --- .../druid/indexing/overlord/TaskLockbox.java | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java index 511a6c6bd59a..324439007172 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java @@ -1204,6 +1204,7 @@ public void remove(final Task task) try { try { log.info("Removing task[%s] from activeTasks", task.getId()); + unlockAll(task); if (findLocksForTask(task).stream().anyMatch(lock -> lock.getType() == TaskLockType.REPLACE)) { final int upgradeSegmentsDeleted = metadataStorageCoordinator.deleteUpgradeSegmentsForTask(task.getId()); log.info( @@ -1214,24 +1215,21 @@ public void remove(final Task task) } if (task instanceof PendingSegmentAllocatingTask) { final String pendingSegmentGroup = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); - if (pendingSegmentGroup != null && activePendingTaskGroupToTaskIds.containsKey(pendingSegmentGroup)) { + if (activePendingTaskGroupToTaskIds.containsKey(pendingSegmentGroup)) { final Set idsInSameGroup = activePendingTaskGroupToTaskIds.get(pendingSegmentGroup); idsInSameGroup.remove(task.getId()); if (idsInSameGroup.isEmpty()) { - if (findLocksForTask(task).stream().anyMatch(lock -> lock.getType() == TaskLockType.APPEND)) { - final int pendingSegmentsDeleted - = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(pendingSegmentGroup); - log.info( - "Deleted [%d] entries from pendingSegments table for pending segments group [%s] with APPEND locks.", - pendingSegmentsDeleted, - pendingSegmentGroup - ); - } - activePendingTaskGroupToTaskIds.remove(pendingSegmentGroup); + final int pendingSegmentsDeleted + = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(pendingSegmentGroup); + log.info( + "Deleted [%d] entries from pendingSegments table for pending segments group [%s] with APPEND locks.", + pendingSegmentsDeleted, + pendingSegmentGroup + ); } + activePendingTaskGroupToTaskIds.remove(pendingSegmentGroup); } } - unlockAll(task); } finally { activeTasks.remove(task.getId()); From de6b40a3f969e01856d9615ac6e3d6ace15f4ed3 Mon Sep 17 00:00:00 2001 From: Amatya Date: Fri, 12 Apr 2024 11:53:15 +0530 Subject: [PATCH 14/20] Fix merge conflicts --- .../concurrent/ConcurrentReplaceAndStreamingAppendTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java index 0558e4f9d088..a0297b3dbba7 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java @@ -36,6 +36,7 @@ import org.apache.druid.indexing.common.config.TaskConfigBuilder; import org.apache.druid.indexing.common.task.IngestionTestBase; import org.apache.druid.indexing.common.task.NoopTask; +import org.apache.druid.indexing.common.task.NoopTaskContextEnricher; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.common.task.TestAppenderatorsManager; import org.apache.druid.indexing.overlord.SegmentPublishResult; @@ -160,7 +161,8 @@ public void setUpIngestionTestBase() throws IOException taskActionClientFactory, getLockbox(), new NoopServiceEmitter(), - getObjectMapper() + getObjectMapper(), + new NoopTaskContextEnricher() ); runningTasks.clear(); taskQueue.start(); From 4653e6051f11342a0608a50c9619a94f7821961b Mon Sep 17 00:00:00 2001 From: Amatya Date: Fri, 12 Apr 2024 14:46:42 +0530 Subject: [PATCH 15/20] Fix metadata cleanup after task removal --- .../druid/indexing/overlord/TaskLockbox.java | 51 ++++++++++--------- .../indexing/overlord/TaskLockboxTest.java | 10 ++-- 2 files changed, 34 insertions(+), 27 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java index 324439007172..b33f7a669689 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java @@ -1204,32 +1204,37 @@ public void remove(final Task task) try { try { log.info("Removing task[%s] from activeTasks", task.getId()); - unlockAll(task); - if (findLocksForTask(task).stream().anyMatch(lock -> lock.getType() == TaskLockType.REPLACE)) { - final int upgradeSegmentsDeleted = metadataStorageCoordinator.deleteUpgradeSegmentsForTask(task.getId()); - log.info( - "Deleted [%d] entries from upgradeSegments table for task[%s] with REPLACE locks.", - upgradeSegmentsDeleted, - task.getId() - ); - } - if (task instanceof PendingSegmentAllocatingTask) { - final String pendingSegmentGroup = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); - if (activePendingTaskGroupToTaskIds.containsKey(pendingSegmentGroup)) { - final Set idsInSameGroup = activePendingTaskGroupToTaskIds.get(pendingSegmentGroup); - idsInSameGroup.remove(task.getId()); - if (idsInSameGroup.isEmpty()) { - final int pendingSegmentsDeleted - = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(pendingSegmentGroup); - log.info( - "Deleted [%d] entries from pendingSegments table for pending segments group [%s] with APPEND locks.", - pendingSegmentsDeleted, - pendingSegmentGroup - ); + try { + if (findLocksForTask(task).stream().anyMatch(lock -> lock.getType() == TaskLockType.REPLACE)) { + final int upgradeSegmentsDeleted = metadataStorageCoordinator.deleteUpgradeSegmentsForTask(task.getId()); + log.info( + "Deleted [%d] entries from upgradeSegments table for task[%s] with REPLACE locks.", + upgradeSegmentsDeleted, + task.getId() + ); + } + if (task instanceof PendingSegmentAllocatingTask) { + final String pendingSegmentGroup = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); + if (activePendingTaskGroupToTaskIds.containsKey(pendingSegmentGroup)) { + final Set idsInSameGroup = activePendingTaskGroupToTaskIds.get(pendingSegmentGroup); + idsInSameGroup.remove(task.getId()); + if (idsInSameGroup.isEmpty()) { + final int pendingSegmentsDeleted + = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(pendingSegmentGroup); + log.info( + "Deleted [%d] entries from pendingSegments table for pending segments group [%s] with APPEND locks.", + pendingSegmentsDeleted, + pendingSegmentGroup + ); + } + activePendingTaskGroupToTaskIds.remove(pendingSegmentGroup); } - activePendingTaskGroupToTaskIds.remove(pendingSegmentGroup); } } + catch (Exception e) { + log.warn(e, "Failure cleaning up upgradeSegments or pendingSegments tables."); + } + unlockAll(task); } finally { activeTasks.remove(task.getId()); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java index c5e582ba34e8..999d4d0abb2e 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java @@ -1896,15 +1896,17 @@ public void testUnlockSupersededLocks() } @Test - public void testUpgradeSegmentsCleanupOnUnlock() + public void testCleanupOnUnlock() { - final Task replaceTask = NoopTask.create(); - final Task appendTask = NoopTask.create(); + final Task replaceTask = NoopTask.forDatasource("replace"); + final Task appendTask = NoopTask.forDatasource("append"); final IndexerSQLMetadataStorageCoordinator coordinator = EasyMock.createMock(IndexerSQLMetadataStorageCoordinator.class); // Only the replaceTask should attempt a delete on the upgradeSegments table EasyMock.expect(coordinator.deleteUpgradeSegmentsForTask(replaceTask.getId())).andReturn(0).once(); - EasyMock.expect(coordinator.deletePendingSegmentsForTaskGroup(appendTask.getId())).andReturn(0); + // Any task may attempt pending segment clean up + EasyMock.expect(coordinator.deletePendingSegmentsForTaskGroup(replaceTask.getId())).andReturn(0).once(); + EasyMock.expect(coordinator.deletePendingSegmentsForTaskGroup(appendTask.getId())).andReturn(0).once(); EasyMock.replay(coordinator); final TaskLockbox taskLockbox = new TaskLockbox(taskStorage, coordinator); From b49b8d887736cf9eeaece03f980fd094651734ab Mon Sep 17 00:00:00 2001 From: Amatya Date: Tue, 16 Apr 2024 07:02:48 +0530 Subject: [PATCH 16/20] Address review and remove unneeded methods --- .../common/actions/SegmentAllocateAction.java | 8 + .../SegmentTransactionalAppendAction.java | 19 ++ .../SegmentTransactionalReplaceAction.java | 45 +++- .../task/PendingSegmentAllocatingTask.java | 3 + ...TestIndexerMetadataStorageCoordinator.java | 2 +- .../IndexerMetadataStorageCoordinator.java | 3 +- .../IndexerSQLMetadataStorageCoordinator.java | 231 +----------------- .../druid/metadata/PendingSegmentRecord.java | 37 ++- ...exerSQLMetadataStorageCoordinatorTest.java | 97 ++++---- 9 files changed, 151 insertions(+), 294 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentAllocateAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentAllocateAction.java index 280f4199e7b8..6ec2e1c23836 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentAllocateAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentAllocateAction.java @@ -25,6 +25,7 @@ import com.google.common.base.Preconditions; import org.apache.druid.indexing.common.LockGranularity; import org.apache.druid.indexing.common.TaskLockType; +import org.apache.druid.indexing.common.task.PendingSegmentAllocatingTask; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator; import org.apache.druid.indexing.overlord.LockRequestForNewSegment; @@ -210,6 +211,13 @@ public SegmentIdWithShardSpec perform( final TaskActionToolbox toolbox ) { + if (!(task instanceof PendingSegmentAllocatingTask)) { + throw new IAE( + "Task[%s] of type[%s] cannot allocate segments as it does not implement PendingSegmentAllocatingTask.", + task.getId(), + task.getType() + ); + } int attempt = 0; while (true) { attempt++; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java index da9b74be8d57..2876dfb93aec 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java @@ -31,6 +31,7 @@ import org.apache.druid.indexing.overlord.CriticalAction; import org.apache.druid.indexing.overlord.DataSourceMetadata; import org.apache.druid.indexing.overlord.SegmentPublishResult; +import org.apache.druid.java.util.common.IAE; import org.apache.druid.metadata.ReplaceTaskLock; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.timeline.DataSegment; @@ -44,6 +45,17 @@ /** * Append segments to metadata storage. The segment versions must all be less than or equal to a lock held by * your task for the segment intervals. + * + *
+ * Pseudo code (for a single interval):
+ * For an append lock held over an interval:
+ *     transaction {
+ *       commit input segments contained within interval
+ *       if there is an active replace lock over the interval:
+ *         add an entry for the inputSegment corresponding to the replace lock's task in the upgradeSegments table
+ *       fetch pending segments with parent contained within the input segments, and commit them
+ *     }
+ * 
*/ public class SegmentTransactionalAppendAction implements TaskAction { @@ -115,6 +127,13 @@ public TypeReference getReturnTypeReference() @Override public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) { + if (!(task instanceof PendingSegmentAllocatingTask)) { + throw new IAE( + "Task[%s] of type[%s] cannot append segments as it does not implement PendingSegmentAllocatingTask.", + task.getId(), + task.getType() + ); + } // Verify that all the locks are of expected type final List locks = toolbox.getTaskLockbox().findLocksForTask(task); for (TaskLock lock : locks) { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java index 922cb499fc85..2f4a580e0464 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java @@ -37,7 +37,7 @@ import org.apache.druid.timeline.DataSegment; import java.util.HashMap; -import java.util.List; +import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -45,6 +45,20 @@ /** * Replace segments in metadata storage. The segment versions must all be less than or equal to a lock held by * your task for the segment intervals. + * + *
+ *  Pseudo code (for a single interval)
+ *- For a replace lock held over an interval:
+ *     transaction {
+ *       commit input segments contained within interval
+ *       upgrade ids in the upgradeSegments table corresponding to this task to the replace lock's version and commit them
+ *       fetch payload, task_allocator_id for pending segments
+ *       upgrade each such pending segment to the replace lock's version with the corresponding root segment
+ *     }
+ * For every pending segment with version == replace lock version:
+ *    Fetch payload, group_id or the pending segment and relay them to the supervisor
+ *    The supervisor relays the payloads to all the tasks with the corresponding group_id to serve realtime queries
+ * 
*/ public class SegmentTransactionalReplaceAction implements TaskAction { @@ -149,25 +163,38 @@ private void registerUpgradedPendingSegmentsOnSupervisor(Task task, TaskActionTo return; } - List pendingSegments - = toolbox.getIndexerMetadataStorageCoordinator().getAllPendingSegments(task.getDataSource()); - Map pendingSegmentIdMap = new HashMap<>(); - pendingSegments.forEach(pendingSegment -> pendingSegmentIdMap.put( + final Set replaceLocksForTask = toolbox + .getTaskLockbox() + .getAllReplaceLocksForDatasource(task.getDataSource()) + .stream() + .filter(lock -> task.getId().equals(lock.getSupervisorTaskId())) + .collect(Collectors.toSet()); + + + Set pendingSegments = new HashSet<>(); + for (ReplaceTaskLock replaceLock : replaceLocksForTask) { + pendingSegments.addAll( + toolbox.getIndexerMetadataStorageCoordinator() + .getPendingSegments(task.getDataSource(), replaceLock.getInterval()) + ); + } + Map idToPendingSegment = new HashMap<>(); + pendingSegments.forEach(pendingSegment -> idToPendingSegment.put( pendingSegment.getId().asSegmentId().toString(), pendingSegment.getId() )); - Map upgradedPendingSegments = new HashMap<>(); + Map segmentToParent = new HashMap<>(); pendingSegments.forEach(pendingSegment -> { if (pendingSegment.getUpgradedFromSegmentId() != null && !pendingSegment.getUpgradedFromSegmentId().equals(pendingSegment.getId().asSegmentId().toString())) { - upgradedPendingSegments.put( + segmentToParent.put( pendingSegment.getId(), - pendingSegmentIdMap.get(pendingSegment.getUpgradedFromSegmentId()) + idToPendingSegment.get(pendingSegment.getUpgradedFromSegmentId()) ); } }); - upgradedPendingSegments.forEach( + segmentToParent.forEach( (newId, oldId) -> supervisorManager.registerNewVersionOfPendingSegmentOnSupervisor( activeSupervisorIdWithAppendLock.get(), oldId, diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/PendingSegmentAllocatingTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/PendingSegmentAllocatingTask.java index 089a50643518..eb5a92d8b0be 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/PendingSegmentAllocatingTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/PendingSegmentAllocatingTask.java @@ -19,6 +19,9 @@ package org.apache.druid.indexing.common.task; +/** + * An interface to be implemented by every appending task that allocates pending segments. + */ public interface PendingSegmentAllocatingTask { /** diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 95fe2623171b..f57494a1e03b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -295,7 +295,7 @@ public int deletePendingSegmentsForTaskGroup(final String taskGroup) } @Override - public List getAllPendingSegments(String datasource) + public List getPendingSegments(String datasource, Interval interval) { throw new UnsupportedOperationException(); } diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index cc6af8498691..64d29a502b5e 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -489,7 +489,8 @@ SegmentPublishResult commitMetadataOnly( /** * Fetches all the pending segments present in the metadata store for a given datasource * @param datasource datasource to be queried + * @param interval interval with which segments overlap * @return List of pending segment records */ - List getAllPendingSegments(String datasource); + List getPendingSegments(String datasource, Interval interval); } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 6a255ec3fe33..1265e84605a4 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -57,7 +57,6 @@ import org.apache.druid.timeline.SegmentTimeline; import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.partition.NoneShardSpec; -import org.apache.druid.timeline.partition.NumberedPartialShardSpec; import org.apache.druid.timeline.partition.NumberedShardSpec; import org.apache.druid.timeline.partition.PartialShardSpec; import org.apache.druid.timeline.partition.PartitionChunk; @@ -94,7 +93,6 @@ import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.TreeMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -1352,7 +1350,8 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( } } - private int insertPendingSegmentsIntoMetastore( + @VisibleForTesting + int insertPendingSegmentsIntoMetastore( Handle handle, List pendingSegments, String dataSource, @@ -1394,51 +1393,6 @@ private int insertPendingSegmentsIntoMetastore( return Arrays.stream(updated).sum(); } - private int insertPendingSegmentsIntoMetastore( - Handle handle, - Map createdSegments, - String dataSource, - boolean skipSegmentLineageCheck - ) throws JsonProcessingException - { - final PreparedBatch insertBatch = handle.prepareBatch( - StringUtils.format( - "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, " - + "sequence_name_prev_id_sha1, payload) " - + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, " - + ":sequence_name_prev_id_sha1, :payload)", - dbTables.getPendingSegmentsTable(), - connector.getQuoteString() - )); - - // Deduplicate the segment ids by inverting the map - Map segmentIdToRequest = new HashMap<>(); - createdSegments.forEach((request, segmentId) -> segmentIdToRequest.put(segmentId, request)); - - final String now = DateTimes.nowUtc().toString(); - for (Map.Entry entry : segmentIdToRequest.entrySet()) { - final SegmentCreateRequest request = entry.getValue(); - final SegmentIdWithShardSpec segmentId = entry.getKey(); - final Interval interval = segmentId.getInterval(); - - insertBatch.add() - .bind("id", segmentId.toString()) - .bind("dataSource", dataSource) - .bind("created_date", now) - .bind("start", interval.getStart().toString()) - .bind("end", interval.getEnd().toString()) - .bind("sequence_name", request.getSequenceName()) - .bind("sequence_prev_id", request.getPreviousSegmentId()) - .bind( - "sequence_name_prev_id_sha1", - getSequenceNameAndPrevIdSha(request, segmentId, skipSegmentLineageCheck) - ) - .bind("payload", jsonMapper.writeValueAsBytes(segmentId)); - } - int[] updated = insertBatch.execute(); - return Arrays.stream(updated).sum(); - } - private void insertPendingSegmentIntoMetastore( Handle handle, SegmentIdWithShardSpec newIdentifier, @@ -1473,183 +1427,6 @@ private void insertPendingSegmentIntoMetastore( .execute(); } - /** - * Creates new IDs for the given append segments if a REPLACE task started and - * finished after these append segments had already been allocated. The newly - * created IDs belong to the same interval and version as the segments committed - * by the REPLACE task. - */ - private Set createNewIdsForAppendSegments( - Handle handle, - String dataSource, - Set segmentsToAppend - ) - { - if (segmentsToAppend.isEmpty()) { - return Collections.emptySet(); - } - - final Set appendIntervals = new HashSet<>(); - final TreeMap> appendVersionToSegments = new TreeMap<>(); - for (DataSegment segment : segmentsToAppend) { - appendIntervals.add(segment.getInterval()); - appendVersionToSegments.computeIfAbsent(segment.getVersion(), v -> new HashSet<>()) - .add(segment); - } - - // Fetch all used segments that overlap with any of the append intervals - final Collection overlappingSegments = retrieveUsedSegmentsForIntervals( - dataSource, - new ArrayList<>(appendIntervals), - Segments.INCLUDING_OVERSHADOWED - ); - - final Map> overlappingVersionToIntervals = new HashMap<>(); - final Map> overlappingIntervalToSegments = new HashMap<>(); - for (DataSegment segment : overlappingSegments) { - overlappingVersionToIntervals.computeIfAbsent(segment.getVersion(), v -> new HashSet<>()) - .add(segment.getInterval()); - overlappingIntervalToSegments.computeIfAbsent(segment.getInterval(), i -> new HashSet<>()) - .add(segment); - } - - final Set upgradedSegments = new HashSet<>(); - for (Map.Entry> entry : overlappingVersionToIntervals.entrySet()) { - final String upgradeVersion = entry.getKey(); - Map> segmentsToUpgrade = getSegmentsWithVersionLowerThan( - upgradeVersion, - entry.getValue(), - appendVersionToSegments - ); - for (Map.Entry> upgradeEntry : segmentsToUpgrade.entrySet()) { - final Interval upgradeInterval = upgradeEntry.getKey(); - final Set segmentsAlreadyOnVersion - = overlappingIntervalToSegments.getOrDefault(upgradeInterval, Collections.emptySet()) - .stream() - .filter(s -> s.getVersion().equals(upgradeVersion)) - .collect(Collectors.toSet()); - Set segmentsUpgradedToVersion = createNewIdsForAppendSegmentsWithVersion( - handle, - upgradeVersion, - upgradeInterval, - upgradeEntry.getValue(), - segmentsAlreadyOnVersion - ); - log.info("Upgraded [%d] segments to version[%s].", segmentsUpgradedToVersion.size(), upgradeVersion); - upgradedSegments.addAll(segmentsUpgradedToVersion); - } - } - - return upgradedSegments; - } - - /** - * Creates a Map from eligible interval to Set of segments that are fully - * contained in that interval and have a version strictly lower than {@code #cutoffVersion}. - */ - private Map> getSegmentsWithVersionLowerThan( - String cutoffVersion, - Set eligibleIntervals, - TreeMap> versionToSegments - ) - { - final Set eligibleSegments - = versionToSegments.headMap(cutoffVersion).values().stream() - .flatMap(Collection::stream) - .collect(Collectors.toSet()); - - final Map> eligibleIntervalToSegments = new HashMap<>(); - - for (DataSegment segment : eligibleSegments) { - final Interval segmentInterval = segment.getInterval(); - for (Interval eligibleInterval : eligibleIntervals) { - if (eligibleInterval.contains(segmentInterval)) { - eligibleIntervalToSegments.computeIfAbsent(eligibleInterval, itvl -> new HashSet<>()) - .add(segment); - break; - } else if (eligibleInterval.overlaps(segmentInterval)) { - // Committed interval overlaps only partially - throw new ISE( - "Committed interval[%s] conflicts with interval[%s] of append segment[%s].", - eligibleInterval, segmentInterval, segment.getId() - ); - } - } - } - - return eligibleIntervalToSegments; - } - - /** - * Computes new segment IDs that belong to the upgradeInterval and upgradeVersion. - * - * @param committedSegments Segments that already exist in the upgradeInterval - * at upgradeVersion. - */ - private Set createNewIdsForAppendSegmentsWithVersion( - Handle handle, - String upgradeVersion, - Interval upgradeInterval, - Set segmentsToUpgrade, - Set committedSegments - ) - { - // Find the committed segments with the higest partition number - SegmentIdWithShardSpec committedMaxId = null; - for (DataSegment committedSegment : committedSegments) { - if (committedMaxId == null - || committedMaxId.getShardSpec().getPartitionNum() < committedSegment.getShardSpec().getPartitionNum()) { - committedMaxId = SegmentIdWithShardSpec.fromDataSegment(committedSegment); - } - } - - // Get pending segments for the new version to determine the next partition number to allocate - final String dataSource = segmentsToUpgrade.iterator().next().getDataSource(); - final List pendingSegments - = getPendingSegmentsForIntervalWithHandle(handle, dataSource, upgradeInterval); - final Set allAllocatedIds = new HashSet<>( - pendingSegments.stream() - .map(PendingSegmentRecord::getId) - .collect(Collectors.toSet()) - ); - - // Create new IDs for each append segment - final Set newSegmentIds = new HashSet<>(); - for (DataSegment segment : segmentsToUpgrade) { - SegmentCreateRequest request = new SegmentCreateRequest( - segment.getId() + "__" + upgradeVersion, - null, - upgradeVersion, - NumberedPartialShardSpec.instance(), - null, - null - ); - - // Create new segment ID based on committed segments, allocated pending segments - // and new IDs created so far in this method - final SegmentIdWithShardSpec newId = createNewSegment( - request, - dataSource, - upgradeInterval, - upgradeVersion, - committedMaxId, - allAllocatedIds - ).getId(); - - // Update the set so that subsequent segment IDs use a higher partition number - allAllocatedIds.add(newId); - newSegmentIds.add( - DataSegment.builder(segment) - .interval(newId.getInterval()) - .version(newId.getVersion()) - .shardSpec(newId.getShardSpec()) - .build() - ); - } - - return newSegmentIds; - } - private Map createNewSegments( Handle handle, String dataSource, @@ -2785,10 +2562,10 @@ public int deletePendingSegmentsForTaskGroup(final String pendingSegmentsGroup) } @Override - public List getAllPendingSegments(String datasource) + public List getPendingSegments(String datasource, Interval interval) { return connector.retryWithHandle( - handle -> getPendingSegmentsForIntervalWithHandle(handle, datasource, Intervals.ETERNITY) + handle -> getPendingSegmentsForIntervalWithHandle(handle, datasource, interval) ); } diff --git a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java index 58ac9576017b..f72dca2e7c84 100644 --- a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java +++ b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java @@ -19,8 +19,6 @@ package org.apache.druid.metadata; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; @@ -32,6 +30,15 @@ import javax.annotation.Nullable; import java.sql.ResultSet; +/** + * Representation of a record in the pending segments table.
+ * Mapping of column in table to field: + *
  • id -> id (Unique identifier for pending segment) + *
  • sequence_name -> sequenceName (sequence name used for segment allocation) + *
  • sequence_prev_id -> sequencePrevId (previous segment id used for segment allocation) + *
  • upgraded_from_segment_id -> upgradedFromSegmentId (Id of the root segment from which this was upgraded) + *
  • task_allocator_id -> taskAllocatorId (Associates a task / task group / replica group with the pending segment) + */ public class PendingSegmentRecord { private final SegmentIdWithShardSpec id; @@ -40,13 +47,12 @@ public class PendingSegmentRecord private final String upgradedFromSegmentId; private final String taskAllocatorId; - @JsonCreator public PendingSegmentRecord( - @JsonProperty("id") SegmentIdWithShardSpec id, - @JsonProperty("sequenceName") String sequenceName, - @JsonProperty("sequencePrevId") String sequencePrevId, - @JsonProperty("upgradedFromSegmentId") @Nullable String upgradedFromSegmentId, - @JsonProperty("taskAllocatorId") @Nullable String taskAllocatorId + SegmentIdWithShardSpec id, + String sequenceName, + String sequencePrevId, + @Nullable String upgradedFromSegmentId, + @Nullable String taskAllocatorId ) { this.id = id; @@ -56,33 +62,38 @@ public PendingSegmentRecord( this.taskAllocatorId = taskAllocatorId; } - @JsonProperty public SegmentIdWithShardSpec getId() { return id; } - @JsonProperty public String getSequenceName() { return sequenceName; } - @JsonProperty public String getSequencePrevId() { return sequencePrevId; } + /** + * The original pending segment using which this upgraded segment was created. + * Corresponds to the column upgraded_from_segment_id in druid_pendingSegments. + * Can be null for pending segments allocated before this column was added or for segments that have not been upgraded. + */ @Nullable - @JsonProperty public String getUpgradedFromSegmentId() { return upgradedFromSegmentId; } + /** + * task / taskGroup / replica group of task that allocated this segment. + * Corresponds to the column task_allocator_id in druid_pendingSegments. + * Can be null for pending segments allocated before this column was added. + */ @Nullable - @JsonProperty public String getTaskAllocatorId() { return taskAllocatorId; diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 532e08595f80..95ec1dda15e2 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -25,8 +25,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; -import com.google.common.hash.Hashing; -import com.google.common.io.BaseEncoding; import org.apache.druid.data.input.StringTuple; import org.apache.druid.error.InvalidInput; import org.apache.druid.indexing.overlord.DataSourceMetadata; @@ -471,44 +469,6 @@ private Boolean insertUsedSegments(Set dataSegments) ); } - private Boolean insertPendingSegmentAndSequenceName(Pair pendingSegmentSequenceName) - { - final SegmentIdWithShardSpec pendingSegment = pendingSegmentSequenceName.lhs; - final String sequenceName = pendingSegmentSequenceName.rhs; - final String table = derbyConnectorRule.metadataTablesConfigSupplier().get().getPendingSegmentsTable(); - return derbyConnector.retryWithHandle( - handle -> { - handle.createStatement( - StringUtils.format( - "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, " - + "sequence_name_prev_id_sha1, payload) " - + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, " - + ":sequence_name_prev_id_sha1, :payload)", - table, - derbyConnector.getQuoteString() - ) - ) - .bind("id", pendingSegment.toString()) - .bind("dataSource", pendingSegment.getDataSource()) - .bind("created_date", DateTimes.nowUtc().toString()) - .bind("start", pendingSegment.getInterval().getStart().toString()) - .bind("end", pendingSegment.getInterval().getEnd().toString()) - .bind("sequence_name", sequenceName) - .bind("sequence_prev_id", pendingSegment.toString()) - .bind("sequence_name_prev_id_sha1", BaseEncoding.base16().encode( - Hashing.sha1() - .newHasher() - .putLong((long) pendingSegment.hashCode() * sequenceName.hashCode()) - .hash() - .asBytes() - )) - .bind("payload", mapper.writeValueAsBytes(pendingSegment)) - .execute(); - return true; - } - ); - } - private Map getSegmentsCommittedDuringReplaceTask(String taskId) { final String table = derbyConnectorRule.metadataTablesConfigSupplier().get().getUpgradeSegmentsTable(); @@ -568,7 +528,7 @@ private void insertIntoUpgradeSegmentsTable(Map se ); } - //@Test + @Test public void testCommitAppendSegments() { final String v1 = "2023-01-01"; @@ -620,7 +580,7 @@ public void testCommitAppendSegments() // Commit the segment and verify the results SegmentPublishResult commitResult - = coordinator.commitAppendSegments(appendSegments, segmentToReplaceLock, null); + = coordinator.commitAppendSegments(appendSegments, segmentToReplaceLock, "append"); Assert.assertTrue(commitResult.isSuccess()); Assert.assertEquals(appendSegments, commitResult.getSegments()); @@ -643,12 +603,36 @@ public void testCommitAppendSegments() Assert.assertEquals(replaceLock.getVersion(), Iterables.getOnlyElement(observedLockVersions)); } - //@Test + @Test public void testCommitReplaceSegments() { final ReplaceTaskLock replaceLock = new ReplaceTaskLock("g1", Intervals.of("2023-01-01/2023-02-01"), "2023-02-01"); final Set segmentsAppendedWithReplaceLock = new HashSet<>(); final Map appendedSegmentToReplaceLockMap = new HashMap<>(); + final PendingSegmentRecord pendingSegmentInInterval = new PendingSegmentRecord( + new SegmentIdWithShardSpec( + "foo", + Intervals.of("2023-01-01/2023-01-02"), + "2023-01-02", + new NumberedShardSpec(100, 0) + ), + "", + "", + null, + "append" + ); + final PendingSegmentRecord pendingSegmentOutsideInterval = new PendingSegmentRecord( + new SegmentIdWithShardSpec( + "foo", + Intervals.of("2023-04-01/2023-04-02"), + "2023-01-02", + new NumberedShardSpec(100, 0) + ), + "", + "", + null, + "append" + ); for (int i = 1; i < 9; i++) { final DataSegment segment = new DataSegment( "foo", @@ -665,6 +649,14 @@ public void testCommitReplaceSegments() appendedSegmentToReplaceLockMap.put(segment, replaceLock); } insertUsedSegments(segmentsAppendedWithReplaceLock); + derbyConnector.retryWithHandle( + handle -> coordinator.insertPendingSegmentsIntoMetastore( + handle, + ImmutableList.of(pendingSegmentInInterval, pendingSegmentOutsideInterval), + "foo", + true + ) + ); insertIntoUpgradeSegmentsTable(appendedSegmentToReplaceLockMap); final Set replacingSegments = new HashSet<>(); @@ -709,6 +701,25 @@ public void testCommitReplaceSegments() } Assert.assertTrue(hasBeenCarriedForward); } + + List pendingSegmentsInInterval = + coordinator.getPendingSegments("foo", Intervals.of("2023-01-01/2023-02-01")); + Assert.assertEquals(2, pendingSegmentsInInterval.size()); + final SegmentId rootPendingSegmentId = pendingSegmentInInterval.getId().asSegmentId(); + if (pendingSegmentsInInterval.get(0).getUpgradedFromSegmentId() == null) { + Assert.assertEquals(rootPendingSegmentId, pendingSegmentsInInterval.get(0).getId().asSegmentId()); + Assert.assertEquals(rootPendingSegmentId.toString(), pendingSegmentsInInterval.get(1).getUpgradedFromSegmentId()); + } else { + Assert.assertEquals(rootPendingSegmentId, pendingSegmentsInInterval.get(1).getId().asSegmentId()); + Assert.assertEquals(rootPendingSegmentId.toString(), pendingSegmentsInInterval.get(0).getUpgradedFromSegmentId()); + } + + List pendingSegmentsOutsideInterval = + coordinator.getPendingSegments("foo", Intervals.of("2023-04-01/2023-05-01")); + Assert.assertEquals(1, pendingSegmentsOutsideInterval.size()); + Assert.assertEquals( + pendingSegmentOutsideInterval.getId().asSegmentId(), pendingSegmentsOutsideInterval.get(0).getId().asSegmentId() + ); } @Test From a21adbcd19d3016c2deb90021b15be9410472417 Mon Sep 17 00:00:00 2001 From: Amatya Date: Tue, 16 Apr 2024 14:56:33 +0530 Subject: [PATCH 17/20] Review comments --- .../druid/msq/indexing/MSQControllerTask.java | 2 +- .../druid/msq/indexing/MSQWorkerTask.java | 2 +- .../common/actions/SegmentAllocateAction.java | 6 +- .../SegmentTransactionalAppendAction.java | 11 ++-- .../AppenderatorDriverRealtimeIndexTask.java | 2 +- .../druid/indexing/common/task/IndexTask.java | 2 +- .../druid/indexing/common/task/NoopTask.java | 2 +- .../task/PendingSegmentAllocatingTask.java | 2 +- .../parallel/ParallelIndexSupervisorTask.java | 2 +- .../batch/parallel/SinglePhaseSubTask.java | 2 +- .../druid/indexing/overlord/TaskLockbox.java | 58 ++++++++++--------- .../SeekableStreamIndexTask.java | 2 +- .../common/task/IngestionTestBase.java | 6 +- .../task/concurrent/CommandQueueTask.java | 2 +- .../ConcurrentReplaceAndAppendTest.java | 2 +- ...ncurrentReplaceAndStreamingAppendTest.java | 3 +- .../IndexerMetadataStorageCoordinator.java | 6 +- .../IndexerSQLMetadataStorageCoordinator.java | 2 +- .../druid/metadata/PendingSegmentRecord.java | 15 ++--- 19 files changed, 65 insertions(+), 64 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java index d626d8abb260..7bb57c5e1c25 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQControllerTask.java @@ -159,7 +159,7 @@ public Set getInputSourceResources() } @Override - public String getPendingSegmentGroupId() + public String getTaskAllocatorId() { return getId(); } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java index cdc4d5973874..c04948402079 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTask.java @@ -127,7 +127,7 @@ public Set getInputSourceResources() } @Override - public String getPendingSegmentGroupId() + public String getTaskAllocatorId() { return getControllerTaskId(); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentAllocateAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentAllocateAction.java index 6ec2e1c23836..d0308516e04b 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentAllocateAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentAllocateAction.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.base.Preconditions; +import org.apache.druid.error.DruidException; import org.apache.druid.indexing.common.LockGranularity; import org.apache.druid.indexing.common.TaskLockType; import org.apache.druid.indexing.common.task.PendingSegmentAllocatingTask; @@ -212,10 +213,9 @@ public SegmentIdWithShardSpec perform( ) { if (!(task instanceof PendingSegmentAllocatingTask)) { - throw new IAE( + throw DruidException.defensive( "Task[%s] of type[%s] cannot allocate segments as it does not implement PendingSegmentAllocatingTask.", - task.getId(), - task.getType() + task.getId(), task.getType() ); } int attempt = 0; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java index 2876dfb93aec..1a1e6c793776 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; +import org.apache.druid.error.DruidException; import org.apache.druid.error.InvalidInput; import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; @@ -31,7 +32,6 @@ import org.apache.druid.indexing.overlord.CriticalAction; import org.apache.druid.indexing.overlord.DataSourceMetadata; import org.apache.druid.indexing.overlord.SegmentPublishResult; -import org.apache.druid.java.util.common.IAE; import org.apache.druid.metadata.ReplaceTaskLock; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.timeline.DataSegment; @@ -43,6 +43,7 @@ import java.util.stream.Collectors; /** + * * Append segments to metadata storage. The segment versions must all be less than or equal to a lock held by * your task for the segment intervals. * @@ -128,7 +129,7 @@ public TypeReference getReturnTypeReference() public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) { if (!(task instanceof PendingSegmentAllocatingTask)) { - throw new IAE( + throw DruidException.defensive( "Task[%s] of type[%s] cannot append segments as it does not implement PendingSegmentAllocatingTask.", task.getId(), task.getType() @@ -152,12 +153,12 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) = TaskLocks.findReplaceLocksCoveringSegments(datasource, toolbox.getTaskLockbox(), segments); final CriticalAction.Action publishAction; - final String pendingSegmentGroupId = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); + final String taskAllocatorId = ((PendingSegmentAllocatingTask) task).getTaskAllocatorId(); if (startMetadata == null) { publishAction = () -> toolbox.getIndexerMetadataStorageCoordinator().commitAppendSegments( segments, segmentToReplaceLock, - pendingSegmentGroupId + taskAllocatorId ); } else { publishAction = () -> toolbox.getIndexerMetadataStorageCoordinator().commitAppendSegmentsAndMetadata( @@ -165,7 +166,7 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) segmentToReplaceLock, startMetadata, endMetadata, - pendingSegmentGroupId + taskAllocatorId ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index d35fd979450f..42759262fab5 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -261,7 +261,7 @@ public boolean isReady(TaskActionClient taskActionClient) } @Override - public String getPendingSegmentGroupId() + public String getTaskAllocatorId() { return getGroupId(); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java index 8c2cd0d4eb59..1796f6ea2a64 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java @@ -303,7 +303,7 @@ public Granularity getSegmentGranularity() } @Override - public String getPendingSegmentGroupId() + public String getTaskAllocatorId() { return getGroupId(); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/NoopTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/NoopTask.java index 9f3160cb7501..9d91542ebf0b 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/NoopTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/NoopTask.java @@ -112,7 +112,7 @@ public int getPriority() } @Override - public String getPendingSegmentGroupId() + public String getTaskAllocatorId() { return getId(); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/PendingSegmentAllocatingTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/PendingSegmentAllocatingTask.java index eb5a92d8b0be..e392adc45535 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/PendingSegmentAllocatingTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/PendingSegmentAllocatingTask.java @@ -28,5 +28,5 @@ public interface PendingSegmentAllocatingTask * Unique string used by an appending task (or its sub-tasks and replicas) to allocate pending segments * and identify pending segments allocated to it. */ - String getPendingSegmentGroupId(); + String getTaskAllocatorId(); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index 89e0b095d9e3..935adb3cde0f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -479,7 +479,7 @@ public boolean isPerfectRollup() } @Override - public String getPendingSegmentGroupId() + public String getTaskAllocatorId() { return getGroupId(); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java index 97abf8f75d4e..e02d59936b20 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java @@ -238,7 +238,7 @@ public String getSubtaskSpecId() } @Override - public String getPendingSegmentGroupId() + public String getTaskAllocatorId() { return getGroupId(); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java index b33f7a669689..8bf2f0d8bba4 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java @@ -108,7 +108,7 @@ public class TaskLockbox // Stores map of pending task group of tasks to the set of their ids. // Useful for task replicas. Clean up pending segments only when the set is empty. @GuardedBy("giant") - private final HashMap> activePendingTaskGroupToTaskIds = new HashMap<>(); + private final Map> activeAllocatorIdToTaskIds = new HashMap<>(); @Inject public TaskLockbox( @@ -220,14 +220,10 @@ public int compare(Pair left, Pair right) activeTasks.remove(task.getId()); } } - activePendingTaskGroupToTaskIds.clear(); + activeAllocatorIdToTaskIds.clear(); for (Task task : storedActiveTasks) { - if (task instanceof PendingSegmentAllocatingTask) { - final String pendingSegmentAllocatingTask = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); - if (activeTasks.contains(task.getId()) && pendingSegmentAllocatingTask != null) { - activePendingTaskGroupToTaskIds.computeIfAbsent(pendingSegmentAllocatingTask, s -> new HashSet<>()) - .add(task.getId()); - } + if (activeTasks.contains(task.getId())) { + trackAppendingTask(task); } } @@ -428,11 +424,11 @@ public LockResult tryLock(final Task task, final LockRequest request) newSegmentId ); } - final String pendingSegmentGroupId = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); + final String taskAllocatorId = ((PendingSegmentAllocatingTask) task).getTaskAllocatorId(); newSegmentId = allocateSegmentId( lockRequestForNewSegment, posseToUse.getTaskLock().getVersion(), - pendingSegmentGroupId + taskAllocatorId ); } } @@ -732,7 +728,7 @@ void allocateSegmentIds( } } - private SegmentIdWithShardSpec allocateSegmentId(LockRequestForNewSegment request, String version, String taskGroup) + private SegmentIdWithShardSpec allocateSegmentId(LockRequestForNewSegment request, String version, String allocatorId) { return metadataStorageCoordinator.allocatePendingSegment( request.getDataSource(), @@ -742,7 +738,7 @@ private SegmentIdWithShardSpec allocateSegmentId(LockRequestForNewSegment reques request.getPartialShardSpec(), version, request.isSkipSegmentLineageCheck(), - taskGroup + allocatorId ); } @@ -1182,17 +1178,25 @@ public void add(Task task) try { log.info("Adding task[%s] to activeTasks", task.getId()); activeTasks.add(task.getId()); - if (task instanceof PendingSegmentAllocatingTask) { - final String pendingSegmentGroupId = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); - activePendingTaskGroupToTaskIds.computeIfAbsent(pendingSegmentGroupId, s -> new HashSet<>()) - .add(task.getId()); - } + trackAppendingTask(task); } finally { giant.unlock(); } } + @GuardedBy("giant") + private void trackAppendingTask(Task task) + { + if (task instanceof PendingSegmentAllocatingTask) { + final String taskAllocatorId = ((PendingSegmentAllocatingTask) task).getTaskAllocatorId(); + if (taskAllocatorId != null) { + activeAllocatorIdToTaskIds.computeIfAbsent(taskAllocatorId, s -> new HashSet<>()) + .add(task.getId()); + } + } + } + /** * Release all locks for a task and remove task from set of active tasks. Does nothing if the task is not currently locked or not an active task. * @@ -1205,29 +1209,29 @@ public void remove(final Task task) try { log.info("Removing task[%s] from activeTasks", task.getId()); try { + // Clean upgrade segments table for entries associated with replacing task if (findLocksForTask(task).stream().anyMatch(lock -> lock.getType() == TaskLockType.REPLACE)) { final int upgradeSegmentsDeleted = metadataStorageCoordinator.deleteUpgradeSegmentsForTask(task.getId()); log.info( "Deleted [%d] entries from upgradeSegments table for task[%s] with REPLACE locks.", - upgradeSegmentsDeleted, - task.getId() + upgradeSegmentsDeleted, task.getId() ); } + // Clean pending segments associated with the appending task if (task instanceof PendingSegmentAllocatingTask) { - final String pendingSegmentGroup = ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId(); - if (activePendingTaskGroupToTaskIds.containsKey(pendingSegmentGroup)) { - final Set idsInSameGroup = activePendingTaskGroupToTaskIds.get(pendingSegmentGroup); + final String taskAllocatorId = ((PendingSegmentAllocatingTask) task).getTaskAllocatorId(); + if (activeAllocatorIdToTaskIds.containsKey(taskAllocatorId)) { + final Set idsInSameGroup = activeAllocatorIdToTaskIds.get(taskAllocatorId); idsInSameGroup.remove(task.getId()); if (idsInSameGroup.isEmpty()) { final int pendingSegmentsDeleted - = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(pendingSegmentGroup); + = metadataStorageCoordinator.deletePendingSegmentsForTaskGroup(taskAllocatorId); log.info( "Deleted [%d] entries from pendingSegments table for pending segments group [%s] with APPEND locks.", - pendingSegmentsDeleted, - pendingSegmentGroup + pendingSegmentsDeleted, taskAllocatorId ); } - activePendingTaskGroupToTaskIds.remove(pendingSegmentGroup); + activeAllocatorIdToTaskIds.remove(taskAllocatorId); } } } @@ -1823,7 +1827,7 @@ SegmentCreateRequest getSegmentRequest() acquiredLock == null ? lockRequest.getVersion() : acquiredLock.getVersion(), action.getPartialShardSpec(), null, - ((PendingSegmentAllocatingTask) task).getPendingSegmentGroupId() + ((PendingSegmentAllocatingTask) task).getTaskAllocatorId() ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java index 28c935f66aec..0ec9a67e8c1a 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTask.java @@ -271,7 +271,7 @@ public boolean withinMinMaxRecordTime(final InputRow row) } @Override - public String getPendingSegmentGroupId() + public String getTaskAllocatorId() { return getTaskResource().getAvailabilityGroup(); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index 59194d6bbefb..44f4ee1ad932 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -234,13 +234,9 @@ public TaskActionToolbox createTaskActionToolbox() ); } - protected void setSupervisorManager(SupervisorManager supervisorManager) + public TaskToolbox createTaskToolbox(TaskConfig config, Task task, SupervisorManager supervisorManager) { this.supervisorManager = supervisorManager; - } - - public TaskToolbox createTaskToolbox(TaskConfig config, Task task) - { return new TaskToolbox.Builder() .config(config) .taskExecutorNode(new DruidNode("druid/middlemanager", "localhost", false, 8091, null, true, false)) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java index ed9b998052fe..5945d1488c38 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/CommandQueueTask.java @@ -142,7 +142,7 @@ private V waitForCommandToFinish(Command command) } @Override - public String getPendingSegmentGroupId() + public String getTaskAllocatorId() { return getId(); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java index f80752d7f6c0..415c63a0ee26 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java @@ -1025,7 +1025,7 @@ private TaskToolboxFactory createToolboxFactory( @Override public TaskToolbox build(TaskConfig config, Task task) { - return createTaskToolbox(config, task); + return createTaskToolbox(config, task, null); } }; } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java index a0297b3dbba7..50c318683e8f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndStreamingAppendTest.java @@ -134,7 +134,6 @@ public void setUpIngestionTestBase() throws IOException EasyMock.reset(supervisorManager); EasyMock.expect(supervisorManager.getActiveSupervisorIdForDatasourceWithAppendLock(WIKI)) .andReturn(Optional.of(WIKI)).anyTimes(); - super.setSupervisorManager(supervisorManager); super.setUpIngestionTestBase(); final TaskConfig taskConfig = new TaskConfigBuilder().build(); taskActionClientFactory = createActionClientFactory(); @@ -771,7 +770,7 @@ private TaskToolboxFactory createToolboxFactory( @Override public TaskToolbox build(TaskConfig config, Task task) { - return createTaskToolbox(config, task); + return createTaskToolbox(config, task, supervisorManager); } }; } diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 64d29a502b5e..2390e7b55003 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -481,13 +481,13 @@ SegmentPublishResult commitMetadataOnly( /** * Delete pending segment for a give task group after all the tasks belonging to it have completed. - * @param taskGroup task group + * @param taskAllocatorId task id / task group / replica group for an appending task * @return number of pending segments deleted from the metadata store */ - int deletePendingSegmentsForTaskGroup(String taskGroup); + int deletePendingSegmentsForTaskGroup(String taskAllocatorId); /** - * Fetches all the pending segments present in the metadata store for a given datasource + * Fetches all the pending segments of the datasource that overlap with a given interval. * @param datasource datasource to be queried * @param interval interval with which segments overlap * @return List of pending segment records diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 1265e84605a4..e7567ba27284 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -288,7 +288,7 @@ List getPendingSegmentsForIntervalWithHandle( final Interval interval ) { - boolean compareIntervalEndpointsAsStrings = Intervals.canCompareEndpointsAsStrings(interval); + final boolean compareIntervalEndpointsAsStrings = Intervals.canCompareEndpointsAsStrings(interval); String sql = "SELECT payload, sequence_name, sequence_prev_id, task_allocator_id, upgraded_from_segment_id" + " FROM " + dbTables.getPendingSegmentsTable() diff --git a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java index f72dca2e7c84..44c62bf47ad1 100644 --- a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java +++ b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java @@ -33,11 +33,14 @@ /** * Representation of a record in the pending segments table.
    * Mapping of column in table to field: - *
  • id -> id (Unique identifier for pending segment) - *
  • sequence_name -> sequenceName (sequence name used for segment allocation) - *
  • sequence_prev_id -> sequencePrevId (previous segment id used for segment allocation) - *
  • upgraded_from_segment_id -> upgradedFromSegmentId (Id of the root segment from which this was upgraded) - *
  • task_allocator_id -> taskAllocatorId (Associates a task / task group / replica group with the pending segment) + * + *
      + *
    • id -> id (Unique identifier for pending segment)
    • + *
    • sequence_name -> sequenceName (sequence name used for segment allocation)
    • + *
    • sequence_prev_id -> sequencePrevId (previous segment id used for segment allocation)
    • + *
    • upgraded_from_segment_id -> upgradedFromSegmentId (Id of the root segment from which this was upgraded)
    • + *
    • task_allocator_id -> taskAllocatorId (Associates a task / task group / replica group with the pending segment)
    • + *
    */ public class PendingSegmentRecord { @@ -79,7 +82,6 @@ public String getSequencePrevId() /** * The original pending segment using which this upgraded segment was created. - * Corresponds to the column upgraded_from_segment_id in druid_pendingSegments. * Can be null for pending segments allocated before this column was added or for segments that have not been upgraded. */ @Nullable @@ -90,7 +92,6 @@ public String getUpgradedFromSegmentId() /** * task / taskGroup / replica group of task that allocated this segment. - * Corresponds to the column task_allocator_id in druid_pendingSegments. * Can be null for pending segments allocated before this column was added. */ @Nullable From a6f958d5e36c8131f382f778d32c8da22683e988 Mon Sep 17 00:00:00 2001 From: Amatya Date: Tue, 16 Apr 2024 17:16:38 +0530 Subject: [PATCH 18/20] Compactions can allocate segments with segment locking --- .../apache/druid/indexing/common/task/CompactionTask.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java index e5df64c5e790..f5aec08c3062 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java @@ -127,7 +127,7 @@ * serialization fields of this class must correspond to those of {@link * ClientCompactionTaskQuery}. */ -public class CompactionTask extends AbstractBatchIndexTask +public class CompactionTask extends AbstractBatchIndexTask implements PendingSegmentAllocatingTask { private static final Logger log = new Logger(CompactionTask.class); private static final Clock UTC_CLOCK = Clock.systemUTC(); @@ -400,6 +400,12 @@ public String getType() return TYPE; } + @Override + public String getTaskAllocatorId() + { + return getGroupId(); + } + @Nonnull @JsonIgnore @Override From c31837ab64cd531f94c487f6181a6e8f1f1b9a10 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Tue, 16 Apr 2024 19:10:54 +0530 Subject: [PATCH 19/20] Tests for coverage --- .../msq/indexing/MSQControllerTaskTest.java | 26 ++++++++++++++++--- .../druid/msq/indexing/MSQWorkerTaskTest.java | 8 +++++- .../druid/indexing/overlord/TaskLockbox.java | 15 +++++++---- 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQControllerTaskTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQControllerTaskTest.java index 6aaf21ee3bde..309eb3830ac5 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQControllerTaskTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQControllerTaskTest.java @@ -35,7 +35,7 @@ public class MSQControllerTaskTest { - MSQSpec MSQ_SPEC = MSQSpec + private final MSQSpec MSQ_SPEC = MSQSpec .builder() .destination(new DataSourceMSQDestination( "target", @@ -59,7 +59,7 @@ public class MSQControllerTaskTest @Test public void testGetInputSourceResources() { - MSQControllerTask msqWorkerTask = new MSQControllerTask( + MSQControllerTask controllerTask = new MSQControllerTask( null, MSQ_SPEC, null, @@ -67,7 +67,25 @@ public void testGetInputSourceResources() null, null, null, - null); - Assert.assertTrue(msqWorkerTask.getInputSourceResources().isEmpty()); + null + ); + Assert.assertTrue(controllerTask.getInputSourceResources().isEmpty()); + } + + @Test + public void testGetTaskAllocatorId() + { + final String taskId = "taskId"; + MSQControllerTask controllerTask = new MSQControllerTask( + taskId, + MSQ_SPEC, + null, + null, + null, + null, + null, + null + ); + Assert.assertEquals(taskId, controllerTask.getTaskAllocatorId()); } } diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQWorkerTaskTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQWorkerTaskTest.java index 37c31ba3271f..482d67d81abe 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQWorkerTaskTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQWorkerTaskTest.java @@ -47,7 +47,6 @@ public class MSQWorkerTaskTest @Test public void testEquals() { - Assert.assertNotEquals(msqWorkerTask, 0); Assert.assertEquals(msqWorkerTask, msqWorkerTask); Assert.assertEquals( msqWorkerTask, @@ -110,4 +109,11 @@ public void testGetInputSourceResources() Assert.assertTrue(msqWorkerTask.getInputSourceResources().isEmpty()); } + @Test + public void testGetTaskAllocatorId() + { + MSQWorkerTask msqWorkerTask = new MSQWorkerTask(controllerTaskId, dataSource, workerNumber, context, retryCount); + Assert.assertEquals(controllerTaskId, msqWorkerTask.getTaskAllocatorId()); + } + } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java index 8bf2f0d8bba4..3876c7dccbf5 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java @@ -101,12 +101,17 @@ public class TaskLockbox private static final EmittingLogger log = new EmittingLogger(TaskLockbox.class); - // Stores List of Active Tasks. TaskLockbox will only grant locks to active activeTasks. - // this set should be accessed under the giant lock. + /** + * Set of active tasks. Locks can be granted only to a task present in this set. + * Should be accessed only under the giant lock. + */ private final Set activeTasks = new HashSet<>(); - // Stores map of pending task group of tasks to the set of their ids. - // Useful for task replicas. Clean up pending segments only when the set is empty. + /** + * Map from a taskAllocatorId to the set of active taskIds using that allocator id. + * Used to clean up pending segments for a taskAllocatorId as soon as the set + * of corresponding active taskIds becomes empty. + */ @GuardedBy("giant") private final Map> activeAllocatorIdToTaskIds = new HashMap<>(); @@ -532,6 +537,7 @@ private void acquireTaskLock(SegmentAllocationHolder holder, boolean isTimeChunk } } + @Nullable private TaskLockPosse createOrFindLockPosse(LockRequest request, Task task, boolean persist) { Preconditions.checkState(!(request instanceof LockRequestForNewSegment), "Can't handle LockRequestForNewSegment"); @@ -1693,7 +1699,6 @@ boolean reusableFor(LockRequest request) } else { throw new ISE("Unknown request type[%s]", request); } - //noinspection SuspiciousIndentAfterControlStatement default: throw new ISE("Unknown lock type[%s]", taskLock.getType()); } From 477abce9d6c2dd666e877269004f290f69112947 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Tue, 16 Apr 2024 19:46:48 +0530 Subject: [PATCH 20/20] Fix intellij inspection --- .../java/org/apache/druid/indexing/overlord/TaskLockbox.java | 1 + 1 file changed, 1 insertion(+) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java index 3876c7dccbf5..35ec79d74ec7 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskLockbox.java @@ -1699,6 +1699,7 @@ boolean reusableFor(LockRequest request) } else { throw new ISE("Unknown request type[%s]", request); } + //noinspection SuspiciousIndentAfterControlStatement default: throw new ISE("Unknown lock type[%s]", taskLock.getType()); }