From b93c574e4d36dd95c6985ad83b7872d70415e079 Mon Sep 17 00:00:00 2001 From: Amatya Date: Thu, 27 Jun 2024 17:25:00 +0530 Subject: [PATCH 01/16] Add root_segment_id to segment schema --- .../IndexerSQLMetadataStorageCoordinator.java | 52 +++++-- .../druid/metadata/SQLMetadataConnector.java | 13 ++ .../metadata/SqlSegmentsMetadataQuery.java | 15 +- .../druid/server/http/DataSegmentPlus.java | 23 +++- ...exerSQLMetadataStorageCoordinatorTest.java | 128 +++++++++++++++++- ...SqlMetadataStorageCoordinatorTestBase.java | 2 + .../server/http/DataSegmentPlusTest.java | 1 + .../server/http/MetadataResourceTest.java | 2 +- 8 files changed, 211 insertions(+), 25 deletions(-) diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index fd6377289081..405dee9bf785 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -564,6 +564,7 @@ public SegmentPublishResult commitReplaceSegments( createNewIdsOfAppendSegmentsAfterReplace(handle, replaceSegments, locksHeldByReplaceTask); Map upgradeSegmentMetadata = new HashMap<>(); + Map rootSegmentIdMap = new HashMap<>(); for (DataSegmentPlus dataSegmentPlus : upgradedSegments) { segmentsToInsert.add(dataSegmentPlus.getDataSegment()); if (dataSegmentPlus.getSchemaFingerprint() != null && dataSegmentPlus.getNumRows() != null) { @@ -572,6 +573,9 @@ public SegmentPublishResult commitReplaceSegments( new SegmentMetadata(dataSegmentPlus.getNumRows(), dataSegmentPlus.getSchemaFingerprint()) ); } + if (dataSegmentPlus.getRootSegmentId() != null) { + rootSegmentIdMap.put(dataSegmentPlus.getDataSegment().getId(), dataSegmentPlus.getRootSegmentId()); + } } SegmentPublishResult result = SegmentPublishResult.ok( insertSegments( @@ -579,7 +583,8 @@ public SegmentPublishResult commitReplaceSegments( segmentsToInsert, segmentSchemaMapping, upgradeSegmentMetadata, - Collections.emptyMap() + Collections.emptyMap(), + rootSegmentIdMap ), upgradePendingSegmentsOverlappingWith(segmentsToInsert) ); @@ -1408,6 +1413,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( final Set allSegmentsToInsert = new HashSet<>(appendSegments); final Map newVersionSegmentToParent = new HashMap<>(); final Map segmentIdMap = new HashMap<>(); + final Map rootSegmentIdMap = new HashMap<>(); appendSegments.forEach(segment -> segmentIdMap.put(segment.getId().toString(), segment)); segmentIdsForNewVersions.forEach( pendingSegment -> { @@ -1415,6 +1421,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( final DataSegment oldSegment = segmentIdMap.get(pendingSegment.getUpgradedFromSegmentId()); final SegmentId newVersionSegmentId = pendingSegment.getId().asSegmentId(); newVersionSegmentToParent.put(newVersionSegmentId, oldSegment.getId()); + rootSegmentIdMap.put(newVersionSegmentId, oldSegment.getId().toString()); allSegmentsToInsert.add( new DataSegment( pendingSegment.getId().asSegmentId(), @@ -1473,7 +1480,8 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( allSegmentsToInsert, segmentSchemaMapping, Collections.emptyMap(), - newVersionSegmentToParent + newVersionSegmentToParent, + rootSegmentIdMap ) ); }, @@ -2081,6 +2089,7 @@ private Set announceHistoricalSegmentBatch( for (List partition : partitionedSegments) { for (DataSegment segment : partition) { String segmentId = segment.getId().toString(); + final String rootSegmentId = null; PreparedBatchPart preparedBatchPart = preparedBatch.add() .bind("id", segmentId) @@ -2092,7 +2101,8 @@ private Set announceHistoricalSegmentBatch( .bind("version", segment.getVersion()) .bind("used", usedSegments.contains(segment)) .bind("payload", jsonMapper.writeValueAsBytes(segment)) - .bind("used_status_last_updated", now); + .bind("used_status_last_updated", now) + .bind("root_segment_id", rootSegmentId); if (schemaPersistEnabled) { Long numRows = null; @@ -2217,6 +2227,10 @@ private Set createNewIdsOfAppendSegmentsAfterReplace( .shardSpec(shardSpec) .build(); + final String rootSegmentId = oldSegmentMetadata.getRootSegmentId() == null + ? oldSegmentMetadata.getDataSegment().getId().toString() + : oldSegmentMetadata.getRootSegmentId(); + upgradedSegments.add( new DataSegmentPlus( dataSegment, @@ -2224,7 +2238,9 @@ private Set createNewIdsOfAppendSegmentsAfterReplace( null, null, oldSegmentMetadata.getSchemaFingerprint(), - oldSegmentMetadata.getNumRows()) + oldSegmentMetadata.getNumRows(), + rootSegmentId + ) ); } @@ -2266,7 +2282,8 @@ private Set insertSegments( Set segments, @Nullable SegmentSchemaMapping segmentSchemaMapping, Map upgradeSegmentMetadata, - Map newVersionForAppendToParent + Map newVersionForAppendToParent, + Map rootSegmentIdMap ) throws IOException { boolean shouldPersistSchema = shouldPersistSchema(segmentSchemaMapping); @@ -2302,7 +2319,8 @@ private Set insertSegments( .bind("version", segment.getVersion()) .bind("used", true) .bind("payload", jsonMapper.writeValueAsBytes(segment)) - .bind("used_status_last_updated", now); + .bind("used_status_last_updated", now) + .bind("root_segment_id", rootSegmentIdMap.get(segment.getId())); if (schemaPersistEnabled) { SegmentMetadata segmentMetadata = @@ -2449,9 +2467,9 @@ private String buildSqlToInsertSegments() { String insertStatement = "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s," - + " partitioned, version, used, payload, used_status_last_updated %3$s) " + + " partitioned, version, used, payload, used_status_last_updated, root_segment_id %3$s) " + "VALUES (:id, :dataSource, :created_date, :start, :end," - + " :partitioned, :version, :used, :payload, :used_status_last_updated %4$s)"; + + " :partitioned, :version, :used, :payload, :used_status_last_updated, :root_segment_id %4$s)"; if (schemaPersistEnabled) { return StringUtils.format( @@ -2923,6 +2941,24 @@ public int deleteUpgradeSegmentsForTask(final String taskId) ); } + @VisibleForTesting + String getRootSegmentIdForSegment(final String segmentId) + { + final String sql = StringUtils.format( + "SELECT root_segment_id from %s where id = :id", + dbTables.getSegmentsTable() + ); + return connector.retryWithHandle( + handle -> { + final List rootIdList = handle.createQuery(sql) + .bind("id", segmentId) + .mapTo(String.class) + .list(); + return rootIdList.isEmpty() ? null : rootIdList.get(0); + } + ); + } + private static class PendingSegmentsRecord { private final String sequenceName; diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java index 2d315d19fc8b..de12f6a5ae7e 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java @@ -587,6 +587,11 @@ protected void alterSegmentTable() Map columnNameTypes = new HashMap<>(); columnNameTypes.put("used_status_last_updated", "VARCHAR(255)"); + // root_segment_id is the segment id is the first / root segment to which the same load spec originally belonged + // Load specs can be shared as a result of segment version upgrade + // This column is null for segments that haven't been upgraded. + columnNameTypes.put("root_segment_id", "VARCHAR(255)"); + if (centralizedDatasourceSchemaConfig.isEnabled()) { columnNameTypes.put("schema_fingerprint", "VARCHAR(255)"); columnNameTypes.put("num_rows", "BIGINT"); @@ -619,6 +624,14 @@ protected void alterSegmentTable() } alterTable(tableName, alterCommands); + + final Set createdIndexSet = getIndexOnTable(tableName); + createIndex( + tableName, + StringUtils.format("idx_%1$s_datasource_root_segment_id", tableName), + ImmutableList.of("dataSource", "root_segment_id"), + createdIndexSet + ); } @Override diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index f14cc9950505..4cede8b8f15f 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -286,7 +286,7 @@ private List retrieveSegmentBatchById( if (includeSchemaInfo) { final Query> query = handle.createQuery( StringUtils.format( - "SELECT payload, used, schema_fingerprint, num_rows FROM %s WHERE dataSource = :dataSource %s", + "SELECT payload, used, root_segment_id, schema_fingerprint, num_rows FROM %s WHERE dataSource = :dataSource %s", dbTables.getSegmentsTable(), getParameterizedInConditionForColumn("id", segmentIds) ) ); @@ -298,15 +298,16 @@ private List retrieveSegmentBatchById( .setFetchSize(connector.getStreamingFetchSize()) .map( (index, r, ctx) -> { - String schemaFingerprint = (String) r.getObject(3); - Long numRows = (Long) r.getObject(4); + String schemaFingerprint = (String) r.getObject(4); + Long numRows = (Long) r.getObject(5); return new DataSegmentPlus( JacksonUtils.readValue(jsonMapper, r.getBytes(1), DataSegment.class), null, null, r.getBoolean(2), schemaFingerprint, - numRows + numRows, + r.getString(3) ); } ) @@ -314,7 +315,7 @@ private List retrieveSegmentBatchById( } else { final Query> query = handle.createQuery( StringUtils.format( - "SELECT payload, used FROM %s WHERE dataSource = :dataSource %s", + "SELECT payload, used, root_segment_id FROM %s WHERE dataSource = :dataSource %s", dbTables.getSegmentsTable(), getParameterizedInConditionForColumn("id", segmentIds) ) ); @@ -331,7 +332,8 @@ private List retrieveSegmentBatchById( null, r.getBoolean(2), null, - null + null, + r.getString(3) ) ) .iterator(); @@ -864,6 +866,7 @@ private ResultIterator getDataSegmentPlusResultIterator(Query{@link DataSegmentPlus#createdDate} - The time when the segment was created. *
  • {@link DataSegmentPlus#usedStatusLastUpdatedDate} - The time when the segments * used status was last updated.
  • + *
  • {@link DataSegmentPlus#rootSegmentId} - The root segment id to which the same load spec originally belonged. + * Load specs can be shared as a result of segment version upgrades.
  • * *

    * This class closely resembles the row structure of the {@link MetadataStorageTablesConfig#getSegmentsTable()}. @@ -53,6 +55,9 @@ public class DataSegmentPlus private final String schemaFingerprint; private final Long numRows; + @Nullable + private final String rootSegmentId; + @JsonCreator public DataSegmentPlus( @JsonProperty("dataSegment") final DataSegment dataSegment, @@ -60,7 +65,8 @@ public DataSegmentPlus( @JsonProperty("usedStatusLastUpdatedDate") @Nullable final DateTime usedStatusLastUpdatedDate, @JsonProperty("used") @Nullable final Boolean used, @JsonProperty("schemaFingerprint") @Nullable final String schemaFingerprint, - @JsonProperty("numRows") @Nullable final Long numRows + @JsonProperty("numRows") @Nullable final Long numRows, + @JsonProperty("rootSegmentId") @Nullable final String rootSegmentId ) { this.dataSegment = dataSegment; @@ -69,6 +75,7 @@ public DataSegmentPlus( this.used = used; this.schemaFingerprint = schemaFingerprint; this.numRows = numRows; + this.rootSegmentId = rootSegmentId; } @Nullable @@ -112,6 +119,13 @@ public Long getNumRows() return numRows; } + @Nullable + @JsonProperty + public String getRootSegmentId() + { + return rootSegmentId; + } + @Override public boolean equals(Object o) { @@ -127,7 +141,8 @@ public boolean equals(Object o) && Objects.equals(usedStatusLastUpdatedDate, that.getUsedStatusLastUpdatedDate()) && Objects.equals(used, that.getUsed()) && Objects.equals(schemaFingerprint, that.getSchemaFingerprint()) - && Objects.equals(numRows, that.getNumRows()); + && Objects.equals(numRows, that.getNumRows()) + && Objects.equals(rootSegmentId, that.getRootSegmentId()); } @Override @@ -139,7 +154,8 @@ public int hashCode() usedStatusLastUpdatedDate, used, schemaFingerprint, - numRows + numRows, + rootSegmentId ); } @@ -153,6 +169,7 @@ public String toString() ", used=" + getUsed() + ", schemaFingerprint=" + getSchemaFingerprint() + ", numRows=" + getNumRows() + + ", rootSegmentId=" + getRootSegmentId() + '}'; } } diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 222c1ece89fb..550d9fcb0b34 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -138,9 +138,11 @@ public void testCommitAppendSegments() final String v1 = "2023-01-01"; final String v2 = "2023-01-02"; final String v3 = "2023-01-03"; + final String alreadyUpgradedVersion = "2023-02-01"; final String lockVersion = "2024-01-01"; - final String replaceTaskId = "replaceTask1"; + final String taskAllocatorId = "appendTask"; + final String replaceTaskId = "replaceTask"; final ReplaceTaskLock replaceLock = new ReplaceTaskLock( replaceTaskId, Intervals.of("2023-01-01/2023-01-03"), @@ -148,6 +150,7 @@ public void testCommitAppendSegments() ); final Set appendSegments = new HashSet<>(); + final List pendingSegmentsForTask = new ArrayList<>(); final Set expectedSegmentsToUpgrade = new HashSet<>(); for (int i = 0; i < 10; i++) { final DataSegment segment = createSegment( @@ -157,6 +160,31 @@ public void testCommitAppendSegments() ); appendSegments.add(segment); expectedSegmentsToUpgrade.add(segment); + // Add the same segment + pendingSegmentsForTask.add( + new PendingSegmentRecord( + SegmentIdWithShardSpec.fromDataSegment(segment), + v1, + segment.getId().toString(), + null, + taskAllocatorId + ) + ); + // Add upgraded pending segment + pendingSegmentsForTask.add( + new PendingSegmentRecord( + new SegmentIdWithShardSpec( + DS.WIKI, + Intervals.of("2023-01-01/2023-02-01"), + alreadyUpgradedVersion, + new NumberedShardSpec(i, 0) + ), + alreadyUpgradedVersion, + segment.getId().toString(), + segment.getId().toString(), + taskAllocatorId + ) + ); } for (int i = 0; i < 10; i++) { @@ -167,6 +195,31 @@ public void testCommitAppendSegments() ); appendSegments.add(segment); expectedSegmentsToUpgrade.add(segment); + // Add the same segment + pendingSegmentsForTask.add( + new PendingSegmentRecord( + SegmentIdWithShardSpec.fromDataSegment(segment), + v2, + segment.getId().toString(), + null, + taskAllocatorId + ) + ); + // Add upgraded pending segment + pendingSegmentsForTask.add( + new PendingSegmentRecord( + new SegmentIdWithShardSpec( + DS.WIKI, + Intervals.of("2023-01-01/2023-02-01"), + alreadyUpgradedVersion, + new NumberedShardSpec(10 + i, 0) + ), + alreadyUpgradedVersion, + segment.getId().toString(), + segment.getId().toString(), + taskAllocatorId + ) + ); } for (int i = 0; i < 10; i++) { @@ -176,23 +229,74 @@ public void testCommitAppendSegments() new LinearShardSpec(i) ); appendSegments.add(segment); + // Add the same segment + pendingSegmentsForTask.add( + new PendingSegmentRecord( + SegmentIdWithShardSpec.fromDataSegment(segment), + v3, + segment.getId().toString(), + null, + taskAllocatorId + ) + ); + // Add upgraded pending segment + pendingSegmentsForTask.add( + new PendingSegmentRecord( + new SegmentIdWithShardSpec( + DS.WIKI, + Intervals.of("2023-01-01/2023-02-01"), + alreadyUpgradedVersion, + new NumberedShardSpec(20 + i, 0) + ), + alreadyUpgradedVersion, + segment.getId().toString(), + segment.getId().toString(), + taskAllocatorId + ) + ); } + derbyConnector.retryWithHandle( + handle -> coordinator.insertPendingSegmentsIntoMetastore(handle, pendingSegmentsForTask, DS.WIKI, false) + ); + final Map segmentToReplaceLock = expectedSegmentsToUpgrade.stream() .collect(Collectors.toMap(s -> s, s -> replaceLock)); // Commit the segment and verify the results SegmentPublishResult commitResult - = coordinator.commitAppendSegments(appendSegments, segmentToReplaceLock, "append", null); + = coordinator.commitAppendSegments(appendSegments, segmentToReplaceLock, taskAllocatorId, null); Assert.assertTrue(commitResult.isSuccess()); - Assert.assertEquals(appendSegments, commitResult.getSegments()); + Set allCommittedSegments + = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); // Verify the segments present in the metadata store - Assert.assertEquals( - appendSegments, - ImmutableSet.copyOf(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())) - ); + Assert.assertTrue(allCommittedSegments.containsAll(appendSegments)); + for (DataSegment segment : appendSegments) { + Assert.assertNull(coordinator.getRootSegmentIdForSegment(segment.getId().toString())); + } + allCommittedSegments.removeAll(appendSegments); + + // Verify the commit of upgraded pending segments + Assert.assertEquals(appendSegments.size(), allCommittedSegments.size()); + Map segmentMap = new HashMap<>(); + for (DataSegment segment : appendSegments) { + segmentMap.put(segment.getId().toString(), segment); + } + for (DataSegment segment : allCommittedSegments) { + for (PendingSegmentRecord pendingSegmentRecord : pendingSegmentsForTask) { + if (pendingSegmentRecord.getId().asSegmentId().toString().equals(segment.getId().toString())) { + DataSegment rootSegment = segmentMap.get(pendingSegmentRecord.getUpgradedFromSegmentId()); + Assert.assertNotNull(rootSegment); + Assert.assertEquals(segment.getLoadSpec(), rootSegment.getLoadSpec()); + Assert.assertNotNull( + pendingSegmentRecord.getUpgradedFromSegmentId(), + coordinator.getRootSegmentIdForSegment(segment.getId().toString()) + ); + } + } + } // Verify entries in the segment task lock table final Set expectedUpgradeSegmentIds @@ -293,9 +397,15 @@ public void testCommitReplaceSegments() final Set usedSegments = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); Assert.assertTrue(usedSegments.containsAll(segmentsAppendedWithReplaceLock)); + for (DataSegment appendSegment : segmentsAppendedWithReplaceLock) { + Assert.assertNull(coordinator.getRootSegmentIdForSegment(appendSegment.getId().toString())); + } usedSegments.removeAll(segmentsAppendedWithReplaceLock); Assert.assertTrue(usedSegments.containsAll(replacingSegments)); + for (DataSegment replaceSegment : replacingSegments) { + Assert.assertNull(coordinator.getRootSegmentIdForSegment(replaceSegment.getId().toString())); + } usedSegments.removeAll(replacingSegments); Assert.assertEquals(segmentsAppendedWithReplaceLock.size(), usedSegments.size()); @@ -303,6 +413,10 @@ public void testCommitReplaceSegments() boolean hasBeenCarriedForward = false; for (DataSegment appendedSegment : segmentsAppendedWithReplaceLock) { if (appendedSegment.getLoadSpec().equals(segmentReplicaWithNewVersion.getLoadSpec())) { + Assert.assertEquals( + appendedSegment.getId().toString(), + coordinator.getRootSegmentIdForSegment(segmentReplicaWithNewVersion.getId().toString()) + ); hasBeenCarriedForward = true; break; } diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java index ce0e06860583..26d1e8518d97 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java @@ -58,6 +58,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; @@ -322,6 +323,7 @@ protected DataSegment createSegment(Interval interval, String version, ShardSpec .version(version) .shardSpec(shardSpec) .size(100) + .loadSpec(ImmutableMap.of("hash", Objects.hash(interval, version, shardSpec))) .build(); } diff --git a/server/src/test/java/org/apache/druid/server/http/DataSegmentPlusTest.java b/server/src/test/java/org/apache/druid/server/http/DataSegmentPlusTest.java index 0f20fc96bdcc..ab5602303eb9 100644 --- a/server/src/test/java/org/apache/druid/server/http/DataSegmentPlusTest.java +++ b/server/src/test/java/org/apache/druid/server/http/DataSegmentPlusTest.java @@ -100,6 +100,7 @@ public void testSerde() throws JsonProcessingException usedStatusLastUpdatedDate, null, null, + null, null ); diff --git a/server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java b/server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java index 4d6bbf5929be..9c52d639300c 100644 --- a/server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java +++ b/server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java @@ -77,7 +77,7 @@ public class MetadataResourceTest .toArray(new DataSegment[0]); private final List segmentsPlus = Arrays.stream(segments) - .map(s -> new DataSegmentPlus(s, DateTimes.nowUtc(), DateTimes.nowUtc(), null, null, null)) + .map(s -> new DataSegmentPlus(s, DateTimes.nowUtc(), DateTimes.nowUtc(), null, null, null, null)) .collect(Collectors.toList()); private HttpServletRequest request; private SegmentsMetadataManager segmentsMetadataManager; From 6a685654b056470de3d7446e6d1e5a27dfd28324 Mon Sep 17 00:00:00 2001 From: Amatya Date: Fri, 28 Jun 2024 08:29:53 +0530 Subject: [PATCH 02/16] Fix DataSegmentPlus serde test --- .../java/org/apache/druid/server/http/DataSegmentPlusTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/apache/druid/server/http/DataSegmentPlusTest.java b/server/src/test/java/org/apache/druid/server/http/DataSegmentPlusTest.java index ab5602303eb9..b963f4337081 100644 --- a/server/src/test/java/org/apache/druid/server/http/DataSegmentPlusTest.java +++ b/server/src/test/java/org/apache/druid/server/http/DataSegmentPlusTest.java @@ -109,7 +109,7 @@ public void testSerde() throws JsonProcessingException JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT ); - Assert.assertEquals(6, objectMap.size()); + Assert.assertEquals(7, objectMap.size()); final Map segmentObjectMap = MAPPER.readValue( MAPPER.writeValueAsString(segmentPlus.getDataSegment()), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT From 5905c35154e50f8eccdad3918254453642b20d8d Mon Sep 17 00:00:00 2001 From: Amatya Date: Fri, 28 Jun 2024 10:59:34 +0530 Subject: [PATCH 03/16] Add kill task changes --- .../common/actions/SegmentNukeAction.java | 31 +++-- .../common/task/KillUnusedSegmentsTask.java | 28 +++-- ...TestIndexerMetadataStorageCoordinator.java | 12 ++ .../IndexerMetadataStorageCoordinator.java | 17 +++ .../IndexerSQLMetadataStorageCoordinator.java | 49 +++++++- ...exerSQLMetadataStorageCoordinatorTest.java | 114 +++++++++++++++++- 6 files changed, 227 insertions(+), 24 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentNukeAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentNukeAction.java index 2856f161e0e6..99ae4848d774 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentNukeAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentNukeAction.java @@ -26,16 +26,20 @@ import org.apache.druid.indexing.common.task.IndexTaskUtils; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.overlord.CriticalAction; +import org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; import org.apache.druid.query.DruidMetrics; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.timeline.DataSegment; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -public class SegmentNukeAction implements TaskAction +public class SegmentNukeAction implements TaskAction> { private final Set segments; @@ -54,18 +58,18 @@ public Set getSegments() } @Override - public TypeReference getReturnTypeReference() + public TypeReference> getReturnTypeReference() { - return new TypeReference() + return new TypeReference>() { }; } @Override - public Void perform(Task task, TaskActionToolbox toolbox) + public Set perform(Task task, TaskActionToolbox toolbox) { TaskLocks.checkLockCoversSegments(task, toolbox.getTaskLockbox(), segments); - + final Set segmentsToKill = new HashSet<>(); try { toolbox.getTaskLockbox().doInCriticalSection( task, @@ -73,7 +77,20 @@ public Void perform(Task task, TaskActionToolbox toolbox) CriticalAction.builder() .onValidLocks( () -> { - toolbox.getIndexerMetadataStorageCoordinator().deleteSegments(segments); + final IndexerMetadataStorageCoordinator coordinator + = toolbox.getIndexerMetadataStorageCoordinator(); + // Need to store the root segment ids before the segments are deleted from the db + final Map rootSegmentIdMap = new HashMap<>(); + for (DataSegment segment : segments) { + rootSegmentIdMap.put(segment, coordinator.getRootSegmentId(segment)); + } + coordinator.deleteSegments(segments); + // Check for references after all segments in the batch have been killed + for (DataSegment segment : segments) { + if (coordinator.isLoadSpecUnreferenced(segment, rootSegmentIdMap.get(segment))) { + segmentsToKill.add(segment); + } + } return null; } ) @@ -98,7 +115,7 @@ public Void perform(Task task, TaskActionToolbox toolbox) toolbox.getEmitter().emit(metricBuilder.setMetric("segment/nuked/bytes", segment.getSize())); } - return null; + return segmentsToKill; } @Override diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index fe49569a3bbf..7823d7cb9a7c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -237,14 +237,26 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // the metadata segment is present. If the segment nuke throws an exception, then the segment cleanup is // abandoned. - toolbox.getTaskActionClient().submit(new SegmentNukeAction(new HashSet<>(unusedSegments))); - - // Kill segments from the deep storage only if their load specs are not being used by any used segments - final List segmentsToBeKilled = unusedSegments - .stream() - .filter(unusedSegment -> unusedSegment.getLoadSpec() == null - || !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec())) - .collect(Collectors.toList()); + final List segmentsToBeKilled; + + Set segmentsWithUnreferencedLoadSpecs + = toolbox.getTaskActionClient().submit(new SegmentNukeAction(new HashSet<>(unusedSegments))); + if (segmentsWithUnreferencedLoadSpecs != null) { + // New task action that returns the exact set of segments to be killed from deep storage + // We still need to check used segment load specs as segment upgrades pre-date this change + segmentsToBeKilled = segmentsWithUnreferencedLoadSpecs + .stream() + .filter(unusedSegment -> unusedSegment.getLoadSpec() == null + || !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec())) + .collect(Collectors.toList()); + } else { + // Kill segments from the deep storage only if their load specs are not being used by any used segments + segmentsToBeKilled = unusedSegments + .stream() + .filter(unusedSegment -> unusedSegment.getLoadSpec() == null + || !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec())) + .collect(Collectors.toList()); + } toolbox.getDataSegmentKiller().kill(segmentsToBeKilled); numBatchesProcessed++; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 61a57e948423..5138af1cc0ac 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -314,6 +314,18 @@ public List getPendingSegments(String datasource, Interval throw new UnsupportedOperationException(); } + @Override + public String getRootSegmentId(final DataSegment segment) + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isLoadSpecUnreferenced(final DataSegment segment, final String rootSegmentId) + { + throw new UnsupportedOperationException(); + } + public Set getPublished() { return ImmutableSet.copyOf(published); diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index c055a8d9e9f4..6cc898220b63 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -473,4 +473,21 @@ SegmentPublishResult commitMetadataOnly( * @return List of pending segment records */ List getPendingSegments(String datasource, Interval interval); + + /** + * Fetches the id of the segment to which the load spec originally belonged for an upgraded segment. + * returns null for non-upgraded segments + * @param segment DataSegment + * @return root segment id + */ + String getRootSegmentId(DataSegment segment); + + /** + * Verifies if a load spec is being used by any other used or unused segment by relying on the lineage of upgrades + * The root segment id must be fetched and provided as an argument for alrady deleted segments + * @param segment segment whose load spec needs to be deleted from deep storage + * @param rootSegmentId the id of the segment which originally pushed to deep storage + * @return true iff the load spec is not being used by any other segment + */ + boolean isLoadSpecUnreferenced(DataSegment segment, String rootSegmentId); } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 405dee9bf785..ddcf3ac03562 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -2941,17 +2941,40 @@ public int deleteUpgradeSegmentsForTask(final String taskId) ); } - @VisibleForTesting - String getRootSegmentIdForSegment(final String segmentId) + @Override + public boolean isLoadSpecUnreferenced(final DataSegment segment, final String rootSegmentId) + { + // Check if the segment itself exists + if (retrieveSegmentForId(segment.getId().toString(), true) != null) { + return false; + } + + // Check if the segment is the parent of any other segment + if (!getSegmentIdsWithRootSegmentId(segment.getDataSource(), segment.getId().toString()).isEmpty()) { + return false; + } + + // Check if the segment is the child or sibling of any other segment + if (rootSegmentId == null) { + return true; + } + if (retrieveSegmentForId(rootSegmentId, true) != null) { + return false; + } + return getSegmentIdsWithRootSegmentId(segment.getDataSource(), rootSegmentId).isEmpty(); + } + + @Override + public String getRootSegmentId(final DataSegment segment) { final String sql = StringUtils.format( - "SELECT root_segment_id from %s where id = :id", + "SELECT root_segment_id FROM %s WHERE id = :id", dbTables.getSegmentsTable() ); return connector.retryWithHandle( handle -> { final List rootIdList = handle.createQuery(sql) - .bind("id", segmentId) + .bind("id", segment.getId().toString()) .mapTo(String.class) .list(); return rootIdList.isEmpty() ? null : rootIdList.get(0); @@ -2959,6 +2982,24 @@ String getRootSegmentIdForSegment(final String segmentId) ); } + private List getSegmentIdsWithRootSegmentId(final String dataSource, final String rootSegmentId) + { + final String sql = StringUtils.format( + "SELECT id FROM %s WHERE dataSource = :dataSource AND root_segment_id = :root_segment_id", + dbTables.getSegmentsTable() + ); + return connector.retryWithHandle( + handle -> { + final List segmentIds = handle.createQuery(sql) + .bind("dataSource", dataSource) + .bind("root_segment_id", rootSegmentId) + .mapTo(String.class) + .list(); + return segmentIds; + } + ); + } + private static class PendingSegmentsRecord { private final String sequenceName; diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 550d9fcb0b34..7bf4fc270aef 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -31,6 +31,7 @@ import org.apache.druid.indexing.overlord.SegmentPublishResult; import org.apache.druid.indexing.overlord.Segments; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; @@ -48,6 +49,7 @@ import org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec; import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec; import org.apache.druid.timeline.partition.LinearShardSpec; +import org.apache.druid.timeline.partition.NoneShardSpec; import org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec; import org.apache.druid.timeline.partition.NumberedOverwriteShardSpec; import org.apache.druid.timeline.partition.NumberedPartialShardSpec; @@ -65,10 +67,12 @@ import org.junit.Rule; import org.junit.Test; import org.skife.jdbi.v2.Handle; +import org.skife.jdbi.v2.PreparedBatch; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -274,7 +278,7 @@ public void testCommitAppendSegments() // Verify the segments present in the metadata store Assert.assertTrue(allCommittedSegments.containsAll(appendSegments)); for (DataSegment segment : appendSegments) { - Assert.assertNull(coordinator.getRootSegmentIdForSegment(segment.getId().toString())); + Assert.assertNull(coordinator.getRootSegmentId(segment)); } allCommittedSegments.removeAll(appendSegments); @@ -292,7 +296,7 @@ public void testCommitAppendSegments() Assert.assertEquals(segment.getLoadSpec(), rootSegment.getLoadSpec()); Assert.assertNotNull( pendingSegmentRecord.getUpgradedFromSegmentId(), - coordinator.getRootSegmentIdForSegment(segment.getId().toString()) + coordinator.getRootSegmentId(segment) ); } } @@ -398,13 +402,13 @@ public void testCommitReplaceSegments() Assert.assertTrue(usedSegments.containsAll(segmentsAppendedWithReplaceLock)); for (DataSegment appendSegment : segmentsAppendedWithReplaceLock) { - Assert.assertNull(coordinator.getRootSegmentIdForSegment(appendSegment.getId().toString())); + Assert.assertNull(coordinator.getRootSegmentId(appendSegment)); } usedSegments.removeAll(segmentsAppendedWithReplaceLock); Assert.assertTrue(usedSegments.containsAll(replacingSegments)); for (DataSegment replaceSegment : replacingSegments) { - Assert.assertNull(coordinator.getRootSegmentIdForSegment(replaceSegment.getId().toString())); + Assert.assertNull(coordinator.getRootSegmentId(replaceSegment)); } usedSegments.removeAll(replacingSegments); @@ -415,7 +419,7 @@ public void testCommitReplaceSegments() if (appendedSegment.getLoadSpec().equals(segmentReplicaWithNewVersion.getLoadSpec())) { Assert.assertEquals( appendedSegment.getId().toString(), - coordinator.getRootSegmentIdForSegment(segmentReplicaWithNewVersion.getId().toString()) + coordinator.getRootSegmentId(segmentReplicaWithNewVersion) ); hasBeenCarriedForward = true; break; @@ -3414,4 +3418,104 @@ public void testRetrieveUnusedSegmentsForExactIntervalAndVersion() unusedSegmentIdsForIntervalAndVersion.get(0) ); } + + @Test + public void testGetRootSegmentId() + { + DataSegment root = createSegment(Intervals.of("2024/2025"), "2024-01-01", new NumberedShardSpec(0, 0)); + DataSegment childV1 = createSegment(Intervals.of("2024/2025"), "2024-02-01", new NumberedShardSpec(0, 0)); + DataSegment childV2 = createSegment(Intervals.ETERNITY, "2025-01-01", new NumberedShardSpec(0, 0)); + Map rootSegmentIdMap = ImmutableMap.of( + childV1.getId(), + root.getId().toString(), + childV2.getId(), + root.getId().toString() + ); + insertUsedSegments(ImmutableSet.of(root, childV1, childV2), rootSegmentIdMap); + coordinator.markSegmentsAsUnusedWithinInterval(DS.WIKI, Intervals.of("2024/2025")); + Assert.assertNull(coordinator.getRootSegmentId(root)); + Assert.assertEquals(root.getId().toString(), coordinator.getRootSegmentId(childV1)); + Assert.assertEquals(root.getId().toString(), coordinator.getRootSegmentId(childV2)); + } + + @Test + public void testIsLoadSpecUnreferenced() + { + DataSegment root = createSegment(Intervals.of("2024/2025"), "2024-01-01", new NumberedShardSpec(0, 0)); + DataSegment childV1 = createSegment(Intervals.of("2024/2025"), "2024-02-01", new NumberedShardSpec(0, 0)); + DataSegment childV2 = createSegment(Intervals.ETERNITY, "2025-01-01", new NumberedShardSpec(0, 0)); + Map rootSegmentIdMap = ImmutableMap.of( + childV1.getId(), + root.getId().toString(), + childV2.getId(), + root.getId().toString() + ); + insertUsedSegments(ImmutableSet.of(root, childV1, childV2), rootSegmentIdMap); + coordinator.markSegmentsAsUnusedWithinInterval(DS.WIKI, Intervals.of("2024/2025")); + + final String rootSegmentId = root.getId().toString(); + Assert.assertFalse(coordinator.isLoadSpecUnreferenced(root, null)); + Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV1, rootSegmentId)); + Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV2, rootSegmentId)); + + Assert.assertFalse(coordinator.isLoadSpecUnreferenced(root, null)); + Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV1, rootSegmentId)); + Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV2, rootSegmentId)); + + coordinator.deleteSegments(ImmutableSet.of(childV2)); + Assert.assertFalse(coordinator.isLoadSpecUnreferenced(root, null)); + Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV1, rootSegmentId)); + Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV2, rootSegmentId)); + + + coordinator.deleteSegments(ImmutableSet.of(root)); + Assert.assertFalse(coordinator.isLoadSpecUnreferenced(root, null)); + Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV1, rootSegmentId)); + Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV2, rootSegmentId)); + + coordinator.deleteSegments(ImmutableSet.of(childV1)); + Assert.assertTrue(coordinator.isLoadSpecUnreferenced(root, null)); + Assert.assertTrue(coordinator.isLoadSpecUnreferenced(childV1, rootSegmentId)); + Assert.assertTrue(coordinator.isLoadSpecUnreferenced(childV2, rootSegmentId)); + } + + + private boolean insertUsedSegments(Set dataSegments, Map rootSegmentIdMap) + { + final String table = derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(); + return derbyConnector.retryWithHandle( + handle -> { + PreparedBatch preparedBatch = handle.prepareBatch( + StringUtils.format( + "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, partitioned, version, used, payload, used_status_last_updated, root_segment_id) " + + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload, :used_status_last_updated, :root_segment_id)", + table, + derbyConnector.getQuoteString() + ) + ); + for (DataSegment segment : dataSegments) { + String id = segment.getId().toString(); + preparedBatch.add() + .bind("id", id) + .bind("dataSource", segment.getDataSource()) + .bind("created_date", DateTimes.nowUtc().toString()) + .bind("start", segment.getInterval().getStart().toString()) + .bind("end", segment.getInterval().getEnd().toString()) + .bind("partitioned", !(segment.getShardSpec() instanceof NoneShardSpec)) + .bind("version", segment.getVersion()) + .bind("used", true) + .bind("payload", mapper.writeValueAsBytes(segment)) + .bind("used_status_last_updated", DateTimes.nowUtc().toString()) + .bind("root_segment_id", rootSegmentIdMap.get(segment.getId())); + } + + final int[] affectedRows = preparedBatch.execute(); + final boolean succeeded = Arrays.stream(affectedRows).allMatch(eachAffectedRows -> eachAffectedRows == 1); + if (!succeeded) { + throw new ISE("Failed to publish segments to DB"); + } + return true; + } + ); + } } From d3e487dbd237db86e8254e03c1fe3cf303c92057 Mon Sep 17 00:00:00 2001 From: Amatya Date: Fri, 28 Jun 2024 12:21:29 +0530 Subject: [PATCH 04/16] Fix test --- .../indexing/test/TestIndexerMetadataStorageCoordinator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 5138af1cc0ac..45d66195d0de 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -317,13 +317,13 @@ public List getPendingSegments(String datasource, Interval @Override public String getRootSegmentId(final DataSegment segment) { - throw new UnsupportedOperationException(); + return null; } @Override public boolean isLoadSpecUnreferenced(final DataSegment segment, final String rootSegmentId) { - throw new UnsupportedOperationException(); + return true; } public Set getPublished() From 5975563afa0d252274f66d4edfe41273516a0b68 Mon Sep 17 00:00:00 2001 From: Amatya Date: Fri, 28 Jun 2024 22:26:32 +0530 Subject: [PATCH 05/16] Batch the metadata calls --- .../common/actions/SegmentNukeAction.java | 18 +- ...TestIndexerMetadataStorageCoordinator.java | 10 +- .../IndexerMetadataStorageCoordinator.java | 18 +- .../IndexerSQLMetadataStorageCoordinator.java | 156 +++++++++++++----- ...exerSQLMetadataStorageCoordinatorTest.java | 102 ++++++------ 5 files changed, 182 insertions(+), 122 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentNukeAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentNukeAction.java index 99ae4848d774..767e7d2f74e4 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentNukeAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentNukeAction.java @@ -33,9 +33,7 @@ import org.apache.druid.segment.SegmentUtils; import org.apache.druid.timeline.DataSegment; -import java.util.HashMap; import java.util.HashSet; -import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -79,18 +77,12 @@ public Set perform(Task task, TaskActionToolbox toolbox) () -> { final IndexerMetadataStorageCoordinator coordinator = toolbox.getIndexerMetadataStorageCoordinator(); - // Need to store the root segment ids before the segments are deleted from the db - final Map rootSegmentIdMap = new HashMap<>(); - for (DataSegment segment : segments) { - rootSegmentIdMap.put(segment, coordinator.getRootSegmentId(segment)); - } + + // Find subset of segments with unreferenced load specs assuming successful nuking + segmentsToKill.addAll(coordinator.findSegmentsWithUnreferencedLoadSpecs(segments)); + + // Nuke segments from metadata store coordinator.deleteSegments(segments); - // Check for references after all segments in the batch have been killed - for (DataSegment segment : segments) { - if (coordinator.isLoadSpecUnreferenced(segment, rootSegmentIdMap.get(segment))) { - segmentsToKill.add(segment); - } - } return null; } ) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 45d66195d0de..f59311b06c20 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -315,15 +315,9 @@ public List getPendingSegments(String datasource, Interval } @Override - public String getRootSegmentId(final DataSegment segment) + public Set findSegmentsWithUnreferencedLoadSpecs(final Set segments) { - return null; - } - - @Override - public boolean isLoadSpecUnreferenced(final DataSegment segment, final String rootSegmentId) - { - return true; + return segments; } public Set getPublished() diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 6cc898220b63..f9e0899f3507 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -475,19 +475,9 @@ SegmentPublishResult commitMetadataOnly( List getPendingSegments(String datasource, Interval interval); /** - * Fetches the id of the segment to which the load spec originally belonged for an upgraded segment. - * returns null for non-upgraded segments - * @param segment DataSegment - * @return root segment id + * Return subset of segments that do not have load specs referenced by segments outside this set + * @param segments set of DataSegments + * @return subset of segments that can safely be killed from deep storage */ - String getRootSegmentId(DataSegment segment); - - /** - * Verifies if a load spec is being used by any other used or unused segment by relying on the lineage of upgrades - * The root segment id must be fetched and provided as an argument for alrady deleted segments - * @param segment segment whose load spec needs to be deleted from deep storage - * @param rootSegmentId the id of the segment which originally pushed to deep storage - * @return true iff the load spec is not being used by any other segment - */ - boolean isLoadSpecUnreferenced(DataSegment segment, String rootSegmentId); + Set findSegmentsWithUnreferencedLoadSpecs(Set segments); } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index ddcf3ac03562..69205a6c17d4 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -1353,6 +1353,21 @@ public int hashCode() } } + private static void bindColumnValuesToQueryWithInCondition( + final String columnName, + final List values, + final Query> query + ) + { + if (values == null) { + return; + } + + for (int i = 0; i < values.size(); i++) { + query.bind(StringUtils.format("%s%d", columnName, i), values.get(i)); + } + } + private static void bindColumnValuesToQueryWithInCondition( final String columnName, final List values, @@ -2942,62 +2957,127 @@ public int deleteUpgradeSegmentsForTask(final String taskId) } @Override - public boolean isLoadSpecUnreferenced(final DataSegment segment, final String rootSegmentId) + public Set findSegmentsWithUnreferencedLoadSpecs(final Set segments) { - // Check if the segment itself exists - if (retrieveSegmentForId(segment.getId().toString(), true) != null) { - return false; - } - - // Check if the segment is the parent of any other segment - if (!getSegmentIdsWithRootSegmentId(segment.getDataSource(), segment.getId().toString()).isEmpty()) { - return false; - } - - // Check if the segment is the child or sibling of any other segment - if (rootSegmentId == null) { - return true; - } - if (retrieveSegmentForId(rootSegmentId, true) != null) { - return false; + if (segments.isEmpty()) { + return segments; + } + final String dataSource = segments.stream() + .findFirst() + .get() + .getDataSource(); + final List segmentIds = segments.stream() + .map(DataSegment::getId) + .map(SegmentId::toString) + .collect(Collectors.toList()); + Map idToRootMap = new HashMap<>(getRootSegmentIds(dataSource, segmentIds)); + final Set rootSegmentIds = new HashSet<>(segmentIds); + rootSegmentIds.addAll(idToRootMap.values()); + idToRootMap.putAll(getSegmentsWithRootSegmentIds(dataSource, new ArrayList<>(rootSegmentIds))); + final Map> rootToIdsMap = new HashMap<>(); + for (Map.Entry idAndRoot : idToRootMap.entrySet()) { + rootToIdsMap.computeIfAbsent(idAndRoot.getValue(), root -> new HashSet<>()) + .add(idAndRoot.getKey()); + } + final Set segmentsWithUnreferencedLoadSpecs = new HashSet<>(); + final Set existingSegmentIds = retrieveSegmentsById(dataSource, rootSegmentIds).stream() + .map(DataSegment::getId) + .map(SegmentId::toString) + .collect(Collectors.toSet()); + for (DataSegment segment : segments) { + final String id = segment.getId().toString(); + final Set conflicts = new HashSet<>(); + if (rootToIdsMap.containsKey(id)) { + // Add children + conflicts.addAll(rootToIdsMap.get(id)); + } + final String parent = idToRootMap.get(id); + if (parent != null) { + // add parent if it exists + if (existingSegmentIds.contains(parent)) { + conflicts.add(parent); + } + // add siblings + if (rootToIdsMap.containsKey(parent)) { + conflicts.addAll(rootToIdsMap.get(parent)); + } + } + // Remove segments being deleted + segmentIds.forEach(conflicts::remove); + if (conflicts.isEmpty()) { + segmentsWithUnreferencedLoadSpecs.add(segment); + } } - return getSegmentIdsWithRootSegmentId(segment.getDataSource(), rootSegmentId).isEmpty(); + return segmentsWithUnreferencedLoadSpecs; } - @Override - public String getRootSegmentId(final DataSegment segment) + @VisibleForTesting + Map getRootSegmentIds(final String dataSource, final List segmentIds) { + if (segmentIds.isEmpty()) { + return Collections.emptyMap(); + } + final String sql = StringUtils.format( - "SELECT root_segment_id FROM %s WHERE id = :id", - dbTables.getSegmentsTable() + "SELECT id, root_segment_id FROM %s WHERE dataSource = :dataSource %s", + dbTables.getSegmentsTable(), + SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("id", segmentIds) ); - return connector.retryWithHandle( + Map rootSegmentIdMap = new HashMap<>(); + connector.retryWithHandle( handle -> { - final List rootIdList = handle.createQuery(sql) - .bind("id", segment.getId().toString()) - .mapTo(String.class) - .list(); - return rootIdList.isEmpty() ? null : rootIdList.get(0); + Query> query = handle.createQuery(sql) + .bind("dataSource", dataSource); + bindColumnValuesToQueryWithInCondition("id", segmentIds, query); + query.map( + (index, r, ctx) -> { + final String id = r.getString(1); + final String rootSegmentId = r.getString(2); + if (rootSegmentId != null) { + rootSegmentIdMap.put(id, rootSegmentId); + } + return null; + } + ) + .list(); + return null; } ); + return rootSegmentIdMap; } - private List getSegmentIdsWithRootSegmentId(final String dataSource, final String rootSegmentId) + private Map getSegmentsWithRootSegmentIds(final String dataSource, final List rootSegmentIds) { + if (rootSegmentIds.isEmpty()) { + return Collections.emptyMap(); + } + final String sql = StringUtils.format( - "SELECT id FROM %s WHERE dataSource = :dataSource AND root_segment_id = :root_segment_id", - dbTables.getSegmentsTable() + "SELECT id, root_segment_id FROM %s WHERE dataSource = :dataSource %s", + dbTables.getSegmentsTable(), + SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("root_segment_id", rootSegmentIds) ); - return connector.retryWithHandle( + Map rootSegmentIdMap = new HashMap<>(); + connector.retryWithHandle( handle -> { - final List segmentIds = handle.createQuery(sql) - .bind("dataSource", dataSource) - .bind("root_segment_id", rootSegmentId) - .mapTo(String.class) - .list(); - return segmentIds; + Query> query = handle.createQuery(sql) + .bind("dataSource", dataSource); + bindColumnValuesToQueryWithInCondition("root_segment_id", rootSegmentIds, query); + query.map( + (index, r, ctx) -> { + final String id = r.getString(1); + final String rootSegmentId = r.getString(2); + if (rootSegmentId != null) { + rootSegmentIdMap.put(id, rootSegmentId); + } + return null; + } + ) + .list(); + return null; } ); + return rootSegmentIdMap; } private static class PendingSegmentsRecord diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 7bf4fc270aef..946a00bdc464 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -275,10 +275,14 @@ public void testCommitAppendSegments() Set allCommittedSegments = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); + Map rootSegmentIdMap = coordinator.getRootSegmentIds( + DS.WIKI, + allCommittedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toList()) + ); // Verify the segments present in the metadata store Assert.assertTrue(allCommittedSegments.containsAll(appendSegments)); for (DataSegment segment : appendSegments) { - Assert.assertNull(coordinator.getRootSegmentId(segment)); + Assert.assertNull(rootSegmentIdMap.get(segment.getId().toString())); } allCommittedSegments.removeAll(appendSegments); @@ -294,9 +298,9 @@ public void testCommitAppendSegments() DataSegment rootSegment = segmentMap.get(pendingSegmentRecord.getUpgradedFromSegmentId()); Assert.assertNotNull(rootSegment); Assert.assertEquals(segment.getLoadSpec(), rootSegment.getLoadSpec()); - Assert.assertNotNull( + Assert.assertEquals( pendingSegmentRecord.getUpgradedFromSegmentId(), - coordinator.getRootSegmentId(segment) + rootSegmentIdMap.get(segment.getId().toString()) ); } } @@ -398,17 +402,23 @@ public void testCommitReplaceSegments() retrieveUsedSegmentIds(derbyConnectorRule.metadataTablesConfigSupplier().get()).size() ); - final Set usedSegments = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); + final Set usedSegments + = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); + + final Map rootSegmentIdMap = coordinator.getRootSegmentIds( + "foo", + usedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toList()) + ); Assert.assertTrue(usedSegments.containsAll(segmentsAppendedWithReplaceLock)); for (DataSegment appendSegment : segmentsAppendedWithReplaceLock) { - Assert.assertNull(coordinator.getRootSegmentId(appendSegment)); + Assert.assertNull(rootSegmentIdMap.get(appendSegment.getId().toString())); } usedSegments.removeAll(segmentsAppendedWithReplaceLock); Assert.assertTrue(usedSegments.containsAll(replacingSegments)); for (DataSegment replaceSegment : replacingSegments) { - Assert.assertNull(coordinator.getRootSegmentId(replaceSegment)); + Assert.assertNull(rootSegmentIdMap.get(replaceSegment.getId().toString())); } usedSegments.removeAll(replacingSegments); @@ -419,7 +429,7 @@ public void testCommitReplaceSegments() if (appendedSegment.getLoadSpec().equals(segmentReplicaWithNewVersion.getLoadSpec())) { Assert.assertEquals( appendedSegment.getId().toString(), - coordinator.getRootSegmentId(segmentReplicaWithNewVersion) + rootSegmentIdMap.get(segmentReplicaWithNewVersion.getId().toString()) ); hasBeenCarriedForward = true; break; @@ -3420,31 +3430,39 @@ public void testRetrieveUnusedSegmentsForExactIntervalAndVersion() } @Test - public void testGetRootSegmentId() + public void testFindDataSegmentsWithUnreferencedLoadSpecs() { - DataSegment root = createSegment(Intervals.of("2024/2025"), "2024-01-01", new NumberedShardSpec(0, 0)); - DataSegment childV1 = createSegment(Intervals.of("2024/2025"), "2024-02-01", new NumberedShardSpec(0, 0)); - DataSegment childV2 = createSegment(Intervals.ETERNITY, "2025-01-01", new NumberedShardSpec(0, 0)); + // Empty set + Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(Collections.emptySet()).isEmpty()); + + // Lone segment is unreferenced + insertUsedSegments(Collections.singleton(eternitySegment), Collections.emptyMap()); + coordinator.markSegmentsAsUnusedWithinInterval(eternitySegment.getDataSource(), Intervals.ETERNITY); + Assert.assertEquals( + Collections.singleton(eternitySegment), + coordinator.findSegmentsWithUnreferencedLoadSpecs(Collections.singleton(eternitySegment)) + ); + + // Siblings are always referenced unless they're passed together Map rootSegmentIdMap = ImmutableMap.of( - childV1.getId(), - root.getId().toString(), - childV2.getId(), - root.getId().toString() + defaultSegment.getId(), + "nonExistentRoot", + defaultSegment2.getId(), + "nonExistentRoot" + ); + insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), rootSegmentIdMap); + Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment)).isEmpty()); + Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment2)).isEmpty()); + Assert.assertEquals( + ImmutableSet.of(defaultSegment, defaultSegment2), + coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment, defaultSegment2)) ); - insertUsedSegments(ImmutableSet.of(root, childV1, childV2), rootSegmentIdMap); - coordinator.markSegmentsAsUnusedWithinInterval(DS.WIKI, Intervals.of("2024/2025")); - Assert.assertNull(coordinator.getRootSegmentId(root)); - Assert.assertEquals(root.getId().toString(), coordinator.getRootSegmentId(childV1)); - Assert.assertEquals(root.getId().toString(), coordinator.getRootSegmentId(childV2)); - } - @Test - public void testIsLoadSpecUnreferenced() - { + // The parent and all its children must all be present in the batch for them to be unreferenced DataSegment root = createSegment(Intervals.of("2024/2025"), "2024-01-01", new NumberedShardSpec(0, 0)); DataSegment childV1 = createSegment(Intervals.of("2024/2025"), "2024-02-01", new NumberedShardSpec(0, 0)); DataSegment childV2 = createSegment(Intervals.ETERNITY, "2025-01-01", new NumberedShardSpec(0, 0)); - Map rootSegmentIdMap = ImmutableMap.of( + rootSegmentIdMap = ImmutableMap.of( childV1.getId(), root.getId().toString(), childV2.getId(), @@ -3453,30 +3471,16 @@ public void testIsLoadSpecUnreferenced() insertUsedSegments(ImmutableSet.of(root, childV1, childV2), rootSegmentIdMap); coordinator.markSegmentsAsUnusedWithinInterval(DS.WIKI, Intervals.of("2024/2025")); - final String rootSegmentId = root.getId().toString(); - Assert.assertFalse(coordinator.isLoadSpecUnreferenced(root, null)); - Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV1, rootSegmentId)); - Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV2, rootSegmentId)); - - Assert.assertFalse(coordinator.isLoadSpecUnreferenced(root, null)); - Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV1, rootSegmentId)); - Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV2, rootSegmentId)); - - coordinator.deleteSegments(ImmutableSet.of(childV2)); - Assert.assertFalse(coordinator.isLoadSpecUnreferenced(root, null)); - Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV1, rootSegmentId)); - Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV2, rootSegmentId)); - - - coordinator.deleteSegments(ImmutableSet.of(root)); - Assert.assertFalse(coordinator.isLoadSpecUnreferenced(root, null)); - Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV1, rootSegmentId)); - Assert.assertFalse(coordinator.isLoadSpecUnreferenced(childV2, rootSegmentId)); - - coordinator.deleteSegments(ImmutableSet.of(childV1)); - Assert.assertTrue(coordinator.isLoadSpecUnreferenced(root, null)); - Assert.assertTrue(coordinator.isLoadSpecUnreferenced(childV1, rootSegmentId)); - Assert.assertTrue(coordinator.isLoadSpecUnreferenced(childV2, rootSegmentId)); + Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root)).isEmpty()); + Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(childV1)).isEmpty()); + Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(childV2)).isEmpty()); + Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root, childV1)).isEmpty()); + Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root, childV2)).isEmpty()); + Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(childV1, childV2)).isEmpty()); + Assert.assertEquals( + ImmutableSet.of(root, childV1, childV2), + coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root, childV1, childV2)) + ); } From 80eefc708e7e60206aca0d3b37c60f2f35c755aa Mon Sep 17 00:00:00 2001 From: Amatya Date: Sun, 30 Jun 2024 20:02:52 +0530 Subject: [PATCH 06/16] Add new task action and other feedback --- .../ActionBasedPublishedSegmentRetriever.java | 2 +- .../DetermineSegmentsToKillAction.java | 98 +++++++++++++++++++ .../common/actions/SegmentNukeAction.java | 23 ++--- .../indexing/common/actions/TaskAction.java | 1 + .../common/task/KillUnusedSegmentsTask.java | 63 ++++++------ ...TestIndexerMetadataStorageCoordinator.java | 4 +- .../IndexerMetadataStorageCoordinator.java | 4 +- .../IndexerSQLMetadataStorageCoordinator.java | 36 +++---- .../druid/metadata/SQLMetadataConnector.java | 2 +- .../metadata/SqlSegmentsMetadataQuery.java | 2 +- ...exerSQLMetadataStorageCoordinatorTest.java | 24 ++--- 11 files changed, 177 insertions(+), 82 deletions(-) create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/actions/DetermineSegmentsToKillAction.java diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/appenderator/ActionBasedPublishedSegmentRetriever.java b/indexing-service/src/main/java/org/apache/druid/indexing/appenderator/ActionBasedPublishedSegmentRetriever.java index ba5cf923b12e..bb349cc97902 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/appenderator/ActionBasedPublishedSegmentRetriever.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/appenderator/ActionBasedPublishedSegmentRetriever.java @@ -79,7 +79,7 @@ public Set findPublishedSegments(Set segmentIds) throws catch (Exception e) { log.warn( e, - "Could not retrieve published segment IDs[%s] using task action[segmentListById]." + "Could not retrieve published segment IDs[%s] using task action[retrieveSegmentsById]." + " Overlord maybe on an older version, retrying with action[segmentListUsed]." + " This task may fail to publish segments if there is a concurrent replace happening.", serializedSegmentIds diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/DetermineSegmentsToKillAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/DetermineSegmentsToKillAction.java new file mode 100644 index 000000000000..925b0f4f247d --- /dev/null +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/DetermineSegmentsToKillAction.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.indexing.common.actions; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.type.TypeReference; +import org.apache.druid.indexing.common.task.Task; +import org.apache.druid.timeline.DataSegment; + +import java.util.Objects; +import java.util.Set; + +/** + * Given a set of segments, this task action determines the subset that can be killed from deep storage + * This task action assumes that the all the segments provided are unused and belong to the same datasource. + */ +public class DetermineSegmentsToKillAction implements TaskAction> +{ + private final Set segments; + + @JsonCreator + public DetermineSegmentsToKillAction(@JsonProperty("segments") Set segments) + { + this.segments = segments; + } + + @JsonProperty + public Set getSegments() + { + return segments; + } + + @Override + public TypeReference> getReturnTypeReference() + { + return new TypeReference>() + { + }; + } + + @Override + public Set perform(Task task, TaskActionToolbox toolbox) + { + return toolbox.getIndexerMetadataStorageCoordinator() + .determineSegmentsWithUnreferencedLoadSpecs(segments); + } + + @Override + public boolean isAudited() + { + return false; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + DetermineSegmentsToKillAction that = (DetermineSegmentsToKillAction) o; + return Objects.equals(segments, that.segments); + } + + @Override + public int hashCode() + { + return Objects.hash(segments); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "segments=" + segments + + '}'; + } +} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentNukeAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentNukeAction.java index 767e7d2f74e4..2856f161e0e6 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentNukeAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentNukeAction.java @@ -26,18 +26,16 @@ import org.apache.druid.indexing.common.task.IndexTaskUtils; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.overlord.CriticalAction; -import org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; import org.apache.druid.query.DruidMetrics; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.timeline.DataSegment; -import java.util.HashSet; import java.util.Set; import java.util.stream.Collectors; -public class SegmentNukeAction implements TaskAction> +public class SegmentNukeAction implements TaskAction { private final Set segments; @@ -56,18 +54,18 @@ public Set getSegments() } @Override - public TypeReference> getReturnTypeReference() + public TypeReference getReturnTypeReference() { - return new TypeReference>() + return new TypeReference() { }; } @Override - public Set perform(Task task, TaskActionToolbox toolbox) + public Void perform(Task task, TaskActionToolbox toolbox) { TaskLocks.checkLockCoversSegments(task, toolbox.getTaskLockbox(), segments); - final Set segmentsToKill = new HashSet<>(); + try { toolbox.getTaskLockbox().doInCriticalSection( task, @@ -75,14 +73,7 @@ public Set perform(Task task, TaskActionToolbox toolbox) CriticalAction.builder() .onValidLocks( () -> { - final IndexerMetadataStorageCoordinator coordinator - = toolbox.getIndexerMetadataStorageCoordinator(); - - // Find subset of segments with unreferenced load specs assuming successful nuking - segmentsToKill.addAll(coordinator.findSegmentsWithUnreferencedLoadSpecs(segments)); - - // Nuke segments from metadata store - coordinator.deleteSegments(segments); + toolbox.getIndexerMetadataStorageCoordinator().deleteSegments(segments); return null; } ) @@ -107,7 +98,7 @@ public Set perform(Task task, TaskActionToolbox toolbox) toolbox.getEmitter().emit(metricBuilder.setMetric("segment/nuked/bytes", segment.getSize())); } - return segmentsToKill; + return null; } @Override diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/TaskAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/TaskAction.java index 4606bd597a8d..510503852855 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/TaskAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/TaskAction.java @@ -42,6 +42,7 @@ @JsonSubTypes.Type(name = "segmentListUsed", value = RetrieveUsedSegmentsAction.class), @JsonSubTypes.Type(name = "segmentListUnused", value = RetrieveUnusedSegmentsAction.class), @JsonSubTypes.Type(name = "markSegmentsAsUnused", value = MarkSegmentsAsUnusedAction.class), + @JsonSubTypes.Type(name = "determineSegmentsToKill", value = DetermineSegmentsToKillAction.class), @JsonSubTypes.Type(name = "segmentNuke", value = SegmentNukeAction.class), @JsonSubTypes.Type(name = "segmentMetadataUpdate", value = SegmentMetadataUpdateAction.class), @JsonSubTypes.Type(name = SegmentAllocateAction.TYPE, value = SegmentAllocateAction.class), diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 7823d7cb9a7c..24060806950f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -34,6 +34,7 @@ import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; import org.apache.druid.indexing.common.TaskToolbox; +import org.apache.druid.indexing.common.actions.DetermineSegmentsToKillAction; import org.apache.druid.indexing.common.actions.RetrieveUnusedSegmentsAction; import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction; import org.apache.druid.indexing.common.actions.SegmentNukeAction; @@ -203,11 +204,9 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception ); // Fetch the load specs of all segments overlapping with the unused segment intervals final Set> usedSegmentLoadSpecs = - new HashSet<>(toolbox.getTaskActionClient().submit(retrieveUsedSegmentsAction) - .stream() - .map(DataSegment::getLoadSpec) - .collect(Collectors.toSet()) - ); + toolbox.getTaskActionClient().submit(retrieveUsedSegmentsAction) + .stream() + .map(DataSegment::getLoadSpec).collect(Collectors.toSet()); do { if (nextBatchSize <= 0) { @@ -231,32 +230,34 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception ); } - // Kill segments - // Order is important here: we want the nuke action to clean up the metadata records _before_ the - // segments are removed from storage, this helps maintain that we will always have a storage segment if - // the metadata segment is present. If the segment nuke throws an exception, then the segment cleanup is - // abandoned. - - final List segmentsToBeKilled; - - Set segmentsWithUnreferencedLoadSpecs - = toolbox.getTaskActionClient().submit(new SegmentNukeAction(new HashSet<>(unusedSegments))); - if (segmentsWithUnreferencedLoadSpecs != null) { - // New task action that returns the exact set of segments to be killed from deep storage - // We still need to check used segment load specs as segment upgrades pre-date this change - segmentsToBeKilled = segmentsWithUnreferencedLoadSpecs - .stream() - .filter(unusedSegment -> unusedSegment.getLoadSpec() == null - || !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec())) - .collect(Collectors.toList()); - } else { - // Kill segments from the deep storage only if their load specs are not being used by any used segments - segmentsToBeKilled = unusedSegments - .stream() - .filter(unusedSegment -> unusedSegment.getLoadSpec() == null - || !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec())) - .collect(Collectors.toList()); + // Kill segments. Order is important here: + // Determine the set of segments to be killed from deep storage _before_ nuking segments from deep storage + // We then want the nuke action to clean up the metadata records _before_ the segments are removed from storage. + // This helps maintain that we will always have a storage segment if the metadata segment is present. + // If the segment nuke throws an exception, then the segment cleanup is abandoned. + + Set segmentsWithUnreferencedLoadSpecs = new HashSet<>(unusedSegments); + try { + segmentsWithUnreferencedLoadSpecs + = toolbox.getTaskActionClient() + .submit(new DetermineSegmentsToKillAction(segmentsWithUnreferencedLoadSpecs)); } + catch (Exception e) { + LOG.warn( + e, + "Could not determine segments with unreferenced load specs using task action[determineSegmentsToKill]." + + " Overlord maybe on an older version." + ); + } + + toolbox.getTaskActionClient().submit(new SegmentNukeAction(new HashSet<>(unusedSegments))); + + // Kill segments from the deep storage only if their load specs are not being used by any other segments + final List segmentsToBeKilled = segmentsWithUnreferencedLoadSpecs + .stream() + .filter(unusedSegment -> unusedSegment.getLoadSpec() == null + || !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec())) + .collect(Collectors.toList()); toolbox.getDataSegmentKiller().kill(segmentsToBeKilled); numBatchesProcessed++; @@ -265,7 +266,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception LOG.info("Processed [%d] batches for kill task[%s].", numBatchesProcessed, getId()); nextBatchSize = computeNextBatchSize(numSegmentsKilled); - } while (unusedSegments.size() != 0 && (null == numTotalBatches || numBatchesProcessed < numTotalBatches)); + } while (!unusedSegments.isEmpty() && (null == numTotalBatches || numBatchesProcessed < numTotalBatches)); final String taskId = getId(); LOG.info( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index f59311b06c20..44d4602c68a3 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -315,7 +315,9 @@ public List getPendingSegments(String datasource, Interval } @Override - public Set findSegmentsWithUnreferencedLoadSpecs(final Set segments) + public Set determineSegmentsWithUnreferencedLoadSpecs( + final Set segments + ) { return segments; } diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index f9e0899f3507..1a6bf0b61b0f 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -476,8 +476,10 @@ SegmentPublishResult commitMetadataOnly( /** * Return subset of segments that do not have load specs referenced by segments outside this set + * Assumes that all the segments belong to a single datasource + * * @param segments set of DataSegments * @return subset of segments that can safely be killed from deep storage */ - Set findSegmentsWithUnreferencedLoadSpecs(Set segments); + Set determineSegmentsWithUnreferencedLoadSpecs(Set segments); } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 69205a6c17d4..4865fd1c49b7 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -1353,21 +1353,6 @@ public int hashCode() } } - private static void bindColumnValuesToQueryWithInCondition( - final String columnName, - final List values, - final Query> query - ) - { - if (values == null) { - return; - } - - for (int i = 0; i < values.size(); i++) { - query.bind(StringUtils.format("%s%d", columnName, i), values.get(i)); - } - } - private static void bindColumnValuesToQueryWithInCondition( final String columnName, final List values, @@ -2957,7 +2942,7 @@ public int deleteUpgradeSegmentsForTask(final String taskId) } @Override - public Set findSegmentsWithUnreferencedLoadSpecs(final Set segments) + public Set determineSegmentsWithUnreferencedLoadSpecs(final Set segments) { if (segments.isEmpty()) { return segments; @@ -3011,6 +2996,15 @@ public Set findSegmentsWithUnreferencedLoadSpecs(final Set getRootSegmentIds(final String dataSource, final List segmentIds) { @@ -3028,7 +3022,7 @@ Map getRootSegmentIds(final String dataSource, final List { Query> query = handle.createQuery(sql) .bind("dataSource", dataSource); - bindColumnValuesToQueryWithInCondition("id", segmentIds, query); + SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition("id", segmentIds, query); query.map( (index, r, ctx) -> { final String id = r.getString(1); @@ -3046,6 +3040,12 @@ Map getRootSegmentIds(final String dataSource, final List getSegmentsWithRootSegmentIds(final String dataSource, final List rootSegmentIds) { if (rootSegmentIds.isEmpty()) { @@ -3062,7 +3062,7 @@ private Map getSegmentsWithRootSegmentIds(final String dataSourc handle -> { Query> query = handle.createQuery(sql) .bind("dataSource", dataSource); - bindColumnValuesToQueryWithInCondition("root_segment_id", rootSegmentIds, query); + SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition("root_segment_id", rootSegmentIds, query); query.map( (index, r, ctx) -> { final String id = r.getString(1); diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java index de12f6a5ae7e..16dd68e4cc8c 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java @@ -587,7 +587,7 @@ protected void alterSegmentTable() Map columnNameTypes = new HashMap<>(); columnNameTypes.put("used_status_last_updated", "VARCHAR(255)"); - // root_segment_id is the segment id is the first / root segment to which the same load spec originally belonged + // root_segment_id is the first segment to which the same load spec originally belonged // Load specs can be shared as a result of segment version upgrade // This column is null for segments that haven't been upgraded. columnNameTypes.put("root_segment_id", "VARCHAR(255)"); diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index 4cede8b8f15f..718c00d8daf8 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -983,7 +983,7 @@ static String getParameterizedInConditionForColumn(final String columnName, fina * * @see #getParameterizedInConditionForColumn(String, List) */ - private static void bindColumnValuesToQueryWithInCondition( + static void bindColumnValuesToQueryWithInCondition( final String columnName, final List values, final SQLStatement query diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 946a00bdc464..62ab0277e103 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -3433,14 +3433,14 @@ public void testRetrieveUnusedSegmentsForExactIntervalAndVersion() public void testFindDataSegmentsWithUnreferencedLoadSpecs() { // Empty set - Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(Collections.emptySet()).isEmpty()); + Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(Collections.emptySet()).isEmpty()); // Lone segment is unreferenced insertUsedSegments(Collections.singleton(eternitySegment), Collections.emptyMap()); coordinator.markSegmentsAsUnusedWithinInterval(eternitySegment.getDataSource(), Intervals.ETERNITY); Assert.assertEquals( Collections.singleton(eternitySegment), - coordinator.findSegmentsWithUnreferencedLoadSpecs(Collections.singleton(eternitySegment)) + coordinator.determineSegmentsWithUnreferencedLoadSpecs(Collections.singleton(eternitySegment)) ); // Siblings are always referenced unless they're passed together @@ -3451,11 +3451,11 @@ public void testFindDataSegmentsWithUnreferencedLoadSpecs() "nonExistentRoot" ); insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), rootSegmentIdMap); - Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment)).isEmpty()); - Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment2)).isEmpty()); + Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment)).isEmpty()); + Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment2)).isEmpty()); Assert.assertEquals( ImmutableSet.of(defaultSegment, defaultSegment2), - coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment, defaultSegment2)) + coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment, defaultSegment2)) ); // The parent and all its children must all be present in the batch for them to be unreferenced @@ -3471,15 +3471,15 @@ public void testFindDataSegmentsWithUnreferencedLoadSpecs() insertUsedSegments(ImmutableSet.of(root, childV1, childV2), rootSegmentIdMap); coordinator.markSegmentsAsUnusedWithinInterval(DS.WIKI, Intervals.of("2024/2025")); - Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root)).isEmpty()); - Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(childV1)).isEmpty()); - Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(childV2)).isEmpty()); - Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root, childV1)).isEmpty()); - Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root, childV2)).isEmpty()); - Assert.assertTrue(coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(childV1, childV2)).isEmpty()); + Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root)).isEmpty()); + Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(childV1)).isEmpty()); + Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(childV2)).isEmpty()); + Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root, childV1)).isEmpty()); + Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root, childV2)).isEmpty()); + Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(childV1, childV2)).isEmpty()); Assert.assertEquals( ImmutableSet.of(root, childV1, childV2), - coordinator.findSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root, childV1, childV2)) + coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root, childV1, childV2)) ); } From 65ab152a0867cc75eabd2a8fe8219bacab0ff197 Mon Sep 17 00:00:00 2001 From: Amatya Date: Sun, 30 Jun 2024 20:24:31 +0530 Subject: [PATCH 07/16] Rename root_segment_id to upgraded_from_segment_id --- .../SegmentTransactionalReplaceAction.java | 2 +- .../SeekableStreamIndexTaskClient.java | 2 +- .../IndexerSQLMetadataStorageCoordinator.java | 126 ++++++++++-------- .../druid/metadata/PendingSegmentRecord.java | 2 +- .../druid/metadata/SQLMetadataConnector.java | 8 +- .../metadata/SqlSegmentsMetadataQuery.java | 4 +- .../druid/server/http/DataSegmentPlus.java | 18 +-- ...exerSQLMetadataStorageCoordinatorTest.java | 36 ++--- 8 files changed, 106 insertions(+), 92 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java index df188ac81533..9846fca58155 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java @@ -52,7 +52,7 @@ * commit input segments contained within interval * upgrade ids in the upgradeSegments table corresponding to this task to the replace lock's version and commit them * fetch payload, task_allocator_id for pending segments - * upgrade each such pending segment to the replace lock's version with the corresponding root segment + * upgrade each such pending segment to the replace lock's version with the corresponding upgraded from segment * } * For every pending segment with version == replace lock version: * Fetch payload, group_id or the pending segment and relay them to the supervisor diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskClient.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskClient.java index 7fd282e44ce2..15d03fb38fec 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskClient.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskClient.java @@ -160,7 +160,7 @@ ListenableFuture setEndOffsetsAsync( * The update is processed only if the task is serving the original pending segment. * * @param taskId - task id - * @param pendingSegmentRecord - the ids belonging to the versions to which the root segment needs to be updated + * @param pendingSegmentRecord - the ids belonging to the versions to which the first segment needs to be updated * @return true if the update succeeds */ ListenableFuture registerNewVersionOfPendingSegmentAsync( diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 4865fd1c49b7..c73c0c42c375 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -564,7 +564,7 @@ public SegmentPublishResult commitReplaceSegments( createNewIdsOfAppendSegmentsAfterReplace(handle, replaceSegments, locksHeldByReplaceTask); Map upgradeSegmentMetadata = new HashMap<>(); - Map rootSegmentIdMap = new HashMap<>(); + Map upgradedFromSegmentIdMap = new HashMap<>(); for (DataSegmentPlus dataSegmentPlus : upgradedSegments) { segmentsToInsert.add(dataSegmentPlus.getDataSegment()); if (dataSegmentPlus.getSchemaFingerprint() != null && dataSegmentPlus.getNumRows() != null) { @@ -573,8 +573,11 @@ public SegmentPublishResult commitReplaceSegments( new SegmentMetadata(dataSegmentPlus.getNumRows(), dataSegmentPlus.getSchemaFingerprint()) ); } - if (dataSegmentPlus.getRootSegmentId() != null) { - rootSegmentIdMap.put(dataSegmentPlus.getDataSegment().getId(), dataSegmentPlus.getRootSegmentId()); + if (dataSegmentPlus.getUpgradedFromSegmentId() != null) { + upgradedFromSegmentIdMap.put( + dataSegmentPlus.getDataSegment().getId(), + dataSegmentPlus.getUpgradedFromSegmentId() + ); } } SegmentPublishResult result = SegmentPublishResult.ok( @@ -584,7 +587,7 @@ public SegmentPublishResult commitReplaceSegments( segmentSchemaMapping, upgradeSegmentMetadata, Collections.emptyMap(), - rootSegmentIdMap + upgradedFromSegmentIdMap ), upgradePendingSegmentsOverlappingWith(segmentsToInsert) ); @@ -1413,7 +1416,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( final Set allSegmentsToInsert = new HashSet<>(appendSegments); final Map newVersionSegmentToParent = new HashMap<>(); final Map segmentIdMap = new HashMap<>(); - final Map rootSegmentIdMap = new HashMap<>(); + final Map upgradedFromSegmentIdMap = new HashMap<>(); appendSegments.forEach(segment -> segmentIdMap.put(segment.getId().toString(), segment)); segmentIdsForNewVersions.forEach( pendingSegment -> { @@ -1421,7 +1424,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( final DataSegment oldSegment = segmentIdMap.get(pendingSegment.getUpgradedFromSegmentId()); final SegmentId newVersionSegmentId = pendingSegment.getId().asSegmentId(); newVersionSegmentToParent.put(newVersionSegmentId, oldSegment.getId()); - rootSegmentIdMap.put(newVersionSegmentId, oldSegment.getId().toString()); + upgradedFromSegmentIdMap.put(newVersionSegmentId, oldSegment.getId().toString()); allSegmentsToInsert.add( new DataSegment( pendingSegment.getId().asSegmentId(), @@ -1481,7 +1484,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( segmentSchemaMapping, Collections.emptyMap(), newVersionSegmentToParent, - rootSegmentIdMap + upgradedFromSegmentIdMap ) ); }, @@ -2089,7 +2092,7 @@ private Set announceHistoricalSegmentBatch( for (List partition : partitionedSegments) { for (DataSegment segment : partition) { String segmentId = segment.getId().toString(); - final String rootSegmentId = null; + final String upgradedFromSegmentId = null; PreparedBatchPart preparedBatchPart = preparedBatch.add() .bind("id", segmentId) @@ -2102,7 +2105,7 @@ private Set announceHistoricalSegmentBatch( .bind("used", usedSegments.contains(segment)) .bind("payload", jsonMapper.writeValueAsBytes(segment)) .bind("used_status_last_updated", now) - .bind("root_segment_id", rootSegmentId); + .bind("upgraded_from_segment_id", upgradedFromSegmentId); if (schemaPersistEnabled) { Long numRows = null; @@ -2227,9 +2230,9 @@ private Set createNewIdsOfAppendSegmentsAfterReplace( .shardSpec(shardSpec) .build(); - final String rootSegmentId = oldSegmentMetadata.getRootSegmentId() == null - ? oldSegmentMetadata.getDataSegment().getId().toString() - : oldSegmentMetadata.getRootSegmentId(); + final String upgradedFromSegmentId = oldSegmentMetadata.getUpgradedFromSegmentId() == null + ? oldSegmentMetadata.getDataSegment().getId().toString() + : oldSegmentMetadata.getUpgradedFromSegmentId(); upgradedSegments.add( new DataSegmentPlus( @@ -2239,7 +2242,7 @@ private Set createNewIdsOfAppendSegmentsAfterReplace( null, oldSegmentMetadata.getSchemaFingerprint(), oldSegmentMetadata.getNumRows(), - rootSegmentId + upgradedFromSegmentId ) ); } @@ -2283,7 +2286,7 @@ private Set insertSegments( @Nullable SegmentSchemaMapping segmentSchemaMapping, Map upgradeSegmentMetadata, Map newVersionForAppendToParent, - Map rootSegmentIdMap + Map upgradedFromSegmentIdMap ) throws IOException { boolean shouldPersistSchema = shouldPersistSchema(segmentSchemaMapping); @@ -2320,7 +2323,7 @@ private Set insertSegments( .bind("used", true) .bind("payload", jsonMapper.writeValueAsBytes(segment)) .bind("used_status_last_updated", now) - .bind("root_segment_id", rootSegmentIdMap.get(segment.getId())); + .bind("upgraded_from_segment_id", upgradedFromSegmentIdMap.get(segment.getId())); if (schemaPersistEnabled) { SegmentMetadata segmentMetadata = @@ -2467,9 +2470,9 @@ private String buildSqlToInsertSegments() { String insertStatement = "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s," - + " partitioned, version, used, payload, used_status_last_updated, root_segment_id %3$s) " + + " partitioned, version, used, payload, used_status_last_updated, upgraded_from_segment_id %3$s) " + "VALUES (:id, :dataSource, :created_date, :start, :end," - + " :partitioned, :version, :used, :payload, :used_status_last_updated, :root_segment_id %4$s)"; + + " :partitioned, :version, :used, :payload, :used_status_last_updated, :upgraded_from_segment_id %4$s)"; if (schemaPersistEnabled) { return StringUtils.format( @@ -2955,36 +2958,37 @@ public Set determineSegmentsWithUnreferencedLoadSpecs(final Set idToRootMap = new HashMap<>(getRootSegmentIds(dataSource, segmentIds)); - final Set rootSegmentIds = new HashSet<>(segmentIds); - rootSegmentIds.addAll(idToRootMap.values()); - idToRootMap.putAll(getSegmentsWithRootSegmentIds(dataSource, new ArrayList<>(rootSegmentIds))); - final Map> rootToIdsMap = new HashMap<>(); - for (Map.Entry idAndRoot : idToRootMap.entrySet()) { - rootToIdsMap.computeIfAbsent(idAndRoot.getValue(), root -> new HashSet<>()) - .add(idAndRoot.getKey()); + Map idToUpgradedMap = new HashMap<>(getUpgradedFromSegmentIds(dataSource, segmentIds)); + final Set upgradedFromSegmentIds = new HashSet<>(segmentIds); + upgradedFromSegmentIds.addAll(idToUpgradedMap.values()); + idToUpgradedMap.putAll(getSegmentsWithUpgradedFromSegmentIds(dataSource, new ArrayList<>(upgradedFromSegmentIds))); + final Map> upgradedToIdsMap = new HashMap<>(); + for (Map.Entry idAndAncestor : idToUpgradedMap.entrySet()) { + upgradedToIdsMap.computeIfAbsent(idAndAncestor.getValue(), ancestor -> new HashSet<>()) + .add(idAndAncestor.getKey()); } final Set segmentsWithUnreferencedLoadSpecs = new HashSet<>(); - final Set existingSegmentIds = retrieveSegmentsById(dataSource, rootSegmentIds).stream() - .map(DataSegment::getId) - .map(SegmentId::toString) - .collect(Collectors.toSet()); + final Set existingSegmentIds + = retrieveSegmentsById(dataSource, upgradedFromSegmentIds).stream() + .map(DataSegment::getId) + .map(SegmentId::toString) + .collect(Collectors.toSet()); for (DataSegment segment : segments) { final String id = segment.getId().toString(); final Set conflicts = new HashSet<>(); - if (rootToIdsMap.containsKey(id)) { + if (upgradedToIdsMap.containsKey(id)) { // Add children - conflicts.addAll(rootToIdsMap.get(id)); + conflicts.addAll(upgradedToIdsMap.get(id)); } - final String parent = idToRootMap.get(id); + final String parent = idToUpgradedMap.get(id); if (parent != null) { // add parent if it exists if (existingSegmentIds.contains(parent)) { conflicts.add(parent); } // add siblings - if (rootToIdsMap.containsKey(parent)) { - conflicts.addAll(rootToIdsMap.get(parent)); + if (upgradedToIdsMap.containsKey(parent)) { + conflicts.addAll(upgradedToIdsMap.get(parent)); } } // Remove segments being deleted @@ -2997,27 +3001,27 @@ public Set determineSegmentsWithUnreferencedLoadSpecs(final Set getRootSegmentIds(final String dataSource, final List segmentIds) + Map getUpgradedFromSegmentIds(final String dataSource, final List segmentIds) { if (segmentIds.isEmpty()) { return Collections.emptyMap(); } final String sql = StringUtils.format( - "SELECT id, root_segment_id FROM %s WHERE dataSource = :dataSource %s", + "SELECT id, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s", dbTables.getSegmentsTable(), SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("id", segmentIds) ); - Map rootSegmentIdMap = new HashMap<>(); + Map upgradedFromSegmentIdMap = new HashMap<>(); connector.retryWithHandle( handle -> { Query> query = handle.createQuery(sql) @@ -3026,9 +3030,9 @@ Map getRootSegmentIds(final String dataSource, final List { final String id = r.getString(1); - final String rootSegmentId = r.getString(2); - if (rootSegmentId != null) { - rootSegmentIdMap.put(id, rootSegmentId); + final String upgradedFromSegmentId = r.getString(2); + if (upgradedFromSegmentId != null) { + upgradedFromSegmentIdMap.put(id, upgradedFromSegmentId); } return null; } @@ -3037,38 +3041,48 @@ Map getRootSegmentIds(final String dataSource, final List getSegmentsWithRootSegmentIds(final String dataSource, final List rootSegmentIds) + private Map getSegmentsWithUpgradedFromSegmentIds( + final String dataSource, + final List upgradedFromSegmentIds + ) { - if (rootSegmentIds.isEmpty()) { + if (upgradedFromSegmentIds.isEmpty()) { return Collections.emptyMap(); } final String sql = StringUtils.format( - "SELECT id, root_segment_id FROM %s WHERE dataSource = :dataSource %s", + "SELECT id, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s", dbTables.getSegmentsTable(), - SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("root_segment_id", rootSegmentIds) + SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn( + "upgraded_from_segment_id", + upgradedFromSegmentIds + ) ); - Map rootSegmentIdMap = new HashMap<>(); + Map upgradedFromSegmentIdMap = new HashMap<>(); connector.retryWithHandle( handle -> { Query> query = handle.createQuery(sql) .bind("dataSource", dataSource); - SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition("root_segment_id", rootSegmentIds, query); + SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition( + "upgraded_from_segment_id", + upgradedFromSegmentIds, + query + ); query.map( (index, r, ctx) -> { final String id = r.getString(1); - final String rootSegmentId = r.getString(2); - if (rootSegmentId != null) { - rootSegmentIdMap.put(id, rootSegmentId); + final String upgradedFromSegmentId = r.getString(2); + if (upgradedFromSegmentId != null) { + upgradedFromSegmentIdMap.put(id, upgradedFromSegmentId); } return null; } @@ -3077,7 +3091,7 @@ private Map getSegmentsWithRootSegmentIds(final String dataSourc return null; } ); - return rootSegmentIdMap; + return upgradedFromSegmentIdMap; } private static class PendingSegmentsRecord diff --git a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java index bfbaad18ef1a..0fa492e28f91 100644 --- a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java +++ b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java @@ -40,7 +40,7 @@ *

  • id -> id (Unique identifier for pending segment)
  • *
  • sequence_name -> sequenceName (sequence name used for segment allocation)
  • *
  • sequence_prev_id -> sequencePrevId (previous segment id used for segment allocation)
  • - *
  • upgraded_from_segment_id -> upgradedFromSegmentId (Id of the root segment from which this was upgraded)
  • + *
  • upgraded_from_segment_id -> upgradedFromSegmentId (Id of the first segment from which this was upgraded)
  • *
  • task_allocator_id -> taskAllocatorId (Associates a task / task group / replica group with the pending segment)
  • * */ diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java index 16dd68e4cc8c..7f17edd012ee 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java @@ -587,10 +587,10 @@ protected void alterSegmentTable() Map columnNameTypes = new HashMap<>(); columnNameTypes.put("used_status_last_updated", "VARCHAR(255)"); - // root_segment_id is the first segment to which the same load spec originally belonged + // upgraded_from_segment_id is the first segment to which the same load spec originally belonged // Load specs can be shared as a result of segment version upgrade // This column is null for segments that haven't been upgraded. - columnNameTypes.put("root_segment_id", "VARCHAR(255)"); + columnNameTypes.put("upgraded_from_segment_id", "VARCHAR(255)"); if (centralizedDatasourceSchemaConfig.isEnabled()) { columnNameTypes.put("schema_fingerprint", "VARCHAR(255)"); @@ -628,8 +628,8 @@ protected void alterSegmentTable() final Set createdIndexSet = getIndexOnTable(tableName); createIndex( tableName, - StringUtils.format("idx_%1$s_datasource_root_segment_id", tableName), - ImmutableList.of("dataSource", "root_segment_id"), + StringUtils.format("idx_%1$s_datasource_upgraded_from_segment_id", tableName), + ImmutableList.of("dataSource", "upgraded_from_segment_id"), createdIndexSet ); } diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index 718c00d8daf8..11508d21a428 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -286,7 +286,7 @@ private List retrieveSegmentBatchById( if (includeSchemaInfo) { final Query> query = handle.createQuery( StringUtils.format( - "SELECT payload, used, root_segment_id, schema_fingerprint, num_rows FROM %s WHERE dataSource = :dataSource %s", + "SELECT payload, used, upgraded_from_segment_id, schema_fingerprint, num_rows FROM %s WHERE dataSource = :dataSource %s", dbTables.getSegmentsTable(), getParameterizedInConditionForColumn("id", segmentIds) ) ); @@ -315,7 +315,7 @@ private List retrieveSegmentBatchById( } else { final Query> query = handle.createQuery( StringUtils.format( - "SELECT payload, used, root_segment_id FROM %s WHERE dataSource = :dataSource %s", + "SELECT payload, used, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s", dbTables.getSegmentsTable(), getParameterizedInConditionForColumn("id", segmentIds) ) ); diff --git a/server/src/main/java/org/apache/druid/server/http/DataSegmentPlus.java b/server/src/main/java/org/apache/druid/server/http/DataSegmentPlus.java index 7d961d8c8c2b..bfda5cbf3ad4 100644 --- a/server/src/main/java/org/apache/druid/server/http/DataSegmentPlus.java +++ b/server/src/main/java/org/apache/druid/server/http/DataSegmentPlus.java @@ -36,7 +36,7 @@ *
  • {@link DataSegmentPlus#createdDate} - The time when the segment was created.
  • *
  • {@link DataSegmentPlus#usedStatusLastUpdatedDate} - The time when the segments * used status was last updated.
  • - *
  • {@link DataSegmentPlus#rootSegmentId} - The root segment id to which the same load spec originally belonged. + *
  • {@link DataSegmentPlus#upgradedFromSegmentId} - The segment id to which the same load spec originally belonged. * Load specs can be shared as a result of segment version upgrades.
  • * *

    @@ -56,7 +56,7 @@ public class DataSegmentPlus private final Long numRows; @Nullable - private final String rootSegmentId; + private final String upgradedFromSegmentId; @JsonCreator public DataSegmentPlus( @@ -66,7 +66,7 @@ public DataSegmentPlus( @JsonProperty("used") @Nullable final Boolean used, @JsonProperty("schemaFingerprint") @Nullable final String schemaFingerprint, @JsonProperty("numRows") @Nullable final Long numRows, - @JsonProperty("rootSegmentId") @Nullable final String rootSegmentId + @JsonProperty("upgradedFromSegmentId") @Nullable final String upgradedFromSegmentId ) { this.dataSegment = dataSegment; @@ -75,7 +75,7 @@ public DataSegmentPlus( this.used = used; this.schemaFingerprint = schemaFingerprint; this.numRows = numRows; - this.rootSegmentId = rootSegmentId; + this.upgradedFromSegmentId = upgradedFromSegmentId; } @Nullable @@ -121,9 +121,9 @@ public Long getNumRows() @Nullable @JsonProperty - public String getRootSegmentId() + public String getUpgradedFromSegmentId() { - return rootSegmentId; + return upgradedFromSegmentId; } @Override @@ -142,7 +142,7 @@ public boolean equals(Object o) && Objects.equals(used, that.getUsed()) && Objects.equals(schemaFingerprint, that.getSchemaFingerprint()) && Objects.equals(numRows, that.getNumRows()) - && Objects.equals(rootSegmentId, that.getRootSegmentId()); + && Objects.equals(upgradedFromSegmentId, that.getUpgradedFromSegmentId()); } @Override @@ -155,7 +155,7 @@ public int hashCode() used, schemaFingerprint, numRows, - rootSegmentId + upgradedFromSegmentId ); } @@ -169,7 +169,7 @@ public String toString() ", used=" + getUsed() + ", schemaFingerprint=" + getSchemaFingerprint() + ", numRows=" + getNumRows() + - ", rootSegmentId=" + getRootSegmentId() + + ", upgradedFromSegmentId=" + getUpgradedFromSegmentId() + '}'; } } diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 62ab0277e103..1de1481a8416 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -275,14 +275,14 @@ public void testCommitAppendSegments() Set allCommittedSegments = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); - Map rootSegmentIdMap = coordinator.getRootSegmentIds( + Map upgradedFromSegmentIdMap = coordinator.getUpgradedFromSegmentIds( DS.WIKI, allCommittedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toList()) ); // Verify the segments present in the metadata store Assert.assertTrue(allCommittedSegments.containsAll(appendSegments)); for (DataSegment segment : appendSegments) { - Assert.assertNull(rootSegmentIdMap.get(segment.getId().toString())); + Assert.assertNull(upgradedFromSegmentIdMap.get(segment.getId().toString())); } allCommittedSegments.removeAll(appendSegments); @@ -295,12 +295,12 @@ public void testCommitAppendSegments() for (DataSegment segment : allCommittedSegments) { for (PendingSegmentRecord pendingSegmentRecord : pendingSegmentsForTask) { if (pendingSegmentRecord.getId().asSegmentId().toString().equals(segment.getId().toString())) { - DataSegment rootSegment = segmentMap.get(pendingSegmentRecord.getUpgradedFromSegmentId()); - Assert.assertNotNull(rootSegment); - Assert.assertEquals(segment.getLoadSpec(), rootSegment.getLoadSpec()); + DataSegment upgradedFromSegment = segmentMap.get(pendingSegmentRecord.getUpgradedFromSegmentId()); + Assert.assertNotNull(upgradedFromSegment); + Assert.assertEquals(segment.getLoadSpec(), upgradedFromSegment.getLoadSpec()); Assert.assertEquals( pendingSegmentRecord.getUpgradedFromSegmentId(), - rootSegmentIdMap.get(segment.getId().toString()) + upgradedFromSegmentIdMap.get(segment.getId().toString()) ); } } @@ -405,20 +405,20 @@ public void testCommitReplaceSegments() final Set usedSegments = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); - final Map rootSegmentIdMap = coordinator.getRootSegmentIds( + final Map upgradedFromSegmentIdMap = coordinator.getUpgradedFromSegmentIds( "foo", usedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toList()) ); Assert.assertTrue(usedSegments.containsAll(segmentsAppendedWithReplaceLock)); for (DataSegment appendSegment : segmentsAppendedWithReplaceLock) { - Assert.assertNull(rootSegmentIdMap.get(appendSegment.getId().toString())); + Assert.assertNull(upgradedFromSegmentIdMap.get(appendSegment.getId().toString())); } usedSegments.removeAll(segmentsAppendedWithReplaceLock); Assert.assertTrue(usedSegments.containsAll(replacingSegments)); for (DataSegment replaceSegment : replacingSegments) { - Assert.assertNull(rootSegmentIdMap.get(replaceSegment.getId().toString())); + Assert.assertNull(upgradedFromSegmentIdMap.get(replaceSegment.getId().toString())); } usedSegments.removeAll(replacingSegments); @@ -429,7 +429,7 @@ public void testCommitReplaceSegments() if (appendedSegment.getLoadSpec().equals(segmentReplicaWithNewVersion.getLoadSpec())) { Assert.assertEquals( appendedSegment.getId().toString(), - rootSegmentIdMap.get(segmentReplicaWithNewVersion.getId().toString()) + upgradedFromSegmentIdMap.get(segmentReplicaWithNewVersion.getId().toString()) ); hasBeenCarriedForward = true; break; @@ -3444,13 +3444,13 @@ public void testFindDataSegmentsWithUnreferencedLoadSpecs() ); // Siblings are always referenced unless they're passed together - Map rootSegmentIdMap = ImmutableMap.of( + Map upgradedFromSegmentIdMap = ImmutableMap.of( defaultSegment.getId(), "nonExistentRoot", defaultSegment2.getId(), "nonExistentRoot" ); - insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), rootSegmentIdMap); + insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment)).isEmpty()); Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment2)).isEmpty()); Assert.assertEquals( @@ -3462,13 +3462,13 @@ public void testFindDataSegmentsWithUnreferencedLoadSpecs() DataSegment root = createSegment(Intervals.of("2024/2025"), "2024-01-01", new NumberedShardSpec(0, 0)); DataSegment childV1 = createSegment(Intervals.of("2024/2025"), "2024-02-01", new NumberedShardSpec(0, 0)); DataSegment childV2 = createSegment(Intervals.ETERNITY, "2025-01-01", new NumberedShardSpec(0, 0)); - rootSegmentIdMap = ImmutableMap.of( + upgradedFromSegmentIdMap = ImmutableMap.of( childV1.getId(), root.getId().toString(), childV2.getId(), root.getId().toString() ); - insertUsedSegments(ImmutableSet.of(root, childV1, childV2), rootSegmentIdMap); + insertUsedSegments(ImmutableSet.of(root, childV1, childV2), upgradedFromSegmentIdMap); coordinator.markSegmentsAsUnusedWithinInterval(DS.WIKI, Intervals.of("2024/2025")); Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root)).isEmpty()); @@ -3484,15 +3484,15 @@ public void testFindDataSegmentsWithUnreferencedLoadSpecs() } - private boolean insertUsedSegments(Set dataSegments, Map rootSegmentIdMap) + private boolean insertUsedSegments(Set dataSegments, Map upgradedFromSegmentIdMap) { final String table = derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(); return derbyConnector.retryWithHandle( handle -> { PreparedBatch preparedBatch = handle.prepareBatch( StringUtils.format( - "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, partitioned, version, used, payload, used_status_last_updated, root_segment_id) " - + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload, :used_status_last_updated, :root_segment_id)", + "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, partitioned, version, used, payload, used_status_last_updated, upgraded_from_segment_id) " + + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload, :used_status_last_updated, :upgraded_from_segment_id)", table, derbyConnector.getQuoteString() ) @@ -3510,7 +3510,7 @@ private boolean insertUsedSegments(Set dataSegments, Map Date: Mon, 1 Jul 2024 13:38:19 +0530 Subject: [PATCH 08/16] Break up task action --- ...etrieveUpgradedFromSegmentsIdsAction.java} | 47 +++++--- .../RetrieveUpgradedToSegmentsIdsAction.java | 111 ++++++++++++++++++ .../indexing/common/actions/TaskAction.java | 3 +- .../common/task/KillUnusedSegmentsTask.java | 53 +++++++-- .../SeekableStreamIndexTaskClient.java | 2 +- ...TestIndexerMetadataStorageCoordinator.java | 19 ++- .../IndexerMetadataStorageCoordinator.java | 21 +++- .../IndexerSQLMetadataStorageCoordinator.java | 107 +++-------------- .../druid/metadata/SegmentUpgradeInfo.java | 79 +++++++++++++ ...exerSQLMetadataStorageCoordinatorTest.java | 109 +++++++++-------- 10 files changed, 370 insertions(+), 181 deletions(-) rename indexing-service/src/main/java/org/apache/druid/indexing/common/actions/{DetermineSegmentsToKillAction.java => RetrieveUpgradedFromSegmentsIdsAction.java} (54%) create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentsIdsAction.java create mode 100644 server/src/main/java/org/apache/druid/metadata/SegmentUpgradeInfo.java diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/DetermineSegmentsToKillAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentsIdsAction.java similarity index 54% rename from indexing-service/src/main/java/org/apache/druid/indexing/common/actions/DetermineSegmentsToKillAction.java rename to indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentsIdsAction.java index 925b0f4f247d..1f91cc181d07 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/DetermineSegmentsToKillAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentsIdsAction.java @@ -22,45 +22,57 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; +import com.google.common.collect.ImmutableList; import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.timeline.DataSegment; +import org.apache.druid.metadata.SegmentUpgradeInfo; +import java.util.List; import java.util.Objects; import java.util.Set; /** - * Given a set of segments, this task action determines the subset that can be killed from deep storage - * This task action assumes that the all the segments provided are unused and belong to the same datasource. + * Task action to retrieve the segment upgrade infos of a given set of used or unused segment ids. */ -public class DetermineSegmentsToKillAction implements TaskAction> +public class RetrieveUpgradedFromSegmentsIdsAction implements TaskAction> { - private final Set segments; + private final String dataSource; + private final Set segmentIds; @JsonCreator - public DetermineSegmentsToKillAction(@JsonProperty("segments") Set segments) + public RetrieveUpgradedFromSegmentsIdsAction( + @JsonProperty("dataSource") String dataSource, + @JsonProperty("segmentIds") Set segmentIds + ) { - this.segments = segments; + this.dataSource = dataSource; + this.segmentIds = segmentIds; } @JsonProperty - public Set getSegments() + public String getDataSource() { - return segments; + return dataSource; + } + + @JsonProperty + public Set getSegmentIds() + { + return segmentIds; } @Override - public TypeReference> getReturnTypeReference() + public TypeReference> getReturnTypeReference() { - return new TypeReference>() + return new TypeReference>() { }; } @Override - public Set perform(Task task, TaskActionToolbox toolbox) + public List perform(Task task, TaskActionToolbox toolbox) { return toolbox.getIndexerMetadataStorageCoordinator() - .determineSegmentsWithUnreferencedLoadSpecs(segments); + .retrieveUpgradedFromSegmentIds(dataSource, ImmutableList.copyOf(segmentIds)); } @Override @@ -78,21 +90,22 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } - DetermineSegmentsToKillAction that = (DetermineSegmentsToKillAction) o; - return Objects.equals(segments, that.segments); + RetrieveUpgradedFromSegmentsIdsAction that = (RetrieveUpgradedFromSegmentsIdsAction) o; + return Objects.equals(dataSource, that.dataSource) && Objects.equals(segmentIds, that.segmentIds); } @Override public int hashCode() { - return Objects.hash(segments); + return Objects.hash(dataSource, segmentIds); } @Override public String toString() { return getClass().getSimpleName() + "{" + - "segments=" + segments + + "dataSource='" + dataSource + '\'' + + ", segmentIds=" + segmentIds + '}'; } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentsIdsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentsIdsAction.java new file mode 100644 index 000000000000..f3f1a0405028 --- /dev/null +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentsIdsAction.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.indexing.common.actions; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.type.TypeReference; +import com.google.common.collect.ImmutableList; +import org.apache.druid.indexing.common.task.Task; +import org.apache.druid.metadata.SegmentUpgradeInfo; + +import java.util.List; +import java.util.Objects; +import java.util.Set; + +/** + * Task action to retrieve the segment upgrade infos of a given set of used or unused upgraded segment ids. + */ +public class RetrieveUpgradedToSegmentsIdsAction implements TaskAction> +{ + private final String dataSource; + private final Set upgradedSegmentIds; + + @JsonCreator + public RetrieveUpgradedToSegmentsIdsAction( + @JsonProperty("dataSource") String dataSource, + @JsonProperty("upgradedSegmentIds") Set upgradedSegmentIds + ) + { + this.dataSource = dataSource; + this.upgradedSegmentIds = upgradedSegmentIds; + } + + @JsonProperty + public String getDataSource() + { + return dataSource; + } + + @JsonProperty + public Set getUpgradedSegmentIds() + { + return upgradedSegmentIds; + } + + @Override + public TypeReference> getReturnTypeReference() + { + return new TypeReference>() + { + }; + } + + @Override + public List perform(Task task, TaskActionToolbox toolbox) + { + return toolbox.getIndexerMetadataStorageCoordinator() + .retrieveUpgradedToSegmentIds(dataSource, ImmutableList.copyOf(getUpgradedSegmentIds())); + } + + @Override + public boolean isAudited() + { + return false; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + RetrieveUpgradedToSegmentsIdsAction that = (RetrieveUpgradedToSegmentsIdsAction) o; + return Objects.equals(dataSource, that.dataSource) && Objects.equals(upgradedSegmentIds, that.upgradedSegmentIds); + } + + @Override + public int hashCode() + { + return Objects.hash(dataSource, upgradedSegmentIds); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "dataSource='" + dataSource + '\'' + + ", upgradedSegmentIds=" + upgradedSegmentIds + + '}'; + } +} diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/TaskAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/TaskAction.java index 510503852855..38d3a02cb1c1 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/TaskAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/TaskAction.java @@ -39,10 +39,11 @@ @JsonSubTypes.Type(name = "segmentTransactionalAppend", value = SegmentTransactionalAppendAction.class), @JsonSubTypes.Type(name = "segmentTransactionalReplace", value = SegmentTransactionalReplaceAction.class), @JsonSubTypes.Type(name = "retrieveSegmentsById", value = RetrieveSegmentsByIdAction.class), + @JsonSubTypes.Type(name = "retrieveUpgradedFromSegmentIds", value = RetrieveUpgradedFromSegmentsIdsAction.class), + @JsonSubTypes.Type(name = "retrieveUpgradedToSegmentIds", value = RetrieveUpgradedToSegmentsIdsAction.class), @JsonSubTypes.Type(name = "segmentListUsed", value = RetrieveUsedSegmentsAction.class), @JsonSubTypes.Type(name = "segmentListUnused", value = RetrieveUnusedSegmentsAction.class), @JsonSubTypes.Type(name = "markSegmentsAsUnused", value = MarkSegmentsAsUnusedAction.class), - @JsonSubTypes.Type(name = "determineSegmentsToKill", value = DetermineSegmentsToKillAction.class), @JsonSubTypes.Type(name = "segmentNuke", value = SegmentNukeAction.class), @JsonSubTypes.Type(name = "segmentMetadataUpdate", value = SegmentMetadataUpdateAction.class), @JsonSubTypes.Type(name = SegmentAllocateAction.TYPE, value = SegmentAllocateAction.class), diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 24060806950f..947793a48058 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -34,8 +34,9 @@ import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; import org.apache.druid.indexing.common.TaskToolbox; -import org.apache.druid.indexing.common.actions.DetermineSegmentsToKillAction; import org.apache.druid.indexing.common.actions.RetrieveUnusedSegmentsAction; +import org.apache.druid.indexing.common.actions.RetrieveUpgradedFromSegmentsIdsAction; +import org.apache.druid.indexing.common.actions.RetrieveUpgradedToSegmentsIdsAction; import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction; import org.apache.druid.indexing.common.actions.SegmentNukeAction; import org.apache.druid.indexing.common.actions.TaskActionClient; @@ -48,6 +49,7 @@ import org.apache.druid.server.lookup.cache.LookupLoadingSpec; import org.apache.druid.server.security.ResourceAction; import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -98,13 +100,15 @@ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask /** * Maximum number of segments that can be killed. */ - @Nullable private final Integer limit; + @Nullable + private final Integer limit; /** * The maximum used status last updated time. Any segments with * {@code used_status_last_updated} no later than this time will be included in the kill task. */ - @Nullable private final DateTime maxUsedStatusLastUpdatedTime; + @Nullable + private final DateTime maxUsedStatusLastUpdatedTime; @JsonCreator public KillUnusedSegmentsTask( @@ -231,29 +235,56 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception } // Kill segments. Order is important here: - // Determine the set of segments to be killed from deep storage _before_ nuking segments from deep storage + // Retrieve the segment upgrade infos for the batch _before_ the segments are nuked // We then want the nuke action to clean up the metadata records _before_ the segments are removed from storage. // This helps maintain that we will always have a storage segment if the metadata segment is present. + // Determine the subset of segments to be killed from deep storage based on loadspecs. // If the segment nuke throws an exception, then the segment cleanup is abandoned. - Set segmentsWithUnreferencedLoadSpecs = new HashSet<>(unusedSegments); + // Determine upgraded_from_segment_id before nuking + final TaskActionClient taskActionClient = toolbox.getTaskActionClient(); + final Set upgradedSegmentIds = unusedSegments.stream() + .map(DataSegment::getId) + .map(SegmentId::toString).collect(Collectors.toSet()); + try { + taskActionClient.submit(new RetrieveUpgradedFromSegmentsIdsAction(getDataSource(), upgradedSegmentIds)) + .stream() + .filter(upgradeInfo -> upgradeInfo.getUpgradedFromSegmentId() != null) + .forEach(upgradeInfo -> upgradedSegmentIds.add(upgradeInfo.getUpgradedFromSegmentId())); + } + catch (Exception e) { + LOG.warn( + e, + "Could not retrieve segment upgrade infos using task action[retrieveUpgradedFromSegmentIds]." + + " Overlord maybe on an older version." + ); + } + + // Nuke Segments + taskActionClient.submit(new SegmentNukeAction(new HashSet<>(unusedSegments))); + + // Determine unreferenced segments + final Set referencedSegmentIds = new HashSet<>(); try { - segmentsWithUnreferencedLoadSpecs - = toolbox.getTaskActionClient() - .submit(new DetermineSegmentsToKillAction(segmentsWithUnreferencedLoadSpecs)); + taskActionClient.submit(new RetrieveUpgradedToSegmentsIdsAction(getDataSource(), upgradedSegmentIds)) + .forEach(upgradeInfo -> referencedSegmentIds.add(upgradeInfo.getId())); } catch (Exception e) { LOG.warn( e, - "Could not determine segments with unreferenced load specs using task action[determineSegmentsToKill]." + "Could not retrieve segment upgrade infos using task action[retrieveUpgradedFromSegmentIds]." + " Overlord maybe on an older version." ); } + final Set unreferencedDataSegments + = unusedSegments.stream() + .filter(unusedSegment -> !referencedSegmentIds.contains(unusedSegment.getId().toString())) + .collect(Collectors.toSet()); - toolbox.getTaskActionClient().submit(new SegmentNukeAction(new HashSet<>(unusedSegments))); // Kill segments from the deep storage only if their load specs are not being used by any other segments - final List segmentsToBeKilled = segmentsWithUnreferencedLoadSpecs + // We still need to check with used load specs as segment upgrades were introduced before this change + final List segmentsToBeKilled = unreferencedDataSegments .stream() .filter(unusedSegment -> unusedSegment.getLoadSpec() == null || !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec())) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskClient.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskClient.java index 15d03fb38fec..7fd282e44ce2 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskClient.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskClient.java @@ -160,7 +160,7 @@ ListenableFuture setEndOffsetsAsync( * The update is processed only if the task is serving the original pending segment. * * @param taskId - task id - * @param pendingSegmentRecord - the ids belonging to the versions to which the first segment needs to be updated + * @param pendingSegmentRecord - the ids belonging to the versions to which the root segment needs to be updated * @return true if the update succeeds */ ListenableFuture registerNewVersionOfPendingSegmentAsync( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 44d4602c68a3..dcb985e70ab4 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -32,6 +32,7 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.ReplaceTaskLock; +import org.apache.druid.metadata.SegmentUpgradeInfo; import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; @@ -315,11 +316,23 @@ public List getPendingSegments(String datasource, Interval } @Override - public Set determineSegmentsWithUnreferencedLoadSpecs( - final Set segments + public List retrieveUpgradedFromSegmentIds( + final String dataSource, + final List segmentIds ) { - return segments; + List segmentUpgradeInfos = new ArrayList<>(); + segmentIds.forEach(id -> segmentUpgradeInfos.add(new SegmentUpgradeInfo(id, null))); + return segmentUpgradeInfos; + } + + @Override + public List retrieveUpgradedToSegmentIds( + final String dataSource, + final List upgradedFromSegmentIds + ) + { + return Collections.emptyList(); } public Set getPublished() diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 1a6bf0b61b0f..89fdfb23dbf9 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -22,6 +22,7 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.ReplaceTaskLock; +import org.apache.druid.metadata.SegmentUpgradeInfo; import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; @@ -475,11 +476,21 @@ SegmentPublishResult commitMetadataOnly( List getPendingSegments(String datasource, Interval interval); /** - * Return subset of segments that do not have load specs referenced by segments outside this set - * Assumes that all the segments belong to a single datasource + * List of SegmentUpgradeInfo for a given datasource and a list of segment ids + * This method assumes that all the segments belong to the same datasource + * @param dataSource data source + * @param segmentIds ids of segments + * @return segment upgrade infos with the given list as the ids + */ + List retrieveUpgradedFromSegmentIds(String dataSource, List segmentIds); + + /** + * List of SegmentUpgradeInfo for a given datasource and list of upgradedFromSegmentIds + * Assumes that all hte upgradedFromSegmentIds are non-null and belong to the given datasource * - * @param segments set of DataSegments - * @return subset of segments that can safely be killed from deep storage + * @param dataSource data source + * @param upgradedFromSegmentIds ids of the first segments which had the corresponding load spec + * @return segment upgrade infos with the given list as upgradedFromSegmentIds */ - Set determineSegmentsWithUnreferencedLoadSpecs(Set segments); + List retrieveUpgradedToSegmentIds(String dataSource, List upgradedFromSegmentIds); } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index c73c0c42c375..61077bcaa434 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -2945,75 +2945,13 @@ public int deleteUpgradeSegmentsForTask(final String taskId) } @Override - public Set determineSegmentsWithUnreferencedLoadSpecs(final Set segments) - { - if (segments.isEmpty()) { - return segments; - } - final String dataSource = segments.stream() - .findFirst() - .get() - .getDataSource(); - final List segmentIds = segments.stream() - .map(DataSegment::getId) - .map(SegmentId::toString) - .collect(Collectors.toList()); - Map idToUpgradedMap = new HashMap<>(getUpgradedFromSegmentIds(dataSource, segmentIds)); - final Set upgradedFromSegmentIds = new HashSet<>(segmentIds); - upgradedFromSegmentIds.addAll(idToUpgradedMap.values()); - idToUpgradedMap.putAll(getSegmentsWithUpgradedFromSegmentIds(dataSource, new ArrayList<>(upgradedFromSegmentIds))); - final Map> upgradedToIdsMap = new HashMap<>(); - for (Map.Entry idAndAncestor : idToUpgradedMap.entrySet()) { - upgradedToIdsMap.computeIfAbsent(idAndAncestor.getValue(), ancestor -> new HashSet<>()) - .add(idAndAncestor.getKey()); - } - final Set segmentsWithUnreferencedLoadSpecs = new HashSet<>(); - final Set existingSegmentIds - = retrieveSegmentsById(dataSource, upgradedFromSegmentIds).stream() - .map(DataSegment::getId) - .map(SegmentId::toString) - .collect(Collectors.toSet()); - for (DataSegment segment : segments) { - final String id = segment.getId().toString(); - final Set conflicts = new HashSet<>(); - if (upgradedToIdsMap.containsKey(id)) { - // Add children - conflicts.addAll(upgradedToIdsMap.get(id)); - } - final String parent = idToUpgradedMap.get(id); - if (parent != null) { - // add parent if it exists - if (existingSegmentIds.contains(parent)) { - conflicts.add(parent); - } - // add siblings - if (upgradedToIdsMap.containsKey(parent)) { - conflicts.addAll(upgradedToIdsMap.get(parent)); - } - } - // Remove segments being deleted - segmentIds.forEach(conflicts::remove); - if (conflicts.isEmpty()) { - segmentsWithUnreferencedLoadSpecs.add(segment); - } - } - return segmentsWithUnreferencedLoadSpecs; - } - - /** - * Gets a map from the input segment id to its upgraded from segment id for a given list of segment ids - * The map contains the id as a key if it has been upgraded. - * The value is the id of the first segment with the same load spec - * This method assumes that all the segments belong to the same datasource - * @param dataSource data source - * @param segmentIds ids of segments - * @return Map from input segment ids to their upgraded_from_segment ids - */ - @VisibleForTesting - Map getUpgradedFromSegmentIds(final String dataSource, final List segmentIds) + public List retrieveUpgradedFromSegmentIds( + final String dataSource, + final List segmentIds + ) { if (segmentIds.isEmpty()) { - return Collections.emptyMap(); + return Collections.emptyList(); } final String sql = StringUtils.format( @@ -3021,42 +2959,31 @@ Map getUpgradedFromSegmentIds(final String dataSource, final Lis dbTables.getSegmentsTable(), SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("id", segmentIds) ); - Map upgradedFromSegmentIdMap = new HashMap<>(); - connector.retryWithHandle( + return connector.retryWithHandle( handle -> { Query> query = handle.createQuery(sql) .bind("dataSource", dataSource); SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition("id", segmentIds, query); - query.map( + return query.map( (index, r, ctx) -> { final String id = r.getString(1); final String upgradedFromSegmentId = r.getString(2); - if (upgradedFromSegmentId != null) { - upgradedFromSegmentIdMap.put(id, upgradedFromSegmentId); - } - return null; + return new SegmentUpgradeInfo(id, upgradedFromSegmentId); } ) .list(); - return null; } ); - return upgradedFromSegmentIdMap; } - /** - * Given a set of datasources upgraded_from_segment ids, return a map from their successors to the corresponding root - * @param dataSource data source - * @param upgradedFromSegmentIds ids of the first segments which had the corresponding load spec - * @return Map from successor to (upgraded from) segment id for every id in the input - */ - private Map getSegmentsWithUpgradedFromSegmentIds( + @Override + public List retrieveUpgradedToSegmentIds( final String dataSource, final List upgradedFromSegmentIds ) { if (upgradedFromSegmentIds.isEmpty()) { - return Collections.emptyMap(); + return Collections.emptyList(); } final String sql = StringUtils.format( @@ -3067,8 +2994,7 @@ private Map getSegmentsWithUpgradedFromSegmentIds( upgradedFromSegmentIds ) ); - Map upgradedFromSegmentIdMap = new HashMap<>(); - connector.retryWithHandle( + return connector.retryWithHandle( handle -> { Query> query = handle.createQuery(sql) .bind("dataSource", dataSource); @@ -3077,21 +3003,16 @@ private Map getSegmentsWithUpgradedFromSegmentIds( upgradedFromSegmentIds, query ); - query.map( + return query.map( (index, r, ctx) -> { final String id = r.getString(1); final String upgradedFromSegmentId = r.getString(2); - if (upgradedFromSegmentId != null) { - upgradedFromSegmentIdMap.put(id, upgradedFromSegmentId); - } - return null; + return new SegmentUpgradeInfo(id, upgradedFromSegmentId); } ) .list(); - return null; } ); - return upgradedFromSegmentIdMap; } private static class PendingSegmentsRecord diff --git a/server/src/main/java/org/apache/druid/metadata/SegmentUpgradeInfo.java b/server/src/main/java/org/apache/druid/metadata/SegmentUpgradeInfo.java new file mode 100644 index 000000000000..921b976c5886 --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/SegmentUpgradeInfo.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import javax.annotation.Nullable; +import java.util.Objects; + +/** + * Information about the id and upgraded from segment id created as a result of segment upgrades + * upgradedFromSegmentId is null for segments that haven't been upgraded + */ +public class SegmentUpgradeInfo +{ + private final String id; + private final String upgradedFromSegmentId; + + @JsonCreator + public SegmentUpgradeInfo( + @JsonProperty("id") String id, + @JsonProperty("upgradedFromSegmentId") @Nullable String upgradedFromSegmentId + ) + { + this.id = id; + this.upgradedFromSegmentId = upgradedFromSegmentId; + } + + @JsonProperty + public String getId() + { + return id; + } + + @Nullable + @JsonProperty + public String getUpgradedFromSegmentId() + { + return upgradedFromSegmentId; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SegmentUpgradeInfo that = (SegmentUpgradeInfo) o; + return Objects.equals(id, that.id) + && Objects.equals(upgradedFromSegmentId, that.upgradedFromSegmentId); + } + + @Override + public int hashCode() + { + return Objects.hash(id, upgradedFromSegmentId); + } +} diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 1de1481a8416..f8187ae026a3 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -275,9 +275,16 @@ public void testCommitAppendSegments() Set allCommittedSegments = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); - Map upgradedFromSegmentIdMap = coordinator.getUpgradedFromSegmentIds( + Map upgradedFromSegmentIdMap = new HashMap<>(); + coordinator.retrieveUpgradedFromSegmentIds( DS.WIKI, allCommittedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toList()) + ).forEach( + segmentUpgradeInfo -> { + if (segmentUpgradeInfo.getUpgradedFromSegmentId() != null) { + upgradedFromSegmentIdMap.put(segmentUpgradeInfo.getId(), segmentUpgradeInfo.getUpgradedFromSegmentId()); + } + } ); // Verify the segments present in the metadata store Assert.assertTrue(allCommittedSegments.containsAll(appendSegments)); @@ -405,9 +412,16 @@ public void testCommitReplaceSegments() final Set usedSegments = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); - final Map upgradedFromSegmentIdMap = coordinator.getUpgradedFromSegmentIds( + final Map upgradedFromSegmentIdMap = new HashMap<>(); + coordinator.retrieveUpgradedFromSegmentIds( "foo", usedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toList()) + ).forEach( + segmentUpgradeInfo -> { + if (segmentUpgradeInfo.getUpgradedFromSegmentId() != null) { + upgradedFromSegmentIdMap.put(segmentUpgradeInfo.getId(), segmentUpgradeInfo.getUpgradedFromSegmentId()); + } + } ); Assert.assertTrue(usedSegments.containsAll(segmentsAppendedWithReplaceLock)); @@ -3430,59 +3444,54 @@ public void testRetrieveUnusedSegmentsForExactIntervalAndVersion() } @Test - public void testFindDataSegmentsWithUnreferencedLoadSpecs() + public void testRetrieveUpgradedFromSegmentIds() { - // Empty set - Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(Collections.emptySet()).isEmpty()); + final String datasource = defaultSegment.getDataSource(); + final Map upgradedFromSegmentIdMap = new HashMap<>(); + upgradedFromSegmentIdMap.put(defaultSegment2.getId(), defaultSegment.getId().toString()); + insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); + coordinator.markSegmentsAsUnusedWithinInterval(datasource, Intervals.ETERNITY); + upgradedFromSegmentIdMap.clear(); + upgradedFromSegmentIdMap.put(defaultSegment3.getId(), defaultSegment.getId().toString()); + insertUsedSegments(ImmutableSet.of(defaultSegment3, defaultSegment4), upgradedFromSegmentIdMap); - // Lone segment is unreferenced - insertUsedSegments(Collections.singleton(eternitySegment), Collections.emptyMap()); - coordinator.markSegmentsAsUnusedWithinInterval(eternitySegment.getDataSource(), Intervals.ETERNITY); - Assert.assertEquals( - Collections.singleton(eternitySegment), - coordinator.determineSegmentsWithUnreferencedLoadSpecs(Collections.singleton(eternitySegment)) - ); + Set expected = new HashSet<>(); + expected.add(new SegmentUpgradeInfo(defaultSegment.getId().toString(), null)); + expected.add(new SegmentUpgradeInfo(defaultSegment2.getId().toString(), defaultSegment.getId().toString())); + expected.add(new SegmentUpgradeInfo(defaultSegment3.getId().toString(), defaultSegment.getId().toString())); + expected.add(new SegmentUpgradeInfo(defaultSegment4.getId().toString(), null)); - // Siblings are always referenced unless they're passed together - Map upgradedFromSegmentIdMap = ImmutableMap.of( - defaultSegment.getId(), - "nonExistentRoot", - defaultSegment2.getId(), - "nonExistentRoot" - ); - insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); - Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment)).isEmpty()); - Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment2)).isEmpty()); - Assert.assertEquals( - ImmutableSet.of(defaultSegment, defaultSegment2), - coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(defaultSegment, defaultSegment2)) - ); - - // The parent and all its children must all be present in the batch for them to be unreferenced - DataSegment root = createSegment(Intervals.of("2024/2025"), "2024-01-01", new NumberedShardSpec(0, 0)); - DataSegment childV1 = createSegment(Intervals.of("2024/2025"), "2024-02-01", new NumberedShardSpec(0, 0)); - DataSegment childV2 = createSegment(Intervals.ETERNITY, "2025-01-01", new NumberedShardSpec(0, 0)); - upgradedFromSegmentIdMap = ImmutableMap.of( - childV1.getId(), - root.getId().toString(), - childV2.getId(), - root.getId().toString() - ); - insertUsedSegments(ImmutableSet.of(root, childV1, childV2), upgradedFromSegmentIdMap); - coordinator.markSegmentsAsUnusedWithinInterval(DS.WIKI, Intervals.of("2024/2025")); - - Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root)).isEmpty()); - Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(childV1)).isEmpty()); - Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(childV2)).isEmpty()); - Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root, childV1)).isEmpty()); - Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root, childV2)).isEmpty()); - Assert.assertTrue(coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(childV1, childV2)).isEmpty()); - Assert.assertEquals( - ImmutableSet.of(root, childV1, childV2), - coordinator.determineSegmentsWithUnreferencedLoadSpecs(ImmutableSet.of(root, childV1, childV2)) - ); + List segmentIds = new ArrayList<>(); + segmentIds.add(defaultSegment.getId().toString()); + segmentIds.add(defaultSegment2.getId().toString()); + segmentIds.add(defaultSegment3.getId().toString()); + segmentIds.add(defaultSegment4.getId().toString()); + Assert.assertEquals(expected, new HashSet<>(coordinator.retrieveUpgradedFromSegmentIds(datasource, segmentIds))); } + @Test + public void testRetrieveUpgradedToSegmentIds() + { + final String datasource = defaultSegment.getDataSource(); + final Map upgradedFromSegmentIdMap = new HashMap<>(); + upgradedFromSegmentIdMap.put(defaultSegment2.getId(), defaultSegment.getId().toString()); + insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); + coordinator.markSegmentsAsUnusedWithinInterval(datasource, Intervals.ETERNITY); + upgradedFromSegmentIdMap.clear(); + upgradedFromSegmentIdMap.put(defaultSegment3.getId(), defaultSegment.getId().toString()); + insertUsedSegments(ImmutableSet.of(defaultSegment3, defaultSegment4), upgradedFromSegmentIdMap); + + Set expected = new HashSet<>(); + expected.add(new SegmentUpgradeInfo(defaultSegment2.getId().toString(), defaultSegment.getId().toString())); + expected.add(new SegmentUpgradeInfo(defaultSegment3.getId().toString(), defaultSegment.getId().toString())); + + List upgradedIds = new ArrayList<>(); + upgradedIds.add(defaultSegment.getId().toString()); + upgradedIds.add(defaultSegment2.getId().toString()); + upgradedIds.add(defaultSegment3.getId().toString()); + upgradedIds.add(defaultSegment4.getId().toString()); + Assert.assertEquals(expected, new HashSet<>(coordinator.retrieveUpgradedToSegmentIds(datasource, upgradedIds))); + } private boolean insertUsedSegments(Set dataSegments, Map upgradedFromSegmentIdMap) { From 4fa1265103db3ce329dde86823f84b0863160236 Mon Sep 17 00:00:00 2001 From: Amatya Date: Wed, 10 Jul 2024 11:31:29 +0530 Subject: [PATCH 09/16] Address most review comments --- ...RetrieveUpgradedFromSegmentIdsAction.java} | 20 +- ...> RetrieveUpgradedToSegmentIdsAction.java} | 36 ++- .../SegmentTransactionalReplaceAction.java | 2 +- .../indexing/common/actions/TaskAction.java | 4 +- .../common/task/KillUnusedSegmentsTask.java | 123 ++++++++--- .../common/task/IngestionTestBase.java | 13 +- .../task/KillUnusedSegmentsTaskTest.java | 207 ++++++++++++++++++ .../indexing/test/TestDataSegmentKiller.java | 13 +- ...TestIndexerMetadataStorageCoordinator.java | 17 +- .../IndexerMetadataStorageCoordinator.java | 18 +- .../IndexerSQLMetadataStorageCoordinator.java | 107 ++++++--- .../druid/metadata/SegmentUpgradeInfo.java | 79 ------- .../UpgradedFromSegmentsResponse.java | 49 +++++ .../metadata/UpgradedToSegmentsResponse.java | 51 +++++ ...exerSQLMetadataStorageCoordinatorTest.java | 104 +++------ 15 files changed, 574 insertions(+), 269 deletions(-) rename indexing-service/src/main/java/org/apache/druid/indexing/common/actions/{RetrieveUpgradedFromSegmentsIdsAction.java => RetrieveUpgradedFromSegmentIdsAction.java} (75%) rename indexing-service/src/main/java/org/apache/druid/indexing/common/actions/{RetrieveUpgradedToSegmentsIdsAction.java => RetrieveUpgradedToSegmentIdsAction.java} (62%) delete mode 100644 server/src/main/java/org/apache/druid/metadata/SegmentUpgradeInfo.java create mode 100644 server/src/main/java/org/apache/druid/metadata/UpgradedFromSegmentsResponse.java create mode 100644 server/src/main/java/org/apache/druid/metadata/UpgradedToSegmentsResponse.java diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentsIdsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java similarity index 75% rename from indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentsIdsAction.java rename to indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java index 1f91cc181d07..a71559a4e0bd 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentsIdsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java @@ -22,24 +22,22 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; -import com.google.common.collect.ImmutableList; import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.metadata.SegmentUpgradeInfo; +import org.apache.druid.metadata.UpgradedFromSegmentsResponse; -import java.util.List; import java.util.Objects; import java.util.Set; /** - * Task action to retrieve the segment upgrade infos of a given set of used or unused segment ids. + * Task action to retrieve the segment IDs from which a given set of segments were upgraded. */ -public class RetrieveUpgradedFromSegmentsIdsAction implements TaskAction> +public class RetrieveUpgradedFromSegmentIdsAction implements TaskAction { private final String dataSource; private final Set segmentIds; @JsonCreator - public RetrieveUpgradedFromSegmentsIdsAction( + public RetrieveUpgradedFromSegmentIdsAction( @JsonProperty("dataSource") String dataSource, @JsonProperty("segmentIds") Set segmentIds ) @@ -61,18 +59,18 @@ public Set getSegmentIds() } @Override - public TypeReference> getReturnTypeReference() + public TypeReference getReturnTypeReference() { - return new TypeReference>() + return new TypeReference() { }; } @Override - public List perform(Task task, TaskActionToolbox toolbox) + public UpgradedFromSegmentsResponse perform(Task task, TaskActionToolbox toolbox) { return toolbox.getIndexerMetadataStorageCoordinator() - .retrieveUpgradedFromSegmentIds(dataSource, ImmutableList.copyOf(segmentIds)); + .retrieveUpgradedFromSegmentIds(dataSource, segmentIds); } @Override @@ -90,7 +88,7 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } - RetrieveUpgradedFromSegmentsIdsAction that = (RetrieveUpgradedFromSegmentsIdsAction) o; + RetrieveUpgradedFromSegmentIdsAction that = (RetrieveUpgradedFromSegmentIdsAction) o; return Objects.equals(dataSource, that.dataSource) && Objects.equals(segmentIds, that.segmentIds); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentsIdsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java similarity index 62% rename from indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentsIdsAction.java rename to indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java index f3f1a0405028..88ba08821342 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentsIdsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java @@ -22,30 +22,28 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; -import com.google.common.collect.ImmutableList; import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.metadata.SegmentUpgradeInfo; +import org.apache.druid.metadata.UpgradedToSegmentsResponse; -import java.util.List; import java.util.Objects; import java.util.Set; /** - * Task action to retrieve the segment upgrade infos of a given set of used or unused upgraded segment ids. + * Task action to retrieve all the segment IDs to which a given set of segments were upgraded. */ -public class RetrieveUpgradedToSegmentsIdsAction implements TaskAction> +public class RetrieveUpgradedToSegmentIdsAction implements TaskAction { private final String dataSource; - private final Set upgradedSegmentIds; + private final Set segmentIds; @JsonCreator - public RetrieveUpgradedToSegmentsIdsAction( + public RetrieveUpgradedToSegmentIdsAction( @JsonProperty("dataSource") String dataSource, - @JsonProperty("upgradedSegmentIds") Set upgradedSegmentIds + @JsonProperty("segmentIds") Set segmentIds ) { this.dataSource = dataSource; - this.upgradedSegmentIds = upgradedSegmentIds; + this.segmentIds = segmentIds; } @JsonProperty @@ -55,24 +53,24 @@ public String getDataSource() } @JsonProperty - public Set getUpgradedSegmentIds() + public Set getSegmentIds() { - return upgradedSegmentIds; + return segmentIds; } @Override - public TypeReference> getReturnTypeReference() + public TypeReference getReturnTypeReference() { - return new TypeReference>() + return new TypeReference() { }; } @Override - public List perform(Task task, TaskActionToolbox toolbox) + public UpgradedToSegmentsResponse perform(Task task, TaskActionToolbox toolbox) { return toolbox.getIndexerMetadataStorageCoordinator() - .retrieveUpgradedToSegmentIds(dataSource, ImmutableList.copyOf(getUpgradedSegmentIds())); + .retrieveUpgradedToSegmentIds(dataSource, segmentIds); } @Override @@ -90,14 +88,14 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } - RetrieveUpgradedToSegmentsIdsAction that = (RetrieveUpgradedToSegmentsIdsAction) o; - return Objects.equals(dataSource, that.dataSource) && Objects.equals(upgradedSegmentIds, that.upgradedSegmentIds); + RetrieveUpgradedToSegmentIdsAction that = (RetrieveUpgradedToSegmentIdsAction) o; + return Objects.equals(dataSource, that.dataSource) && Objects.equals(segmentIds, that.segmentIds); } @Override public int hashCode() { - return Objects.hash(dataSource, upgradedSegmentIds); + return Objects.hash(dataSource, segmentIds); } @Override @@ -105,7 +103,7 @@ public String toString() { return getClass().getSimpleName() + "{" + "dataSource='" + dataSource + '\'' + - ", upgradedSegmentIds=" + upgradedSegmentIds + + ", segmentIds=" + segmentIds + '}'; } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java index 9846fca58155..df188ac81533 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java @@ -52,7 +52,7 @@ * commit input segments contained within interval * upgrade ids in the upgradeSegments table corresponding to this task to the replace lock's version and commit them * fetch payload, task_allocator_id for pending segments - * upgrade each such pending segment to the replace lock's version with the corresponding upgraded from segment + * upgrade each such pending segment to the replace lock's version with the corresponding root segment * } * For every pending segment with version == replace lock version: * Fetch payload, group_id or the pending segment and relay them to the supervisor diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/TaskAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/TaskAction.java index 38d3a02cb1c1..973a83ecee43 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/TaskAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/TaskAction.java @@ -39,8 +39,8 @@ @JsonSubTypes.Type(name = "segmentTransactionalAppend", value = SegmentTransactionalAppendAction.class), @JsonSubTypes.Type(name = "segmentTransactionalReplace", value = SegmentTransactionalReplaceAction.class), @JsonSubTypes.Type(name = "retrieveSegmentsById", value = RetrieveSegmentsByIdAction.class), - @JsonSubTypes.Type(name = "retrieveUpgradedFromSegmentIds", value = RetrieveUpgradedFromSegmentsIdsAction.class), - @JsonSubTypes.Type(name = "retrieveUpgradedToSegmentIds", value = RetrieveUpgradedToSegmentsIdsAction.class), + @JsonSubTypes.Type(name = "retrieveUpgradedFromSegmentIds", value = RetrieveUpgradedFromSegmentIdsAction.class), + @JsonSubTypes.Type(name = "retrieveUpgradedToSegmentIds", value = RetrieveUpgradedToSegmentIdsAction.class), @JsonSubTypes.Type(name = "segmentListUsed", value = RetrieveUsedSegmentsAction.class), @JsonSubTypes.Type(name = "segmentListUnused", value = RetrieveUnusedSegmentsAction.class), @JsonSubTypes.Type(name = "markSegmentsAsUnused", value = MarkSegmentsAsUnusedAction.class), diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 947793a48058..d8a187221975 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -34,9 +34,10 @@ import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; import org.apache.druid.indexing.common.TaskToolbox; +import org.apache.druid.indexing.common.actions.RetrieveSegmentsByIdAction; import org.apache.druid.indexing.common.actions.RetrieveUnusedSegmentsAction; -import org.apache.druid.indexing.common.actions.RetrieveUpgradedFromSegmentsIdsAction; -import org.apache.druid.indexing.common.actions.RetrieveUpgradedToSegmentsIdsAction; +import org.apache.druid.indexing.common.actions.RetrieveUpgradedFromSegmentIdsAction; +import org.apache.druid.indexing.common.actions.RetrieveUpgradedToSegmentIdsAction; import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction; import org.apache.druid.indexing.common.actions.SegmentNukeAction; import org.apache.druid.indexing.common.actions.TaskActionClient; @@ -50,6 +51,7 @@ import org.apache.druid.server.security.ResourceAction; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; +import org.apache.druid.utils.CollectionUtils; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -57,6 +59,7 @@ import javax.annotation.Nullable; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -79,7 +82,7 @@ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask * Default nuke batch size. This is a small enough size that we still get value from batching, while * yielding as quickly as possible. In one real cluster environment backed with mysql, ~2000rows/sec, * with batch size of 100, means a batch should only less than a second for the task lock, and depending - * on the segment store latency, unoptimised S3 cleanups typically take 5-10 seconds per 100. Over time + * on the segment store latency, unoptimised S3 cleanups typically take 5-10 seconds per 100. Over time, * we expect the S3 cleanup to get quicker, so this should be < 1 second, which means we'll be yielding * the task lockbox every 1-2 seconds. */ @@ -201,6 +204,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception numTotalBatches != null ? StringUtils.format(" in [%d] batches.", numTotalBatches) : "." ); + final TaskActionClient taskActionClient = toolbox.getTaskActionClient(); RetrieveUsedSegmentsAction retrieveUsedSegmentsAction = new RetrieveUsedSegmentsAction( getDataSource(), ImmutableList.of(getInterval()), @@ -208,7 +212,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception ); // Fetch the load specs of all segments overlapping with the unused segment intervals final Set> usedSegmentLoadSpecs = - toolbox.getTaskActionClient().submit(retrieveUsedSegmentsAction) + taskActionClient.submit(retrieveUsedSegmentsAction) .stream() .map(DataSegment::getLoadSpec).collect(Collectors.toSet()); @@ -242,21 +246,22 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // If the segment nuke throws an exception, then the segment cleanup is abandoned. // Determine upgraded_from_segment_id before nuking - final TaskActionClient taskActionClient = toolbox.getTaskActionClient(); final Set upgradedSegmentIds = unusedSegments.stream() .map(DataSegment::getId) .map(SegmentId::toString).collect(Collectors.toSet()); + final Map upgradedFromSegmentIds = new HashMap<>(); try { - taskActionClient.submit(new RetrieveUpgradedFromSegmentsIdsAction(getDataSource(), upgradedSegmentIds)) - .stream() - .filter(upgradeInfo -> upgradeInfo.getUpgradedFromSegmentId() != null) - .forEach(upgradeInfo -> upgradedSegmentIds.add(upgradeInfo.getUpgradedFromSegmentId())); + upgradedFromSegmentIds.putAll( + taskActionClient.submit( + new RetrieveUpgradedFromSegmentIdsAction(getDataSource(), upgradedSegmentIds) + ).getUpgradedFromSegmentId() + ); } catch (Exception e) { LOG.warn( e, - "Could not retrieve segment upgrade infos using task action[retrieveUpgradedFromSegmentIds]." - + " Overlord maybe on an older version." + "Could not retrieve UpgradedFromSegmentsResponse using task action[retrieveUpgradedFromSegmentIds]." + + " Overlord may be on an older version." ); } @@ -264,27 +269,12 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception taskActionClient.submit(new SegmentNukeAction(new HashSet<>(unusedSegments))); // Determine unreferenced segments - final Set referencedSegmentIds = new HashSet<>(); - try { - taskActionClient.submit(new RetrieveUpgradedToSegmentsIdsAction(getDataSource(), upgradedSegmentIds)) - .forEach(upgradeInfo -> referencedSegmentIds.add(upgradeInfo.getId())); - } - catch (Exception e) { - LOG.warn( - e, - "Could not retrieve segment upgrade infos using task action[retrieveUpgradedFromSegmentIds]." - + " Overlord maybe on an older version." - ); - } - final Set unreferencedDataSegments - = unusedSegments.stream() - .filter(unusedSegment -> !referencedSegmentIds.contains(unusedSegment.getId().toString())) - .collect(Collectors.toSet()); - + final List unreferencedSegments + = findUnreferencedSegments(unusedSegments, upgradedFromSegmentIds, taskActionClient); // Kill segments from the deep storage only if their load specs are not being used by any other segments // We still need to check with used load specs as segment upgrades were introduced before this change - final List segmentsToBeKilled = unreferencedDataSegments + final List segmentsToBeKilled = unreferencedSegments .stream() .filter(unusedSegment -> unusedSegment.getLoadSpec() == null || !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec())) @@ -344,6 +334,81 @@ private NavigableMap> getNonRevokedTaskLockMap(TaskActi return taskLockMap; } + private List findUnreferencedSegments( + List unusedSegments, + Map upgradedFromSegmentIds, + TaskActionClient taskActionClient + ) throws IOException + { + if (unusedSegments.isEmpty() || upgradedFromSegmentIds.isEmpty()) { + return unusedSegments; + } + + final Set upgradedSegmentIds = new HashSet<>(); + for (DataSegment segment : unusedSegments) { + final String id = segment.getId().toString(); + upgradedSegmentIds.add(id); + if (upgradedFromSegmentIds.containsKey(id)) { + upgradedSegmentIds.add(upgradedFromSegmentIds.get(id)); + } + } + + final Map> upgradedToSegmentIds; + try { + upgradedToSegmentIds = taskActionClient.submit( + new RetrieveUpgradedToSegmentIdsAction(getDataSource(), upgradedSegmentIds) + ).getUpgradedToSegmentIds(); + } + catch (Exception e) { + LOG.warn( + e, + "Could not retrieve UpgradedToSegmentsResponse using task action[retrieveUpgradedToSegmentIds]." + + " Overlord may be on an older version." + ); + return unusedSegments; + } + + final Set existingSegmentIds + = taskActionClient.submit(new RetrieveSegmentsByIdAction(getDataSource(), upgradedSegmentIds)) + .stream() + .map(DataSegment::getId) + .map(SegmentId::toString) + .collect(Collectors.toSet()); + + List unreferencedSegments = new ArrayList<>(); + for (DataSegment segment : unusedSegments) { + final String id = segment.getId().toString(); + + // If the segment still exists for some reason, do not kill it + if (existingSegmentIds.contains(id)) { + break; + } + + // If the segment is the parent of existing segments, do not kill it + if (upgradedToSegmentIds.containsKey(id)) { + break; + } + + // If the segment has no parent, it can be killed from deep storage + if (!upgradedFromSegmentIds.containsKey(id)) { + unreferencedSegments.add(segment); + break; + } + + // If the parent segment exists, do not kill it + final String parentId = upgradedFromSegmentIds.get(id); + if (existingSegmentIds.contains(parentId)) { + break; + } + + // If the parent segment has no existing children, it can be killed + if (CollectionUtils.isNullOrEmpty(upgradedToSegmentIds.get(parentId))) { + unreferencedSegments.add(segment); + } + } + return unreferencedSegments; + } + @Override public LookupLoadingSpec getLookupLoadingSpec() { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index 133ced3907dc..d6687efbf34e 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -61,6 +61,7 @@ import org.apache.druid.indexing.overlord.TaskStorage; import org.apache.druid.indexing.overlord.autoscaling.ScalingStats; import org.apache.druid.indexing.overlord.supervisor.SupervisorManager; +import org.apache.druid.indexing.test.TestDataSegmentKiller; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.RE; @@ -81,7 +82,6 @@ import org.apache.druid.segment.join.NoopJoinableFactory; import org.apache.druid.segment.loading.LocalDataSegmentPusher; import org.apache.druid.segment.loading.LocalDataSegmentPusherConfig; -import org.apache.druid.segment.loading.NoopDataSegmentKiller; import org.apache.druid.segment.loading.SegmentCacheManager; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaCache; @@ -130,6 +130,7 @@ public abstract class IngestionTestBase extends InitializedNullHandlingTest private SegmentSchemaManager segmentSchemaManager; private SegmentSchemaCache segmentSchemaCache; private SupervisorManager supervisorManager; + private TestDataSegmentKiller dataSegmentKiller; protected File reportsFile; @Before @@ -169,6 +170,7 @@ public void setUpIngestionTestBase() throws IOException lockbox = new TaskLockbox(taskStorage, storageCoordinator); segmentCacheManagerFactory = new SegmentCacheManagerFactory(TestIndex.INDEX_IO, getObjectMapper()); reportsFile = temporaryFolder.newFile(); + dataSegmentKiller = new TestDataSegmentKiller(); } @After @@ -243,6 +245,11 @@ public RowIngestionMetersFactory getRowIngestionMetersFactory() return testUtils.getRowIngestionMetersFactory(); } + public TestDataSegmentKiller getDataSegmentKiller() + { + return dataSegmentKiller; + } + public TaskActionToolbox createTaskActionToolbox() { storageCoordinator.start(); @@ -265,7 +272,7 @@ public TaskToolbox createTaskToolbox(TaskConfig config, Task task, SupervisorMan .taskExecutorNode(new DruidNode("druid/middlemanager", "localhost", false, 8091, null, true, false)) .taskActionClient(createActionClient(task)) .segmentPusher(new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig())) - .dataSegmentKiller(new NoopDataSegmentKiller()) + .dataSegmentKiller(dataSegmentKiller) .joinableFactory(NoopJoinableFactory.INSTANCE) .jsonMapper(objectMapper) .taskWorkDir(baseDir) @@ -450,7 +457,7 @@ public ListenableFuture run(Task task) .taskExecutorNode(new DruidNode("druid/middlemanager", "localhost", false, 8091, null, true, false)) .taskActionClient(taskActionClient) .segmentPusher(new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig())) - .dataSegmentKiller(new NoopDataSegmentKiller()) + .dataSegmentKiller(dataSegmentKiller) .joinableFactory(NoopJoinableFactory.INSTANCE) .jsonMapper(objectMapper) .taskWorkDir(baseDir) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java index f888ace5a54d..0650c8014e13 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java @@ -38,6 +38,7 @@ import org.apache.druid.java.util.common.JodaUtils; import org.apache.druid.server.lookup.cache.LookupLoadingSpec; import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; import org.assertj.core.api.Assertions; import org.easymock.Capture; import org.easymock.EasyMock; @@ -125,6 +126,212 @@ public void testKill() throws Exception new KillTaskReport.Stats(1, 2), getReportedStats() ); + Assert.assertEquals(ImmutableSet.of(segment3), getDataSegmentKiller().getKilledSegments()); + } + + @Test + public void testKillSegmentsDeleteUnreferencedSiblings() throws Exception + { + final Map upgradeSegmentMapping = ImmutableMap.of( + segment1.getId(), + "nonExistentParent", + segment2.getId(), + "nonExistentParent" + ); + getStorageCoordinator().insertUsedSegments(ImmutableSet.of(segment1, segment2), upgradeSegmentMapping); + getStorageCoordinator().markSegmentsAsUnusedWithinInterval(DATA_SOURCE, Intervals.ETERNITY); + + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTaskBuilder() + .dataSource(DATA_SOURCE) + .interval(Intervals.ETERNITY) + .build(); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + + final List observedUnusedSegments = + getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.ETERNITY, + null, + null, + null + ); + + Assert.assertEquals(Collections.emptyList(), observedUnusedSegments); + + Assert.assertEquals( + new KillTaskReport.Stats(2, 2), + getReportedStats() + ); + Assert.assertEquals(ImmutableSet.of(segment1, segment2), getDataSegmentKiller().getKilledSegments()); + } + + @Test + public void testKillSegmentsDoNotDeleteReferencedSibling() throws Exception + { + final Map upgradeSegmentMapping = ImmutableMap.of( + segment1.getId(), + "nonExistentParent", + segment2.getId(), + "nonExistentParent" + ); + getStorageCoordinator().insertUsedSegments(ImmutableSet.of(segment1, segment2), upgradeSegmentMapping); + getStorageCoordinator().markSegmentsAsUnusedWithinInterval(DATA_SOURCE, Intervals.ETERNITY); + + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTaskBuilder() + .dataSource(DATA_SOURCE) + .interval(segment1.getInterval()) + .build(); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + + final List observedUnusedSegments = + getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.ETERNITY, + null, + null, + null + ); + + Assert.assertEquals(Collections.singletonList(segment2), observedUnusedSegments); + + Assert.assertEquals( + new KillTaskReport.Stats(0, 2), + getReportedStats() + ); + Assert.assertEquals(Collections.emptySet(), getDataSegmentKiller().getKilledSegments()); + } + + @Test + public void testKillSegmentsDoNotDeleteParentWithReferencedChildren() throws Exception + { + final Map upgradeSegmentMapping = ImmutableMap.of( + segment1.getId(), + segment3.getId().toString(), + segment2.getId(), + segment3.getId().toString() + ); + getStorageCoordinator().insertUsedSegments(ImmutableSet.of(segment1, segment2, segment3), upgradeSegmentMapping); + getSegmentsMetadataManager().markSegmentAsUnused(segment2.getId()); + getSegmentsMetadataManager().markSegmentAsUnused(segment3.getId()); + + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTaskBuilder() + .dataSource(DATA_SOURCE) + .interval(Intervals.ETERNITY) + .build(); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + + final List observedUnusedSegments = + getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.ETERNITY, + null, + null, + null + ); + Assert.assertEquals(ImmutableList.of(), observedUnusedSegments); + Assertions.assertThat( + getMetadataStorageCoordinator().retrieveUsedSegmentsForInterval( + DATA_SOURCE, + Intervals.ETERNITY, + Segments.ONLY_VISIBLE + ) + ).containsExactlyInAnyOrder(segment1); + + Assert.assertEquals( + new KillTaskReport.Stats(0, 2), + getReportedStats() + ); + Assert.assertEquals(Collections.emptySet(), getDataSegmentKiller().getKilledSegments()); + } + + @Test + public void testKillSegmentsDoNotDeleteChildrenWithReferencedParent() throws Exception + { + final Map upgradeSegmentMapping = ImmutableMap.of( + segment1.getId(), + segment3.getId().toString(), + segment2.getId(), + segment3.getId().toString() + ); + getStorageCoordinator().insertUsedSegments(ImmutableSet.of(segment1, segment2, segment3), upgradeSegmentMapping); + getSegmentsMetadataManager().markSegmentAsUnused(segment1.getId()); + getSegmentsMetadataManager().markSegmentAsUnused(segment2.getId()); + + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTaskBuilder() + .dataSource(DATA_SOURCE) + .interval(Intervals.ETERNITY) + .build(); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + + final List observedUnusedSegments = + getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.ETERNITY, + null, + null, + null + ); + Assert.assertEquals(ImmutableList.of(), observedUnusedSegments); + Assertions.assertThat( + getMetadataStorageCoordinator().retrieveUsedSegmentsForInterval( + DATA_SOURCE, + Intervals.ETERNITY, + Segments.ONLY_VISIBLE + ) + ).containsExactlyInAnyOrder(segment3); + + Assert.assertEquals( + new KillTaskReport.Stats(0, 2), + getReportedStats() + ); + Assert.assertEquals(Collections.emptySet(), getDataSegmentKiller().getKilledSegments()); + } + + @Test + public void testKillSegmentsDeleteChildrenAndParent() throws Exception + { + final Map upgradeSegmentMapping = ImmutableMap.of( + segment1.getId(), + segment3.getId().toString(), + segment2.getId(), + segment3.getId().toString() + ); + getStorageCoordinator().insertUsedSegments(ImmutableSet.of(segment1, segment2, segment3), upgradeSegmentMapping); + getSegmentsMetadataManager().markSegmentAsUnused(segment1.getId()); + getSegmentsMetadataManager().markSegmentAsUnused(segment2.getId()); + getSegmentsMetadataManager().markSegmentAsUnused(segment3.getId()); + + + final KillUnusedSegmentsTask task = new KillUnusedSegmentsTaskBuilder() + .dataSource(DATA_SOURCE) + .interval(Intervals.ETERNITY) + .build(); + + Assert.assertEquals(TaskState.SUCCESS, taskRunner.run(task).get().getStatusCode()); + + final List observedUnusedSegments = + getMetadataStorageCoordinator().retrieveUnusedSegmentsForInterval( + DATA_SOURCE, + Intervals.ETERNITY, + null, + null, + null + ); + Assert.assertEquals(ImmutableList.of(), observedUnusedSegments); + + Assert.assertEquals( + new KillTaskReport.Stats(3, 2), + getReportedStats() + ); + Assert.assertEquals(ImmutableSet.of(segment1, segment2, segment3), getDataSegmentKiller().getKilledSegments()); } @Test diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestDataSegmentKiller.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestDataSegmentKiller.java index 33421eb1a5cb..92581f6dd1ed 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestDataSegmentKiller.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestDataSegmentKiller.java @@ -22,12 +22,18 @@ import org.apache.druid.segment.loading.DataSegmentKiller; import org.apache.druid.timeline.DataSegment; +import java.util.HashSet; +import java.util.Set; + public class TestDataSegmentKiller implements DataSegmentKiller { + + private final Set killedSegments = new HashSet<>(); + @Override public void kill(DataSegment segment) { - // do nothing + killedSegments.add(segment); } @Override @@ -35,4 +41,9 @@ public void killAll() { throw new UnsupportedOperationException("not implemented"); } + + public Set getKilledSegments() + { + return killedSegments; + } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index dcb985e70ab4..4f499632b88b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -32,7 +32,8 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.ReplaceTaskLock; -import org.apache.druid.metadata.SegmentUpgradeInfo; +import org.apache.druid.metadata.UpgradedFromSegmentsResponse; +import org.apache.druid.metadata.UpgradedToSegmentsResponse; import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; @@ -316,23 +317,21 @@ public List getPendingSegments(String datasource, Interval } @Override - public List retrieveUpgradedFromSegmentIds( + public UpgradedFromSegmentsResponse retrieveUpgradedFromSegmentIds( final String dataSource, - final List segmentIds + final Set segmentIds ) { - List segmentUpgradeInfos = new ArrayList<>(); - segmentIds.forEach(id -> segmentUpgradeInfos.add(new SegmentUpgradeInfo(id, null))); - return segmentUpgradeInfos; + return new UpgradedFromSegmentsResponse(Collections.emptyMap()); } @Override - public List retrieveUpgradedToSegmentIds( + public UpgradedToSegmentsResponse retrieveUpgradedToSegmentIds( final String dataSource, - final List upgradedFromSegmentIds + final Set upgradedFromSegmentIds ) { - return Collections.emptyList(); + return new UpgradedToSegmentsResponse(Collections.emptyMap()); } public Set getPublished() diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 89fdfb23dbf9..b0cc3a28dd65 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -22,7 +22,8 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.ReplaceTaskLock; -import org.apache.druid.metadata.SegmentUpgradeInfo; +import org.apache.druid.metadata.UpgradedFromSegmentsResponse; +import org.apache.druid.metadata.UpgradedToSegmentsResponse; import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; @@ -476,21 +477,20 @@ SegmentPublishResult commitMetadataOnly( List getPendingSegments(String datasource, Interval interval); /** - * List of SegmentUpgradeInfo for a given datasource and a list of segment ids - * This method assumes that all the segments belong to the same datasource + * Returns a mapping from the segments ids to their parent segments ids + * * @param dataSource data source * @param segmentIds ids of segments - * @return segment upgrade infos with the given list as the ids + * @return UpgradedFromSegmentsResponse */ - List retrieveUpgradedFromSegmentIds(String dataSource, List segmentIds); + UpgradedFromSegmentsResponse retrieveUpgradedFromSegmentIds(String dataSource, Set segmentIds); /** - * List of SegmentUpgradeInfo for a given datasource and list of upgradedFromSegmentIds - * Assumes that all hte upgradedFromSegmentIds are non-null and belong to the given datasource + * Returns a mapping from segment ids to their child segment ids * * @param dataSource data source * @param upgradedFromSegmentIds ids of the first segments which had the corresponding load spec - * @return segment upgrade infos with the given list as upgradedFromSegmentIds + * @return UpgradedToSegmentsResponse */ - List retrieveUpgradedToSegmentIds(String dataSource, List upgradedFromSegmentIds); + UpgradedToSegmentsResponse retrieveUpgradedToSegmentIds(String dataSource, Set upgradedFromSegmentIds); } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 61077bcaa434..09d79ef2293a 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -1551,6 +1551,48 @@ int insertPendingSegmentsIntoMetastore( return Arrays.stream(updated).sum(); } + @VisibleForTesting + public void insertUsedSegments(Set dataSegments, Map upgradedFromSegmentIdMap) + { + final String table = dbTables.getSegmentsTable(); + connector.retryWithHandle( + handle -> { + PreparedBatch preparedBatch = handle.prepareBatch( + StringUtils.format( + "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, partitioned, version," + + " used, payload, used_status_last_updated, upgraded_from_segment_id) " + + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version," + + " :used, :payload, :used_status_last_updated, :upgraded_from_segment_id)", + table, + connector.getQuoteString() + ) + ); + for (DataSegment segment : dataSegments) { + String id = segment.getId().toString(); + preparedBatch.add() + .bind("id", id) + .bind("dataSource", segment.getDataSource()) + .bind("created_date", DateTimes.nowUtc().toString()) + .bind("start", segment.getInterval().getStart().toString()) + .bind("end", segment.getInterval().getEnd().toString()) + .bind("partitioned", !(segment.getShardSpec() instanceof NoneShardSpec)) + .bind("version", segment.getVersion()) + .bind("used", true) + .bind("payload", jsonMapper.writeValueAsBytes(segment)) + .bind("used_status_last_updated", DateTimes.nowUtc().toString()) + .bind("upgraded_from_segment_id", upgradedFromSegmentIdMap.get(segment.getId())); + } + + final int[] affectedRows = preparedBatch.execute(); + final boolean succeeded = Arrays.stream(affectedRows).allMatch(eachAffectedRows -> eachAffectedRows == 1); + if (!succeeded) { + throw new ISE("Failed to publish segments to DB"); + } + return true; + } + ); + } + private void insertPendingSegmentIntoMetastore( Handle handle, SegmentIdWithShardSpec newIdentifier, @@ -2945,74 +2987,83 @@ public int deleteUpgradeSegmentsForTask(final String taskId) } @Override - public List retrieveUpgradedFromSegmentIds( + public UpgradedFromSegmentsResponse retrieveUpgradedFromSegmentIds( final String dataSource, - final List segmentIds + final Set segmentIds ) { + final Map upgradedFromSegmentIds = new HashMap<>(); if (segmentIds.isEmpty()) { - return Collections.emptyList(); + return new UpgradedFromSegmentsResponse(upgradedFromSegmentIds); } + final List segmentIdList = ImmutableList.copyOf(segmentIds); final String sql = StringUtils.format( "SELECT id, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s", dbTables.getSegmentsTable(), - SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("id", segmentIds) + SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("id", segmentIdList) ); - return connector.retryWithHandle( + connector.retryWithHandle( handle -> { Query> query = handle.createQuery(sql) .bind("dataSource", dataSource); - SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition("id", segmentIds, query); - return query.map( - (index, r, ctx) -> { - final String id = r.getString(1); - final String upgradedFromSegmentId = r.getString(2); - return new SegmentUpgradeInfo(id, upgradedFromSegmentId); - } - ) - .list(); + SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition("id", segmentIdList, query); + return query.map((index, r, ctx) -> { + final String id = r.getString(1); + final String upgradedFromSegmentId = r.getString(2); + if (upgradedFromSegmentId != null) { + upgradedFromSegmentIds.put(id, upgradedFromSegmentId); + } + return null; + } + ) + .list(); } ); + return new UpgradedFromSegmentsResponse(upgradedFromSegmentIds); } @Override - public List retrieveUpgradedToSegmentIds( + public UpgradedToSegmentsResponse retrieveUpgradedToSegmentIds( final String dataSource, - final List upgradedFromSegmentIds + final Set upgradedFromSegmentIds ) { + final Map> upgradedToSegmentIds = new HashMap<>(); if (upgradedFromSegmentIds.isEmpty()) { - return Collections.emptyList(); + return new UpgradedToSegmentsResponse(upgradedToSegmentIds); } + final List upgradedFromSegmentIdList = ImmutableList.copyOf(upgradedFromSegmentIds); final String sql = StringUtils.format( "SELECT id, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s", dbTables.getSegmentsTable(), SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn( "upgraded_from_segment_id", - upgradedFromSegmentIds + upgradedFromSegmentIdList ) ); - return connector.retryWithHandle( + connector.retryWithHandle( handle -> { Query> query = handle.createQuery(sql) .bind("dataSource", dataSource); SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition( "upgraded_from_segment_id", - upgradedFromSegmentIds, + upgradedFromSegmentIdList, query ); - return query.map( - (index, r, ctx) -> { - final String id = r.getString(1); - final String upgradedFromSegmentId = r.getString(2); - return new SegmentUpgradeInfo(id, upgradedFromSegmentId); - } - ) - .list(); + return query.map((index, r, ctx) -> { + final String upgradedToId = r.getString(1); + final String id = r.getString(2); + upgradedToSegmentIds.computeIfAbsent(id, k -> new HashSet<>()) + .add(upgradedToId); + return null; + } + ) + .list(); } ); + return new UpgradedToSegmentsResponse(upgradedToSegmentIds); } private static class PendingSegmentsRecord diff --git a/server/src/main/java/org/apache/druid/metadata/SegmentUpgradeInfo.java b/server/src/main/java/org/apache/druid/metadata/SegmentUpgradeInfo.java deleted file mode 100644 index 921b976c5886..000000000000 --- a/server/src/main/java/org/apache/druid/metadata/SegmentUpgradeInfo.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.metadata; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; - -import javax.annotation.Nullable; -import java.util.Objects; - -/** - * Information about the id and upgraded from segment id created as a result of segment upgrades - * upgradedFromSegmentId is null for segments that haven't been upgraded - */ -public class SegmentUpgradeInfo -{ - private final String id; - private final String upgradedFromSegmentId; - - @JsonCreator - public SegmentUpgradeInfo( - @JsonProperty("id") String id, - @JsonProperty("upgradedFromSegmentId") @Nullable String upgradedFromSegmentId - ) - { - this.id = id; - this.upgradedFromSegmentId = upgradedFromSegmentId; - } - - @JsonProperty - public String getId() - { - return id; - } - - @Nullable - @JsonProperty - public String getUpgradedFromSegmentId() - { - return upgradedFromSegmentId; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - SegmentUpgradeInfo that = (SegmentUpgradeInfo) o; - return Objects.equals(id, that.id) - && Objects.equals(upgradedFromSegmentId, that.upgradedFromSegmentId); - } - - @Override - public int hashCode() - { - return Objects.hash(id, upgradedFromSegmentId); - } -} diff --git a/server/src/main/java/org/apache/druid/metadata/UpgradedFromSegmentsResponse.java b/server/src/main/java/org/apache/druid/metadata/UpgradedFromSegmentsResponse.java new file mode 100644 index 000000000000..b40a12609a41 --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/UpgradedFromSegmentsResponse.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Map; + +/** + * Response for the RetrieveUpgradedFromSegmentIds task action + */ +public class UpgradedFromSegmentsResponse +{ + // Map from a segment ID to the segment ID from which it was upgraded + // There should be no entry in the map for an original non-upgraded segment + private final Map upgradedFromSegmentIds; + + @JsonCreator + public UpgradedFromSegmentsResponse( + @JsonProperty("upgradedFromSegmentIds") Map upgradedFromSegmentIds + ) + { + this.upgradedFromSegmentIds = upgradedFromSegmentIds; + } + + @JsonProperty + public Map getUpgradedFromSegmentId() + { + return upgradedFromSegmentIds; + } +} diff --git a/server/src/main/java/org/apache/druid/metadata/UpgradedToSegmentsResponse.java b/server/src/main/java/org/apache/druid/metadata/UpgradedToSegmentsResponse.java new file mode 100644 index 000000000000..80e42f040da2 --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/UpgradedToSegmentsResponse.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Map; +import java.util.Set; + +/** + * Response for the RetrieveUpgradedToSegmentIds task action + */ +public class UpgradedToSegmentsResponse +{ + + // Map from a segment ID to a set containing + // all segment IDs that were upgraded from it AND are still present in the metadata store + private final Map> upgradedToSegmentIds; + + @JsonCreator + public UpgradedToSegmentsResponse( + @JsonProperty("upgradedToSegmentIds") Map> upgradedToSegmentIds + ) + { + this.upgradedToSegmentIds = upgradedToSegmentIds; + } + + @JsonProperty + public Map> getUpgradedToSegmentIds() + { + return upgradedToSegmentIds; + } +} diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index f8187ae026a3..89ebd3cc885d 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -31,7 +31,6 @@ import org.apache.druid.indexing.overlord.SegmentPublishResult; import org.apache.druid.indexing.overlord.Segments; import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; @@ -49,7 +48,6 @@ import org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec; import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec; import org.apache.druid.timeline.partition.LinearShardSpec; -import org.apache.druid.timeline.partition.NoneShardSpec; import org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec; import org.apache.druid.timeline.partition.NumberedOverwriteShardSpec; import org.apache.druid.timeline.partition.NumberedPartialShardSpec; @@ -67,12 +65,10 @@ import org.junit.Rule; import org.junit.Test; import org.skife.jdbi.v2.Handle; -import org.skife.jdbi.v2.PreparedBatch; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -275,17 +271,10 @@ public void testCommitAppendSegments() Set allCommittedSegments = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); - Map upgradedFromSegmentIdMap = new HashMap<>(); - coordinator.retrieveUpgradedFromSegmentIds( + Map upgradedFromSegmentIdMap = coordinator.retrieveUpgradedFromSegmentIds( DS.WIKI, - allCommittedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toList()) - ).forEach( - segmentUpgradeInfo -> { - if (segmentUpgradeInfo.getUpgradedFromSegmentId() != null) { - upgradedFromSegmentIdMap.put(segmentUpgradeInfo.getId(), segmentUpgradeInfo.getUpgradedFromSegmentId()); - } - } - ); + allCommittedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toSet()) + ).getUpgradedFromSegmentId(); // Verify the segments present in the metadata store Assert.assertTrue(allCommittedSegments.containsAll(appendSegments)); for (DataSegment segment : appendSegments) { @@ -412,17 +401,10 @@ public void testCommitReplaceSegments() final Set usedSegments = new HashSet<>(retrieveUsedSegments(derbyConnectorRule.metadataTablesConfigSupplier().get())); - final Map upgradedFromSegmentIdMap = new HashMap<>(); - coordinator.retrieveUpgradedFromSegmentIds( + final Map upgradedFromSegmentIdMap = coordinator.retrieveUpgradedFromSegmentIds( "foo", - usedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toList()) - ).forEach( - segmentUpgradeInfo -> { - if (segmentUpgradeInfo.getUpgradedFromSegmentId() != null) { - upgradedFromSegmentIdMap.put(segmentUpgradeInfo.getId(), segmentUpgradeInfo.getUpgradedFromSegmentId()); - } - } - ); + usedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toSet()) + ).getUpgradedFromSegmentId(); Assert.assertTrue(usedSegments.containsAll(segmentsAppendedWithReplaceLock)); for (DataSegment appendSegment : segmentsAppendedWithReplaceLock) { @@ -3449,24 +3431,25 @@ public void testRetrieveUpgradedFromSegmentIds() final String datasource = defaultSegment.getDataSource(); final Map upgradedFromSegmentIdMap = new HashMap<>(); upgradedFromSegmentIdMap.put(defaultSegment2.getId(), defaultSegment.getId().toString()); - insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); + coordinator.insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); coordinator.markSegmentsAsUnusedWithinInterval(datasource, Intervals.ETERNITY); upgradedFromSegmentIdMap.clear(); upgradedFromSegmentIdMap.put(defaultSegment3.getId(), defaultSegment.getId().toString()); - insertUsedSegments(ImmutableSet.of(defaultSegment3, defaultSegment4), upgradedFromSegmentIdMap); + coordinator.insertUsedSegments(ImmutableSet.of(defaultSegment3, defaultSegment4), upgradedFromSegmentIdMap); - Set expected = new HashSet<>(); - expected.add(new SegmentUpgradeInfo(defaultSegment.getId().toString(), null)); - expected.add(new SegmentUpgradeInfo(defaultSegment2.getId().toString(), defaultSegment.getId().toString())); - expected.add(new SegmentUpgradeInfo(defaultSegment3.getId().toString(), defaultSegment.getId().toString())); - expected.add(new SegmentUpgradeInfo(defaultSegment4.getId().toString(), null)); + Map expected = new HashMap<>(); + expected.put(defaultSegment2.getId().toString(), defaultSegment.getId().toString()); + expected.put(defaultSegment3.getId().toString(), defaultSegment.getId().toString()); - List segmentIds = new ArrayList<>(); + Set segmentIds = new HashSet<>(); segmentIds.add(defaultSegment.getId().toString()); segmentIds.add(defaultSegment2.getId().toString()); segmentIds.add(defaultSegment3.getId().toString()); segmentIds.add(defaultSegment4.getId().toString()); - Assert.assertEquals(expected, new HashSet<>(coordinator.retrieveUpgradedFromSegmentIds(datasource, segmentIds))); + Assert.assertEquals( + expected, + coordinator.retrieveUpgradedFromSegmentIds(datasource, segmentIds).getUpgradedFromSegmentId() + ); } @Test @@ -3475,60 +3458,25 @@ public void testRetrieveUpgradedToSegmentIds() final String datasource = defaultSegment.getDataSource(); final Map upgradedFromSegmentIdMap = new HashMap<>(); upgradedFromSegmentIdMap.put(defaultSegment2.getId(), defaultSegment.getId().toString()); - insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); + coordinator.insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); coordinator.markSegmentsAsUnusedWithinInterval(datasource, Intervals.ETERNITY); upgradedFromSegmentIdMap.clear(); upgradedFromSegmentIdMap.put(defaultSegment3.getId(), defaultSegment.getId().toString()); - insertUsedSegments(ImmutableSet.of(defaultSegment3, defaultSegment4), upgradedFromSegmentIdMap); + coordinator.insertUsedSegments(ImmutableSet.of(defaultSegment3, defaultSegment4), upgradedFromSegmentIdMap); - Set expected = new HashSet<>(); - expected.add(new SegmentUpgradeInfo(defaultSegment2.getId().toString(), defaultSegment.getId().toString())); - expected.add(new SegmentUpgradeInfo(defaultSegment3.getId().toString(), defaultSegment.getId().toString())); + Map> expected = new HashMap<>(); + expected.put(defaultSegment.getId().toString(), new HashSet<>()); + expected.get(defaultSegment.getId().toString()).add(defaultSegment2.getId().toString()); + expected.get(defaultSegment.getId().toString()).add(defaultSegment3.getId().toString()); - List upgradedIds = new ArrayList<>(); + Set upgradedIds = new HashSet<>(); upgradedIds.add(defaultSegment.getId().toString()); upgradedIds.add(defaultSegment2.getId().toString()); upgradedIds.add(defaultSegment3.getId().toString()); upgradedIds.add(defaultSegment4.getId().toString()); - Assert.assertEquals(expected, new HashSet<>(coordinator.retrieveUpgradedToSegmentIds(datasource, upgradedIds))); - } - - private boolean insertUsedSegments(Set dataSegments, Map upgradedFromSegmentIdMap) - { - final String table = derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(); - return derbyConnector.retryWithHandle( - handle -> { - PreparedBatch preparedBatch = handle.prepareBatch( - StringUtils.format( - "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, partitioned, version, used, payload, used_status_last_updated, upgraded_from_segment_id) " - + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload, :used_status_last_updated, :upgraded_from_segment_id)", - table, - derbyConnector.getQuoteString() - ) - ); - for (DataSegment segment : dataSegments) { - String id = segment.getId().toString(); - preparedBatch.add() - .bind("id", id) - .bind("dataSource", segment.getDataSource()) - .bind("created_date", DateTimes.nowUtc().toString()) - .bind("start", segment.getInterval().getStart().toString()) - .bind("end", segment.getInterval().getEnd().toString()) - .bind("partitioned", !(segment.getShardSpec() instanceof NoneShardSpec)) - .bind("version", segment.getVersion()) - .bind("used", true) - .bind("payload", mapper.writeValueAsBytes(segment)) - .bind("used_status_last_updated", DateTimes.nowUtc().toString()) - .bind("upgraded_from_segment_id", upgradedFromSegmentIdMap.get(segment.getId())); - } - - final int[] affectedRows = preparedBatch.execute(); - final boolean succeeded = Arrays.stream(affectedRows).allMatch(eachAffectedRows -> eachAffectedRows == 1); - if (!succeeded) { - throw new ISE("Failed to publish segments to DB"); - } - return true; - } + Assert.assertEquals( + expected, + coordinator.retrieveUpgradedToSegmentIds(datasource, upgradedIds).getUpgradedToSegmentIds() ); } } From 570b33a2023d26385cf6e3b043ddd82adf807c6b Mon Sep 17 00:00:00 2001 From: Amatya Date: Wed, 10 Jul 2024 12:16:36 +0530 Subject: [PATCH 10/16] Address feedback --- .../common/task/KillUnusedSegmentsTask.java | 13 ++--- .../IndexerSQLMetadataStorageCoordinator.java | 47 +++++++++---------- .../druid/metadata/PendingSegmentRecord.java | 5 +- .../druid/metadata/SQLMetadataConnector.java | 3 +- .../metadata/SqlSegmentsMetadataQuery.java | 8 ++-- 5 files changed, 39 insertions(+), 37 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index d8a187221975..6b7ddd5eb1ec 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -211,10 +211,10 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception Segments.INCLUDING_OVERSHADOWED ); // Fetch the load specs of all segments overlapping with the unused segment intervals - final Set> usedSegmentLoadSpecs = - taskActionClient.submit(retrieveUsedSegmentsAction) - .stream() - .map(DataSegment::getLoadSpec).collect(Collectors.toSet()); + final Set> usedSegmentLoadSpecs = taskActionClient.submit(retrieveUsedSegmentsAction) + .stream() + .map(DataSegment::getLoadSpec) + .collect(Collectors.toSet()); do { if (nextBatchSize <= 0) { @@ -245,10 +245,11 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // Determine the subset of segments to be killed from deep storage based on loadspecs. // If the segment nuke throws an exception, then the segment cleanup is abandoned. - // Determine upgraded_from_segment_id before nuking + // Determine upgraded segment ids before nuking final Set upgradedSegmentIds = unusedSegments.stream() .map(DataSegment::getId) - .map(SegmentId::toString).collect(Collectors.toSet()); + .map(SegmentId::toString) + .collect(Collectors.toSet()); final Map upgradedFromSegmentIds = new HashMap<>(); try { upgradedFromSegmentIds.putAll( diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 09d79ef2293a..951bc65d3b11 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -564,7 +564,7 @@ public SegmentPublishResult commitReplaceSegments( createNewIdsOfAppendSegmentsAfterReplace(handle, replaceSegments, locksHeldByReplaceTask); Map upgradeSegmentMetadata = new HashMap<>(); - Map upgradedFromSegmentIdMap = new HashMap<>(); + final Map upgradedFromSegmentIdMap = new HashMap<>(); for (DataSegmentPlus dataSegmentPlus : upgradedSegments) { segmentsToInsert.add(dataSegmentPlus.getDataSegment()); if (dataSegmentPlus.getSchemaFingerprint() != null && dataSegmentPlus.getNumRows() != null) { @@ -575,7 +575,7 @@ public SegmentPublishResult commitReplaceSegments( } if (dataSegmentPlus.getUpgradedFromSegmentId() != null) { upgradedFromSegmentIdMap.put( - dataSegmentPlus.getDataSegment().getId(), + dataSegmentPlus.getDataSegment().getId().toString(), dataSegmentPlus.getUpgradedFromSegmentId() ); } @@ -1416,7 +1416,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( final Set allSegmentsToInsert = new HashSet<>(appendSegments); final Map newVersionSegmentToParent = new HashMap<>(); final Map segmentIdMap = new HashMap<>(); - final Map upgradedFromSegmentIdMap = new HashMap<>(); + final Map upgradedFromSegmentIdMap = new HashMap<>(); appendSegments.forEach(segment -> segmentIdMap.put(segment.getId().toString(), segment)); segmentIdsForNewVersions.forEach( pendingSegment -> { @@ -1424,7 +1424,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( final DataSegment oldSegment = segmentIdMap.get(pendingSegment.getUpgradedFromSegmentId()); final SegmentId newVersionSegmentId = pendingSegment.getId().asSegmentId(); newVersionSegmentToParent.put(newVersionSegmentId, oldSegment.getId()); - upgradedFromSegmentIdMap.put(newVersionSegmentId, oldSegment.getId().toString()); + upgradedFromSegmentIdMap.put(newVersionSegmentId.toString(), oldSegment.getId().toString()); allSegmentsToInsert.add( new DataSegment( pendingSegment.getId().asSegmentId(), @@ -2134,7 +2134,6 @@ private Set announceHistoricalSegmentBatch( for (List partition : partitionedSegments) { for (DataSegment segment : partition) { String segmentId = segment.getId().toString(); - final String upgradedFromSegmentId = null; PreparedBatchPart preparedBatchPart = preparedBatch.add() .bind("id", segmentId) @@ -2147,7 +2146,7 @@ private Set announceHistoricalSegmentBatch( .bind("used", usedSegments.contains(segment)) .bind("payload", jsonMapper.writeValueAsBytes(segment)) .bind("used_status_last_updated", now) - .bind("upgraded_from_segment_id", upgradedFromSegmentId); + .bind("upgraded_from_segment_id", (String) null); if (schemaPersistEnabled) { Long numRows = null; @@ -2272,6 +2271,8 @@ private Set createNewIdsOfAppendSegmentsAfterReplace( .shardSpec(shardSpec) .build(); + // When the segment already has an upgraded_from_segment_id, reuse it for its children + // This ensures that the row has a single level of lineage final String upgradedFromSegmentId = oldSegmentMetadata.getUpgradedFromSegmentId() == null ? oldSegmentMetadata.getDataSegment().getId().toString() : oldSegmentMetadata.getUpgradedFromSegmentId(); @@ -2328,7 +2329,7 @@ private Set insertSegments( @Nullable SegmentSchemaMapping segmentSchemaMapping, Map upgradeSegmentMetadata, Map newVersionForAppendToParent, - Map upgradedFromSegmentIdMap + Map upgradedFromSegmentIdMap ) throws IOException { boolean shouldPersistSchema = shouldPersistSchema(segmentSchemaMapping); @@ -2365,7 +2366,7 @@ private Set insertSegments( .bind("used", true) .bind("payload", jsonMapper.writeValueAsBytes(segment)) .bind("used_status_last_updated", now) - .bind("upgraded_from_segment_id", upgradedFromSegmentIdMap.get(segment.getId())); + .bind("upgraded_from_segment_id", upgradedFromSegmentIdMap.get(segment.getId().toString())); if (schemaPersistEnabled) { SegmentMetadata segmentMetadata = @@ -3009,15 +3010,13 @@ public UpgradedFromSegmentsResponse retrieveUpgradedFromSegmentIds( .bind("dataSource", dataSource); SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition("id", segmentIdList, query); return query.map((index, r, ctx) -> { - final String id = r.getString(1); - final String upgradedFromSegmentId = r.getString(2); - if (upgradedFromSegmentId != null) { - upgradedFromSegmentIds.put(id, upgradedFromSegmentId); - } - return null; - } - ) - .list(); + final String id = r.getString(1); + final String upgradedFromSegmentId = r.getString(2); + if (upgradedFromSegmentId != null) { + upgradedFromSegmentIds.put(id, upgradedFromSegmentId); + } + return null; + }).list(); } ); return new UpgradedFromSegmentsResponse(upgradedFromSegmentIds); @@ -3053,14 +3052,12 @@ public UpgradedToSegmentsResponse retrieveUpgradedToSegmentIds( query ); return query.map((index, r, ctx) -> { - final String upgradedToId = r.getString(1); - final String id = r.getString(2); - upgradedToSegmentIds.computeIfAbsent(id, k -> new HashSet<>()) - .add(upgradedToId); - return null; - } - ) - .list(); + final String upgradedToId = r.getString(1); + final String id = r.getString(2); + upgradedToSegmentIds.computeIfAbsent(id, k -> new HashSet<>()) + .add(upgradedToId); + return null; + }).list(); } ); return new UpgradedToSegmentsResponse(upgradedToSegmentIds); diff --git a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java index 0fa492e28f91..f117fe7f28bf 100644 --- a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java +++ b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java @@ -40,7 +40,10 @@ *

  • id -> id (Unique identifier for pending segment)
  • *
  • sequence_name -> sequenceName (sequence name used for segment allocation)
  • *
  • sequence_prev_id -> sequencePrevId (previous segment id used for segment allocation)
  • - *
  • upgraded_from_segment_id -> upgradedFromSegmentId (Id of the first segment from which this was upgraded)
  • + *
  • upgraded_from_segment_id -> upgradedFromSegmentId + * (ID of the segment which was upgraded to create the current segment. + * If the former was itself created as a result of an upgrade, then this ID + * must refer to the original non-upgraded segment in the hierarchy.)
  • *
  • task_allocator_id -> taskAllocatorId (Associates a task / task group / replica group with the pending segment)
  • * */ diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java index 7f17edd012ee..41dbac6753af 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java @@ -589,7 +589,8 @@ protected void alterSegmentTable() // upgraded_from_segment_id is the first segment to which the same load spec originally belonged // Load specs can be shared as a result of segment version upgrade - // This column is null for segments that haven't been upgraded. + // This column is null for segments that were not upgraded from any other + // or were upgraded before this column was added columnNameTypes.put("upgraded_from_segment_id", "VARCHAR(255)"); if (centralizedDatasourceSchemaConfig.isEnabled()) { diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index 11508d21a428..fc1c84a70371 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -286,7 +286,7 @@ private List retrieveSegmentBatchById( if (includeSchemaInfo) { final Query> query = handle.createQuery( StringUtils.format( - "SELECT payload, used, upgraded_from_segment_id, schema_fingerprint, num_rows FROM %s WHERE dataSource = :dataSource %s", + "SELECT payload, used, schema_fingerprint, num_rows, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s", dbTables.getSegmentsTable(), getParameterizedInConditionForColumn("id", segmentIds) ) ); @@ -298,8 +298,8 @@ private List retrieveSegmentBatchById( .setFetchSize(connector.getStreamingFetchSize()) .map( (index, r, ctx) -> { - String schemaFingerprint = (String) r.getObject(4); - Long numRows = (Long) r.getObject(5); + String schemaFingerprint = (String) r.getObject(3); + Long numRows = (Long) r.getObject(4); return new DataSegmentPlus( JacksonUtils.readValue(jsonMapper, r.getBytes(1), DataSegment.class), null, @@ -307,7 +307,7 @@ private List retrieveSegmentBatchById( r.getBoolean(2), schemaFingerprint, numRows, - r.getString(3) + r.getString(5) ); } ) From d57ca013e94610b41f692497e959bc6665c0c2ec Mon Sep 17 00:00:00 2001 From: Amatya Date: Thu, 11 Jul 2024 13:57:45 +0530 Subject: [PATCH 11/16] Address feedback --- .../RetrieveUpgradedFromSegmentIdsAction.java | 6 ++-- .../RetrieveUpgradedToSegmentIdsAction.java | 6 ++-- .../common/task/KillUnusedSegmentsTask.java | 33 +++++++++++++++---- ...TestIndexerMetadataStorageCoordinator.java | 12 +++---- .../IndexerMetadataStorageCoordinator.java | 22 ++++++------- .../IndexerSQLMetadataStorageCoordinator.java | 23 +++++++------ .../druid/metadata/SQLMetadataConnector.java | 4 --- .../UpgradedFromSegmentsResponse.java | 4 +-- .../metadata/UpgradedToSegmentsResponse.java | 2 -- ...exerSQLMetadataStorageCoordinatorTest.java | 10 +++--- 10 files changed, 67 insertions(+), 55 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java index a71559a4e0bd..e16db7e4e58d 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java @@ -69,8 +69,10 @@ public TypeReference getReturnTypeReference() @Override public UpgradedFromSegmentsResponse perform(Task task, TaskActionToolbox toolbox) { - return toolbox.getIndexerMetadataStorageCoordinator() - .retrieveUpgradedFromSegmentIds(dataSource, segmentIds); + return new UpgradedFromSegmentsResponse( + toolbox.getIndexerMetadataStorageCoordinator() + .retrieveUpgradedFromSegmentIds(dataSource, segmentIds) + ); } @Override diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java index 88ba08821342..2dba05ab7bc3 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java @@ -69,8 +69,10 @@ public TypeReference getReturnTypeReference() @Override public UpgradedToSegmentsResponse perform(Task task, TaskActionToolbox toolbox) { - return toolbox.getIndexerMetadataStorageCoordinator() - .retrieveUpgradedToSegmentIds(dataSource, segmentIds); + return new UpgradedToSegmentsResponse( + toolbox.getIndexerMetadataStorageCoordinator() + .retrieveUpgradedToSegmentIds(dataSource, segmentIds) + ); } @Override diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 6b7ddd5eb1ec..12bfbadc8b63 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -72,6 +72,20 @@ * The client representation of this task is {@link ClientKillUnusedSegmentsTaskQuery}. * JSON serialization fields of this class must correspond to those of {@link * ClientKillUnusedSegmentsTaskQuery}, except for {@link #id} and {@link #context} fields. + *
    + *
    + * The Kill task fetches the set of used segments for the interval and computes the set of their load specs.
    + * Until `limit` segments have been processed in total or all segments for the interval have been nuked:
    + *
      + *
    1. Fetch at most `batchSize` unused segments from the metadata store.
    2. + *
    3. Determine the mapping from these segments to their parents *before* nuking the segments.
    4. + *
    5. Nuke the batch of unused segments from the metadata store.
    6. + *
    7. Determine the mapping of the set of parents to all their children.
    8. + *
    9. Check if unused or parent segments exist.
    10. + *
    11. Find the unreferenced segments.
    12. + *
    13. Filter the set of unreferenced segments using load specs from the set of used segments.
    14. + *
    15. Kill the filtered set of segments from deep storage.
    16. + *
    */ public class KillUnusedSegmentsTask extends AbstractFixedIntervalTask { @@ -246,16 +260,16 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // If the segment nuke throws an exception, then the segment cleanup is abandoned. // Determine upgraded segment ids before nuking - final Set upgradedSegmentIds = unusedSegments.stream() - .map(DataSegment::getId) - .map(SegmentId::toString) - .collect(Collectors.toSet()); + final Set segmentIds = unusedSegments.stream() + .map(DataSegment::getId) + .map(SegmentId::toString) + .collect(Collectors.toSet()); final Map upgradedFromSegmentIds = new HashMap<>(); try { upgradedFromSegmentIds.putAll( taskActionClient.submit( - new RetrieveUpgradedFromSegmentIdsAction(getDataSource(), upgradedSegmentIds) - ).getUpgradedFromSegmentId() + new RetrieveUpgradedFromSegmentIdsAction(getDataSource(), segmentIds) + ).getUpgradedFromSegmentIds() ); } catch (Exception e) { @@ -281,6 +295,13 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception || !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec())) .collect(Collectors.toList()); + final Set segmentsNotKilled = new HashSet<>(unusedSegments); + segmentsToBeKilled.forEach(segmentsNotKilled::remove); + LOG.infoSegments( + segmentsNotKilled, + "Skipping segment kill from deep storage as their load specs are referenced by other segments." + ); + toolbox.getDataSegmentKiller().kill(segmentsToBeKilled); numBatchesProcessed++; numSegmentsKilled += segmentsToBeKilled.size(); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 4f499632b88b..d2055d6e0c99 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -32,8 +32,6 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.ReplaceTaskLock; -import org.apache.druid.metadata.UpgradedFromSegmentsResponse; -import org.apache.druid.metadata.UpgradedToSegmentsResponse; import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; @@ -317,21 +315,21 @@ public List getPendingSegments(String datasource, Interval } @Override - public UpgradedFromSegmentsResponse retrieveUpgradedFromSegmentIds( + public Map retrieveUpgradedFromSegmentIds( final String dataSource, final Set segmentIds ) { - return new UpgradedFromSegmentsResponse(Collections.emptyMap()); + return Collections.emptyMap(); } @Override - public UpgradedToSegmentsResponse retrieveUpgradedToSegmentIds( + public Map> retrieveUpgradedToSegmentIds( final String dataSource, - final Set upgradedFromSegmentIds + final Set segmentIds ) { - return new UpgradedToSegmentsResponse(Collections.emptyMap()); + return Collections.emptyMap(); } public Set getPublished() diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index b0cc3a28dd65..29ff97e21a69 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -22,8 +22,6 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.ReplaceTaskLock; -import org.apache.druid.metadata.UpgradedFromSegmentsResponse; -import org.apache.druid.metadata.UpgradedToSegmentsResponse; import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; @@ -477,20 +475,20 @@ SegmentPublishResult commitMetadataOnly( List getPendingSegments(String datasource, Interval interval); /** - * Returns a mapping from the segments ids to their parent segments ids - * + * Map from a segment ID to the segment ID from which it was upgraded + * There should be no entry in the map for an original non-upgraded segment * @param dataSource data source * @param segmentIds ids of segments - * @return UpgradedFromSegmentsResponse + * @return map from child id to parent id */ - UpgradedFromSegmentsResponse retrieveUpgradedFromSegmentIds(String dataSource, Set segmentIds); + Map retrieveUpgradedFromSegmentIds(String dataSource, Set segmentIds); /** - * Returns a mapping from segment ids to their child segment ids - * - * @param dataSource data source - * @param upgradedFromSegmentIds ids of the first segments which had the corresponding load spec - * @return UpgradedToSegmentsResponse + * Map from a segment ID to a set containing + * all segment IDs that were upgraded from it AND are still present in the metadata store + * @param dataSource data source + * @param segmentIds ids of the first segments which had the corresponding load spec + * @return map from parent id to set of all its children */ - UpgradedToSegmentsResponse retrieveUpgradedToSegmentIds(String dataSource, Set upgradedFromSegmentIds); + Map> retrieveUpgradedToSegmentIds(String dataSource, Set segmentIds); } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 951bc65d3b11..39a134fa869f 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -2272,7 +2272,6 @@ private Set createNewIdsOfAppendSegmentsAfterReplace( .build(); // When the segment already has an upgraded_from_segment_id, reuse it for its children - // This ensures that the row has a single level of lineage final String upgradedFromSegmentId = oldSegmentMetadata.getUpgradedFromSegmentId() == null ? oldSegmentMetadata.getDataSegment().getId().toString() : oldSegmentMetadata.getUpgradedFromSegmentId(); @@ -2988,14 +2987,13 @@ public int deleteUpgradeSegmentsForTask(final String taskId) } @Override - public UpgradedFromSegmentsResponse retrieveUpgradedFromSegmentIds( + public Map retrieveUpgradedFromSegmentIds( final String dataSource, final Set segmentIds ) { - final Map upgradedFromSegmentIds = new HashMap<>(); if (segmentIds.isEmpty()) { - return new UpgradedFromSegmentsResponse(upgradedFromSegmentIds); + return Collections.emptyMap(); } final List segmentIdList = ImmutableList.copyOf(segmentIds); @@ -3004,6 +3002,7 @@ public UpgradedFromSegmentsResponse retrieveUpgradedFromSegmentIds( dbTables.getSegmentsTable(), SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("id", segmentIdList) ); + final Map upgradedFromSegmentIds = new HashMap<>(); connector.retryWithHandle( handle -> { Query> query = handle.createQuery(sql) @@ -3019,21 +3018,20 @@ public UpgradedFromSegmentsResponse retrieveUpgradedFromSegmentIds( }).list(); } ); - return new UpgradedFromSegmentsResponse(upgradedFromSegmentIds); + return upgradedFromSegmentIds; } @Override - public UpgradedToSegmentsResponse retrieveUpgradedToSegmentIds( + public Map> retrieveUpgradedToSegmentIds( final String dataSource, - final Set upgradedFromSegmentIds + final Set segmentIds ) { - final Map> upgradedToSegmentIds = new HashMap<>(); - if (upgradedFromSegmentIds.isEmpty()) { - return new UpgradedToSegmentsResponse(upgradedToSegmentIds); + if (segmentIds.isEmpty()) { + return Collections.emptyMap(); } - final List upgradedFromSegmentIdList = ImmutableList.copyOf(upgradedFromSegmentIds); + final List upgradedFromSegmentIdList = ImmutableList.copyOf(segmentIds); final String sql = StringUtils.format( "SELECT id, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s", dbTables.getSegmentsTable(), @@ -3042,6 +3040,7 @@ public UpgradedToSegmentsResponse retrieveUpgradedToSegmentIds( upgradedFromSegmentIdList ) ); + final Map> upgradedToSegmentIds = new HashMap<>(); connector.retryWithHandle( handle -> { Query> query = handle.createQuery(sql) @@ -3060,7 +3059,7 @@ public UpgradedToSegmentsResponse retrieveUpgradedToSegmentIds( }).list(); } ); - return new UpgradedToSegmentsResponse(upgradedToSegmentIds); + return upgradedToSegmentIds; } private static class PendingSegmentsRecord diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java index 41dbac6753af..dc87b9fc2fd4 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataConnector.java @@ -587,10 +587,6 @@ protected void alterSegmentTable() Map columnNameTypes = new HashMap<>(); columnNameTypes.put("used_status_last_updated", "VARCHAR(255)"); - // upgraded_from_segment_id is the first segment to which the same load spec originally belonged - // Load specs can be shared as a result of segment version upgrade - // This column is null for segments that were not upgraded from any other - // or were upgraded before this column was added columnNameTypes.put("upgraded_from_segment_id", "VARCHAR(255)"); if (centralizedDatasourceSchemaConfig.isEnabled()) { diff --git a/server/src/main/java/org/apache/druid/metadata/UpgradedFromSegmentsResponse.java b/server/src/main/java/org/apache/druid/metadata/UpgradedFromSegmentsResponse.java index b40a12609a41..9830561c215a 100644 --- a/server/src/main/java/org/apache/druid/metadata/UpgradedFromSegmentsResponse.java +++ b/server/src/main/java/org/apache/druid/metadata/UpgradedFromSegmentsResponse.java @@ -29,8 +29,6 @@ */ public class UpgradedFromSegmentsResponse { - // Map from a segment ID to the segment ID from which it was upgraded - // There should be no entry in the map for an original non-upgraded segment private final Map upgradedFromSegmentIds; @JsonCreator @@ -42,7 +40,7 @@ public UpgradedFromSegmentsResponse( } @JsonProperty - public Map getUpgradedFromSegmentId() + public Map getUpgradedFromSegmentIds() { return upgradedFromSegmentIds; } diff --git a/server/src/main/java/org/apache/druid/metadata/UpgradedToSegmentsResponse.java b/server/src/main/java/org/apache/druid/metadata/UpgradedToSegmentsResponse.java index 80e42f040da2..410386522709 100644 --- a/server/src/main/java/org/apache/druid/metadata/UpgradedToSegmentsResponse.java +++ b/server/src/main/java/org/apache/druid/metadata/UpgradedToSegmentsResponse.java @@ -31,8 +31,6 @@ public class UpgradedToSegmentsResponse { - // Map from a segment ID to a set containing - // all segment IDs that were upgraded from it AND are still present in the metadata store private final Map> upgradedToSegmentIds; @JsonCreator diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 89ebd3cc885d..e5f064ac50a7 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -142,7 +142,7 @@ public void testCommitAppendSegments() final String lockVersion = "2024-01-01"; final String taskAllocatorId = "appendTask"; - final String replaceTaskId = "replaceTask"; + final String replaceTaskId = "replaceTask1"; final ReplaceTaskLock replaceLock = new ReplaceTaskLock( replaceTaskId, Intervals.of("2023-01-01/2023-01-03"), @@ -274,7 +274,7 @@ public void testCommitAppendSegments() Map upgradedFromSegmentIdMap = coordinator.retrieveUpgradedFromSegmentIds( DS.WIKI, allCommittedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toSet()) - ).getUpgradedFromSegmentId(); + ); // Verify the segments present in the metadata store Assert.assertTrue(allCommittedSegments.containsAll(appendSegments)); for (DataSegment segment : appendSegments) { @@ -404,7 +404,7 @@ public void testCommitReplaceSegments() final Map upgradedFromSegmentIdMap = coordinator.retrieveUpgradedFromSegmentIds( "foo", usedSegments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toSet()) - ).getUpgradedFromSegmentId(); + ); Assert.assertTrue(usedSegments.containsAll(segmentsAppendedWithReplaceLock)); for (DataSegment appendSegment : segmentsAppendedWithReplaceLock) { @@ -3448,7 +3448,7 @@ public void testRetrieveUpgradedFromSegmentIds() segmentIds.add(defaultSegment4.getId().toString()); Assert.assertEquals( expected, - coordinator.retrieveUpgradedFromSegmentIds(datasource, segmentIds).getUpgradedFromSegmentId() + coordinator.retrieveUpgradedFromSegmentIds(datasource, segmentIds) ); } @@ -3476,7 +3476,7 @@ public void testRetrieveUpgradedToSegmentIds() upgradedIds.add(defaultSegment4.getId().toString()); Assert.assertEquals( expected, - coordinator.retrieveUpgradedToSegmentIds(datasource, upgradedIds).getUpgradedToSegmentIds() + coordinator.retrieveUpgradedToSegmentIds(datasource, upgradedIds) ); } } From 1224ea7917b7a42e1c685a1bec3316b4ca444caf Mon Sep 17 00:00:00 2001 From: Amatya Date: Thu, 11 Jul 2024 14:03:36 +0530 Subject: [PATCH 12/16] Move classes --- .../common/actions/RetrieveUpgradedFromSegmentIdsAction.java | 1 - .../common/actions/RetrieveUpgradedToSegmentIdsAction.java | 1 - .../indexing/common/actions}/UpgradedFromSegmentsResponse.java | 2 +- .../indexing/common/actions}/UpgradedToSegmentsResponse.java | 2 +- 4 files changed, 2 insertions(+), 4 deletions(-) rename {server/src/main/java/org/apache/druid/metadata => indexing-service/src/main/java/org/apache/druid/indexing/common/actions}/UpgradedFromSegmentsResponse.java (96%) rename {server/src/main/java/org/apache/druid/metadata => indexing-service/src/main/java/org/apache/druid/indexing/common/actions}/UpgradedToSegmentsResponse.java (96%) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java index e16db7e4e58d..ea5e6b605a23 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java @@ -23,7 +23,6 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.metadata.UpgradedFromSegmentsResponse; import java.util.Objects; import java.util.Set; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java index 2dba05ab7bc3..ffb7029d6b77 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java @@ -23,7 +23,6 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.type.TypeReference; import org.apache.druid.indexing.common.task.Task; -import org.apache.druid.metadata.UpgradedToSegmentsResponse; import java.util.Objects; import java.util.Set; diff --git a/server/src/main/java/org/apache/druid/metadata/UpgradedFromSegmentsResponse.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedFromSegmentsResponse.java similarity index 96% rename from server/src/main/java/org/apache/druid/metadata/UpgradedFromSegmentsResponse.java rename to indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedFromSegmentsResponse.java index 9830561c215a..18f7625d809a 100644 --- a/server/src/main/java/org/apache/druid/metadata/UpgradedFromSegmentsResponse.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedFromSegmentsResponse.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.metadata; +package org.apache.druid.indexing.common.actions; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; diff --git a/server/src/main/java/org/apache/druid/metadata/UpgradedToSegmentsResponse.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedToSegmentsResponse.java similarity index 96% rename from server/src/main/java/org/apache/druid/metadata/UpgradedToSegmentsResponse.java rename to indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedToSegmentsResponse.java index 410386522709..6eb7ced2eb87 100644 --- a/server/src/main/java/org/apache/druid/metadata/UpgradedToSegmentsResponse.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedToSegmentsResponse.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.metadata; +package org.apache.druid.indexing.common.actions; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; From 868959554d7af91d25c5714d7a95f14f77ae2bfb Mon Sep 17 00:00:00 2001 From: Amatya Date: Thu, 11 Jul 2024 20:23:31 +0530 Subject: [PATCH 13/16] suggested logic with failing tests --- .../common/task/KillUnusedSegmentsTask.java | 51 +++++++++++++++++-- .../IndexerSQLMetadataStorageCoordinator.java | 5 ++ ...exerSQLMetadataStorageCoordinatorTest.java | 4 +- 3 files changed, 52 insertions(+), 8 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 12bfbadc8b63..e0dbca3b7440 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -43,6 +43,7 @@ import org.apache.druid.indexing.common.actions.TaskActionClient; import org.apache.druid.indexing.common.actions.TaskLocks; import org.apache.druid.indexing.common.actions.TimeChunkLockTryAcquireAction; +import org.apache.druid.indexing.common.actions.UpgradedToSegmentsResponse; import org.apache.druid.indexing.overlord.Segments; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; @@ -285,7 +286,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // Determine unreferenced segments final List unreferencedSegments - = findUnreferencedSegments(unusedSegments, upgradedFromSegmentIds, taskActionClient); + = findUnreferencedSegmentsV2(unusedSegments, upgradedFromSegmentIds, taskActionClient); // Kill segments from the deep storage only if their load specs are not being used by any other segments // We still need to check with used load specs as segment upgrades were introduced before this change @@ -403,24 +404,24 @@ private List findUnreferencedSegments( // If the segment still exists for some reason, do not kill it if (existingSegmentIds.contains(id)) { - break; + continue; } // If the segment is the parent of existing segments, do not kill it if (upgradedToSegmentIds.containsKey(id)) { - break; + continue; } // If the segment has no parent, it can be killed from deep storage if (!upgradedFromSegmentIds.containsKey(id)) { unreferencedSegments.add(segment); - break; + continue; } // If the parent segment exists, do not kill it final String parentId = upgradedFromSegmentIds.get(id); if (existingSegmentIds.contains(parentId)) { - break; + continue; } // If the parent segment has no existing children, it can be killed @@ -431,6 +432,46 @@ private List findUnreferencedSegments( return unreferencedSegments; } + private List findUnreferencedSegmentsV2( + List unusedSegments, + Map upgradedFromSegmentIds, + TaskActionClient taskActionClient + ) + { + + // Determine parentId for each unused segment + final Map parentIdToUnusedSegment = new HashMap<>(); + for (DataSegment segment : unusedSegments) { + final String segmentId = segment.getId().toString(); + parentIdToUnusedSegment.put(upgradedFromSegmentIds.getOrDefault(segmentId, segmentId), segment); + } + + // Check if the parent or any of its children exist in metadata store + try { + UpgradedToSegmentsResponse response = taskActionClient.submit( + new RetrieveUpgradedToSegmentIdsAction(getDataSource(), parentIdToUnusedSegment.keySet()) + ); + if (response != null && response.getUpgradedToSegmentIds() != null) { + response.getUpgradedToSegmentIds().forEach((parent, children) -> { + if (!CollectionUtils.isNullOrEmpty(children)) { + // Do not kill segment if its parent or any of its siblings still exist in metadata store + parentIdToUnusedSegment.remove(parent); + } + }); + } + } + catch (Exception e) { + LOG.warn( + e, + "Could not retrieve UpgradedToSegmentsResponse using task action[retrieveUpgradedToSegmentIds]." + + " Overlord may be on an older version." + ); + + } + return new ArrayList<>(parentIdToUnusedSegment.values()); + } + + @Override public LookupLoadingSpec getLookupLoadingSpec() { diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 39a134fa869f..dc23494c3617 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -3041,6 +3041,11 @@ public Map> retrieveUpgradedToSegmentIds( ) ); final Map> upgradedToSegmentIds = new HashMap<>(); + retrieveSegmentsById(dataSource, segmentIds) + .stream() + .map(DataSegment::getId) + .map(SegmentId::toString) + .forEach(id -> upgradedToSegmentIds.computeIfAbsent(id, k -> new HashSet<>()).add(id)); connector.retryWithHandle( handle -> { Query> query = handle.createQuery(sql) diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index e5f064ac50a7..26cac2682881 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -3466,14 +3466,12 @@ public void testRetrieveUpgradedToSegmentIds() Map> expected = new HashMap<>(); expected.put(defaultSegment.getId().toString(), new HashSet<>()); + expected.get(defaultSegment.getId().toString()).add(defaultSegment.getId().toString()); expected.get(defaultSegment.getId().toString()).add(defaultSegment2.getId().toString()); expected.get(defaultSegment.getId().toString()).add(defaultSegment3.getId().toString()); Set upgradedIds = new HashSet<>(); upgradedIds.add(defaultSegment.getId().toString()); - upgradedIds.add(defaultSegment2.getId().toString()); - upgradedIds.add(defaultSegment3.getId().toString()); - upgradedIds.add(defaultSegment4.getId().toString()); Assert.assertEquals( expected, coordinator.retrieveUpgradedToSegmentIds(datasource, upgradedIds) From f54dd1a610bf1bbd4a3921314480c32b4d48a115 Mon Sep 17 00:00:00 2001 From: Amatya Date: Mon, 15 Jul 2024 09:21:29 +0530 Subject: [PATCH 14/16] Address review comments --- .../RetrieveUpgradedToSegmentIdsAction.java | 7 +- .../actions/UpgradedFromSegmentsResponse.java | 3 - .../actions/UpgradedToSegmentsResponse.java | 3 - .../common/task/KillUnusedSegmentsTask.java | 123 ++++-------------- .../task/KillUnusedSegmentsTaskTest.java | 62 +++++---- .../IndexerMetadataStorageCoordinator.java | 5 +- .../IndexerSQLMetadataStorageCoordinator.java | 42 ------ ...exerSQLMetadataStorageCoordinatorTest.java | 26 ++-- ...SqlMetadataStorageCoordinatorTestBase.java | 47 +++++++ 9 files changed, 132 insertions(+), 186 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java index ffb7029d6b77..f74d32f57747 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java @@ -28,7 +28,12 @@ import java.util.Set; /** - * Task action to retrieve all the segment IDs to which a given set of segments were upgraded. + * Task action to determine the set of all segments containing the same load spec given the parent id.
    + * Returns a map from a segment ID to a set containing: + *
      + *
    1. all segment IDs that were upgraded from it AND are still present in the metadata store
    2. + *
    3. the segment ID itself if and only if it is still present in the metadata store
    4. + *
    */ public class RetrieveUpgradedToSegmentIdsAction implements TaskAction { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedFromSegmentsResponse.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedFromSegmentsResponse.java index 18f7625d809a..5f0f1775f16a 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedFromSegmentsResponse.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedFromSegmentsResponse.java @@ -24,9 +24,6 @@ import java.util.Map; -/** - * Response for the RetrieveUpgradedFromSegmentIds task action - */ public class UpgradedFromSegmentsResponse { private final Map upgradedFromSegmentIds; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedToSegmentsResponse.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedToSegmentsResponse.java index 6eb7ced2eb87..e9bf33a97cee 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedToSegmentsResponse.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/UpgradedToSegmentsResponse.java @@ -25,9 +25,6 @@ import java.util.Map; import java.util.Set; -/** - * Response for the RetrieveUpgradedToSegmentIds task action - */ public class UpgradedToSegmentsResponse { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index e0dbca3b7440..1de5002fac0b 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -34,7 +34,6 @@ import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; import org.apache.druid.indexing.common.TaskToolbox; -import org.apache.druid.indexing.common.actions.RetrieveSegmentsByIdAction; import org.apache.druid.indexing.common.actions.RetrieveUnusedSegmentsAction; import org.apache.druid.indexing.common.actions.RetrieveUpgradedFromSegmentIdsAction; import org.apache.druid.indexing.common.actions.RetrieveUpgradedToSegmentIdsAction; @@ -70,13 +69,13 @@ import java.util.stream.Collectors; /** + *

    * The client representation of this task is {@link ClientKillUnusedSegmentsTaskQuery}. * JSON serialization fields of this class must correspond to those of {@link * ClientKillUnusedSegmentsTaskQuery}, except for {@link #id} and {@link #context} fields. - *
    - *
    + *

    * The Kill task fetches the set of used segments for the interval and computes the set of their load specs.
    - * Until `limit` segments have been processed in total or all segments for the interval have been nuked:
    + * Until `limit` segments have been processed in total or all segments for the interval have been nuked: *

      *
    1. Fetch at most `batchSize` unused segments from the metadata store.
    2. *
    3. Determine the mapping from these segments to their parents *before* nuking the segments.
    4. @@ -276,7 +275,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception catch (Exception e) { LOG.warn( e, - "Could not retrieve UpgradedFromSegmentsResponse using task action[retrieveUpgradedFromSegmentIds]." + "Could not retrieve parent segment ids using task action[retrieveUpgradedFromSegmentIds]." + " Overlord may be on an older version." ); } @@ -284,17 +283,9 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception // Nuke Segments taskActionClient.submit(new SegmentNukeAction(new HashSet<>(unusedSegments))); - // Determine unreferenced segments - final List unreferencedSegments - = findUnreferencedSegmentsV2(unusedSegments, upgradedFromSegmentIds, taskActionClient); - - // Kill segments from the deep storage only if their load specs are not being used by any other segments - // We still need to check with used load specs as segment upgrades were introduced before this change - final List segmentsToBeKilled = unreferencedSegments - .stream() - .filter(unusedSegment -> unusedSegment.getLoadSpec() == null - || !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec())) - .collect(Collectors.toList()); + // Determine segments to be killed + final List segmentsToBeKilled + = getKillableSegments(unusedSegments, upgradedFromSegmentIds, usedSegmentLoadSpecs, taskActionClient); final Set segmentsNotKilled = new HashSet<>(unusedSegments); segmentsToBeKilled.forEach(segmentsNotKilled::remove); @@ -357,105 +348,34 @@ private NavigableMap> getNonRevokedTaskLockMap(TaskActi return taskLockMap; } - private List findUnreferencedSegments( - List unusedSegments, - Map upgradedFromSegmentIds, - TaskActionClient taskActionClient - ) throws IOException - { - if (unusedSegments.isEmpty() || upgradedFromSegmentIds.isEmpty()) { - return unusedSegments; - } - - final Set upgradedSegmentIds = new HashSet<>(); - for (DataSegment segment : unusedSegments) { - final String id = segment.getId().toString(); - upgradedSegmentIds.add(id); - if (upgradedFromSegmentIds.containsKey(id)) { - upgradedSegmentIds.add(upgradedFromSegmentIds.get(id)); - } - } - - final Map> upgradedToSegmentIds; - try { - upgradedToSegmentIds = taskActionClient.submit( - new RetrieveUpgradedToSegmentIdsAction(getDataSource(), upgradedSegmentIds) - ).getUpgradedToSegmentIds(); - } - catch (Exception e) { - LOG.warn( - e, - "Could not retrieve UpgradedToSegmentsResponse using task action[retrieveUpgradedToSegmentIds]." - + " Overlord may be on an older version." - ); - return unusedSegments; - } - - final Set existingSegmentIds - = taskActionClient.submit(new RetrieveSegmentsByIdAction(getDataSource(), upgradedSegmentIds)) - .stream() - .map(DataSegment::getId) - .map(SegmentId::toString) - .collect(Collectors.toSet()); - - List unreferencedSegments = new ArrayList<>(); - for (DataSegment segment : unusedSegments) { - final String id = segment.getId().toString(); - - // If the segment still exists for some reason, do not kill it - if (existingSegmentIds.contains(id)) { - continue; - } - - // If the segment is the parent of existing segments, do not kill it - if (upgradedToSegmentIds.containsKey(id)) { - continue; - } - - // If the segment has no parent, it can be killed from deep storage - if (!upgradedFromSegmentIds.containsKey(id)) { - unreferencedSegments.add(segment); - continue; - } - - // If the parent segment exists, do not kill it - final String parentId = upgradedFromSegmentIds.get(id); - if (existingSegmentIds.contains(parentId)) { - continue; - } - - // If the parent segment has no existing children, it can be killed - if (CollectionUtils.isNullOrEmpty(upgradedToSegmentIds.get(parentId))) { - unreferencedSegments.add(segment); - } - } - return unreferencedSegments; - } - - private List findUnreferencedSegmentsV2( + private List getKillableSegments( List unusedSegments, Map upgradedFromSegmentIds, + Set> usedSegmentLoadSpecs, TaskActionClient taskActionClient ) { // Determine parentId for each unused segment - final Map parentIdToUnusedSegment = new HashMap<>(); + final Map> parentIdToUnusedSegments = new HashMap<>(); for (DataSegment segment : unusedSegments) { final String segmentId = segment.getId().toString(); - parentIdToUnusedSegment.put(upgradedFromSegmentIds.getOrDefault(segmentId, segmentId), segment); + parentIdToUnusedSegments.computeIfAbsent( + upgradedFromSegmentIds.getOrDefault(segmentId, segmentId), + k -> new HashSet<>() + ).add(segment); } // Check if the parent or any of its children exist in metadata store try { UpgradedToSegmentsResponse response = taskActionClient.submit( - new RetrieveUpgradedToSegmentIdsAction(getDataSource(), parentIdToUnusedSegment.keySet()) + new RetrieveUpgradedToSegmentIdsAction(getDataSource(), parentIdToUnusedSegments.keySet()) ); if (response != null && response.getUpgradedToSegmentIds() != null) { response.getUpgradedToSegmentIds().forEach((parent, children) -> { if (!CollectionUtils.isNullOrEmpty(children)) { // Do not kill segment if its parent or any of its siblings still exist in metadata store - parentIdToUnusedSegment.remove(parent); + parentIdToUnusedSegments.remove(parent); } }); } @@ -463,12 +383,17 @@ private List findUnreferencedSegmentsV2( catch (Exception e) { LOG.warn( e, - "Could not retrieve UpgradedToSegmentsResponse using task action[retrieveUpgradedToSegmentIds]." + "Could not retrieve referenced ids using task action[retrieveUpgradedToSegmentIds]." + " Overlord may be on an older version." ); - } - return new ArrayList<>(parentIdToUnusedSegment.values()); + + // Filter using the used segment load specs as segment upgrades predate the above task action + return parentIdToUnusedSegments.values() + .stream() + .flatMap(Set::stream) + .filter(segment -> !usedSegmentLoadSpecs.contains(segment.getLoadSpec())) + .collect(Collectors.toList()); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java index 0650c8014e13..fe2b5a51c86a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java @@ -36,9 +36,9 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.JodaUtils; +import org.apache.druid.metadata.IndexerSqlMetadataStorageCoordinatorTestBase; import org.apache.druid.server.lookup.cache.LookupLoadingSpec; import org.apache.druid.timeline.DataSegment; -import org.apache.druid.timeline.SegmentId; import org.assertj.core.api.Assertions; import org.easymock.Capture; import org.easymock.EasyMock; @@ -73,10 +73,10 @@ public void setup() taskRunner = new TestTaskRunner(); final String version = DateTimes.nowUtc().toString(); - segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version); - segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version); - segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version); - segment4 = newSegment(Intervals.of("2019-04-01/2019-05-01"), version); + segment1 = newSegment(Intervals.of("2019-01-01/2019-02-01"), version).withLoadSpec(ImmutableMap.of("k", 1)); + segment2 = newSegment(Intervals.of("2019-02-01/2019-03-01"), version).withLoadSpec(ImmutableMap.of("k", 2)); + segment3 = newSegment(Intervals.of("2019-03-01/2019-04-01"), version).withLoadSpec(ImmutableMap.of("k", 3)); + segment4 = newSegment(Intervals.of("2019-04-01/2019-05-01"), version).withLoadSpec(ImmutableMap.of("k", 4)); } @Test @@ -132,13 +132,13 @@ public void testKill() throws Exception @Test public void testKillSegmentsDeleteUnreferencedSiblings() throws Exception { - final Map upgradeSegmentMapping = ImmutableMap.of( - segment1.getId(), + final Map upgradeSegmentMapping = ImmutableMap.of( + segment1.getId().toString(), "nonExistentParent", - segment2.getId(), + segment2.getId().toString(), "nonExistentParent" ); - getStorageCoordinator().insertUsedSegments(ImmutableSet.of(segment1, segment2), upgradeSegmentMapping); + insertUsedSegments(ImmutableSet.of(segment1, segment2), upgradeSegmentMapping); getStorageCoordinator().markSegmentsAsUnusedWithinInterval(DATA_SOURCE, Intervals.ETERNITY); @@ -170,13 +170,13 @@ public void testKillSegmentsDeleteUnreferencedSiblings() throws Exception @Test public void testKillSegmentsDoNotDeleteReferencedSibling() throws Exception { - final Map upgradeSegmentMapping = ImmutableMap.of( - segment1.getId(), + final Map upgradeSegmentMapping = ImmutableMap.of( + segment1.getId().toString(), "nonExistentParent", - segment2.getId(), + segment2.getId().toString(), "nonExistentParent" ); - getStorageCoordinator().insertUsedSegments(ImmutableSet.of(segment1, segment2), upgradeSegmentMapping); + insertUsedSegments(ImmutableSet.of(segment1, segment2), upgradeSegmentMapping); getStorageCoordinator().markSegmentsAsUnusedWithinInterval(DATA_SOURCE, Intervals.ETERNITY); @@ -208,13 +208,13 @@ public void testKillSegmentsDoNotDeleteReferencedSibling() throws Exception @Test public void testKillSegmentsDoNotDeleteParentWithReferencedChildren() throws Exception { - final Map upgradeSegmentMapping = ImmutableMap.of( - segment1.getId(), + final Map upgradeSegmentMapping = ImmutableMap.of( + segment1.getId().toString(), segment3.getId().toString(), - segment2.getId(), + segment2.getId().toString(), segment3.getId().toString() ); - getStorageCoordinator().insertUsedSegments(ImmutableSet.of(segment1, segment2, segment3), upgradeSegmentMapping); + insertUsedSegments(ImmutableSet.of(segment1, segment2, segment3), upgradeSegmentMapping); getSegmentsMetadataManager().markSegmentAsUnused(segment2.getId()); getSegmentsMetadataManager().markSegmentAsUnused(segment3.getId()); @@ -253,13 +253,13 @@ public void testKillSegmentsDoNotDeleteParentWithReferencedChildren() throws Exc @Test public void testKillSegmentsDoNotDeleteChildrenWithReferencedParent() throws Exception { - final Map upgradeSegmentMapping = ImmutableMap.of( - segment1.getId(), + final Map upgradeSegmentMapping = ImmutableMap.of( + segment1.getId().toString(), segment3.getId().toString(), - segment2.getId(), + segment2.getId().toString(), segment3.getId().toString() ); - getStorageCoordinator().insertUsedSegments(ImmutableSet.of(segment1, segment2, segment3), upgradeSegmentMapping); + insertUsedSegments(ImmutableSet.of(segment1, segment2, segment3), upgradeSegmentMapping); getSegmentsMetadataManager().markSegmentAsUnused(segment1.getId()); getSegmentsMetadataManager().markSegmentAsUnused(segment2.getId()); @@ -298,13 +298,13 @@ public void testKillSegmentsDoNotDeleteChildrenWithReferencedParent() throws Exc @Test public void testKillSegmentsDeleteChildrenAndParent() throws Exception { - final Map upgradeSegmentMapping = ImmutableMap.of( - segment1.getId(), + final Map upgradeSegmentMapping = ImmutableMap.of( + segment1.getId().toString(), segment3.getId().toString(), - segment2.getId(), + segment2.getId().toString(), segment3.getId().toString() ); - getStorageCoordinator().insertUsedSegments(ImmutableSet.of(segment1, segment2, segment3), upgradeSegmentMapping); + insertUsedSegments(ImmutableSet.of(segment1, segment2, segment3), upgradeSegmentMapping); getSegmentsMetadataManager().markSegmentAsUnused(segment1.getId()); getSegmentsMetadataManager().markSegmentAsUnused(segment2.getId()); getSegmentsMetadataManager().markSegmentAsUnused(segment3.getId()); @@ -1454,4 +1454,16 @@ private static DataSegment newSegment(Interval interval, String version, Map segments, Map upgradedFromSegmentIdMap) + { + final String table = derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(); + IndexerSqlMetadataStorageCoordinatorTestBase.insertUsedSegments( + segments, + upgradedFromSegmentIdMap, + derbyConnectorRule.getConnector(), + table, + getObjectMapper() + ); + } } diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 29ff97e21a69..83b4ac7e474c 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -479,16 +479,15 @@ SegmentPublishResult commitMetadataOnly( * There should be no entry in the map for an original non-upgraded segment * @param dataSource data source * @param segmentIds ids of segments - * @return map from child id to parent id */ Map retrieveUpgradedFromSegmentIds(String dataSource, Set segmentIds); /** * Map from a segment ID to a set containing - * all segment IDs that were upgraded from it AND are still present in the metadata store + * 1) all segment IDs that were upgraded from it AND are still present in the metadata store + * 2) the segment ID itself if and only if it is still present in the metadata store * @param dataSource data source * @param segmentIds ids of the first segments which had the corresponding load spec - * @return map from parent id to set of all its children */ Map> retrieveUpgradedToSegmentIds(String dataSource, Set segmentIds); } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index dc23494c3617..54f75ccb9201 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -1551,48 +1551,6 @@ int insertPendingSegmentsIntoMetastore( return Arrays.stream(updated).sum(); } - @VisibleForTesting - public void insertUsedSegments(Set dataSegments, Map upgradedFromSegmentIdMap) - { - final String table = dbTables.getSegmentsTable(); - connector.retryWithHandle( - handle -> { - PreparedBatch preparedBatch = handle.prepareBatch( - StringUtils.format( - "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, partitioned, version," - + " used, payload, used_status_last_updated, upgraded_from_segment_id) " - + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version," - + " :used, :payload, :used_status_last_updated, :upgraded_from_segment_id)", - table, - connector.getQuoteString() - ) - ); - for (DataSegment segment : dataSegments) { - String id = segment.getId().toString(); - preparedBatch.add() - .bind("id", id) - .bind("dataSource", segment.getDataSource()) - .bind("created_date", DateTimes.nowUtc().toString()) - .bind("start", segment.getInterval().getStart().toString()) - .bind("end", segment.getInterval().getEnd().toString()) - .bind("partitioned", !(segment.getShardSpec() instanceof NoneShardSpec)) - .bind("version", segment.getVersion()) - .bind("used", true) - .bind("payload", jsonMapper.writeValueAsBytes(segment)) - .bind("used_status_last_updated", DateTimes.nowUtc().toString()) - .bind("upgraded_from_segment_id", upgradedFromSegmentIdMap.get(segment.getId())); - } - - final int[] affectedRows = preparedBatch.execute(); - final boolean succeeded = Arrays.stream(affectedRows).allMatch(eachAffectedRows -> eachAffectedRows == 1); - if (!succeeded) { - throw new ISE("Failed to publish segments to DB"); - } - return true; - } - ); - } - private void insertPendingSegmentIntoMetastore( Handle handle, SegmentIdWithShardSpec newIdentifier, diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 26cac2682881..f352d5e2609d 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -3429,13 +3429,13 @@ public void testRetrieveUnusedSegmentsForExactIntervalAndVersion() public void testRetrieveUpgradedFromSegmentIds() { final String datasource = defaultSegment.getDataSource(); - final Map upgradedFromSegmentIdMap = new HashMap<>(); - upgradedFromSegmentIdMap.put(defaultSegment2.getId(), defaultSegment.getId().toString()); - coordinator.insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); + final Map upgradedFromSegmentIdMap = new HashMap<>(); + upgradedFromSegmentIdMap.put(defaultSegment2.getId().toString(), defaultSegment.getId().toString()); + insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); coordinator.markSegmentsAsUnusedWithinInterval(datasource, Intervals.ETERNITY); upgradedFromSegmentIdMap.clear(); - upgradedFromSegmentIdMap.put(defaultSegment3.getId(), defaultSegment.getId().toString()); - coordinator.insertUsedSegments(ImmutableSet.of(defaultSegment3, defaultSegment4), upgradedFromSegmentIdMap); + upgradedFromSegmentIdMap.put(defaultSegment3.getId().toString(), defaultSegment.getId().toString()); + insertUsedSegments(ImmutableSet.of(defaultSegment3, defaultSegment4), upgradedFromSegmentIdMap); Map expected = new HashMap<>(); expected.put(defaultSegment2.getId().toString(), defaultSegment.getId().toString()); @@ -3456,13 +3456,13 @@ public void testRetrieveUpgradedFromSegmentIds() public void testRetrieveUpgradedToSegmentIds() { final String datasource = defaultSegment.getDataSource(); - final Map upgradedFromSegmentIdMap = new HashMap<>(); - upgradedFromSegmentIdMap.put(defaultSegment2.getId(), defaultSegment.getId().toString()); - coordinator.insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); + final Map upgradedFromSegmentIdMap = new HashMap<>(); + upgradedFromSegmentIdMap.put(defaultSegment2.getId().toString(), defaultSegment.getId().toString()); + insertUsedSegments(ImmutableSet.of(defaultSegment, defaultSegment2), upgradedFromSegmentIdMap); coordinator.markSegmentsAsUnusedWithinInterval(datasource, Intervals.ETERNITY); upgradedFromSegmentIdMap.clear(); - upgradedFromSegmentIdMap.put(defaultSegment3.getId(), defaultSegment.getId().toString()); - coordinator.insertUsedSegments(ImmutableSet.of(defaultSegment3, defaultSegment4), upgradedFromSegmentIdMap); + upgradedFromSegmentIdMap.put(defaultSegment3.getId().toString(), defaultSegment.getId().toString()); + insertUsedSegments(ImmutableSet.of(defaultSegment3, defaultSegment4), upgradedFromSegmentIdMap); Map> expected = new HashMap<>(); expected.put(defaultSegment.getId().toString(), new HashSet<>()); @@ -3477,4 +3477,10 @@ public void testRetrieveUpgradedToSegmentIds() coordinator.retrieveUpgradedToSegmentIds(datasource, upgradedIds) ); } + + private void insertUsedSegments(Set segments, Map upgradedFromSegmentIdMap) + { + final String table = derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(); + insertUsedSegments(segments, upgradedFromSegmentIdMap, derbyConnector, table, mapper); + } } diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java index 26d1e8518d97..2076e5ffa461 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java @@ -323,6 +323,7 @@ protected DataSegment createSegment(Interval interval, String version, ShardSpec .version(version) .shardSpec(shardSpec) .size(100) + // hash to get a unique load spec as segmentId has not yet been generated .loadSpec(ImmutableMap.of("hash", Objects.hash(interval, version, shardSpec))) .build(); } @@ -561,4 +562,50 @@ protected void insertIntoUpgradeSegmentsTable(Map } ); } + + public static void insertUsedSegments( + Set dataSegments, + Map upgradedFromSegmentIdMap, + SQLMetadataConnector connector, + String table, + ObjectMapper jsonMapper + ) + { + connector.retryWithHandle( + handle -> { + PreparedBatch preparedBatch = handle.prepareBatch( + StringUtils.format( + "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, partitioned, version," + + " used, payload, used_status_last_updated, upgraded_from_segment_id) " + + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version," + + " :used, :payload, :used_status_last_updated, :upgraded_from_segment_id)", + table, + connector.getQuoteString() + ) + ); + for (DataSegment segment : dataSegments) { + String id = segment.getId().toString(); + preparedBatch.add() + .bind("id", id) + .bind("dataSource", segment.getDataSource()) + .bind("created_date", DateTimes.nowUtc().toString()) + .bind("start", segment.getInterval().getStart().toString()) + .bind("end", segment.getInterval().getEnd().toString()) + .bind("partitioned", !(segment.getShardSpec() instanceof NoneShardSpec)) + .bind("version", segment.getVersion()) + .bind("used", true) + .bind("payload", jsonMapper.writeValueAsBytes(segment)) + .bind("used_status_last_updated", DateTimes.nowUtc().toString()) + .bind("upgraded_from_segment_id", upgradedFromSegmentIdMap.get(segment.getId().toString())); + } + + final int[] affectedRows = preparedBatch.execute(); + final boolean succeeded = Arrays.stream(affectedRows).allMatch(eachAffectedRows -> eachAffectedRows == 1); + if (!succeeded) { + throw new ISE("Failed to publish segments to DB"); + } + return true; + } + ); + } } From ed553ab994c8c7b88e5ec261ca8c6e41283cd69f Mon Sep 17 00:00:00 2001 From: Amatya Date: Mon, 15 Jul 2024 10:59:50 +0530 Subject: [PATCH 15/16] Remove unneeded methods --- .../RetrieveUpgradedFromSegmentIdsAction.java | 20 ------------------- .../RetrieveUpgradedToSegmentIdsAction.java | 20 ------------------- 2 files changed, 40 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java index ea5e6b605a23..67f7ae6e1317 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedFromSegmentIdsAction.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import org.apache.druid.indexing.common.task.Task; -import java.util.Objects; import java.util.Set; /** @@ -80,25 +79,6 @@ public boolean isAudited() return false; } - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - RetrieveUpgradedFromSegmentIdsAction that = (RetrieveUpgradedFromSegmentIdsAction) o; - return Objects.equals(dataSource, that.dataSource) && Objects.equals(segmentIds, that.segmentIds); - } - - @Override - public int hashCode() - { - return Objects.hash(dataSource, segmentIds); - } - @Override public String toString() { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java index f74d32f57747..412c9604d114 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RetrieveUpgradedToSegmentIdsAction.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import org.apache.druid.indexing.common.task.Task; -import java.util.Objects; import java.util.Set; /** @@ -85,25 +84,6 @@ public boolean isAudited() return false; } - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - RetrieveUpgradedToSegmentIdsAction that = (RetrieveUpgradedToSegmentIdsAction) o; - return Objects.equals(dataSource, that.dataSource) && Objects.equals(segmentIds, that.segmentIds); - } - - @Override - public int hashCode() - { - return Objects.hash(dataSource, segmentIds); - } - @Override public String toString() { From ebc8362363547db08d93e108c21ca23c248b0a54 Mon Sep 17 00:00:00 2001 From: Amatya Date: Mon, 15 Jul 2024 11:08:12 +0530 Subject: [PATCH 16/16] Add doc --- .../indexing/common/task/KillUnusedSegmentsTask.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java index 1de5002fac0b..e1f6d2915eea 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTask.java @@ -348,6 +348,15 @@ private NavigableMap> getNonRevokedTaskLockMap(TaskActi return taskLockMap; } + /** + * Determines subset of segments without referenced load specs that can be safely killed by + * looking at the segment upgrades and used segment load specs + * @param unusedSegments input segments + * @param upgradedFromSegmentIds segment to parent mapping + * @param usedSegmentLoadSpecs load specs of used segments + * @param taskActionClient task action client + * @return list of segments to kill from deep storage + */ private List getKillableSegments( List unusedSegments, Map upgradedFromSegmentIds,