-
Notifications
You must be signed in to change notification settings - Fork 15.1k
KAFKA-15605: Fix topic deletion handling during ZK migration #14545
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
894ac0a
139aa8a
f399c3b
3918116
42600ea
49418aa
b3061eb
f6f44e4
de6898c
5c545e6
07f59f8
3bf0a5a
85809fc
d47c33a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -92,6 +92,10 @@ class LogManager(logDirs: Seq[File], | |
| // Each element in the queue contains the log object to be deleted and the time it is scheduled for deletion. | ||
| private val logsToBeDeleted = new LinkedBlockingQueue[(UnifiedLog, Long)]() | ||
|
|
||
| // Map of stray partition to stray log. This holds all stray logs detected on the broker. | ||
| // Visible for testing | ||
| private val strayLogs = new Pool[TopicPartition, UnifiedLog]() | ||
|
|
||
| private val _liveLogDirs: ConcurrentLinkedQueue[File] = createAndValidateLogDirs(logDirs, initialOfflineDirs) | ||
| @volatile private var _currentDefaultConfig = initialDefaultConfig | ||
| @volatile private var numRecoveryThreadsPerDataDir = recoveryThreadsPerDataDir | ||
|
|
@@ -302,6 +306,10 @@ class LogManager(logDirs: Seq[File], | |
| this.logsToBeDeleted.add((log, time.milliseconds())) | ||
| } | ||
|
|
||
| def addStrayLog(strayPartition: TopicPartition, strayLog: UnifiedLog): Unit = { | ||
| this.strayLogs.put(strayPartition, strayLog) | ||
| } | ||
|
|
||
| // Only for testing | ||
| private[log] def hasLogsToBeDeleted: Boolean = !logsToBeDeleted.isEmpty | ||
|
|
||
|
|
@@ -337,6 +345,9 @@ class LogManager(logDirs: Seq[File], | |
|
|
||
| if (logDir.getName.endsWith(UnifiedLog.DeleteDirSuffix)) { | ||
| addLogToBeDeleted(log) | ||
| } else if (logDir.getName.endsWith(UnifiedLog.StrayDirSuffix)) { | ||
| addStrayLog(topicPartition, log) | ||
| warn(s"Loaded stray log: $logDir") | ||
| } else { | ||
| val previous = { | ||
| if (log.isFuture) | ||
|
|
@@ -1203,7 +1214,8 @@ class LogManager(logDirs: Seq[File], | |
| */ | ||
| def asyncDelete(topicPartition: TopicPartition, | ||
| isFuture: Boolean = false, | ||
| checkpoint: Boolean = true): Option[UnifiedLog] = { | ||
| checkpoint: Boolean = true, | ||
| isStray: Boolean = false): Option[UnifiedLog] = { | ||
| val removedLog: Option[UnifiedLog] = logCreationOrDeletionLock synchronized { | ||
| removeLogAndMetrics(if (isFuture) futureLogs else currentLogs, topicPartition) | ||
| } | ||
|
|
@@ -1216,15 +1228,21 @@ class LogManager(logDirs: Seq[File], | |
| cleaner.updateCheckpoints(removedLog.parentDirFile, partitionToRemove = Option(topicPartition)) | ||
| } | ||
| } | ||
| removedLog.renameDir(UnifiedLog.logDeleteDirName(topicPartition), false) | ||
| if (isStray) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like we are only renaming the stray log dir and removing it from LogManager. Do we also want to add the logic on delayed stray log deletion? If we want to delete the stray logs immediately (more risky, and it might create conflicts with AK merge), I think we need to add it to the log deletion queue (by calling Right now the log dir will be renamed to "-stray" but it will not be deleted by the broker.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For now, the desired behavior is to stop tracking the partition but not delete the files. Since migrations are are one-off and inherently risky, I didn't want to take any destructive actions like deleting the logs (immediately or delayed). The stray'd partitions are logged at the INFO level when they are detected, and at WARN on subsequent startups. This gives give operators the information needed to clean up stray partitions if desired. I filed https://issues.apache.org/jira/browse/KAFKA-15698 to track automatic clean up of the stray partitions.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it. Makes sense. |
||
| // Move aside stray partitions, don't delete them | ||
| removedLog.renameDir(UnifiedLog.logStrayDirName(topicPartition), false) | ||
| warn(s"Log for partition ${removedLog.topicPartition} is marked as stray and renamed to ${removedLog.dir.getAbsolutePath}") | ||
| } else { | ||
| removedLog.renameDir(UnifiedLog.logDeleteDirName(topicPartition), false) | ||
| addLogToBeDeleted(removedLog) | ||
| info(s"Log for partition ${removedLog.topicPartition} is renamed to ${removedLog.dir.getAbsolutePath} and is scheduled for deletion") | ||
| } | ||
| if (checkpoint) { | ||
| val logDir = removedLog.parentDirFile | ||
| val logsToCheckpoint = logsInDir(logDir) | ||
| checkpointRecoveryOffsetsInDir(logDir, logsToCheckpoint) | ||
| checkpointLogStartOffsetsInDir(logDir, logsToCheckpoint) | ||
| } | ||
| addLogToBeDeleted(removedLog) | ||
| info(s"Log for partition ${removedLog.topicPartition} is renamed to ${removedLog.dir.getAbsolutePath} and is scheduled for deletion") | ||
|
|
||
| case None => | ||
| if (offlineLogDirs.nonEmpty) { | ||
|
|
@@ -1244,18 +1262,19 @@ class LogManager(logDirs: Seq[File], | |
| * topic-partition is raised | ||
| */ | ||
| def asyncDelete(topicPartitions: Set[TopicPartition], | ||
| isStray: Boolean, | ||
| errorHandler: (TopicPartition, Throwable) => Unit): Unit = { | ||
| val logDirs = mutable.Set.empty[File] | ||
|
|
||
| topicPartitions.foreach { topicPartition => | ||
| try { | ||
| getLog(topicPartition).foreach { log => | ||
| logDirs += log.parentDirFile | ||
| asyncDelete(topicPartition, checkpoint = false) | ||
| asyncDelete(topicPartition, checkpoint = false, isStray = isStray) | ||
| } | ||
| getLog(topicPartition, isFuture = true).foreach { log => | ||
| logDirs += log.parentDirFile | ||
| asyncDelete(topicPartition, isFuture = true, checkpoint = false) | ||
| asyncDelete(topicPartition, isFuture = true, checkpoint = false, isStray = isStray) | ||
| } | ||
| } catch { | ||
| case e: Throwable => errorHandler(topicPartition, e) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.