apache · guozhangwang · Feb 21, 2020 · Feb 20, 2020 · Feb 20, 2020 · Feb 21, 2020
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java
@@ -94,7 +94,6 @@ public class StreamTask extends AbstractTask implements ProcessorNodePunctuator
     private long idleStartTime;
     private Producer<byte[], byte[]> producer;
     private boolean commitRequested = false;
-    private boolean transactionInFlight = false;
 
     private final String threadId;
 
@@ -294,7 +293,6 @@ public void initializeTopology() {
             } catch (final ProducerFencedException | UnknownProducerIdException e) {
                 throw new TaskMigratedException(this, e);
             }
-            transactionInFlight = true;
         }
 
         processorContext.initialize();
@@ -522,10 +520,8 @@ void commit(final boolean startNewTransaction, final Map<TopicPartition, Long> p
             if (eosEnabled) {
                 producer.sendOffsetsToTransaction(consumedOffsetsAndMetadata, applicationId);
                 producer.commitTransaction();
-                transactionInFlight = false;
                 if (startNewTransaction) {
                     producer.beginTransaction();
-                    transactionInFlight = true;
                 }
             } else {
                 consumer.commitSync(consumedOffsetsAndMetadata);
@@ -602,7 +598,7 @@ private void initTopology() {
      */
     public void suspend() {
         log.debug("Suspending");
-        suspend(true, false);
+        suspend(true);
     }
 
     /**
@@ -618,8 +614,7 @@ public void suspend() {
      *                               or if the task producer got fenced (EOS)
      */
     // visible for testing
-    void suspend(final boolean clean,
-                 final boolean isZombie) {
+    void suspend(final boolean clean) {
         // this is necessary because all partition times are reset to -1 during close
         // we need to preserve the original partitions times before calling commit
         final Map<TopicPartition, Long> partitionTimes = extractPartitionTimes();
@@ -640,14 +635,7 @@ void suspend(final boolean clean,
 
                 if (eosEnabled) {
                     stateMgr.checkpoint(activeTaskCheckpointableOffsets());
-
-                    try {
-                        recordCollector.close();
-                    } catch (final RecoverableClientException e) {
-                        taskMigratedException = new TaskMigratedException(this, e);
-                    } finally {
-                        producer = null;
-                    }
+                    taskMigratedException = closeRecordCollector();
                 }
             }
             if (taskMigratedException != null) {
@@ -662,37 +650,26 @@ void suspend(final boolean clean,
             }
 
             if (eosEnabled) {
-                maybeAbortTransactionAndCloseRecordCollector(isZombie);
+                // Ignore any exceptions whilee closing the record collector, i.e task producer.
+                closeRecordCollector();
             }
         }
     }
 
-    private void maybeAbortTransactionAndCloseRecordCollector(final boolean isZombie) {
-        if (!isZombie) {
-            try {
-                if (transactionInFlight) {
-                    producer.abortTransaction();
-                }
-                transactionInFlight = false;
-            } catch (final ProducerFencedException ignore) {
-                /* TODO
-                 * this should actually never happen atm as we guard the call to #abortTransaction
-                 * -> the reason for the guard is a "bug" in the Producer -- it throws IllegalStateException
-                 * instead of ProducerFencedException atm. We can remove the isZombie flag after KAFKA-5604 got
-                 * fixed and fall-back to this catch-and-swallow code
-                 */
-
-                // can be ignored: transaction got already aborted by brokers/transactional-coordinator if this happens
-            }
-        }
+    private TaskMigratedException closeRecordCollector() {
+        TaskMigratedException taskMigratedException = null;
 
         try {
             recordCollector.close();
+        } catch (final RecoverableClientException e) {
+            taskMigratedException = new TaskMigratedException(this, e);
         } catch (final Throwable e) {
             log.error("Failed to close producer due to the following error:", e);
         } finally {
             producer = null;
         }
+
+        return taskMigratedException;
     }
 
     private void closeTopology() {
@@ -742,7 +719,7 @@ void closeSuspended(final boolean clean, RuntimeException firstException) {
 
     /**
      * <pre>
-     * - {@link #suspend(boolean, boolean) suspend(clean)}
+     * - {@link #suspend(boolean) suspend(clean)}
      *   - close topology
      *   - if (clean) {@link #commit()}
      *     - flush state and producer
@@ -765,7 +742,7 @@ public void close(boolean clean,
 
         RuntimeException firstException = null;
         try {
-            suspend(clean, isZombie);
+            suspend(clean);
         } catch (final RuntimeException e) {
             clean = false;
             firstException = e;

diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java
@@ -1317,26 +1317,25 @@ public void shouldNotCloseProducerIfFencedOnCloseDuringCleanCloseWithEosEnabled(
     }
 
     @Test
-    public void shouldAbortTransactionAndCloseProducerOnUncleanCloseWithEosEnabled() {
+    public void shouldCloseProducerOnUncleanCloseWithEosEnabled() {
         task = createStatelessTask(createConfig(true), StreamsConfig.METRICS_LATEST);
         task.initializeTopology();
 
         task.close(false, false);
         task = null;
 
-        assertTrue(producer.transactionAborted());
-        assertFalse(producer.transactionInFlight());
+        // Make sure no method call on the producer during an unclean close (such as abort).
+        assertTrue(producer.transactionInFlight());
         assertTrue(producer.closed());
     }
 
     @Test
-    public void shouldAbortTransactionAndCloseProducerOnErrorDuringUncleanCloseWithEosEnabled() {
+    public void shouldCloseProducerOnErrorDuringUncleanCloseWithEosEnabled() {
         task = createTaskThatThrowsException(true);
         task.initializeTopology();
 
         task.close(false, false);
 
-        assertTrue(producer.transactionAborted());
         assertTrue(producer.closed());
     }
 
@@ -1553,15 +1552,14 @@ public void shouldOnlyCloseFencedProducerOnUncleanClosedWithEosEnabled() {
     }
 
     @Test
-    public void shouldAbortTransactionButNotCloseProducerIfFencedOnCloseDuringUncleanCloseWithEosEnabled() {
+    public void shouldNotCloseProducerIfFencedOnCloseDuringUncleanCloseWithEosEnabled() {
         task = createStatelessTask(createConfig(true), StreamsConfig.METRICS_LATEST);
         task.initializeTopology();
         producer.fenceProducerOnClose();
 
         task.close(false, false);
         task = null;
 
-        assertTrue(producer.transactionAborted());
         assertFalse(producer.closed());
     }