From f357c1fff55c1cce0debe22b3c025782f4fce088 Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Wed, 8 Apr 2026 16:05:56 -0600 Subject: [PATCH 1/2] Disable OleTxTests.Recovery under JIT stress configurations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Recovery test consistently times out (20+ min) under stress modes (fullpgo, jitstress2_jitstressregs) — confirmed from Helix logs across multiple hits. PR #125813 added a 120s child-process timeout but the main thread still hangs waiting for MSDTC under slow runtimes. This is a libraries-level test exercising MSDTC/OLE transaction recovery; running it under JIT stress provides no additional signal. Skip it on all non-regular CoreCLR test modes. Fixes #126304 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/libraries/System.Transactions.Local/tests/OleTxTests.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libraries/System.Transactions.Local/tests/OleTxTests.cs b/src/libraries/System.Transactions.Local/tests/OleTxTests.cs index d071f03b51c896..7428490853bdb8 100644 --- a/src/libraries/System.Transactions.Local/tests/OleTxTests.cs +++ b/src/libraries/System.Transactions.Local/tests/OleTxTests.cs @@ -302,6 +302,7 @@ public void SinglePhaseCommit(SinglePhaseEnlistment singlePhaseEnlistment) } [ConditionalFact(typeof(OleTxTests), nameof(IsRemoteExecutorSupportedAndNotNano))] + [SkipOnCoreClr("OleTx Recovery times out under JIT stress, not relevant for libraries tests", ~RuntimeTestModes.RegularRun)] public void Recovery() { Test(() => From df4852ca4591438080a7c81dcb2b69f30ee8e866 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 9 Apr 2026 05:54:06 +0000 Subject: [PATCH 2/2] Add watchdog thread to OleTxTests.Recovery to generate crash dump on hang Agent-Logs-Url: https://github.com/dotnet/runtime/sessions/57e057a0-4388-4473-8c26-c58d16566062 Co-authored-by: jkotas <6668460+jkotas@users.noreply.github.com> --- .../tests/OleTxTests.cs | 143 ++++++++++-------- 1 file changed, 81 insertions(+), 62 deletions(-) diff --git a/src/libraries/System.Transactions.Local/tests/OleTxTests.cs b/src/libraries/System.Transactions.Local/tests/OleTxTests.cs index 7428490853bdb8..6c16c96b9b4117 100644 --- a/src/libraries/System.Transactions.Local/tests/OleTxTests.cs +++ b/src/libraries/System.Transactions.Local/tests/OleTxTests.cs @@ -302,75 +302,94 @@ public void SinglePhaseCommit(SinglePhaseEnlistment singlePhaseEnlistment) } [ConditionalFact(typeof(OleTxTests), nameof(IsRemoteExecutorSupportedAndNotNano))] - [SkipOnCoreClr("OleTx Recovery times out under JIT stress, not relevant for libraries tests", ~RuntimeTestModes.RegularRun)] public void Recovery() { - Test(() => + // Start a watchdog thread to generate a crash dump if the test hangs longer than 5 minutes. + // This helps diagnose the hang described in https://github.com/dotnet/runtime/issues/126304. + var testCompleted = new ManualResetEventSlim(false); + var watchdog = new Thread(() => { - // We are going to spin up an external process to also enlist in the transaction, and then to crash when it - // receives the commit notification. We will then initiate the recovery flow. - - using var tx = new CommittableTransaction(); - - var outcomeEvent1 = new AutoResetEvent(false); - var enlistment1 = new TestEnlistment(Phase1Vote.Prepared, EnlistmentOutcome.Committed, outcomeReceived: outcomeEvent1); - var guid1 = Guid.NewGuid(); - tx.EnlistDurable(guid1, enlistment1, EnlistmentOptions.None); - - // The propagation token is used to propagate the transaction to that process so it can enlist to our - // transaction. We also provide the resource manager identifier GUID, and a path where the external process will - // write the recovery information it will receive from the MSDTC when preparing. - // We'll need these two elements later in order to Reenlist and trigger recovery. - byte[] propagationToken = TransactionInterop.GetTransmitterPropagationToken(tx); - string propagationTokenText = Convert.ToBase64String(propagationToken); - var guid2 = Guid.NewGuid(); - string secondEnlistmentRecoveryFilePath = Path.GetTempFileName(); - - using var waitHandle = new EventWaitHandle( - initialState: false, - EventResetMode.ManualReset, - "System.Transactions.Tests.OleTxTests.Recovery"); + if (!testCompleted.Wait(TimeSpan.FromMinutes(5))) + { + Environment.FailFast("OleTxTests.Recovery did not complete within 5 minutes. See https://github.com/dotnet/runtime/issues/126304"); + } + }); + watchdog.IsBackground = true; + watchdog.Start(); - try + try + { + Test(() => { - using (RemoteExecutor.Invoke( - EnlistAndCrash, - propagationTokenText, guid2.ToString(), secondEnlistmentRecoveryFilePath, - // Bound the child process lifetime so that if MSDTC is unresponsive - // and the process hangs, Dispose() will kill it instead of blocking indefinitely. - new RemoteInvokeOptions { ExpectedExitCode = 42, TimeOut = 120_000 })) - { - // Wait for the external process to enlist in the transaction, it will signal this EventWaitHandle. - Assert.True(waitHandle.WaitOne(Timeout)); + // We are going to spin up an external process to also enlist in the transaction, and then to crash when it + // receives the commit notification. We will then initiate the recovery flow. + + using var tx = new CommittableTransaction(); + + var outcomeEvent1 = new AutoResetEvent(false); + var enlistment1 = new TestEnlistment(Phase1Vote.Prepared, EnlistmentOutcome.Committed, outcomeReceived: outcomeEvent1); + var guid1 = Guid.NewGuid(); + tx.EnlistDurable(guid1, enlistment1, EnlistmentOptions.None); + + // The propagation token is used to propagate the transaction to that process so it can enlist to our + // transaction. We also provide the resource manager identifier GUID, and a path where the external process will + // write the recovery information it will receive from the MSDTC when preparing. + // We'll need these two elements later in order to Reenlist and trigger recovery. + byte[] propagationToken = TransactionInterop.GetTransmitterPropagationToken(tx); + string propagationTokenText = Convert.ToBase64String(propagationToken); + var guid2 = Guid.NewGuid(); + string secondEnlistmentRecoveryFilePath = Path.GetTempFileName(); + + using var waitHandle = new EventWaitHandle( + initialState: false, + EventResetMode.ManualReset, + "System.Transactions.Tests.OleTxTests.Recovery"); - tx.Commit(); + try + { + using (RemoteExecutor.Invoke( + EnlistAndCrash, + propagationTokenText, guid2.ToString(), secondEnlistmentRecoveryFilePath, + // Bound the child process lifetime so that if MSDTC is unresponsive + // and the process hangs, Dispose() will kill it instead of blocking indefinitely. + new RemoteInvokeOptions { ExpectedExitCode = 42, TimeOut = 120_000 })) + { + // Wait for the external process to enlist in the transaction, it will signal this EventWaitHandle. + Assert.True(waitHandle.WaitOne(Timeout)); + + tx.Commit(); + } + + // The other has crashed when the MSDTC notified it to commit. + // Load the recovery information the other process has written to disk for us and reenlist with + // the failed RM's Guid to commit. + var outcomeEvent3 = new AutoResetEvent(false); + var enlistment3 = new TestEnlistment(Phase1Vote.Prepared, EnlistmentOutcome.Committed, outcomeReceived: outcomeEvent3); + byte[] secondRecoveryInformation = File.ReadAllBytes(secondEnlistmentRecoveryFilePath); + _ = TransactionManager.Reenlist(guid2, secondRecoveryInformation, enlistment3); + TransactionManager.RecoveryComplete(guid2); + + Assert.True(outcomeEvent1.WaitOne(Timeout)); + Assert.True(outcomeEvent3.WaitOne(Timeout)); + Assert.Equal(EnlistmentOutcome.Committed, enlistment1.Outcome); + Assert.Equal(EnlistmentOutcome.Committed, enlistment3.Outcome); + Assert.Equal(TransactionStatus.Committed, tx.TransactionInformation.Status); + + // Note: verify manually in the MSDTC console that the distributed transaction is gone + // (i.e. successfully committed), + // (Start -> Component Services -> Computers -> My Computer -> Distributed Transaction Coordinator -> + // Local DTC -> Transaction List) } - - // The other has crashed when the MSDTC notified it to commit. - // Load the recovery information the other process has written to disk for us and reenlist with - // the failed RM's Guid to commit. - var outcomeEvent3 = new AutoResetEvent(false); - var enlistment3 = new TestEnlistment(Phase1Vote.Prepared, EnlistmentOutcome.Committed, outcomeReceived: outcomeEvent3); - byte[] secondRecoveryInformation = File.ReadAllBytes(secondEnlistmentRecoveryFilePath); - _ = TransactionManager.Reenlist(guid2, secondRecoveryInformation, enlistment3); - TransactionManager.RecoveryComplete(guid2); - - Assert.True(outcomeEvent1.WaitOne(Timeout)); - Assert.True(outcomeEvent3.WaitOne(Timeout)); - Assert.Equal(EnlistmentOutcome.Committed, enlistment1.Outcome); - Assert.Equal(EnlistmentOutcome.Committed, enlistment3.Outcome); - Assert.Equal(TransactionStatus.Committed, tx.TransactionInformation.Status); - - // Note: verify manually in the MSDTC console that the distributed transaction is gone - // (i.e. successfully committed), - // (Start -> Component Services -> Computers -> My Computer -> Distributed Transaction Coordinator -> - // Local DTC -> Transaction List) - } - finally - { - File.Delete(secondEnlistmentRecoveryFilePath); - } - }); + finally + { + File.Delete(secondEnlistmentRecoveryFilePath); + } + }); + } + finally + { + testCompleted.Set(); + } static void EnlistAndCrash(string propagationTokenText, string resourceManagerIdentifierGuid, string recoveryInformationFilePath) => Test(() =>