From fadc7fede57a74316079649013600b897d01ad25 Mon Sep 17 00:00:00 2001 From: Jaime Herrero Date: Sat, 28 Feb 2026 17:18:42 -0500 Subject: [PATCH] fix: set FailureAction=rollback for swarm services default UpdateConfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docker Swarm's default FailureAction is "pause". When a task fails or is terminated early during a rolling update, Swarm pauses the update and stops ALL reconciliation — orphan containers persist indefinitely, even when healthy. This is the root cause of orphan container issues reported in production (services showing Replicas: N/1 with multiple healthy containers that never get cleaned up). Setting FailureAction to "rollback" makes Swarm automatically revert to the previous working service spec on failure, preventing orphans while preserving service availability. Also adds a default RollbackConfig with Order: "start-first" to match the update config (Docker defaults rollback to "stop-first" otherwise). Only affects the default config — users who have configured their own updateConfigSwarm/rollbackConfigSwarm are not affected. Relates to #1669, #2223, #2911, #2150 --- packages/server/src/utils/docker/utils.ts | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/packages/server/src/utils/docker/utils.ts b/packages/server/src/utils/docker/utils.ts index 144df2c143..427f450929 100644 --- a/packages/server/src/utils/docker/utils.ts +++ b/packages/server/src/utils/docker/utils.ts @@ -550,9 +550,15 @@ export const generateConfigContainer = ( }, }, }), - ...(rollbackConfigSwarm && { - RollbackConfig: rollbackConfigSwarm, - }), + ...(rollbackConfigSwarm + ? { RollbackConfig: rollbackConfigSwarm } + : { + // default rollback config to match update config + RollbackConfig: { + Parallelism: 1, + Order: "start-first", + }, + }), ...(updateConfigSwarm ? { UpdateConfig: updateConfigSwarm } : { @@ -560,6 +566,7 @@ export const generateConfigContainer = ( UpdateConfig: { Parallelism: 1, Order: "start-first", + FailureAction: "rollback", }, }), ...(sanitizedStopGracePeriodSwarm !== null &&