-
Notifications
You must be signed in to change notification settings - Fork 285
bridge: add guest-side reconnect loop for live migration #2698
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ import ( | |
| "github.com/Microsoft/hcsshim/internal/guest/bridge" | ||
| "github.com/Microsoft/hcsshim/internal/guest/cgroup" | ||
| "github.com/Microsoft/hcsshim/internal/guest/kmsg" | ||
| "github.com/Microsoft/hcsshim/internal/guest/prot" | ||
| "github.com/Microsoft/hcsshim/internal/guest/runtime/hcsv2" | ||
| "github.com/Microsoft/hcsshim/internal/guest/runtime/runc" | ||
| "github.com/Microsoft/hcsshim/internal/guest/transport" | ||
|
|
@@ -385,35 +386,17 @@ func main() { | |
| if err != nil { | ||
| logrus.WithError(err).Fatal("failed to initialize new runc runtime") | ||
| } | ||
| mux := bridge.NewBridgeMux() | ||
| b := bridge.Bridge{ | ||
| Handler: mux, | ||
| EnableV4: *v4, | ||
| } | ||
|
|
||
| h := hcsv2.NewHost(rtime, tport, initialEnforcer, logWriter) | ||
| // Initialize virtual pod support in the host | ||
| if err := h.InitializeVirtualPodSupport(virtualPodsControl); err != nil { | ||
| logrus.WithError(err).Warn("Virtual pod support initialization failed") | ||
| } | ||
| b.AssignHandlers(mux, h) | ||
|
|
||
| var bridgeIn io.ReadCloser | ||
| var bridgeOut io.WriteCloser | ||
| if *useInOutErr { | ||
| bridgeIn = os.Stdin | ||
| bridgeOut = os.Stdout | ||
| } else { | ||
| const commandPort uint32 = 0x40000000 | ||
| bridgeCon, err := tport.Dial(commandPort) | ||
| if err != nil { | ||
| logrus.WithFields(logrus.Fields{ | ||
| "port": commandPort, | ||
| logrus.ErrorKey: err, | ||
| }).Fatal("failed to dial host vsock connection") | ||
| } | ||
| bridgeIn = bridgeCon | ||
| bridgeOut = bridgeCon | ||
| } | ||
| // During live migration the VM is frozen and only wakes up when the host | ||
| // shim is ready, so the vsock port should be immediately available. We | ||
| // use a tight retry interval instead of exponential backoff. | ||
| const reconnectInterval = 100 * time.Millisecond | ||
|
|
||
| event := cgroups1.MemoryThresholdEvent(*gcsMemLimitBytes, false) | ||
| gefd, err := gcsControl.RegisterMemoryEvent(event) | ||
|
|
@@ -448,10 +431,42 @@ func main() { | |
| go readMemoryEvents(startTime, gefdFile, "/gcs", int64(*gcsMemLimitBytes), gcsControl) | ||
| go readMemoryEvents(startTime, oomFile, "/containers", containersLimit, containersControl) | ||
| go readMemoryEvents(startTime, virtualPodsOomFile, "/containers/virtual-pods", containersLimit, virtualPodsControl) | ||
| err = b.ListenAndServe(bridgeIn, bridgeOut) | ||
| if err != nil { | ||
| logrus.WithFields(logrus.Fields{ | ||
| logrus.ErrorKey: err, | ||
| }).Fatal("failed to serve gcs service") | ||
|
|
||
| mux := bridge.NewBridgeMux() | ||
|
rawahars marked this conversation as resolved.
|
||
| b := bridge.New(mux, *v4) | ||
| b.AssignHandlers(mux, h) | ||
|
|
||
| // Reconnect loop: dial the host, serve until the connection drops, then | ||
| // re-dial. During live migration the VM is frozen and only wakes up when | ||
| // the destination host shim is ready, so the vsock port should be | ||
| // immediately available. | ||
| for { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jterry75 I wonder if we should retry indefinitely or should we enforce a limit, say 12000 iterations (20 minutes). In such a case, if the shim died but VM is running, the VM can self-terminate after 20 mins of inactivity. While that is fragile, I feel that 20 mins of inactivity after blackout is critical failure path anyways. What do you think?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question for Justin. My instinct is to retry forever — if the shim died but the VM is still running, the GCS should keep trying until either the host comes back or the VM is torn down externally. A 20-minute timeout would leave the VM in a zombie state where it's running but can't be managed. That said, happy to add a configurable limit if Justin thinks there's a scenario where self-termination is better.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No it's not zombie VM. IIRC if the gcs crashed, the VM will also exit as it's init process exited.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Host controls the lifetime of these. Even for a zombie VM host would be responsible for cleaning up. So we should just loop forever. If there is no connection, and nobody comes and terminates this VM that's other bugs to deal with. But that shouldn't happen ever since we use the terminate on last handle closed. |
||
| var bridgeIn io.ReadCloser | ||
| var bridgeOut io.WriteCloser | ||
| if *useInOutErr { | ||
| bridgeIn = os.Stdin | ||
| bridgeOut = os.Stdout | ||
| } else { | ||
| bridgeCon, dialErr := tport.Dial(prot.LinuxGcsVsockPort) | ||
| if dialErr != nil { | ||
| logrus.WithError(dialErr).Warn("failed to dial host, retrying") | ||
| time.Sleep(reconnectInterval) | ||
| continue | ||
| } | ||
| bridgeIn = bridgeCon | ||
| bridgeOut = bridgeCon | ||
| } | ||
|
|
||
| logrus.Info("bridge connected, serving") | ||
|
|
||
| serveErr := b.ListenAndServe(bridgeIn, bridgeOut) | ||
|
jterry75 marked this conversation as resolved.
|
||
|
|
||
| if b.ShutdownRequested() { | ||
| logrus.Info("bridge shutdown requested, exiting reconnect loop") | ||
| break | ||
| } | ||
|
|
||
| logrus.WithError(serveErr).Warn("bridge connection lost, will reconnect") | ||
| time.Sleep(reconnectInterval) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,165 @@ | ||
| //go:build linux | ||
|
|
||
| package bridge | ||
|
|
||
| import ( | ||
| "io" | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/Microsoft/hcsshim/internal/guest/prot" | ||
| ) | ||
|
|
||
| func TestBridge_NotificationQueuedWhenDisconnected(t *testing.T) { | ||
| b := New(nil, false) | ||
| // Bridge starts disconnected (connected == false). | ||
| b.publishNotification(&prot.ContainerNotification{ | ||
| MessageBase: prot.MessageBase{ContainerID: "c1"}, | ||
| }) | ||
| b.publishNotification(&prot.ContainerNotification{ | ||
| MessageBase: prot.MessageBase{ContainerID: "c2"}, | ||
| }) | ||
|
|
||
| b.notifyMu.Lock() | ||
| if len(b.pendingNotifications) != 2 { | ||
| t.Fatalf("expected 2 queued, got %d", len(b.pendingNotifications)) | ||
| } | ||
| b.notifyMu.Unlock() | ||
| } | ||
|
|
||
| func TestBridge_DrainOnReconnect(t *testing.T) { | ||
| b := New(nil, false) | ||
|
|
||
| // Queue notifications while disconnected. | ||
| b.publishNotification(&prot.ContainerNotification{ | ||
| MessageBase: prot.MessageBase{ContainerID: "c1"}, | ||
| }) | ||
| b.publishNotification(&prot.ContainerNotification{ | ||
| MessageBase: prot.MessageBase{ContainerID: "c2"}, | ||
| }) | ||
|
|
||
| // Simulate what ListenAndServe does: create channels, start writer, | ||
| // then drain. | ||
| b.responseChan = make(chan bridgeResponse, 4) | ||
|
|
||
| b.drainPendingNotifications() | ||
|
|
||
| // Collect drained notifications. | ||
| var ids []string | ||
| for i := 0; i < 2; i++ { | ||
| select { | ||
| case resp := <-b.responseChan: | ||
| n := resp.response.(*prot.ContainerNotification) | ||
| ids = append(ids, n.ContainerID) | ||
| case <-time.After(time.Second): | ||
| t.Fatalf("timed out waiting for notification %d", i) | ||
| } | ||
| } | ||
| if len(ids) != 2 { | ||
| t.Fatalf("expected 2 drained notifications, got %d", len(ids)) | ||
| } | ||
|
|
||
| b.notifyMu.Lock() | ||
| if len(b.pendingNotifications) != 0 { | ||
| t.Fatalf("expected 0 pending after drain, got %d", len(b.pendingNotifications)) | ||
| } | ||
| b.notifyMu.Unlock() | ||
| } | ||
|
|
||
| func TestBridge_DisconnectQueuesAfterDrain(t *testing.T) { | ||
| b := New(nil, false) | ||
| b.responseChan = make(chan bridgeResponse, 4) | ||
|
|
||
| // Drain with nothing pending — just sets connected = true. | ||
| b.drainPendingNotifications() | ||
|
|
||
| // Send while connected — goes directly to responseChan. | ||
| b.publishNotification(&prot.ContainerNotification{ | ||
| MessageBase: prot.MessageBase{ContainerID: "live"}, | ||
| }) | ||
|
|
||
| select { | ||
| case resp := <-b.responseChan: | ||
| n := resp.response.(*prot.ContainerNotification) | ||
| if n.ContainerID != "live" { | ||
| t.Fatalf("expected 'live', got %q", n.ContainerID) | ||
| } | ||
| default: | ||
| t.Fatal("expected notification on responseChan") | ||
| } | ||
|
|
||
| // Disconnect — future notifications should queue. | ||
| b.disconnectNotifications() | ||
|
|
||
| b.publishNotification(&prot.ContainerNotification{ | ||
| MessageBase: prot.MessageBase{ContainerID: "queued"}, | ||
| }) | ||
|
|
||
| b.notifyMu.Lock() | ||
| if len(b.pendingNotifications) != 1 { | ||
| t.Fatalf("expected 1 queued after disconnect, got %d", len(b.pendingNotifications)) | ||
| } | ||
| b.notifyMu.Unlock() | ||
|
|
||
| // Nothing should be on responseChan. | ||
| select { | ||
| case <-b.responseChan: | ||
| t.Fatal("should not have received on responseChan after disconnect") | ||
| default: | ||
| } | ||
| } | ||
|
|
||
| func TestBridge_FullReconnectCycle(t *testing.T) { | ||
| b := New(nil, false) | ||
|
|
||
| // --- Iteration 1: simulate ListenAndServe --- | ||
| r1, w1 := io.Pipe() | ||
| b.responseChan = make(chan bridgeResponse, 4) | ||
| b.quitChan = make(chan bool) | ||
|
|
||
| go func() { | ||
| for range b.responseChan { | ||
| } | ||
| }() // drain writer | ||
|
|
||
| b.drainPendingNotifications() | ||
|
|
||
| // Send a notification while connected. | ||
| b.publishNotification(&prot.ContainerNotification{ | ||
| MessageBase: prot.MessageBase{ContainerID: "iter1"}, | ||
| }) | ||
|
|
||
| // Simulate bridge drop — disconnect, close channels. | ||
| b.disconnectNotifications() | ||
| close(b.quitChan) | ||
| close(b.responseChan) | ||
| r1.Close() | ||
| w1.Close() | ||
|
|
||
| // --- Between iterations: container exits --- | ||
| b.publishNotification(&prot.ContainerNotification{ | ||
| MessageBase: prot.MessageBase{ContainerID: "between"}, | ||
| }) | ||
|
|
||
| b.notifyMu.Lock() | ||
| if len(b.pendingNotifications) != 1 || b.pendingNotifications[0].ContainerID != "between" { | ||
| t.Fatalf("expected 'between' queued, got %v", b.pendingNotifications) | ||
| } | ||
| b.notifyMu.Unlock() | ||
|
|
||
| // --- Iteration 2: reconnect --- | ||
| b.responseChan = make(chan bridgeResponse, 4) | ||
| b.quitChan = make(chan bool) | ||
|
|
||
| b.drainPendingNotifications() | ||
|
|
||
| select { | ||
| case resp := <-b.responseChan: | ||
| n := resp.response.(*prot.ContainerNotification) | ||
| if n.ContainerID != "between" { | ||
| t.Fatalf("expected 'between', got %q", n.ContainerID) | ||
| } | ||
| case <-time.After(time.Second): | ||
| t.Fatal("timed out waiting for drained notification") | ||
| } | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.