From e91e7f2d1b90915e6efba01d3e95611a9e223a98 Mon Sep 17 00:00:00 2001 From: Amit Barve Date: Thu, 11 Jun 2020 00:03:50 -0700 Subject: [PATCH 1/2] Add support for creating network namespaces inside cloned uvms. This is one of the many small PRs that enable the support for late cloning. This commit adds the set of functions required for adding network namespace and network endpoints to cloned UVMs. Signed-off-by: Amit Barve --- cmd/containerd-shim-runhcs-v1/pod.go | 17 +------- internal/hcsoci/create.go | 12 +----- internal/hcsoci/hcsdoc_wcow.go | 9 +++- internal/hcsoci/network.go | 62 ++++++++++++++++++++++++++++ internal/uvm/create_wcow.go | 1 - internal/uvm/network.go | 45 ++++++++++++++++---- 6 files changed, 109 insertions(+), 37 deletions(-) diff --git a/cmd/containerd-shim-runhcs-v1/pod.go b/cmd/containerd-shim-runhcs-v1/pod.go index 2d89b0d8ea..952671a31e 100644 --- a/cmd/containerd-shim-runhcs-v1/pod.go +++ b/cmd/containerd-shim-runhcs-v1/pod.go @@ -152,21 +152,8 @@ func createPod(ctx context.Context, events publisher, req *task.CreateTaskReques // isolated. Process isolated WCOW gets the namespace endpoints // automatically. if parent != nil { - nsid := "" - if s.Windows != nil && s.Windows.Network != nil { - nsid = s.Windows.Network.NetworkNamespace - } - - if nsid != "" { - endpoints, err := hcsoci.GetNamespaceEndpoints(ctx, nsid) - if err != nil { - return nil, err - } - err = parent.AddNetNS(ctx, nsid) - if err != nil { - return nil, err - } - err = parent.AddEndpointsToNS(ctx, nsid, endpoints) + if s.Windows != nil && s.Windows.Network != nil && s.Windows.Network.NetworkNamespace != "" { + err = hcsoci.SetupNetworkNamespace(ctx, parent, s.Windows.Network.NetworkNamespace) if err != nil { return nil, err } diff --git a/internal/hcsoci/create.go b/internal/hcsoci/create.go index e425316298..2283d3843b 100644 --- a/internal/hcsoci/create.go +++ b/internal/hcsoci/create.go @@ -149,20 +149,10 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C // container but not a workload container in a sandbox that inherits // the namespace. if ct == oci.KubernetesContainerTypeNone || ct == oci.KubernetesContainerTypeSandbox { - endpoints, err := GetNamespaceEndpoints(ctx, coi.actualNetworkNamespace) + err = SetupNetworkNamespace(ctx, coi.HostingSystem, coi.actualNetworkNamespace) if err != nil { return nil, r, err } - err = coi.HostingSystem.AddNetNS(ctx, coi.actualNetworkNamespace) - if err != nil { - return nil, r, err - } - err = coi.HostingSystem.AddEndpointsToNS(ctx, coi.actualNetworkNamespace, endpoints) - if err != nil { - // Best effort clean up the NS - coi.HostingSystem.RemoveNetNS(ctx, coi.actualNetworkNamespace) - return nil, r, err - } r.SetAddedNetNSToVM(true) } } diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index 4e3a06852f..01d8b5c361 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -182,7 +182,14 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter v2Container.Networking = &hcsschema.Networking{} v1.EndpointList = coi.Spec.Windows.Network.EndpointList - v2Container.Networking.Namespace = coi.actualNetworkNamespace + + // Use the reserved network namespace for containers created inside + // cloned or template UVMs. + if coi.HostingSystem.IsTemplate || coi.HostingSystem.IsClone { + v2Container.Networking.Namespace = uvm.DEFAULT_CLONE_NETWORK_NAMESPACE_ID + } else { + v2Container.Networking.Namespace = coi.actualNetworkNamespace + } v1.AllowUnqualifiedDNSQuery = coi.Spec.Windows.Network.AllowUnqualifiedDNSQuery v2Container.Networking.AllowUnqualifiedDnsQuery = v1.AllowUnqualifiedDNSQuery diff --git a/internal/hcsoci/network.go b/internal/hcsoci/network.go index d33bd6b838..dd0ddca374 100644 --- a/internal/hcsoci/network.go +++ b/internal/hcsoci/network.go @@ -3,6 +3,7 @@ package hcsoci import ( "context" + "github.com/Microsoft/hcsshim/hcn" "github.com/Microsoft/hcsshim/internal/hns" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" @@ -68,3 +69,64 @@ func GetNamespaceEndpoints(ctx context.Context, netNS string) ([]*hns.HNSEndpoin } return endpoints, nil } + +// Network namespace setup is a bit different for templates and clones. +// For templates and clones we use a special network namespace ID. +// Details about this can be found in the Networking section of the late-clone wiki page. +// +// In this function we take the namespace ID of the namespace that was created for this +// UVM. We hot add the namespace (with the default ID if this is a template). We get the +// endpoints associated with this namespace and then hot add those endpoints (by changing +// their namespace IDs by the deafult IDs if it is a template). +func SetupNetworkNamespace(ctx context.Context, hostingSystem *uvm.UtilityVM, nsid string) error { + nsidInsideUVM := nsid + if hostingSystem.IsTemplate || hostingSystem.IsClone { + nsidInsideUVM = uvm.DEFAULT_CLONE_NETWORK_NAMESPACE_ID + } + + // Query endpoints with actual nsid + endpoints, err := GetNamespaceEndpoints(ctx, nsid) + if err != nil { + return err + } + + // Add the network namespace inside the UVM if it is not a clone. (Clones will + // inherit the namespace from template) + if !hostingSystem.IsClone { + // Get the namespace struct from the actual nsid. + hcnNamespace, err := hcn.GetNamespaceByID(nsid) + if err != nil { + return err + } + + // All templates should have a special NSID so that it + // will be easier to debug. Override it here. + if hostingSystem.IsTemplate { + hcnNamespace.Id = nsidInsideUVM + } + + if err = hostingSystem.AddNetNS(ctx, hcnNamespace); err != nil { + return err + } + } + + // If adding a network endpoint to clones or a template override nsid associated + // with it. + if hostingSystem.IsClone || hostingSystem.IsTemplate { + // replace nsid for each endpoint + for _, ep := range endpoints { + ep.Namespace = &hns.Namespace{ + ID: nsidInsideUVM, + } + } + } + + if err = hostingSystem.AddEndpointsToNS(ctx, nsidInsideUVM, endpoints); err != nil { + // Best effort clean up the NS + if removeErr := hostingSystem.RemoveNetNS(ctx, nsidInsideUVM); removeErr != nil { + log.G(ctx).Warn(removeErr) + } + return err + } + return nil +} diff --git a/internal/uvm/create_wcow.go b/internal/uvm/create_wcow.go index 51d609895c..40a53b72fb 100644 --- a/internal/uvm/create_wcow.go +++ b/internal/uvm/create_wcow.go @@ -304,7 +304,6 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error uvm.namespaces[DEFAULT_CLONE_NETWORK_NAMESPACE_ID] = &namespaceInfo{ nics: make(map[string]*nicInfo), } - uvm.IsClone = true } diff --git a/internal/uvm/network.go b/internal/uvm/network.go index 2b0769d7af..4be39676f9 100644 --- a/internal/uvm/network.go +++ b/internal/uvm/network.go @@ -56,13 +56,16 @@ func (endpoints *NetworkEndpoints) Release(ctx context.Context) error { return nil } -// AddNetNS adds network namespace inside the guest. +// AddNetNS adds network namespace inside the guest without actually querying for the +// namespace by its ID. It uses the given namespace struct as it is in the guest request. +// This function is mostly used when we need to override the values inside the namespace +// struct returned by the GetNamespaceByID. For most uses cases AddNetNSByID is more appropriate. // -// If a namespace with `id` already exists returns `ErrNetNSAlreadyAttached`. -func (uvm *UtilityVM) AddNetNS(ctx context.Context, id string) error { +// If a namespace with the same id already exists returns `ErrNetNSAlreadyAttached`. +func (uvm *UtilityVM) AddNetNS(ctx context.Context, hcnNamespace *hcn.HostComputeNamespace) error { uvm.m.Lock() defer uvm.m.Unlock() - if _, ok := uvm.namespaces[id]; ok { + if _, ok := uvm.namespaces[hcnNamespace.Id]; ok { return ErrNetNSAlreadyAttached } @@ -70,10 +73,6 @@ func (uvm *UtilityVM) AddNetNS(ctx context.Context, id string) error { // Add a Guest Network namespace. On LCOW we add the adapters // dynamically. if uvm.operatingSystem == "windows" { - hcnNamespace, err := hcn.GetNamespaceByID(id) - if err != nil { - return err - } guestNamespace := hcsschema.ModifySettingRequest{ GuestRequest: guestrequest.GuestRequest{ ResourceType: guestrequest.ResourceTypeNetworkNamespace, @@ -90,12 +89,28 @@ func (uvm *UtilityVM) AddNetNS(ctx context.Context, id string) error { if uvm.namespaces == nil { uvm.namespaces = make(map[string]*namespaceInfo) } - uvm.namespaces[id] = &namespaceInfo{ + uvm.namespaces[hcnNamespace.Id] = &namespaceInfo{ nics: make(map[string]*nicInfo), } return nil } +// AddNetNSByID adds finds the namespace with given `id` and adds that +// network namespace inside the guest. +// +// If a namespace with `id` already exists returns `ErrNetNSAlreadyAttached`. +func (uvm *UtilityVM) AddNetNSByID(ctx context.Context, id string) error { + hcnNamespace, err := hcn.GetNamespaceByID(id) + if err != nil { + return err + } + + if err = uvm.AddNetNS(ctx, hcnNamespace); err != nil { + return err + } + return nil +} + // AddEndpointsToNS adds all unique `endpoints` to the network namespace // matching `id`. On failure does not roll back any previously successfully // added endpoints. @@ -314,3 +329,15 @@ func (uvm *UtilityVM) removeNIC(ctx context.Context, id guid.GUID, endpoint *hns } return nil } + +// Removes all NICs added to this uvm. +func (uvm *UtilityVM) RemoveAllNICs(ctx context.Context) error { + for _, ns := range uvm.namespaces { + for _, ninfo := range ns.nics { + if err := uvm.removeNIC(ctx, ninfo.ID, ninfo.Endpoint); err != nil { + return err + } + } + } + return nil +} From 452b379551be2265155e0290b297ea0f72d65399 Mon Sep 17 00:00:00 2001 From: Amit Barve Date: Mon, 17 Aug 2020 21:37:02 -0700 Subject: [PATCH 2/2] Add support for creating late clones via hcsshim This is one of the many small PRs that enable the support for late cloning.This commit adds the set of functions that expose the late cloning functionality from hcsshim and adds new annotations for clients to use the late cloning feature. Signed-off-by: Amit Barve --- cmd/containerd-shim-runhcs-v1/clone.go | 50 ++++ cmd/containerd-shim-runhcs-v1/exec_clone.go | 67 +++++ cmd/containerd-shim-runhcs-v1/exec_hcs.go | 13 +- cmd/containerd-shim-runhcs-v1/pod.go | 22 +- cmd/containerd-shim-runhcs-v1/task_hcs.go | 167 ++++++++++- .../task_wcow_podsandbox.go | 5 + internal/clone/registry.go | 168 +++++++++++ internal/cow/cow.go | 2 + internal/gcs/container.go | 17 ++ internal/hcsoci/clone.go | 50 ++++ internal/hcsoci/create.go | 269 +++++++++++++++--- internal/hcsoci/hcsdoc_wcow.go | 115 ++++---- internal/hcsoci/resources_wcow.go | 84 +++--- internal/layers/layers.go | 3 + internal/oci/uvm.go | 66 +++++ internal/uvm/clone.go | 34 ++- internal/uvm/create.go | 42 ++- internal/uvm/create_lcow.go | 1 + internal/uvm/create_wcow.go | 12 +- internal/uvm/scsi.go | 26 +- internal/uvm/types.go | 8 + internal/uvm/vsmb.go | 22 ++ 22 files changed, 1075 insertions(+), 168 deletions(-) create mode 100644 cmd/containerd-shim-runhcs-v1/clone.go create mode 100644 cmd/containerd-shim-runhcs-v1/exec_clone.go create mode 100644 internal/clone/registry.go create mode 100644 internal/hcsoci/clone.go diff --git a/cmd/containerd-shim-runhcs-v1/clone.go b/cmd/containerd-shim-runhcs-v1/clone.go new file mode 100644 index 0000000000..221aabacab --- /dev/null +++ b/cmd/containerd-shim-runhcs-v1/clone.go @@ -0,0 +1,50 @@ +package main + +import ( + "context" + + "github.com/Microsoft/hcsshim/internal/clone" + "github.com/Microsoft/hcsshim/internal/uvm" +) + +// saveAsTemplate saves the UVM and container inside it as a template and also stores the +// relevant information in the registry so that clones can be created from this template. +// Every cloned uvm gets its own NIC and we do not want to create clones of a template +// which still has a NIC attached to it. So remove the NICs attached to the template uvm +// before saving it. +// Similar to the NIC scenario we do not want to create clones from a template with an +// active GCS connection so close the GCS connection too. +func saveAsTemplate(ctx context.Context, templateTask *hcsTask) (err error) { + var utc *uvm.UVMTemplateConfig + var templateConfig *clone.TemplateConfig + + if err = templateTask.host.RemoveAllNICs(ctx); err != nil { + return err + } + + if err = templateTask.host.CloseGCSConnection(); err != nil { + return err + } + + utc, err = templateTask.host.GenerateTemplateConfig() + if err != nil { + return err + } + + templateConfig = &clone.TemplateConfig{ + TemplateUVMID: utc.UVMID, + TemplateUVMResources: utc.Resources, + TemplateUVMCreateOpts: utc.CreateOpts, + TemplateContainerID: templateTask.id, + TemplateContainerSpec: *templateTask.taskSpec, + } + + if err = clone.SaveTemplateConfig(ctx, templateConfig); err != nil { + return err + } + + if err = templateTask.host.SaveAsTemplate(ctx); err != nil { + return err + } + return nil +} diff --git a/cmd/containerd-shim-runhcs-v1/exec_clone.go b/cmd/containerd-shim-runhcs-v1/exec_clone.go new file mode 100644 index 0000000000..078149f140 --- /dev/null +++ b/cmd/containerd-shim-runhcs-v1/exec_clone.go @@ -0,0 +1,67 @@ +package main + +import ( + "context" + + "github.com/Microsoft/hcsshim/internal/cmd" + "github.com/Microsoft/hcsshim/internal/cow" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +func newClonedExec( + ctx context.Context, + events publisher, + tid string, + host *uvm.UtilityVM, + c cow.Container, + id, bundle string, + isWCOW bool, + spec *specs.Process, + io cmd.UpstreamIO) *clonedExec { + log.G(ctx).WithFields(logrus.Fields{ + "tid": tid, + "eid": id, // Init exec ID is always same as Task ID + "bundle": bundle, + }).Debug("newClonedExec") + + he := &hcsExec{ + events: events, + tid: tid, + host: host, + c: c, + id: id, + bundle: bundle, + isWCOW: isWCOW, + spec: spec, + io: io, + processDone: make(chan struct{}), + state: shimExecStateCreated, + exitStatus: 255, // By design for non-exited process status. + exited: make(chan struct{}), + } + + ce := &clonedExec{ + he, + } + go he.waitForContainerExit() + return ce +} + +var _ = (shimExec)(&clonedExec{}) + +// clonedExec inherits from hcsExec. The only difference between these two is that +// on starting a clonedExec it doesn't attempt to start the container even if the +// exec is the init process. This is because in case of clonedExec the container is +// already running inside the pod. +type clonedExec struct { + *hcsExec +} + +func (ce *clonedExec) Start(ctx context.Context) (err error) { + // A cloned exec should never initialize the container as it should + // already be running. + return ce.startInternal(ctx, false) +} diff --git a/cmd/containerd-shim-runhcs-v1/exec_hcs.go b/cmd/containerd-shim-runhcs-v1/exec_hcs.go index 728ef51c10..7c2115c2cc 100644 --- a/cmd/containerd-shim-runhcs-v1/exec_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/exec_hcs.go @@ -181,7 +181,7 @@ func (he *hcsExec) Status() *task.StateResponse { } } -func (he *hcsExec) Start(ctx context.Context) (err error) { +func (he *hcsExec) startInternal(ctx context.Context, initializeContainer bool) (err error) { he.sl.Lock() defer he.sl.Unlock() if he.state != shimExecStateCreated { @@ -192,8 +192,7 @@ func (he *hcsExec) Start(ctx context.Context) (err error) { he.exitFromCreatedL(ctx, 1) } }() - if he.id == he.tid { - // This is the init exec. We need to start the container itself + if initializeContainer { err = he.c.Start(ctx) if err != nil { return err @@ -257,6 +256,12 @@ func (he *hcsExec) Start(ctx context.Context) (err error) { return nil } +func (he *hcsExec) Start(ctx context.Context) (err error) { + // If he.id == he.tid then this is the init exec. + // We need to initialize the container itself before starting this exec. + return he.startInternal(ctx, he.id == he.tid) +} + func (he *hcsExec) Kill(ctx context.Context, signal uint32) error { he.sl.Lock() defer he.sl.Unlock() @@ -414,6 +419,8 @@ func (he *hcsExec) exitFromCreatedL(ctx context.Context, status int) { // // 6. Close `he.exited` channel to unblock any waiters who might have called // `Create`/`Wait`/`Start` which is a valid pattern. +// +// 7. Finally, save the UVM and this container as a template if specified. func (he *hcsExec) waitForExit() { ctx, span := trace.StartSpan(context.Background(), "hcsExec::waitForExit") defer span.End() diff --git a/cmd/containerd-shim-runhcs-v1/pod.go b/cmd/containerd-shim-runhcs-v1/pod.go index 952671a31e..6e341b83fb 100644 --- a/cmd/containerd-shim-runhcs-v1/pod.go +++ b/cmd/containerd-shim-runhcs-v1/pod.go @@ -228,6 +228,16 @@ func (p *pod) ID() string { return p.id } +func (p *pod) GetCloneAnnotations(ctx context.Context, s *specs.Spec) (bool, string, error) { + isTemplate, templateID, err := oci.ParseCloneAnnotations(ctx, s) + if err != nil { + return false, "", err + } else if (isTemplate || templateID != "") && p.host == nil { + return false, "", fmt.Errorf("save as template and creating clones is only supported for hyper-v isolated containers") + } + return isTemplate, templateID, nil +} + func (p *pod) CreateTask(ctx context.Context, req *task.CreateTaskRequest, s *specs.Spec) (_ shimTask, err error) { if req.ID == p.id { return nil, errors.Wrapf(errdefs.ErrAlreadyExists, "task with id: '%s' already exists", req.ID) @@ -270,7 +280,17 @@ func (p *pod) CreateTask(ctx context.Context, req *task.CreateTaskRequest, s *sp sid) } - st, err := newHcsTask(ctx, p.events, p.host, false, req, s) + _, templateID, err := p.GetCloneAnnotations(ctx, s) + if err != nil { + return nil, err + } + + var st shimTask + if templateID != "" { + st, err = newClonedHcsTask(ctx, p.events, p.host, false, req, s, templateID) + } else { + st, err = newHcsTask(ctx, p.events, p.host, false, req, s) + } if err != nil { return nil, err } diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index aa93019e1f..c16271c79c 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -123,6 +123,7 @@ func newHcsTask( }).Debug("newHcsTask") owner := filepath.Base(os.Args[0]) + isTemplate := oci.ParseAnnotationsSaveAsTemplate(ctx, s) io, err := cmd.NewNpipeIO(ctx, req.Stdin, req.Stdout, req.Stderr, req.Terminal) if err != nil { @@ -162,14 +163,16 @@ func newHcsTask( } ht := &hcsTask{ - events: events, - id: req.ID, - isWCOW: oci.IsWCOW(s), - c: system, - cr: resources, - ownsHost: ownsParent, - host: parent, - closed: make(chan struct{}), + events: events, + id: req.ID, + isWCOW: oci.IsWCOW(s), + c: system, + cr: resources, + ownsHost: ownsParent, + host: parent, + closed: make(chan struct{}), + taskSpec: s, + isTemplate: isTemplate, } ht.init = newHcsExec( ctx, @@ -189,9 +192,121 @@ func newHcsTask( // handle this case. go ht.waitForHostExit() } + + // In the normal case the `Signal` call from the caller killed this task's + // init process. Or the init process ran to completion - this will mostly + // happen when we are creating a template and want to wait for init process + // to finish before we save the template. In such cases do not tear down the + // container after init exits - because we need the container in the template + go ht.waitInitExit(!isTemplate) + + // Publish the created event + ht.events.publishEvent( + ctx, + runtime.TaskCreateEventTopic, + &eventstypes.TaskCreate{ + ContainerID: req.ID, + Bundle: req.Bundle, + Rootfs: req.Rootfs, + IO: &eventstypes.TaskIO{ + Stdin: req.Stdin, + Stdout: req.Stdout, + Stderr: req.Stderr, + Terminal: req.Terminal, + }, + Checkpoint: "", + Pid: uint32(ht.init.Pid()), + }) + return ht, nil +} + +// newClonedTask creates a container within `parent`. The parent must be already cloned +// from a template and hence this container must already be present inside that parent. +// This function simply creates the go wrapper around the container that is already +// running inside the cloned parent. +// This task MAY own the UVM that it is running in but as of now the cloning feature is +// only used for WCOW hyper-V isolated containers and for WCOW, the wcowPodSandboxTask +// owns that UVM. +func newClonedHcsTask( + ctx context.Context, + events publisher, + parent *uvm.UtilityVM, + ownsParent bool, + req *task.CreateTaskRequest, + s *specs.Spec, + templateID string) (_ shimTask, err error) { + log.G(ctx).WithFields(logrus.Fields{ + "tid": req.ID, + "ownsParent": ownsParent, + "templateid": templateID, + }).Debug("newClonedHcsTask") + + owner := filepath.Base(os.Args[0]) + + if parent.OS() != "windows" { + return nil, fmt.Errorf("cloned task can only be created inside a windows host") + } + + io, err := cmd.NewNpipeIO(ctx, req.Stdin, req.Stdout, req.Stderr, req.Terminal) + if err != nil { + return nil, err + } + + var netNS string + if s.Windows != nil && + s.Windows.Network != nil { + netNS = s.Windows.Network.NetworkNamespace + } + + // This is a cloned task. Use the templateid as the ID of the container here + // because that's the ID of this container inside the UVM. + opts := hcsoci.CreateOptions{ + ID: templateID, + Owner: owner, + Spec: s, + HostingSystem: parent, + NetworkNamespace: netNS, + } + system, resources, err := hcsoci.CloneContainer(ctx, &opts) + if err != nil { + return nil, err + } + + ht := &hcsTask{ + events: events, + id: req.ID, + isWCOW: oci.IsWCOW(s), + c: system, + cr: resources, + ownsHost: ownsParent, + host: parent, + closed: make(chan struct{}), + templateID: templateID, + taskSpec: s, + isTemplate: false, + } + ht.init = newClonedExec( + ctx, + events, + req.ID, + parent, + system, + req.ID, + req.Bundle, + ht.isWCOW, + s.Process, + io) + + if parent != nil { + // We have a parent UVM. Listen for its exit and forcibly close this + // task. This is not expected but in the event of a UVM crash we need to + // handle this case. + go ht.waitForHostExit() + } + // In the normal case the `Signal` call from the caller killed this task's // init process. - go ht.waitInitExit() + go ht.waitInitExit(true) // Publish the created event ht.events.publishEvent( @@ -268,6 +383,22 @@ type hcsTask struct { // closeHostOnce is used to close `host`. This will only be used if // `ownsHost==true` and `host != nil`. closeHostOnce sync.Once + + // templateID represents the id of the template container from which this container + // is cloned. The parent UVM (inside which this container is running) identifies this + // container with it's original id (i.e the id that was assigned to this container + // at the time of template creation i.e the templateID). Hence, every request that + // is sent to the GCS must actually use templateID to reference this container. + // A non-empty templateID specifies that this task was cloned. + templateID string + + // if isTemplate is true then this container will be saved as a template as soon + // as its init process exits. Note: templateID and isTemplate are mutually exclusive. + // i.e isTemplate can not be true when templateID is not empty. + isTemplate bool + + // taskSpec represents the spec/configuration for this task. + taskSpec *specs.Spec } func (ht *hcsTask) ID() string { @@ -451,7 +582,7 @@ func (ht *hcsTask) Wait() *task.StateResponse { return ht.init.Wait() } -func (ht *hcsTask) waitInitExit() { +func (ht *hcsTask) waitInitExit(destroyContainer bool) { ctx, span := trace.StartSpan(context.Background(), "hcsTask::waitInitExit") defer span.End() span.AddAttributes(trace.StringAttribute("tid", ht.id)) @@ -459,8 +590,20 @@ func (ht *hcsTask) waitInitExit() { // Wait for it to exit on its own ht.init.Wait() - // Close the host and event the exit - ht.close(ctx) + if destroyContainer { + // Close the host and event the exit + ht.close(ctx) + } else { + // Close the container's host, but do not close or terminate the container itself + ht.closeHost(ctx) + } + + if ht.isTemplate { + // Save the host as a template + if err := saveAsTemplate(ctx, ht); err != nil { + log.G(ctx).WithError(err).Error("failed to save as template") + } + } } // waitForHostExit waits for the host virtual machine to exit. Once exited diff --git a/cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go b/cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go index 9f60d6398c..88a467b09d 100644 --- a/cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go +++ b/cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go @@ -7,6 +7,7 @@ import ( "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" + "github.com/Microsoft/hcsshim/internal/clone" "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/shimdiag" @@ -170,6 +171,10 @@ func (wpst *wcowPodSandboxTask) close(ctx context.Context) { if err := wpst.host.Close(); err != nil { log.G(ctx).WithError(err).Error("failed host vm shutdown") } + // cleanup template state if any exists + if err := clone.RemoveSavedTemplateConfig(wpst.host.ID()); err != nil { + log.G(ctx).WithError(err).Error("failed to cleanup template config state for vm") + } } // Send the `init` exec exit notification always. exit := wpst.init.Status() diff --git a/internal/clone/registry.go b/internal/clone/registry.go new file mode 100644 index 0000000000..ed5ef3dce4 --- /dev/null +++ b/internal/clone/registry.go @@ -0,0 +1,168 @@ +package clone + +import ( + "bytes" + "context" + "encoding/gob" + "fmt" + + "github.com/Microsoft/hcsshim/internal/regstate" + "github.com/Microsoft/hcsshim/internal/uvm" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +const ( + configRoot = "LateClone" + configKey = "UVMConfig" + templateConfigCurrentSerialVersionID = 1 +) + +// TemplateConfig struct maintains all of the information about a template. This includes +// the information for both the template container and the template UVM. This struct is +// serialized and stored in the registry and hence is version controlled. +// Note: Update the `templateConfigCurrentSerialVersionID` when this structure definition +// is changed. +type TemplateConfig struct { + SerialVersionID uint32 + TemplateUVMID string + TemplateUVMResources []uvm.Cloneable + TemplateUVMCreateOpts uvm.OptionsWCOW + TemplateContainerID string + // Below we store the container spec for the template container so that when + // cloning containers we can verify that a different spec is not provided for the + // cloned container. + TemplateContainerSpec specs.Spec +} + +// When encoding interfaces gob requires us to register the struct types that we will be +// using under those interfaces. This registration needs to happen on both sides i.e the +// side which encodes the data (i.e the shim process of the template) and the side which +// decodes the data (i.e the shim process of the clone). +// Go init function: https://golang.org/doc/effective_go.html#init +func init() { + // Register the pointer to structs because that is what is being stored. + gob.Register(&uvm.VSMBShare{}) + gob.Register(&uvm.SCSIMount{}) +} + +func encodeTemplateConfig(templateConfig *TemplateConfig) ([]byte, error) { + var buf bytes.Buffer + + encoder := gob.NewEncoder(&buf) + if err := encoder.Encode(templateConfig); err != nil { + return nil, fmt.Errorf("error while encoding template config: %s", err) + } + return buf.Bytes(), nil +} + +func decodeTemplateConfig(encodedBytes []byte) (*TemplateConfig, error) { + var templateConfig TemplateConfig + + reader := bytes.NewReader(encodedBytes) + decoder := gob.NewDecoder(reader) + if err := decoder.Decode(&templateConfig); err != nil { + return nil, fmt.Errorf("error while decoding template config: %s", err) + } + return &templateConfig, nil +} + +// loadPersistedUVMConfig loads a persisted config from the registry that matches the given ID +// If not found returns `regstate.NotFoundError` +func loadPersistedUVMConfig(id string) ([]byte, error) { + sk, err := regstate.Open(configRoot, false) + if err != nil { + return nil, err + } + defer sk.Close() + + var encodedConfig []byte + if err := sk.Get(id, configKey, &encodedConfig); err != nil { + return nil, err + } + return encodedConfig, nil +} + +// storePersistedUVMConfig stores the given config to the registry. +// If the store fails returns the store error. +func storePersistedUVMConfig(id string, encodedConfig []byte) error { + sk, err := regstate.Open(configRoot, false) + if err != nil { + return err + } + defer sk.Close() + + if err := sk.Create(id, configKey, encodedConfig); err != nil { + return err + } + return nil +} + +// removePersistedUVMConfig removes any persisted state associated with this config. If the config +// is not found in the registery `Remove` returns no error. +func removePersistedUVMConfig(id string) error { + sk, err := regstate.Open(configRoot, false) + if err != nil { + if regstate.IsNotFoundError(err) { + return nil + } + return err + } + defer sk.Close() + + if err := sk.Remove(id); err != nil { + if regstate.IsNotFoundError(err) { + return nil + } + return err + } + return nil +} + +// Saves all the information required to create a clone from the template +// of this container into the registry. +func SaveTemplateConfig(ctx context.Context, templateConfig *TemplateConfig) error { + _, err := loadPersistedUVMConfig(templateConfig.TemplateUVMID) + if !regstate.IsNotFoundError(err) { + return fmt.Errorf("parent VM(ID: %s) config shouldn't exit in registry (%s)", templateConfig.TemplateUVMID, err) + } + + // set the serial version before encoding + templateConfig.SerialVersionID = templateConfigCurrentSerialVersionID + + encodedBytes, err := encodeTemplateConfig(templateConfig) + if err != nil { + return fmt.Errorf("failed to encode template config: %s", err) + } + + if err := storePersistedUVMConfig(templateConfig.TemplateUVMID, encodedBytes); err != nil { + return fmt.Errorf("failed to store encoded template config: %s", err) + } + + return nil +} + +// Removes all the state associated with the template with given ID +// If there is no state associated with this ID then the function simply returns without +// doing anything. +func RemoveSavedTemplateConfig(id string) error { + return removePersistedUVMConfig(id) +} + +// Retrieves the UVMTemplateConfig for the template with given ID from the registry. +func FetchTemplateConfig(ctx context.Context, id string) (*TemplateConfig, error) { + encodedBytes, err := loadPersistedUVMConfig(id) + if err != nil { + return nil, fmt.Errorf("failed to fetch encoded template config: %s", err) + } + + templateConfig, err := decodeTemplateConfig(encodedBytes) + if err != nil { + return nil, fmt.Errorf("failed to decode template config: %s", err) + } + + if templateConfig.SerialVersionID != templateConfigCurrentSerialVersionID { + return nil, fmt.Errorf("serialized version of TemplateConfig: %d doesn't match with the current version: %d", templateConfig.SerialVersionID, templateConfigCurrentSerialVersionID) + } + + return templateConfig, nil +} diff --git a/internal/cow/cow.go b/internal/cow/cow.go index 8193315f06..89c1955131 100644 --- a/internal/cow/cow.go +++ b/internal/cow/cow.go @@ -80,4 +80,6 @@ type Container interface { // container to be terminated by some error condition (including calling // Close). Wait() error + // Modify this container + Modify(ctx context.Context, config interface{}) error } diff --git a/internal/gcs/container.go b/internal/gcs/container.go index 218d138f00..8e4ca07905 100644 --- a/internal/gcs/container.go +++ b/internal/gcs/container.go @@ -61,6 +61,23 @@ func (gc *GuestConnection) CreateContainer(ctx context.Context, cid string, conf return c, nil } +// CloneContainer just creates the wrappers and sets up notification requests for a +// container that is already running inside the UVM (after cloning). +func (gc *GuestConnection) CloneContainer(ctx context.Context, cid string) (_ *Container, err error) { + c := &Container{ + gc: gc, + id: cid, + notifyCh: make(chan struct{}), + closeCh: make(chan struct{}), + } + err = gc.requestNotify(cid, c.notifyCh) + if err != nil { + return nil, err + } + go c.waitBackground() + return c, nil +} + // OS returns the operating system of the container, "linux" or "windows". func (c *Container) OS() string { return c.gc.os diff --git a/internal/hcsoci/clone.go b/internal/hcsoci/clone.go new file mode 100644 index 0000000000..4d26f21d0b --- /dev/null +++ b/internal/hcsoci/clone.go @@ -0,0 +1,50 @@ +// +build windows + +package hcsoci + +import ( + "context" + "fmt" + + "github.com/Microsoft/hcsshim/internal/cow" + "github.com/Microsoft/hcsshim/internal/requesttype" + hcsschema "github.com/Microsoft/hcsshim/internal/schema2" +) + +const ( + MappedDirectoryResourcePath = "Container/MappedDirectories" + MappedPipeResourcePath = "Container/MappedPipes" +) + +// Usually mounts specified in the container config are added in the container doc +// that is passed along with the container creation reuqest. However, for cloned containers +// we don't send any create container request so we must add the mounts one by one by +// doing Modify requests to that container. +func addMountsToClone(ctx context.Context, c cow.Container, mounts *mountsConfig) error { + // TODO(ambarve) : Find out if there is a way to send request for all the mounts + // at the same time to save time + for _, md := range mounts.mdsv2 { + requestDocument := &hcsschema.ModifySettingRequest{ + RequestType: requesttype.Add, + ResourcePath: MappedDirectoryResourcePath, + Settings: md, + } + err := c.Modify(ctx, requestDocument) + if err != nil { + return fmt.Errorf("error while adding mapped directory (%s) to the container: %s", md.HostPath, err) + } + } + + for _, mp := range mounts.mpsv2 { + requestDocument := &hcsschema.ModifySettingRequest{ + RequestType: requesttype.Add, + ResourcePath: MappedPipeResourcePath, + Settings: mp, + } + err := c.Modify(ctx, requestDocument) + if err != nil { + return fmt.Errorf("error while adding mapped pipe (%s) to the container: %s", mp.HostPath, err) + } + } + return nil +} diff --git a/internal/hcsoci/create.go b/internal/hcsoci/create.go index 2283d3843b..f87adb1e99 100644 --- a/internal/hcsoci/create.go +++ b/internal/hcsoci/create.go @@ -11,6 +11,7 @@ import ( "strconv" "github.com/Microsoft/go-winio/pkg/guid" + "github.com/Microsoft/hcsshim/internal/clone" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/log" @@ -65,26 +66,133 @@ type createOptionsInternal struct { actualOwner string // Owner for the container actualNetworkNamespace string ccgState *hcsschema.ContainerCredentialGuardState // Container Credential Guard information to be attached to HCS container document + isTemplate bool // Are we going to save this container as a template + templateID string // Template ID of the template from which this container is being cloned } -// CreateContainer creates a container. It can cope with a wide variety of -// scenarios, including v1 HCS schema calls, as well as more complex v2 HCS schema -// calls. Note we always return the resources that have been allocated, even in the -// case of an error. This provides support for the debugging option not to -// release the resources on failure, so that the client can make the necessary -// call to release resources that have been allocated as part of calling this function. -func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.Container, _ *resources.Resources, err error) { +// compares two slices of strings and returns true if they are same, returns false otherwise. +// The elements in the slices don't have to be in the same order for them to be equal. +func cmpSlices(s1, s2 []string) bool { + equal := (len(s1) == len(s2)) + for i := 0; equal && i < len(s1); i++ { + found := false + for j := 0; !found && j < len(s2); j++ { + found = (s1[i] == s2[j]) + } + equal = equal && found + } + return equal +} + +// Compares to mount structs and returns true if they are equal, returns false otherwise. +func compareMounts(m1, m2 specs.Mount) bool { + return cmpSlices(m1.Options, m2.Options) && (m1.Source == m2.Source) && (m1.Destination == m2.Destination) && (m1.Type == m2.Type) +} + +// verifyCloneContainerSpecs compares the container creation spec provided during the template container +// creation and the spec provided during cloned container creation and checks that all the fields match +// (except for the certain fields that are allowed to be different). +func verifyCloneContainerSpecs(templateSpec, cloneSpec *specs.Spec) error { + // Following fields can be different in the template and clone specs. + // 1. Process + // 2. Annotations - Only the template/cloning related annotations can be different. + // 3. Windows.LayerFolders - Only the last i.e scratch layer can be different. + + if templateSpec.Version != cloneSpec.Version { + return fmt.Errorf("OCI Runtime Spec version of template (%s) doesn't match with the Spec version of clone (%s)", templateSpec.Version, cloneSpec.Version) + } + + // for annotations check that the values of memory & cpu annotations are same + if templateSpec.Annotations[oci.AnnotationContainerMemorySizeInMB] != cloneSpec.Annotations[oci.AnnotationContainerMemorySizeInMB] { + return fmt.Errorf("memory size limit for template and clone containers can not be different") + } + if templateSpec.Annotations[oci.AnnotationContainerProcessorCount] != cloneSpec.Annotations[oci.AnnotationContainerProcessorCount] { + return fmt.Errorf("processor count for template and clone containers can not be different") + } + if templateSpec.Annotations[oci.AnnotationContainerProcessorLimit] != cloneSpec.Annotations[oci.AnnotationContainerProcessorLimit] { + return fmt.Errorf("processor limit for template and clone containers can not be different") + } + + // LayerFolders should be identical except for the last element. + if !cmpSlices(templateSpec.Windows.LayerFolders[:len(templateSpec.Windows.LayerFolders)-1], cloneSpec.Windows.LayerFolders[:len(cloneSpec.Windows.LayerFolders)-1]) { + return fmt.Errorf("layers provided for template container and clone container don't match. Check the image specified in container config") + } + + if templateSpec.Windows.HyperV != cloneSpec.Windows.HyperV { + return fmt.Errorf("HyperV spec for template and clone containers can not be different") + } + + if templateSpec.Windows.Network.AllowUnqualifiedDNSQuery != cloneSpec.Windows.Network.AllowUnqualifiedDNSQuery { + return fmt.Errorf("different values for allow unqualified DNS query can not be provided for template and clones") + } + if templateSpec.Windows.Network.NetworkSharedContainerName != cloneSpec.Windows.Network.NetworkSharedContainerName { + return fmt.Errorf("different network shared name can not be provided for template and clones") + } + if !cmpSlices(templateSpec.Windows.Network.DNSSearchList, cloneSpec.Windows.Network.DNSSearchList) { + return fmt.Errorf("different DNS search list can not be provided for template and clones") + } + return nil +} + +func validateContainerConfig(ctx context.Context, coi *createOptionsInternal) error { + + if coi.HostingSystem != nil && coi.HostingSystem.IsTemplate && !coi.isTemplate { + return fmt.Errorf("only a template container can be created inside a template pod. Any other combination is not valid") + } + + if coi.HostingSystem != nil && coi.templateID != "" && !coi.HostingSystem.IsClone { + return fmt.Errorf("A container can not be cloned inside a non cloned POD") + } + + if coi.templateID != "" { + // verify that the configurations provided for the template for + // this clone are same. + tc, err := clone.FetchTemplateConfig(ctx, coi.HostingSystem.TemplateID) + if err != nil { + return fmt.Errorf("config validation failed : %s", err) + } + if err := verifyCloneContainerSpecs(&tc.TemplateContainerSpec, coi.Spec); err != nil { + return err + } + } + + if coi.HostingSystem != nil && coi.HostingSystem.IsTemplate { + if len(coi.Spec.Windows.Devices) != 0 { + return fmt.Errorf("Mapped Devices are not supported for template containers") + } + + if _, ok := coi.Spec.Windows.CredentialSpec.(string); ok { + return fmt.Errorf("gMSA specifications are not supported for template containers") + } + + if coi.Spec.Windows.Servicing { + return fmt.Errorf("template containers can't be started in servicing mode") + } + + // check that no mounts are specified. + if len(coi.Spec.Mounts) > 0 { + return fmt.Errorf("user specified mounts are not permitted for template containers") + } + } + return nil +} + +func initializeCreateOptions(ctx context.Context, createOptions *CreateOptions) (*createOptionsInternal, error) { coi := &createOptionsInternal{ CreateOptions: createOptions, actualID: createOptions.ID, actualOwner: createOptions.Owner, } + if coi.Spec == nil { + return nil, fmt.Errorf("Spec must be supplied") + } + // Defaults if omitted by caller. if coi.actualID == "" { g, err := guid.NewV4() if err != nil { - return nil, nil, err + return nil, err } coi.actualID = g.String() } @@ -92,10 +200,6 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C coi.actualOwner = filepath.Base(os.Args[0]) } - if coi.Spec == nil { - return nil, nil, fmt.Errorf("Spec must be supplied") - } - if coi.HostingSystem != nil { // By definition, a hosting system can only be supplied for a v2 Xenon. coi.actualSchemaVersion = schemaversion.SchemaV21() @@ -103,10 +207,64 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C coi.actualSchemaVersion = schemaversion.DetermineSchemaVersion(coi.SchemaVersion) } + coi.isTemplate = oci.ParseAnnotationsSaveAsTemplate(ctx, createOptions.Spec) + coi.templateID = oci.ParseAnnotationsTemplateID(ctx, createOptions.Spec) + log.G(ctx).WithFields(logrus.Fields{ "options": fmt.Sprintf("%+v", createOptions), "schema": coi.actualSchemaVersion, - }).Debug("hcsshim::CreateContainer") + }).Debug("hcsshim::initializeCreateOptions") + + return coi, nil +} + +// configureSandboxNetwork creates a new network namespace for the pod (sandbox) +// if required and then adds that namespace to the pod. +func configureSandboxNetwork(ctx context.Context, coi *createOptionsInternal, r *resources.Resources) error { + if coi.NetworkNamespace != "" { + r.SetNetNS(coi.NetworkNamespace) + } else { + err := createNetworkNamespace(ctx, coi, r) + if err != nil { + return err + } + } + coi.actualNetworkNamespace = r.NetNS() + + if coi.HostingSystem != nil { + ct, _, err := oci.GetSandboxTypeAndID(coi.Spec.Annotations) + if err != nil { + return err + } + // Only add the network namespace to a standalone or sandbox + // container but not a workload container in a sandbox that inherits + // the namespace. + if ct == oci.KubernetesContainerTypeNone || ct == oci.KubernetesContainerTypeSandbox { + if err = SetupNetworkNamespace(ctx, coi.HostingSystem, coi.actualNetworkNamespace); err != nil { + return err + } + r.SetAddedNetNSToVM(true) + } + } + + return nil +} + +// CreateContainer creates a container. It can cope with a wide variety of +// scenarios, including v1 HCS schema calls, as well as more complex v2 HCS schema +// calls. Note we always return the resources that have been allocated, even in the +// case of an error. This provides support for the debugging option not to +// release the resources on failure, so that the client can make the necessary +// call to release resources that have been allocated as part of calling this function. +func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.Container, _ *resources.Resources, err error) { + coi, err := initializeCreateOptions(ctx, createOptions) + if err != nil { + return nil, nil, err + } + + if err := validateContainerConfig(ctx, coi); err != nil { + return nil, nil, fmt.Errorf("container config validation failed: %s", err) + } r := resources.NewContainerResources(createOptions.ID) defer func() { @@ -130,31 +288,10 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C if coi.Spec.Windows != nil && coi.Spec.Windows.Network != nil && schemaversion.IsV21(coi.actualSchemaVersion) { + err = configureSandboxNetwork(ctx, coi, r) + if err != nil { + return nil, r, fmt.Errorf("failure while creating namespace for container: %s", err) - if coi.NetworkNamespace != "" { - r.SetNetNS(coi.NetworkNamespace) - } else { - err := createNetworkNamespace(ctx, coi, r) - if err != nil { - return nil, r, err - } - } - coi.actualNetworkNamespace = r.NetNS() - if coi.HostingSystem != nil { - ct, _, err := oci.GetSandboxTypeAndID(coi.Spec.Annotations) - if err != nil { - return nil, r, err - } - // Only add the network namespace to a standalone or sandbox - // container but not a workload container in a sandbox that inherits - // the namespace. - if ct == oci.KubernetesContainerTypeNone || ct == oci.KubernetesContainerTypeSandbox { - err = SetupNetworkNamespace(ctx, coi.HostingSystem, coi.actualNetworkNamespace) - if err != nil { - return nil, r, err - } - r.SetAddedNetNSToVM(true) - } } } @@ -224,6 +361,64 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C return system, r, nil } +// CloneContainer is similar to CreateContainer but it does not add layers or namespace like +// CreateContainer does. Also, instead of sending create container request it sends a modify +// request to an existing container. CloneContainer only works for WCOW. +func CloneContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.Container, _ *resources.Resources, err error) { + coi, err := initializeCreateOptions(ctx, createOptions) + if err != nil { + return nil, nil, err + } + + if err := validateContainerConfig(ctx, coi); err != nil { + return nil, nil, err + } + + if coi.Spec.Windows == nil || coi.HostingSystem == nil { + return nil, nil, fmt.Errorf("CloneContainer is only supported for Hyper-v isolated WCOW ") + } + + r := resources.NewContainerResources(createOptions.ID) + defer func() { + if err != nil { + if !coi.DoNotReleaseResourcesOnFailure { + resources.ReleaseResources(ctx, r, coi.HostingSystem, true) + } + } + }() + + if coi.HostingSystem != nil { + n := coi.HostingSystem.ContainerCounter() + if coi.Spec.Linux != nil { + r.SetContainerRootInUVM(fmt.Sprintf(lcowRootInUVM, createOptions.ID)) + } else { + r.SetContainerRootInUVM(fmt.Sprintf(wcowRootInUVM, strconv.FormatUint(n, 16))) + } + } + + if err = setupMounts(ctx, coi, r); err != nil { + return nil, r, err + } + + mounts, err := createMountsConfig(ctx, coi) + if err != nil { + return nil, r, err + } + + c, err := coi.HostingSystem.CloneContainer(ctx, coi.actualID) + if err != nil { + return nil, r, err + } + + // Everything that is usually added to the container during the createContainer + // request (via the gcsDocument) must be hot added here. + if err := addMountsToClone(ctx, c, mounts); err != nil { + return nil, r, err + } + + return c, r, nil +} + // isV2Xenon returns true if the create options are for a HCS schema V2 xenon container // with a hosting VM func (coi *createOptionsInternal) isV2Xenon() bool { diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index 01d8b5c361..a89992f256 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -23,6 +23,60 @@ import ( "github.com/sirupsen/logrus" ) +// A simple wrapper struct around the container mount configs that should be added to the +// container. +type mountsConfig struct { + mdsv1 []schema1.MappedDir + mpsv1 []schema1.MappedPipe + mdsv2 []hcsschema.MappedDirectory + mpsv2 []hcsschema.MappedPipe +} + +func createMountsConfig(ctx context.Context, coi *createOptionsInternal) (*mountsConfig, error) { + // Add the mounts as mapped directories or mapped pipes + // TODO: Mapped pipes to add in v2 schema. + var config mountsConfig + for _, mount := range coi.Spec.Mounts { + if mount.Type != "" { + return nil, fmt.Errorf("invalid container spec - Mount.Type '%s' must not be set", mount.Type) + } + if uvm.IsPipe(mount.Source) { + src, dst := uvm.GetContainerPipeMapping(coi.HostingSystem, mount) + config.mpsv1 = append(config.mpsv1, schema1.MappedPipe{HostPath: src, ContainerPipeName: dst}) + config.mpsv2 = append(config.mpsv2, hcsschema.MappedPipe{HostPath: src, ContainerPipeName: dst}) + } else { + readOnly := false + for _, o := range mount.Options { + if strings.ToLower(o) == "ro" { + readOnly = true + } + } + mdv1 := schema1.MappedDir{HostPath: mount.Source, ContainerPath: mount.Destination, ReadOnly: readOnly} + mdv2 := hcsschema.MappedDirectory{ContainerPath: mount.Destination, ReadOnly: readOnly} + if coi.HostingSystem == nil { + mdv2.HostPath = mount.Source + } else { + uvmPath, err := coi.HostingSystem.GetVSMBUvmPath(ctx, mount.Source, readOnly) + if err != nil { + if err == uvm.ErrNotAttached { + // It could also be a scsi mount. + uvmPath, err = coi.HostingSystem.GetScsiUvmPath(ctx, mount.Source) + if err != nil { + return nil, err + } + } else { + return nil, err + } + } + mdv2.HostPath = uvmPath + } + config.mdsv1 = append(config.mdsv1, mdv1) + config.mdsv2 = append(config.mdsv2, mdv2) + } + } + return &config, nil +} + // createWindowsContainerDocument creates documents for passing to HCS or GCS to create // a container, both hosted and process isolated. It creates both v1 and v2 // container objects, WCOW only. The containers storage should have been mounted already. @@ -185,7 +239,7 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter // Use the reserved network namespace for containers created inside // cloned or template UVMs. - if coi.HostingSystem.IsTemplate || coi.HostingSystem.IsClone { + if coi.HostingSystem != nil && (coi.HostingSystem.IsTemplate || coi.HostingSystem.IsClone) { v2Container.Networking.Namespace = uvm.DEFAULT_CLONE_NETWORK_NAMESPACE_ID } else { v2Container.Networking.Namespace = coi.actualNetworkNamespace @@ -275,60 +329,17 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter } } - // Add the mounts as mapped directories or mapped pipes - // TODO: Mapped pipes to add in v2 schema. - var ( - mdsv1 []schema1.MappedDir - mpsv1 []schema1.MappedPipe - mdsv2 []hcsschema.MappedDirectory - mpsv2 []hcsschema.MappedPipe - ) - for _, mount := range coi.Spec.Mounts { - if mount.Type != "" { - return nil, nil, fmt.Errorf("invalid container spec - Mount.Type '%s' must not be set", mount.Type) - } - if uvm.IsPipe(mount.Source) { - src, dst := uvm.GetContainerPipeMapping(coi.HostingSystem, mount) - mpsv1 = append(mpsv1, schema1.MappedPipe{HostPath: src, ContainerPipeName: dst}) - mpsv2 = append(mpsv2, hcsschema.MappedPipe{HostPath: src, ContainerPipeName: dst}) - } else { - readOnly := false - for _, o := range mount.Options { - if strings.ToLower(o) == "ro" { - readOnly = true - } - } - mdv1 := schema1.MappedDir{HostPath: mount.Source, ContainerPath: mount.Destination, ReadOnly: readOnly} - mdv2 := hcsschema.MappedDirectory{ContainerPath: mount.Destination, ReadOnly: readOnly} - if coi.HostingSystem == nil { - mdv2.HostPath = mount.Source - } else { - uvmPath, err := coi.HostingSystem.GetVSMBUvmPath(ctx, mount.Source, readOnly) - if err != nil { - if err == uvm.ErrNotAttached { - // It could also be a scsi mount. - uvmPath, err = coi.HostingSystem.GetScsiUvmPath(ctx, mount.Source) - if err != nil { - return nil, nil, err - } - } else { - return nil, nil, err - } - } - mdv2.HostPath = uvmPath - } - mdsv1 = append(mdsv1, mdv1) - mdsv2 = append(mdsv2, mdv2) - } + mounts, err := createMountsConfig(ctx, coi) + if err != nil { + return nil, nil, err } - - v1.MappedDirectories = mdsv1 - v2Container.MappedDirectories = mdsv2 - if len(mpsv1) > 0 && osversion.Get().Build < osversion.RS3 { + v1.MappedDirectories = mounts.mdsv1 + v2Container.MappedDirectories = mounts.mdsv2 + if len(mounts.mpsv1) > 0 && osversion.Get().Build < osversion.RS3 { return nil, nil, fmt.Errorf("named pipe mounts are not supported on this version of Windows") } - v1.MappedPipes = mpsv1 - v2Container.MappedPipes = mpsv2 + v1.MappedPipes = mounts.mpsv1 + v2Container.MappedPipes = mounts.mpsv2 // add assigned devices to the container definition if err := parseAssignedDevices(ctx, coi, v2Container); err != nil { diff --git a/internal/hcsoci/resources_wcow.go b/internal/hcsoci/resources_wcow.go index 9ace171729..0ef3ea5482 100644 --- a/internal/hcsoci/resources_wcow.go +++ b/internal/hcsoci/resources_wcow.go @@ -60,6 +60,53 @@ func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r r.SetLayers(layers) } + if err := setupMounts(ctx, coi, r); err != nil { + return err + } + + if cs, ok := coi.Spec.Windows.CredentialSpec.(string); ok { + // Only need to create a CCG instance for v2 containers + if schemaversion.IsV21(coi.actualSchemaVersion) { + hypervisorIsolated := coi.HostingSystem != nil + ccgInstance, ccgResource, err := credentials.CreateCredentialGuard(ctx, coi.actualID, cs, hypervisorIsolated) + if err != nil { + return err + } + coi.ccgState = ccgInstance.CredentialGuard + r.Add(ccgResource) + if hypervisorIsolated { + // If hypervisor isolated we need to add an hvsocket service table entry + // By default HVSocket won't allow something inside the VM to connect + // back to a process on the host. We need to update the HVSocket service table + // to allow a connection to CCG.exe on the host, so that GMSA can function. + // We need to hot add this here because at UVM creation time we don't know what containers + // will be launched in the UVM, nonetheless if they will ask for GMSA. This is a workaround + // for the previous design requirement for CCG V2 where the service entry + // must be present in the UVM'S HCS document before being sent over as hot adding + // an HvSocket service was not possible. + hvSockConfig := ccgInstance.HvSocketConfig + if err := coi.HostingSystem.UpdateHvSocketService(ctx, hvSockConfig.ServiceId, hvSockConfig.ServiceConfig); err != nil { + return fmt.Errorf("failed to update hvsocket service: %s", err) + } + } + } + } + + if coi.HostingSystem != nil && coi.hasWindowsAssignedDevices() { + windowsDevices, closers, err := handleAssignedDevicesWindows(ctx, coi.HostingSystem, coi.Spec.Annotations, coi.Spec.Windows.Devices) + if err != nil { + return err + } + r.Add(closers...) + coi.Spec.Windows.Devices = windowsDevices + } + + return nil +} + +// setupMount adds the custom mounts requested in the container configuration of this +// request. +func setupMounts(ctx context.Context, coi *createOptionsInternal, r *resources.Resources) error { // Validate each of the mounts. If this is a V2 Xenon, we have to add them as // VSMB shares to the utility VM. For V1 Xenon and Argons, there's nothing for // us to do as it's done by HCS. @@ -121,42 +168,5 @@ func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r } } - if cs, ok := coi.Spec.Windows.CredentialSpec.(string); ok { - // Only need to create a CCG instance for v2 containers - if schemaversion.IsV21(coi.actualSchemaVersion) { - hypervisorIsolated := coi.HostingSystem != nil - ccgInstance, ccgResource, err := credentials.CreateCredentialGuard(ctx, coi.actualID, cs, hypervisorIsolated) - if err != nil { - return err - } - coi.ccgState = ccgInstance.CredentialGuard - r.Add(ccgResource) - if hypervisorIsolated { - // If hypervisor isolated we need to add an hvsocket service table entry - // By default HVSocket won't allow something inside the VM to connect - // back to a process on the host. We need to update the HVSocket service table - // to allow a connection to CCG.exe on the host, so that GMSA can function. - // We need to hot add this here because at UVM creation time we don't know what containers - // will be launched in the UVM, nonetheless if they will ask for GMSA. This is a workaround - // for the previous design requirement for CCG V2 where the service entry - // must be present in the UVM'S HCS document before being sent over as hot adding - // an HvSocket service was not possible. - hvSockConfig := ccgInstance.HvSocketConfig - if err := coi.HostingSystem.UpdateHvSocketService(ctx, hvSockConfig.ServiceId, hvSockConfig.ServiceConfig); err != nil { - return fmt.Errorf("failed to update hvsocket service: %s", err) - } - } - } - } - - if coi.HostingSystem != nil && coi.hasWindowsAssignedDevices() { - windowsDevices, closers, err := handleAssignedDevicesWindows(ctx, coi.HostingSystem, coi.Spec.Annotations, coi.Spec.Windows.Devices) - if err != nil { - return err - } - r.Add(closers...) - coi.Spec.Windows.Devices = windowsDevices - } - return nil } diff --git a/internal/layers/layers.go b/internal/layers/layers.go index fb5429bcc0..01d8529cc6 100644 --- a/internal/layers/layers.go +++ b/internal/layers/layers.go @@ -125,6 +125,9 @@ func MountContainerLayers(ctx context.Context, layerFolders []string, guestRoot if uvm.OS() == "windows" { options := uvm.DefaultVSMBOptions(true) options.TakeBackupPrivilege = true + if uvm.IsTemplate { + uvm.SetSaveableVSMBOptions(options, options.ReadOnly) + } if _, err := uvm.AddVSMB(ctx, layerPath, options); err != nil { return "", fmt.Errorf("failed to add VSMB layer: %s", err) } diff --git a/internal/oci/uvm.go b/internal/oci/uvm.go index 9c8003cd25..3d57222804 100644 --- a/internal/oci/uvm.go +++ b/internal/oci/uvm.go @@ -3,10 +3,12 @@ package oci import ( "context" "errors" + "fmt" "strconv" "strings" runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + "github.com/Microsoft/hcsshim/internal/clone" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/uvm" @@ -138,6 +140,21 @@ const ( // annotation used to specify the cpugroup ID that a UVM should be assigned to annotationCPUGroupID = "io.microsoft.virtualmachine.cpugroup.id" + + // SaveAsTemplate annotation must be used with a pod & container creation request. + // If this annotation is present in the request then it will save the UVM (pod) + // and the container(s) inside it as a template. However, this also means that this + // pod and the containers inside this pod will permananetly stay in the + // paused/templated state and can not be resumed again. + annotationSaveAsTemplate = "io.microsoft.virtualmachine.saveastemplate" + + // This annotation should be used when creating a pod or a container from a template. + // When creating a pod from a template use the ID of the templated pod as the + // TemplateID and when creating a container use the ID of the templated container as + // the TemplateID. It is the client's responsibility to make sure that the sandbox + // within which a cloned container needs to be created must also be created from the + // same template. + annotationTemplateID = "io.microsoft.virtualmachine.templateid" ) // parseAnnotationsBool searches `a` for `key` and if found verifies that the @@ -328,6 +345,32 @@ func parseAnnotationsString(a map[string]string, key string, def string) string return def } +// ParseAnnotationsSaveAsTemplate searches for the boolean value which specifies +// if this create request should be considered as a template creation request. If value +// is found the returns the actual value, returns false otherwise. +func ParseAnnotationsSaveAsTemplate(ctx context.Context, s *specs.Spec) bool { + return parseAnnotationsBool(ctx, s.Annotations, annotationSaveAsTemplate, false) +} + +// ParseAnnotationsTemplateID searches for the templateID in the create request. If the +// value is found then returns the value otherwise returns the empty string. +func ParseAnnotationsTemplateID(ctx context.Context, s *specs.Spec) string { + return parseAnnotationsString(s.Annotations, annotationTemplateID, "") +} + +func ParseCloneAnnotations(ctx context.Context, s *specs.Spec) (isTemplate bool, templateID string, err error) { + templateID = ParseAnnotationsTemplateID(ctx, s) + isTemplate = ParseAnnotationsSaveAsTemplate(ctx, s) + if templateID != "" && isTemplate { + return false, "", fmt.Errorf("templateID and save as template flags can not be passed in the same request") + } + + if (isTemplate || templateID != "") && !IsWCOW(s) { + return false, "", fmt.Errorf("save as template and creating clones is only available for WCOW") + } + return +} + // handleAnnotationKernelDirectBoot handles parsing annotationKernelDirectBoot and setting // implied annotations from the result. func handleAnnotationKernelDirectBoot(ctx context.Context, a map[string]string, lopts *uvm.OptionsLCOW) { @@ -369,6 +412,26 @@ func handleAnnotationFullyPhysicallyBacked(ctx context.Context, a map[string]str } } +// handleCloneAnnotations handles parsing annotations related to template creation and cloning +// Since late cloning is only supported for WCOW this function only deals with WCOW options. +func handleCloneAnnotations(ctx context.Context, a map[string]string, wopts *uvm.OptionsWCOW) (err error) { + wopts.IsTemplate = parseAnnotationsBool(ctx, a, annotationSaveAsTemplate, false) + templateID := parseAnnotationsString(a, annotationTemplateID, "") + if templateID != "" { + tc, err := clone.FetchTemplateConfig(ctx, templateID) + if err != nil { + return err + } + wopts.TemplateConfig = &uvm.UVMTemplateConfig{ + UVMID: tc.TemplateUVMID, + CreateOpts: tc.TemplateUVMCreateOpts, + Resources: tc.TemplateUVMResources, + } + wopts.IsClone = true + } + return nil +} + // SpecToUVMCreateOpts parses `s` and returns either `*uvm.OptionsLCOW` or // `*uvm.OptionsWCOW`. func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) (interface{}, error) { @@ -419,6 +482,9 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( wopts.DisableCompartmentNamespace = parseAnnotationsBool(ctx, s.Annotations, annotationDisableCompartmentNamespace, wopts.DisableCompartmentNamespace) wopts.CPUGroupID = parseAnnotationsString(s.Annotations, annotationCPUGroupID, wopts.CPUGroupID) handleAnnotationFullyPhysicallyBacked(ctx, s.Annotations, wopts) + if err := handleCloneAnnotations(ctx, s.Annotations, wopts); err != nil { + return nil, err + } return wopts, nil } return nil, errors.New("cannot create UVM opts spec is not LCOW or WCOW") diff --git a/internal/uvm/clone.go b/internal/uvm/clone.go index 7d25713e0e..5a7977b482 100644 --- a/internal/uvm/clone.go +++ b/internal/uvm/clone.go @@ -2,7 +2,9 @@ package uvm import ( "context" + "fmt" + "github.com/Microsoft/hcsshim/internal/cow" hcsschema "github.com/Microsoft/hcsshim/internal/schema2" "github.com/pkg/errors" ) @@ -50,7 +52,7 @@ type Cloneable interface { Clone(ctx context.Context, vm *UtilityVM, cd *cloneData) error } -// cloneData contains all the information that might be required during cloning process of +// A struct to keep all the information that might be required during cloning process of // a resource. type cloneData struct { // doc spec for the clone @@ -68,15 +70,22 @@ type UVMTemplateConfig struct { UVMID string // Array of all resources that will be required while making a clone from this template Resources []Cloneable + // The OptionsWCOW used for template uvm creation + CreateOpts OptionsWCOW } // Captures all the information that is necessary to properly save this UVM as a template // and create clones from this template later. The struct returned by this method must be // later on made available while creating a clone from this template. -func (uvm *UtilityVM) GenerateTemplateConfig() *UVMTemplateConfig { +func (uvm *UtilityVM) GenerateTemplateConfig() (*UVMTemplateConfig, error) { + if _, ok := uvm.createOpts.(OptionsWCOW); !ok { + return nil, fmt.Errorf("template config can only be created for a WCOW uvm") + } + // Add all the SCSI Mounts and VSMB shares into the list of clones templateConfig := &UVMTemplateConfig{ - UVMID: uvm.ID(), + UVMID: uvm.ID(), + CreateOpts: uvm.createOpts.(OptionsWCOW), } for _, vsmbShare := range uvm.vsmbDirShares { @@ -95,13 +104,13 @@ func (uvm *UtilityVM) GenerateTemplateConfig() *UVMTemplateConfig { } } - return templateConfig + return templateConfig, nil } // Pauses the uvm and then saves it as a template. This uvm can not be restarted or used // after it is successfully saved. -// uvm must be in the paused state before we attempt to save it. save call will throw the -// VM in incorrect state exception if it is not in the paused state at the time of saving. +// uvm must be in the paused state before it can be saved as a template.save call will throw +// an incorrect uvm state exception if uvm is not in the paused state at the time of saving. func (uvm *UtilityVM) SaveAsTemplate(ctx context.Context) error { if err := uvm.hcsSystem.Pause(ctx); err != nil { return errors.Wrap(err, "error pausing the VM") @@ -115,3 +124,16 @@ func (uvm *UtilityVM) SaveAsTemplate(ctx context.Context) error { } return nil } + +// CloneContainer attaches back to a container that is already running inside the UVM +// because of the clone +func (uvm *UtilityVM) CloneContainer(ctx context.Context, id string) (cow.Container, error) { + if uvm.gc == nil { + return nil, fmt.Errorf("clone container cannot work without external GCS connection") + } + c, err := uvm.gc.CloneContainer(ctx, id) + if err != nil { + return nil, fmt.Errorf("failed to clone container %s: %s", id, err) + } + return c, nil +} diff --git a/internal/uvm/create.go b/internal/uvm/create.go index 5f69b015b2..47eda3cca9 100644 --- a/internal/uvm/create.go +++ b/internal/uvm/create.go @@ -87,6 +87,37 @@ type Options struct { CPUGroupID string } +// compares the create opts used during template creation with the create opts +// provided for clone creation. If they don't match (except for a few fields) +// then clone creation is failed. +func verifyCloneUvmCreateOpts(templateOpts, cloneOpts *OptionsWCOW) bool { + // Following fields can be different in the template and clone configurations. + // 1. the scratch layer path. i.e the last element of the LayerFolders path. + // 2. IsTemplate, IsClone and TemplateConfig variables. + // 3. ID + // 4. AdditionalHCSDocumentJSON + + // Save the original values of the fields that we want to ignore and replace them with + // the same values as that of the other object. So that we can simply use `==` operator. + templateIDBackup := templateOpts.ID + templateAdditionalJsonBackup := templateOpts.AdditionHCSDocumentJSON + templateOpts.ID = cloneOpts.ID + templateOpts.AdditionHCSDocumentJSON = cloneOpts.AdditionHCSDocumentJSON + + // We can't use `==` operator on structs which include slices in them. So compare the + // Layerfolders separately and then directly compare the Options struct. + result := (len(templateOpts.LayerFolders) == len(cloneOpts.LayerFolders)) + for i := 0; result && i < len(templateOpts.LayerFolders)-1; i++ { + result = result && (templateOpts.LayerFolders[i] == cloneOpts.LayerFolders[i]) + } + result = result && (*templateOpts.Options == *cloneOpts.Options) + + // set original values + templateOpts.ID = templateIDBackup + templateOpts.AdditionHCSDocumentJSON = templateAdditionalJsonBackup + return result +} + // Verifies that the final UVM options are correct and supported. func verifyOptions(ctx context.Context, options interface{}) error { switch opts := options.(type) { @@ -123,9 +154,18 @@ func verifyOptions(ctx context.Context, options interface{}) error { if len(opts.LayerFolders) < 2 { return errors.New("at least 2 LayerFolders must be supplied") } + if opts.IsClone && !verifyCloneUvmCreateOpts(&opts.TemplateConfig.CreateOpts, opts) { + return errors.New("clone configuration doesn't match with template configuration.") + } if opts.IsClone && opts.TemplateConfig == nil { return errors.New("template config can not be nil when creating clone") } + if opts.IsClone && !opts.ExternalGuestConnection { + return errors.New("External gcs connection can not be disabled for clones") + } + if opts.IsTemplate && opts.FullyPhysicallyBacked { + return errors.New("Template can not be created from a full physically backed UVM") + } } return nil } @@ -211,7 +251,7 @@ func (uvm *UtilityVM) Close() (err error) { } if err := uvm.CloseGCSConnection(); err != nil { - log.G(ctx).Errorf("close gcs connection failed: %f", err) + log.G(ctx).Errorf("close GCS connection failed: %s", err) } // outputListener will only be nil for a Create -> Stop without a Start. In diff --git a/internal/uvm/create_lcow.go b/internal/uvm/create_lcow.go index 1ce25e01f7..1b3308e0ba 100644 --- a/internal/uvm/create_lcow.go +++ b/internal/uvm/create_lcow.go @@ -168,6 +168,7 @@ func CreateLCOW(ctx context.Context, opts *OptionsLCOW) (_ *UtilityVM, err error physicallyBacked: !opts.AllowOvercommit, devicesPhysicallyBacked: opts.FullyPhysicallyBacked, cpuGroupID: opts.CPUGroupID, + createOpts: opts, } defer func() { diff --git a/internal/uvm/create_wcow.go b/internal/uvm/create_wcow.go index 40a53b72fb..79bd93ec7e 100644 --- a/internal/uvm/create_wcow.go +++ b/internal/uvm/create_wcow.go @@ -215,6 +215,7 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error physicallyBacked: !opts.AllowOvercommit, devicesPhysicallyBacked: opts.FullyPhysicallyBacked, cpuGroupID: opts.CPUGroupID, + createOpts: *opts, } defer func() { @@ -277,11 +278,7 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error }, } - uvm.scsiLocations[0][0] = &SCSIMount{ - vm: uvm, - HostPath: doc.VirtualMachine.Devices.Scsi["0"].Attachments["0"].Path, - refCount: 1, - } + uvm.scsiLocations[0][0] = newSCSIMount(uvm, doc.VirtualMachine.Devices.Scsi["0"].Attachments["0"].Path, "", "", 1, 0, 0, false) } else { doc.VirtualMachine.RestoreState = &hcsschema.RestoreState{} doc.VirtualMachine.RestoreState.TemplateSystemId = opts.TemplateConfig.UVMID @@ -305,14 +302,13 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error nics: make(map[string]*nicInfo), } uvm.IsClone = true + uvm.TemplateID = opts.TemplateConfig.UVMID } // Add appropriate VSMB share options if this UVM needs to be saved as a template if opts.IsTemplate { for _, share := range doc.VirtualMachine.Devices.VirtualSmb.Shares { - share.Options.PseudoDirnotify = true - share.Options.NoLocks = true - share.Options.NoDirectmap = true + uvm.SetSaveableVSMBOptions(share.Options, share.Options.ReadOnly) } uvm.IsTemplate = true } diff --git a/internal/uvm/scsi.go b/internal/uvm/scsi.go index 03569ffa39..d211352ad2 100644 --- a/internal/uvm/scsi.go +++ b/internal/uvm/scsi.go @@ -448,17 +448,21 @@ func (sm *SCSIMount) Clone(ctx context.Context, vm *UtilityVM, cd *cloneData) er ) if !sm.readOnly { - // Copy this scsi disk - // TODO(ambarve): This is a writeable SCSI mount. It can either be the - // scratch VHD of the UVM or it can be a SCSI mount that belongs to some - // container which is being automatically cloned here as a part of UVM - // cloning process. We will receive a request for creation of this - // container later on which will specify the storage path for this - // container. However, that storage location is not available now so we - // just use the storage of the uvm instead. Find a better way for handling - // this. Problem with this approach is that the scratch VHD of the container - // will not be automatically cleaned after container exits. It will stay - // there as long as the UVM keeps running. + // This is a writeable SCSI mount. It must be either the + // 1. scratch VHD of the UVM or + // 2. scratch VHD of the container. + // A user provided writable SCSI mount is not allowed on the template UVM + // or container and so this SCSI mount has to be the scratch VHD of the + // UVM or container. The container inside this UVM will automatically be + // cloned here when we are cloning the uvm itself. We will receive a + // request for creation of this container later and that request will + // specify the storage path for this container. However, that storage + // location is not available now so we just use the storage path of the + // uvm instead. + // TODO(ambarve): Find a better way for handling this. Problem with this + // approach is that the scratch VHD of the container will not be + // automatically cleaned after container exits. It will stay there as long + // as the UVM keeps running. // For the scratch VHD of the VM (always attached at Controller:0, LUN:0) // clone it in the scratch folder diff --git a/internal/uvm/types.go b/internal/uvm/types.go index 7d25ba2f0e..5f29166edf 100644 --- a/internal/uvm/types.go +++ b/internal/uvm/types.go @@ -122,4 +122,12 @@ type UtilityVM struct { // specifies if this UVM is a cloned from a template IsClone bool + + // ID of the template from which this clone was created. Only applies when IsClone + // is true + TemplateID string + + // The CreateOpts used to create this uvm. These can be either of type + // uvm.OptionsLCOW or uvm.OptionsWCOW + createOpts interface{} } diff --git a/internal/uvm/vsmb.go b/internal/uvm/vsmb.go index 201c7bb3ed..e15fb42554 100644 --- a/internal/uvm/vsmb.go +++ b/internal/uvm/vsmb.go @@ -60,6 +60,28 @@ func (uvm *UtilityVM) DefaultVSMBOptions(readOnly bool) *hcsschema.VirtualSmbSha return opts } +func (uvm *UtilityVM) SetSaveableVSMBOptions(opts *hcsschema.VirtualSmbShareOptions, readOnly bool) { + if readOnly { + opts.ShareRead = true + opts.CacheIo = true + opts.ReadOnly = true + opts.PseudoOplocks = true + opts.NoOplocks = false + } else { + // Using NoOpLocks can cause intermittent Access denied failures due to + // a VSMB bug that was fixed but not backported to RS5/19H1. + opts.ShareRead = false + opts.CacheIo = false + opts.ReadOnly = false + opts.PseudoOplocks = false + opts.NoOplocks = true + } + opts.NoLocks = true + opts.PseudoDirnotify = true + opts.NoDirectmap = true + return +} + // findVSMBShare finds a share by `hostPath`. If not found returns `ErrNotAttached`. func (uvm *UtilityVM) findVSMBShare(ctx context.Context, m map[string]*VSMBShare, shareKey string) (*VSMBShare, error) { share, ok := m[shareKey]