diff --git a/cmd/containerd-shim-runhcs-v1/pod.go b/cmd/containerd-shim-runhcs-v1/pod.go index 2d89b0d8ea..b144c69b34 100644 --- a/cmd/containerd-shim-runhcs-v1/pod.go +++ b/cmd/containerd-shim-runhcs-v1/pod.go @@ -51,18 +51,24 @@ type shimPod interface { // the `shimExecStateRunning, shimExecStateExited` states. If the exec is // not in this state this pod MUST return `errdefs.ErrFailedPrecondition`. KillTask(ctx context.Context, tid, eid string, signal uint32, all bool) error + + // Specifies if this pod is a template + IsTemplate() bool } -func createPod(ctx context.Context, events publisher, req *task.CreateTaskRequest, s *specs.Spec) (shimPod, error) { +// TODO (ambarve): The third return value of this function (the template id) +// is only needed until we implement the late cloning part after that this return +// value can be removed. +func createPod(ctx context.Context, events publisher, req *task.CreateTaskRequest, s *specs.Spec) (shimPod, error, string) { log.G(ctx).WithField("tid", req.ID).Debug("createPod") if osversion.Get().Build < osversion.RS5 { - return nil, errors.Wrapf(errdefs.ErrFailedPrecondition, "pod support is not available on Windows versions previous to RS5 (%d)", osversion.RS5) + return nil, errors.Wrapf(errdefs.ErrFailedPrecondition, "pod support is not available on Windows versions previous to RS5 (%d)", osversion.RS5), "" } ct, sid, err := oci.GetSandboxTypeAndID(s.Annotations) if err != nil { - return nil, err + return nil, err, "" } if ct != oci.KubernetesContainerTypeSandbox { return nil, errors.Wrapf( @@ -70,7 +76,7 @@ func createPod(ctx context.Context, events publisher, req *task.CreateTaskReques "expected annotation: '%s': '%s' got '%s'", oci.KubernetesContainerTypeAnnotation, oci.KubernetesContainerTypeSandbox, - ct) + ct), "" } if sid != req.ID { return nil, errors.Wrapf( @@ -78,26 +84,29 @@ func createPod(ctx context.Context, events publisher, req *task.CreateTaskReques "expected annotation '%s': '%s' got '%s'", oci.KubernetesSandboxIDAnnotation, req.ID, - sid) + sid), "" } owner := filepath.Base(os.Args[0]) isWCOW := oci.IsWCOW(s) var parent *uvm.UtilityVM + var isCreateTemplateRequest bool if oci.IsIsolated(s) { // Create the UVM parent opts, err := oci.SpecToUVMCreateOpts(ctx, s, fmt.Sprintf("%s@vm", req.ID), owner) if err != nil { - return nil, err + return nil, err, "" } switch opts.(type) { case *uvm.OptionsLCOW: lopts := (opts).(*uvm.OptionsLCOW) parent, err = uvm.CreateLCOW(ctx, lopts) if err != nil { - return nil, err + return nil, err, "" } + // TODO(ambarve): implement cloning for LCOW + isCreateTemplateRequest = lopts.Options.SaveAsTemplate case *uvm.OptionsWCOW: wopts := (opts).(*uvm.OptionsWCOW) @@ -111,23 +120,24 @@ func createPod(ctx context.Context, events publisher, req *task.CreateTaskReques vmPath := filepath.Join(layers[layersLen-1], "vm") err := os.MkdirAll(vmPath, 0) if err != nil { - return nil, err + return nil, err, "" } layers[layersLen-1] = vmPath wopts.LayerFolders = layers parent, err = uvm.CreateWCOW(ctx, wopts) if err != nil { - return nil, err + return nil, err, "" } + isCreateTemplateRequest = wopts.Options.SaveAsTemplate } err = parent.Start(ctx) if err != nil { parent.Close() - return nil, err + return nil, err, "" } } else if !isWCOW { - return nil, errors.Wrap(errdefs.ErrFailedPrecondition, "oci spec does not contain WCOW or LCOW spec") + return nil, errors.Wrap(errdefs.ErrFailedPrecondition, "oci spec does not contain WCOW or LCOW spec"), "" } defer func() { // clean up the uvm if we fail any further operations @@ -137,10 +147,22 @@ func createPod(ctx context.Context, events publisher, req *task.CreateTaskReques }() p := pod{ - events: events, - id: req.ID, - host: parent, + events: events, + id: req.ID, + host: parent, + isTemplate: isCreateTemplateRequest, + } + + // For a template creation request return before actually starting + // any containers inside it. + if isCreateTemplateRequest { + err = parent.SaveAsTemplate(ctx) + if err != nil { + return nil, err, "" + } + return &p, nil, parent.ID() } + // TOOD: JTERRY75 - There is a bug in the compartment activation for Windows // Process isolated that requires us to create the real pause container to // hold the network compartment open. This is not required for Windows @@ -160,15 +182,15 @@ func createPod(ctx context.Context, events publisher, req *task.CreateTaskReques if nsid != "" { endpoints, err := hcsoci.GetNamespaceEndpoints(ctx, nsid) if err != nil { - return nil, err + return nil, err, "" } err = parent.AddNetNS(ctx, nsid) if err != nil { - return nil, err + return nil, err, "" } err = parent.AddEndpointsToNS(ctx, nsid, endpoints) if err != nil { - return nil, err + return nil, err, "" } } } @@ -202,12 +224,12 @@ func createPod(ctx context.Context, events publisher, req *task.CreateTaskReques // task for the sandbox. lt, err := newHcsTask(ctx, events, parent, true, req, s) if err != nil { - return nil, err + return nil, err, "" } p.sandboxTask = lt } - return &p, nil + return &p, nil, "" } var _ = (shimPod)(&pod{}) @@ -235,6 +257,9 @@ type pod struct { // to release the lock to allow concurrent creates. wcl sync.Mutex workloadTasks sync.Map + + // specifies if this pod was created as a template. + isTemplate bool } func (p *pod) ID() string { @@ -329,3 +354,7 @@ func (p *pod) KillTask(ctx context.Context, tid, eid string, signal uint32, all }) return eg.Wait() } + +func (p *pod) IsTemplate() bool { + return p.isTemplate +} diff --git a/cmd/containerd-shim-runhcs-v1/service_internal.go b/cmd/containerd-shim-runhcs-v1/service_internal.go index 153a58979e..bc401b1509 100644 --- a/cmd/containerd-shim-runhcs-v1/service_internal.go +++ b/cmd/containerd-shim-runhcs-v1/service_internal.go @@ -79,7 +79,6 @@ func (s *service) createInternal(ctx context.Context, req *task.CreateTaskReques } shimOpts = v.(*runhcsopts.Options) } - var spec specs.Spec f, err := os.Open(filepath.Join(req.Bundle, "config.json")) if err != nil { @@ -163,15 +162,19 @@ func (s *service) createInternal(ctx context.Context, req *task.CreateTaskReques resp.Pid = uint32(e.Pid()) return resp, nil } - pod, err = createPod(ctx, s.events, req, &spec) + pod, err, templateID := createPod(ctx, s.events, req, &spec) if err != nil { s.cl.Unlock() return nil, err } - t, _ := pod.GetTask(req.ID) - e, _ := t.GetExec("") - resp.Pid = uint32(e.Pid()) - s.taskOrPod.Store(pod) + if templateID == "" { + t, _ := pod.GetTask(req.ID) + e, _ := t.GetExec("") + resp.Pid = uint32(e.Pid()) + s.taskOrPod.Store(pod) + } + // TODO(ambarve): A template pod probably should still be + // stored in the taskOrPod map until late cloning is implemented } else { t, err := newHcsStandaloneTask(ctx, s.events, req, &spec) if err != nil { diff --git a/internal/hcs/callback.go b/internal/hcs/callback.go index 62ba81751b..cebbe75ad4 100644 --- a/internal/hcs/callback.go +++ b/internal/hcs/callback.go @@ -106,6 +106,7 @@ func newSystemChannels() notificationChannels { hcsNotificationSystemStartCompleted, hcsNotificationSystemPauseCompleted, hcsNotificationSystemResumeCompleted, + hcsNotificationSystemSaveCompleted, } { channels[notif] = make(notificationChannel, 1) } diff --git a/internal/hcs/system.go b/internal/hcs/system.go index 67a5f7176f..605f84a309 100644 --- a/internal/hcs/system.go +++ b/internal/hcs/system.go @@ -604,3 +604,30 @@ func (computeSystem *System) Modify(ctx context.Context, config interface{}) err return nil } + +// Save the compute system +func (computeSystem *System) Save(ctx context.Context, options string) (err error) { + operation := "hcsshim::System::Save" + + // hcsSaveComputeSystemContext is an async peration. Start the outer span + // here to measure the full save time. + ctx, span := trace.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, "", ErrAlreadyClosed, nil) + } + + result, err := vmcompute.HcsSaveComputeSystem(ctx, computeSystem.handle, options) + events, err := processAsyncHcsResult(ctx, err, result, computeSystem.callbackNumber, hcsNotificationSystemSaveCompleted, &timeout.SystemSave) + if err != nil { + return makeSystemError(computeSystem, operation, "", err, events) + } + + return nil +} diff --git a/internal/hcsoci/resources_wcow.go b/internal/hcsoci/resources_wcow.go index f62e494d77..4dce6f57b2 100644 --- a/internal/hcsoci/resources_wcow.go +++ b/internal/hcsoci/resources_wcow.go @@ -19,6 +19,8 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" ) +const wcowGlobalMountPrefix = "C:\\mounts\\m%d" + func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, resources *Resources) error { if coi.Spec == nil || coi.Spec.Windows == nil || coi.Spec.Windows.LayerFolders == nil { return fmt.Errorf("field 'Spec.Windows.Layerfolders' is not populated") @@ -77,8 +79,7 @@ func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r } if coi.HostingSystem != nil && schemaversion.IsV21(coi.actualSchemaVersion) { - uvmPath := fmt.Sprintf("C:\\%s\\%d", coi.actualID, i) - + uvmPath := fmt.Sprintf(wcowGlobalMountPrefix, coi.HostingSystem.UVMMountCounter()) readOnly := false for _, o := range mount.Options { if strings.ToLower(o) == "ro" { diff --git a/internal/oci/uvm.go b/internal/oci/uvm.go index 12c4037dee..dd16eeb720 100644 --- a/internal/oci/uvm.go +++ b/internal/oci/uvm.go @@ -123,6 +123,8 @@ const ( annotationVPCIEnabled = "io.microsoft.virtualmachine.lcow.vpcienabled" annotationStorageQoSBandwidthMaximum = "io.microsoft.virtualmachine.storageqos.bandwidthmaximum" annotationStorageQoSIopsMaximum = "io.microsoft.virtualmachine.storageqos.iopsmaximum" + annotationSaveAsTemplate = "io.microsoft.virtualmachine.saveastemplate" + annotationTemplateID = "io.microsoft.virtualmachine.templateid" ) // parseAnnotationsBool searches `a` for `key` and if found verifies that the @@ -348,6 +350,8 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( lopts.KernelFile = uvm.KernelFile } lopts.BootFilesPath = parseAnnotationsString(s.Annotations, annotationBootFilesRootPath, lopts.BootFilesPath) + lopts.SaveAsTemplate = parseAnnotationsBool(ctx, s.Annotations, annotationSaveAsTemplate, false) + lopts.TemplateID = parseAnnotationsString(s.Annotations, annotationTemplateID, lopts.TemplateID) return lopts, nil } else if IsWCOW(s) { wopts := uvm.NewDefaultOptionsWCOW(id, owner) @@ -362,6 +366,8 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( wopts.ProcessorWeight = ParseAnnotationsCPUWeight(ctx, s, annotationProcessorWeight, wopts.ProcessorWeight) wopts.StorageQoSBandwidthMaximum = ParseAnnotationsStorageBps(ctx, s, annotationStorageQoSBandwidthMaximum, wopts.StorageQoSBandwidthMaximum) wopts.StorageQoSIopsMaximum = ParseAnnotationsStorageIops(ctx, s, annotationStorageQoSIopsMaximum, wopts.StorageQoSIopsMaximum) + wopts.SaveAsTemplate = parseAnnotationsBool(ctx, s.Annotations, annotationSaveAsTemplate, false) + wopts.TemplateID = parseAnnotationsString(s.Annotations, annotationTemplateID, wopts.TemplateID) return wopts, nil } return nil, errors.New("cannot create UVM opts spec is not LCOW or WCOW") diff --git a/internal/timeout/timeout.go b/internal/timeout/timeout.go index ff3b6572e6..eaf39fa513 100644 --- a/internal/timeout/timeout.go +++ b/internal/timeout/timeout.go @@ -29,6 +29,9 @@ var ( // SystemResume is the timeout for resuming a compute system SystemResume time.Duration = defaultTimeout + // SystemSave is the timeout for saving a compute system + SystemSave time.Duration = defaultTimeout + // SyscallWatcher is the timeout before warning of a potential stuck platform syscall. SyscallWatcher time.Duration = defaultTimeout @@ -51,6 +54,7 @@ func init() { SystemStart = durationFromEnvironment("HCSSHIM_TIMEOUT_SYSTEMSTART", SystemStart) SystemPause = durationFromEnvironment("HCSSHIM_TIMEOUT_SYSTEMPAUSE", SystemPause) SystemResume = durationFromEnvironment("HCSSHIM_TIMEOUT_SYSTEMRESUME", SystemResume) + SystemSave = durationFromEnvironment("HCSSHIM_TIMEOUT_SYSTEMSAVE", SystemSave) SyscallWatcher = durationFromEnvironment("HCSSHIM_TIMEOUT_SYSCALLWATCHER", SyscallWatcher) Tar2VHD = durationFromEnvironment("HCSSHIM_TIMEOUT_TAR2VHD", Tar2VHD) ExternalCommandToStart = durationFromEnvironment("HCSSHIM_TIMEOUT_EXTERNALCOMMANDSTART", ExternalCommandToStart) diff --git a/internal/uvm/clone.go b/internal/uvm/clone.go new file mode 100644 index 0000000000..0eda7cd6fb --- /dev/null +++ b/internal/uvm/clone.go @@ -0,0 +1,157 @@ +package uvm + +import ( + "context" + + "github.com/Microsoft/go-winio/pkg/security" + "github.com/Microsoft/hcsshim/internal/copyfile" + "github.com/Microsoft/hcsshim/internal/regstate" + hcsschema "github.com/Microsoft/hcsshim/internal/schema2" +) + +var err error + +const ( + hcsSaveOptions = "{\"SaveType\": \"AsTemplate\"}" + templateRoot = "troot" + templateKey = "tkey" +) + +type PersistedUVMConfig struct { + ID string + Stored bool + Config hcsschema.ComputeSystem +} + +func NewPersistedUVMConfig(ID string, config hcsschema.ComputeSystem) *PersistedUVMConfig { + return &PersistedUVMConfig{ + ID: ID, + Stored: false, + Config: config, + } +} + +// LoadTemplateConfig loads a persisted template config from the registry that matches +// `templateID`. If not found returns `regstate.NotFoundError` +func LoadPersistedUVMConfig(ID string) (*PersistedUVMConfig, error) { + sk, err := regstate.Open(templateRoot, false) + if err != nil { + return nil, err + } + defer sk.Close() + + var puc PersistedUVMConfig + if err := sk.Get(ID, templateKey, &puc); err != nil { + return nil, err + } + return &puc, nil +} + +// Store stores or updates the in-memory config to its registry state. If the +// store fails returns the store error. +func StorePersistedUVMConfig(puc *PersistedUVMConfig) error { + sk, err := regstate.Open(templateRoot, false) + if err != nil { + return err + } + defer sk.Close() + + if puc.Stored { + if err := sk.Set(puc.ID, templateKey, puc); err != nil { + return err + } + } else { + if err := sk.Create(puc.ID, templateKey, puc); err != nil { + return err + } + } + puc.Stored = true + return nil +} + +// TODO(ambarve): Hook this up with the pod removal functions. +// Remove removes any persisted state associated with this config. If the config +// is not found in the registery `Remove` returns no error. +func RemovePersistedUVMConfig(ID string) error { + sk, err := regstate.Open(templateRoot, false) + if err != nil { + if regstate.IsNotFoundError(err) { + return nil + } + return err + } + defer sk.Close() + + if err := sk.Remove(ID); err != nil { + if regstate.IsNotFoundError(err) { + return nil + } + return err + } + return nil +} + +// Store the current UVM as a template which can be later used for cloning. +// Note: Once this UVM is stored as a template it can not be resumed. It will +// permananetly stay in Saved as template state. +func (uvm *UtilityVM) SaveAsTemplate(ctx context.Context) error { + err := uvm.hcsSystem.Pause(ctx) + if err != nil { + return err + } + + err = uvm.hcsSystem.Save(ctx, hcsSaveOptions) + if err != nil { + return err + } + + err = StorePersistedUVMConfig(NewPersistedUVMConfig(uvm.ID(), *uvm.configDoc)) + if err != nil { + return err + } + return nil +} + +// Get the config of the UVM with given ID +func getUVMConfig(ctx context.Context, uvmID string) (*hcsschema.ComputeSystem, error) { + puc, err := LoadPersistedUVMConfig(uvmID) + if err != nil { + return nil, err + } + return &puc.Config, nil +} + +func (uvm *UtilityVM) clone(ctx context.Context, doc *hcsschema.ComputeSystem, opts *OptionsWCOW) error { + doc.VirtualMachine.RestoreState = &hcsschema.RestoreState{} + doc.VirtualMachine.RestoreState.TemplateSystemId = opts.TemplateID + + templateConfig, err := getUVMConfig(ctx, opts.TemplateID) + if err != nil { + return err + } + + srcVhdPath := templateConfig.VirtualMachine.Devices.Scsi["0"].Attachments["0"].Path + dstVhdPath := doc.VirtualMachine.Devices.Scsi["0"].Attachments["0"].Path + + // copy the VHDX of source VM + err = copyfile.CopyFile(ctx, srcVhdPath, dstVhdPath, true) + if err != nil { + return err + } + + // Guest connection will be done externally for clones + doc.VirtualMachine.GuestConnection = &hcsschema.GuestConnection{} + + // original VHD has VM group access but it is overwritten in the copyFile op above + err = security.GrantVmGroupAccess(dstVhdPath) + if err != nil { + return err + } + + err = uvm.create(ctx, uvm.configDoc) + if err != nil { + return err + } + + return nil +} diff --git a/internal/uvm/create.go b/internal/uvm/create.go index 4c13f7138b..b545a14c93 100644 --- a/internal/uvm/create.go +++ b/internal/uvm/create.go @@ -64,6 +64,13 @@ type Options struct { // ExternalGuestConnection sets whether the guest RPC connection is performed // internally by the OS platform or externally by this package. ExternalGuestConnection bool + + // SaveAsTemplate states if this pod should be created and saved as template + SaveAsTemplate bool + + // TemplateID specifies the ID of the template from which this pod should + // be created + TemplateID string } // newDefaultOptions returns the default base options for WCOW and LCOW. diff --git a/internal/uvm/create_wcow.go b/internal/uvm/create_wcow.go index 7331361e15..5dc2dc7c21 100644 --- a/internal/uvm/create_wcow.go +++ b/internal/uvm/create_wcow.go @@ -59,7 +59,7 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error } span.AddAttributes(trace.StringAttribute(logfields.UVMID, opts.ID)) - log.G(ctx).WithField("options", fmt.Sprintf("%+v", opts)).Debug("uvm::CreateLCOW options") + log.G(ctx).WithField("options", fmt.Sprintf("%+v", opts)).Debug("uvm::CreateWCOW options") uvm := &UtilityVM{ id: opts.ID, @@ -172,6 +172,8 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error ReadOnly: true, PseudoOplocks: true, TakeBackupPrivilege: true, + NoLocks: true, + PseudoDirnotify: true, CacheIo: true, ShareRead: true, }, @@ -193,7 +195,6 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error BandwidthMaximum: opts.StorageQoSBandwidthMaximum, } } - uvm.scsiLocations[0][0].hostPath = doc.VirtualMachine.Devices.Scsi["0"].Attachments["0"].Path fullDoc, err := mergemaps.MergeJSON(doc, ([]byte)(opts.AdditionHCSDocumentJSON)) @@ -201,7 +202,15 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error return nil, fmt.Errorf("failed to merge additional JSON '%s': %s", opts.AdditionHCSDocumentJSON, err) } - err = uvm.create(ctx, fullDoc) + // Save the config doc of this UVM so that it can be used for cloning etc + uvm.configDoc = (fullDoc).(*hcsschema.ComputeSystem) + + if opts.Options.TemplateID != "" { + err = uvm.clone(ctx, (fullDoc).(*hcsschema.ComputeSystem), opts) + } else { + err = uvm.create(ctx, fullDoc) + } + if err != nil { return nil, err } diff --git a/internal/uvm/types.go b/internal/uvm/types.go index af1b9dd8f9..b58cbbdc33 100644 --- a/internal/uvm/types.go +++ b/internal/uvm/types.go @@ -11,6 +11,7 @@ import ( "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hns" "github.com/Microsoft/hcsshim/internal/schema1" + hcsschema "github.com/Microsoft/hcsshim/internal/schema2" "golang.org/x/sys/windows" ) @@ -127,4 +128,7 @@ type UtilityVM struct { // This is used in generating unique mount path inside UVM for every mount. // Access to this variable should be done atomically. mountCounter uint64 + + // The configuration with which this UVM was created + configDoc *hcsschema.ComputeSystem } diff --git a/internal/vmcompute/vmcompute.go b/internal/vmcompute/vmcompute.go index 7c2a0dc280..da77d9f286 100644 --- a/internal/vmcompute/vmcompute.go +++ b/internal/vmcompute/vmcompute.go @@ -28,6 +28,7 @@ import ( //sys hcsModifyComputeSystem(computeSystem HcsSystem, configuration string, result **uint16) (hr error) = vmcompute.HcsModifyComputeSystem? //sys hcsRegisterComputeSystemCallback(computeSystem HcsSystem, callback uintptr, context uintptr, callbackHandle *HcsCallback) (hr error) = vmcompute.HcsRegisterComputeSystemCallback? //sys hcsUnregisterComputeSystemCallback(callbackHandle HcsCallback) (hr error) = vmcompute.HcsUnregisterComputeSystemCallback? +//sys hcsSaveComputeSystem(computeSystem HcsSystem, options string, result **uint16) (hr error) = vmcompute.HcsSaveComputeSystem? //sys hcsCreateProcess(computeSystem HcsSystem, processParameters string, processInformation *HcsProcessInformation, process *HcsProcess, result **uint16) (hr error) = vmcompute.HcsCreateProcess? //sys hcsOpenProcess(computeSystem HcsSystem, pid uint32, process *HcsProcess, result **uint16) (hr error) = vmcompute.HcsOpenProcess? @@ -357,6 +358,28 @@ func HcsUnregisterComputeSystemCallback(ctx gcontext.Context, callbackHandle Hcs }) } +func HcsSaveComputeSystem(ctx gcontext.Context, computeSystem HcsSystem, options string) (result string, hr error) { + ctx, span := trace.StartSpan(ctx, "HcsSaveComputeSystem") + defer span.End() + defer func() { + if result != "" { + span.AddAttributes(trace.StringAttribute("result", result)) + } + if hr != errVmcomputeOperationPending { + oc.SetSpanStatus(span, hr) + } + }() + + return result, execute(ctx, timeout.SyscallWatcher, func() error { + var resultp *uint16 + err := hcsSaveComputeSystem(computeSystem, options, &resultp) + if resultp != nil { + result = interop.ConvertAndFreeCoTaskMemString(resultp) + } + return err + }) +} + func HcsCreateProcess(ctx gcontext.Context, computeSystem HcsSystem, processParameters string) (processInformation HcsProcessInformation, process HcsProcess, result string, hr error) { ctx, span := trace.StartSpan(ctx, "HcsCreateProcess") defer span.End() diff --git a/internal/vmcompute/zsyscall_windows.go b/internal/vmcompute/zsyscall_windows.go index 0f2a69f6ad..7c8d60cda5 100644 --- a/internal/vmcompute/zsyscall_windows.go +++ b/internal/vmcompute/zsyscall_windows.go @@ -52,6 +52,7 @@ var ( procHcsModifyComputeSystem = modvmcompute.NewProc("HcsModifyComputeSystem") procHcsRegisterComputeSystemCallback = modvmcompute.NewProc("HcsRegisterComputeSystemCallback") procHcsUnregisterComputeSystemCallback = modvmcompute.NewProc("HcsUnregisterComputeSystemCallback") + procHcsSaveComputeSystem = modvmcompute.NewProc("HcsSaveComputeSystem") procHcsCreateProcess = modvmcompute.NewProc("HcsCreateProcess") procHcsOpenProcess = modvmcompute.NewProc("HcsOpenProcess") procHcsCloseProcess = modvmcompute.NewProc("HcsCloseProcess") @@ -342,6 +343,29 @@ func hcsUnregisterComputeSystemCallback(callbackHandle HcsCallback) (hr error) { return } +func hcsSaveComputeSystem(computeSystem HcsSystem, options string, result **uint16) (hr error) { + var _p0 *uint16 + _p0, hr = syscall.UTF16PtrFromString(options) + if hr != nil { + return + } + return _hcsSaveComputeSystem(computeSystem, _p0, result) +} + +func _hcsSaveComputeSystem(computeSystem HcsSystem, options *uint16, result **uint16) (hr error) { + if hr = procHcsSaveComputeSystem.Find(); hr != nil { + return + } + r0, _, _ := syscall.Syscall(procHcsSaveComputeSystem.Addr(), 3, uintptr(computeSystem), uintptr(unsafe.Pointer(options)), uintptr(unsafe.Pointer(result))) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +} + func hcsCreateProcess(computeSystem HcsSystem, processParameters string, processInformation *HcsProcessInformation, process *HcsProcess, result **uint16) (hr error) { var _p0 *uint16 _p0, hr = syscall.UTF16PtrFromString(processParameters) diff --git a/test/cri-containerd/createcontainer_test.go b/test/cri-containerd/createcontainer_test.go index c3f6159c6c..c4bf4d7eba 100644 --- a/test/cri-containerd/createcontainer_test.go +++ b/test/cri-containerd/createcontainer_test.go @@ -961,3 +961,53 @@ func Test_CreateContainer_Mount_NamedPipe_WCOW(t *testing.T) { } runCreateContainerTest(t, wcowHypervisorRuntimeHandler, request) } + +// TODO(ambarve): This test doesn't work right now because start container command for template fails. +// It seems that the pod directory is deleted if start pod command fails. So when the test creates a clone +// it can not find the VHD of the template. +// Once Proper responses are implemented for start template requests then this will start working. +func Test_Create_Pod_FromTemplate(t *testing.T) { + pullRequiredImages(t, []string{imageWindowsNanoserver}) + + client := newTestRuntimeClient(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // create template sandbox first + sandboxRequest := &runtime.RunPodSandboxRequest{ + Config: &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: t.Name() + "-template-Sandbox", + Uid: "0", + Namespace: testNamespace, + }, + Annotations: map[string]string{ + "io.microsoft.virtualmachine.saveastemplate": "true", + }, + }, + RuntimeHandler: wcowHypervisorRuntimeHandler, + } + + templatePodID := runPodSandbox(t, client, ctx, sandboxRequest) + defer removePodSandbox(t, client, ctx, templatePodID) + defer stopPodSandbox(t, client, ctx, templatePodID) + + // create clone from previously created template + sandboxRequest = &runtime.RunPodSandboxRequest{ + Config: &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: t.Name() + "-cloned-Sandbox", + Uid: "0", + Namespace: testNamespace, + }, + Annotations: map[string]string{ + "io.microsoft.virtualmachine.templateid": templatePodID + "@vm", + }, + }, + RuntimeHandler: wcowHypervisorRuntimeHandler, + } + + clonePodID := runPodSandbox(t, client, ctx, sandboxRequest) + defer removePodSandbox(t, client, ctx, clonePodID) + defer stopPodSandbox(t, client, ctx, clonePodID) +} diff --git a/test/cri-containerd/runpodsandbox_test.go b/test/cri-containerd/runpodsandbox_test.go index 12dbcfbc1b..f239940443 100644 --- a/test/cri-containerd/runpodsandbox_test.go +++ b/test/cri-containerd/runpodsandbox_test.go @@ -14,9 +14,12 @@ import ( "testing" "time" + "github.com/Microsoft/go-winio/vhd" "github.com/Microsoft/hcsshim/internal/lcow" "github.com/Microsoft/hcsshim/osversion" testutilities "github.com/Microsoft/hcsshim/test/functional/utilities" + "github.com/pkg/errors" + "golang.org/x/sys/windows" runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" ) @@ -996,6 +999,29 @@ func createExt4VHD(ctx context.Context, t *testing.T, path string) { } } +func createNTFSVHD(vhdPath string, sizeGB uint32, t *testing.T) error { + if err := vhd.CreateVhdx(vhdPath, sizeGB, 1); err != nil { + return errors.Wrap(err, "failed to create VHD") + } + + vhd, err := vhd.OpenVirtualDisk(vhdPath, vhd.VirtualDiskAccessNone, vhd.OpenVirtualDiskFlagNone) + if err != nil { + return errors.Wrap(err, "failed to open VHD") + } + defer func() { + err2 := windows.CloseHandle(windows.Handle(vhd)) + if err == nil { + err = errors.Wrap(err2, "failed to close VHD") + } + }() + + if err := hcsFormatWritableLayerVhd(uintptr(vhd)); err != nil { + return errors.Wrap(err, "failed to format VHD") + } + + return nil +} + func Test_RunPodSandbox_MultipleContainersSameVhd_LCOW(t *testing.T) { pullRequiredLcowImages(t, []string{imageLcowK8sPause, imageLcowAlpine}) @@ -1086,6 +1112,104 @@ func Test_RunPodSandbox_MultipleContainersSameVhd_LCOW(t *testing.T) { } } +func Test_RunPodSandbox_MultipleContainersSameVhd_WCOW(t *testing.T) { + pullRequiredImages(t, []string{imageWindowsNanoserver}) + + client := newTestRuntimeClient(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + annotations := map[string]string{ + "io.microsoft.virtualmachine.computetopology.memory.allowovercommit": "true", + } + + vhdHostDir, err := ioutil.TempDir("", "") + if err != nil { + t.Fatalf("failed to create temporary directory: %s", err) + } + defer os.RemoveAll(vhdHostDir) + + vhdHostPath := filepath.Join(vhdHostDir, "temp.vhdx") + + if err := createNTFSVHD(vhdHostPath, 10, t); err != nil { + t.Fatalf("failed to create NTFS VHD: %s", err) + } + + vhdContainerPath := "C:\\containerDir" + + mounts := []*runtime.Mount{ + { + HostPath: "vhd://" + vhdHostPath, + ContainerPath: vhdContainerPath, + }, + } + + sbRequest := &runtime.RunPodSandboxRequest{ + Config: &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: t.Name(), + Uid: "0", + Namespace: testNamespace, + }, + Annotations: annotations, + }, + RuntimeHandler: wcowHypervisorRuntimeHandler, + } + + podID := runPodSandbox(t, client, ctx, sbRequest) + defer removePodSandbox(t, client, ctx, podID) + defer stopPodSandbox(t, client, ctx, podID) + + execCommand := []string{ + "cmd", + "/c", + "dir", + vhdContainerPath, + } + + command := []string{ + "ping", + "-t", + "127.0.0.1", + } + + // create 2 containers with vhd mounts and verify both can mount vhd + for i := 1; i < 3; i++ { + containerName := t.Name() + "-Container-" + strconv.Itoa(i) + containerId := createContainerInSandbox(t, client, ctx, podID, containerName, imageWindowsNanoserver, command, annotations, mounts, sbRequest.Config) + defer removeContainer(t, client, ctx, containerId) + + startContainer(t, client, ctx, containerId) + defer stopContainer(t, client, ctx, containerId) + + _, errorMsg, exitCode := execContainer(t, client, ctx, containerId, execCommand) + + // The dir command will return File Not Found error if the directory is empty. + // Don't fail the test if that happens. It is expected behaviour in this case. + if exitCode != 0 && !strings.Contains(errorMsg, "File Not Found") { + t.Fatalf("Exec into container failed with: %v and exit code: %d, %s", errorMsg, exitCode, containerId) + } + } + + // For the 3rd container don't add any mounts + // this makes sure you can have containers that share vhd mounts and + // at the same time containers in a pod that don't have any mounts + mounts = []*runtime.Mount{} + containerName := t.Name() + "-Container-3" + containerId := createContainerInSandbox(t, client, ctx, podID, containerName, imageWindowsNanoserver, command, annotations, mounts, sbRequest.Config) + defer removeContainer(t, client, ctx, containerId) + + startContainer(t, client, ctx, containerId) + defer stopContainer(t, client, ctx, containerId) + + output, errorMsg, exitCode := execContainer(t, client, ctx, containerId, execCommand) + + // 3rd container should not have the mount and ls should fail + if exitCode != 0 && !strings.Contains(errorMsg, "File Not Found") { + t.Fatalf("Exec into container failed: %v and exit code: %s, %s", errorMsg, output, containerId) + } +} + func createSandboxContainerAndExecForCustomScratch(t *testing.T, annotations map[string]string) (string, string, int) { cmd := []string{ "df", diff --git a/test/cri-containerd/syscall.go b/test/cri-containerd/syscall.go new file mode 100644 index 0000000000..63958ac410 --- /dev/null +++ b/test/cri-containerd/syscall.go @@ -0,0 +1,7 @@ +// +build functional + +package cri_containerd + +//go:generate go run ../../mksyscall_windows.go -output zsyscall_windows.go syscall.go + +//sys hcsFormatWritableLayerVhd(handle uintptr) (hr error) = computestorage.HcsFormatWritableLayerVhd diff --git a/test/cri-containerd/zsyscall_windows.go b/test/cri-containerd/zsyscall_windows.go new file mode 100644 index 0000000000..85531b6207 --- /dev/null +++ b/test/cri-containerd/zsyscall_windows.go @@ -0,0 +1,54 @@ +// Code generated mksyscall_windows.exe DO NOT EDIT + +package cri_containerd + +import ( + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +var _ unsafe.Pointer + +// Do the interface allocations only once for common +// Errno values. +const ( + errnoERROR_IO_PENDING = 997 +) + +var ( + errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING) +) + +// errnoErr returns common boxed Errno values, to prevent +// allocations at runtime. +func errnoErr(e syscall.Errno) error { + switch e { + case 0: + return nil + case errnoERROR_IO_PENDING: + return errERROR_IO_PENDING + } + // TODO: add more here, after collecting data on the common + // error values see on Windows. (perhaps when running + // all.bat?) + return e +} + +var ( + modcomputestorage = windows.NewLazySystemDLL("computestorage.dll") + + procHcsFormatWritableLayerVhd = modcomputestorage.NewProc("HcsFormatWritableLayerVhd") +) + +func hcsFormatWritableLayerVhd(handle uintptr) (hr error) { + r0, _, _ := syscall.Syscall(procHcsFormatWritableLayerVhd.Addr(), 1, uintptr(handle), 0, 0) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +}