diff --git a/cmd/containerd-shim-runhcs-v1/delete.go b/cmd/containerd-shim-runhcs-v1/delete.go index bbeb6f7967..5c8f8313e4 100644 --- a/cmd/containerd-shim-runhcs-v1/delete.go +++ b/cmd/containerd-shim-runhcs-v1/delete.go @@ -4,7 +4,6 @@ package main import ( "context" - "encoding/json" "fmt" "os" "path/filepath" @@ -18,10 +17,8 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" "github.com/Microsoft/hcsshim/internal/hcs" - "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/memory" "github.com/Microsoft/hcsshim/internal/oc" - cimlayer "github.com/Microsoft/hcsshim/internal/wclayer/cim" "github.com/Microsoft/hcsshim/internal/winapi" ) @@ -126,28 +123,8 @@ The delete command will be executed in the container's bundle as its cwd. fmt.Fprintf(os.Stderr, "failed to delete user %q: %v", username, err) } - // cleanup the layers mounted for the container. We currently only handle cleanup of CimFS - // layers here. First n-1 values should be the image layerFolders (topmost layer being at - // index 0) and the last entry should be the scratch layer - var layerFolders []string - f, err := os.Open(filepath.Join(bundleFlag, layersFile)) - if err != nil { - if !errors.Is(err, os.ErrNotExist) { - fmt.Fprintf(os.Stderr, "open layers file: %s", err) - } - } else { - defer f.Close() - if err = json.NewDecoder(f).Decode(&layerFolders); err != nil { - fmt.Fprintf(os.Stderr, "decode layers json: %s", err) - } - } - if err == nil && cimlayer.IsCimLayer(layerFolders[0]) { - scratchLayerFolderPath := layerFolders[len(layerFolders)-1] - err = layers.ReleaseCimFSHostLayers(ctx, scratchLayerFolderPath, idFlag) - if err != nil { - fmt.Fprintf(os.Stderr, "cleanup container %q mounts: %s", idFlag, err) - } - } + // TODO(ambarve): + // correctly handle cleanup of cimfs layers in case of shim process crash here. if data, err := proto.Marshal(&task.DeleteResponse{ ExitedAt: timestamppb.New(time.Now()), diff --git a/cmd/containerd-shim-runhcs-v1/main.go b/cmd/containerd-shim-runhcs-v1/main.go index 5c0abcb522..be5e950f16 100644 --- a/cmd/containerd-shim-runhcs-v1/main.go +++ b/cmd/containerd-shim-runhcs-v1/main.go @@ -29,7 +29,6 @@ import ( const usage = `` const ttrpcAddressEnv = "TTRPC_ADDRESS" -const layersFile = "layers.json" // Add a manifest to get proper Windows version detection. //go:generate go run github.com/josephspurrier/goversioninfo/cmd/goversioninfo -platform-specific diff --git a/cmd/containerd-shim-runhcs-v1/pod.go b/cmd/containerd-shim-runhcs-v1/pod.go index 57aedcfc93..1d2551ee4d 100644 --- a/cmd/containerd-shim-runhcs-v1/pod.go +++ b/cmd/containerd-shim-runhcs-v1/pod.go @@ -10,6 +10,7 @@ import ( "strings" "sync" + "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oci" "github.com/Microsoft/hcsshim/internal/uvm" @@ -122,22 +123,15 @@ func createPod(ctx context.Context, events publisher, req *task.CreateTaskReques return nil, err } case *uvm.OptionsWCOW: + var layerFolders []string + if s.Windows != nil { + layerFolders = s.Windows.LayerFolders + } wopts := (opts).(*uvm.OptionsWCOW) - - // In order for the UVM sandbox.vhdx not to collide with the actual - // nested Argon sandbox.vhdx we append the \vm folder to the last - // entry in the list. - layersLen := len(s.Windows.LayerFolders) - layers := make([]string, layersLen) - copy(layers, s.Windows.LayerFolders) - - vmPath := filepath.Join(layers[layersLen-1], "vm") - err := os.MkdirAll(vmPath, 0) + wopts.BootFiles, err = layers.GetWCOWUVMBootFilesFromLayers(ctx, req.Rootfs, layerFolders) if err != nil { return nil, err } - layers[layersLen-1] = vmPath - wopts.LayerFolders = layers parent, err = uvm.CreateWCOW(ctx, wopts) if err != nil { diff --git a/cmd/containerd-shim-runhcs-v1/rootfs.go b/cmd/containerd-shim-runhcs-v1/rootfs.go deleted file mode 100644 index 50046a3120..0000000000 --- a/cmd/containerd-shim-runhcs-v1/rootfs.go +++ /dev/null @@ -1,144 +0,0 @@ -//go:build windows - -package main - -import ( - "encoding/json" - "fmt" - "path/filepath" - "strings" - - "github.com/Microsoft/hcsshim/internal/layers" - "github.com/containerd/containerd/api/types" - "github.com/containerd/containerd/mount" - "github.com/containerd/errdefs" -) - -// validateRootfsAndLayers checks to ensure we have appropriate information -// for setting up the container's root filesystem. It ensures the following: -// - One and only one of Rootfs or LayerFolders can be provided. -// - If LayerFolders are provided, there are at least two entries. -// - If Rootfs is provided, there is a single entry and it does not have a Target set. -func validateRootfsAndLayers(rootfs []*types.Mount, layerFolders []string) error { - if len(rootfs) > 0 && len(layerFolders) > 0 { - return fmt.Errorf("cannot pass both a rootfs mount and Windows.LayerFolders: %w", errdefs.ErrFailedPrecondition) - } - if len(rootfs) == 0 && len(layerFolders) == 0 { - return fmt.Errorf("must pass either a rootfs mount or Windows.LayerFolders: %w", errdefs.ErrFailedPrecondition) - } - if len(rootfs) > 0 { - // We have a rootfs. - - if len(rootfs) > 1 { - return fmt.Errorf("expected a single rootfs mount: %w", errdefs.ErrFailedPrecondition) - } - if rootfs[0].Target != "" { - return fmt.Errorf("rootfs mount is missing Target path: %w", errdefs.ErrFailedPrecondition) - } - } else { - // We have layerFolders. - - if len(layerFolders) < 2 { - return fmt.Errorf("must pass at least two Windows.LayerFolders: %w", errdefs.ErrFailedPrecondition) - } - } - - return nil -} - -// parseLegacyRootfsMount parses the rootfs mount format that we have traditionally -// used for both Linux and Windows containers. -// The mount format consists of: -// - The scratch folder path in m.Source, which contains sandbox.vhdx. -// - A mount option in the form parentLayerPaths=, where JSON is an array of -// string paths to read-only layer directories. The exact contents of these layer -// directories are intepreteted differently for Linux and Windows containers. -func parseLegacyRootfsMount(m *types.Mount) (string, []string, error) { - // parentLayerPaths are passed in layerN, layerN-1, ..., layer 0 - // - // The OCI spec expects: - // layerN, layerN-1, ..., layer0, scratch - var parentLayerPaths []string - for _, option := range m.Options { - if strings.HasPrefix(option, mount.ParentLayerPathsFlag) { - err := json.Unmarshal([]byte(option[len(mount.ParentLayerPathsFlag):]), &parentLayerPaths) - if err != nil { - return "", nil, fmt.Errorf("unmarshal parent layer paths from mount: %w: %w", err, errdefs.ErrFailedPrecondition) - } - // Would perhaps be worthwhile to check for unrecognized options and return an error, - // but since this is a legacy layer mount we don't do that to avoid breaking anyone. - break - } - } - return m.Source, parentLayerPaths, nil -} - -// getLCOWLayers returns a layers.LCOWLayers describing the rootfs that should be set up -// for an LCOW container. It takes as input the set of rootfs mounts and the layer folders -// from the OCI spec, it is assumed that these were previously checked with validateRootfsAndLayers -// such that only one of them is populated. -func getLCOWLayers(rootfs []*types.Mount, layerFolders []string) (*layers.LCOWLayers, error) { - legacyLayer := func(scratchLayer string, parentLayers []string) *layers.LCOWLayers { - // Each read-only layer should have a layer.vhd, and the scratch layer should have a sandbox.vhdx. - roLayers := make([]*layers.LCOWLayer, 0, len(parentLayers)) - for _, parentLayer := range parentLayers { - roLayers = append( - roLayers, - &layers.LCOWLayer{ - VHDPath: filepath.Join(parentLayer, "layer.vhd"), - }, - ) - } - return &layers.LCOWLayers{ - Layers: roLayers, - ScratchVHDPath: filepath.Join(scratchLayer, "sandbox.vhdx"), - } - } - // Due to previous validation, we know that for a Linux container we either have LayerFolders, or - // a single rootfs mount. - if len(layerFolders) > 0 { - return legacyLayer(layerFolders[len(layerFolders)-1], layerFolders[:len(layerFolders)-1]), nil - } - m := rootfs[0] - switch m.Type { - case "lcow-layer": - scratchLayer, parentLayers, err := parseLegacyRootfsMount(rootfs[0]) - if err != nil { - return nil, err - } - return legacyLayer(scratchLayer, parentLayers), nil - case "lcow-partitioned-layer": - var ( - scratchPath string - layerData []struct { - Path string - Partition uint64 - } - ) - for _, opt := range m.Options { - if optPrefix := "scratch="; strings.HasPrefix(opt, optPrefix) { - scratchPath = strings.TrimPrefix(opt, optPrefix) - } else if optPrefix := "parent-partitioned-layers="; strings.HasPrefix(opt, optPrefix) { - layerJSON := strings.TrimPrefix(opt, optPrefix) - if err := json.Unmarshal([]byte(layerJSON), &layerData); err != nil { - return nil, err - } - } else { - return nil, fmt.Errorf("unrecognized %s mount option: %s", m.Type, opt) - } - } - roLayers := make([]*layers.LCOWLayer, 0, len(layerData)) - for _, layer := range layerData { - roLayers = append( - roLayers, - &layers.LCOWLayer{ - VHDPath: layer.Path, - Partition: layer.Partition, - }, - ) - } - return &layers.LCOWLayers{Layers: roLayers, ScratchVHDPath: scratchPath}, nil - default: - return nil, fmt.Errorf("unrecognized rootfs mount type: %s", m.Type) - } -} diff --git a/cmd/containerd-shim-runhcs-v1/service_internal.go b/cmd/containerd-shim-runhcs-v1/service_internal.go index 73d0f5aaf4..b24e7b139a 100644 --- a/cmd/containerd-shim-runhcs-v1/service_internal.go +++ b/cmd/containerd-shim-runhcs-v1/service_internal.go @@ -27,13 +27,6 @@ import ( var empty = &emptypb.Empty{} -// TODO(ambarve): Once we can vendor containerd 2.0 in hcsshim, we should directly reference these types from -// containerd module -const ( - LegacyMountType string = "windows-layer" - CimFSMountType string = "CimFS" -) - // getPod returns the pod this shim is tracking or else returns `nil`. It is the // callers responsibility to verify that `s.isSandbox == true` before calling // this method. @@ -123,53 +116,6 @@ func (s *service) createInternal(ctx context.Context, req *task.CreateTaskReques } } - var layerFolders []string - if spec.Windows != nil { - layerFolders = spec.Windows.LayerFolders - } - if err := validateRootfsAndLayers(req.Rootfs, layerFolders); err != nil { - return nil, err - } - - // Only work with Windows here. - // Parsing of the rootfs mount for Linux containers occurs later. - if spec.Linux == nil && len(req.Rootfs) > 0 { - // For Windows containers, we work with LayerFolders throughout - // much of the creation logic in the shim. If we were given a - // rootfs mount, convert it to LayerFolders here. - m := req.Rootfs[0] - if m.Type != LegacyMountType && m.Type != CimFSMountType { - return nil, fmt.Errorf("unsupported Windows mount type: %s", m.Type) - } else if m.Type == CimFSMountType && (shimOpts.SandboxIsolation == runhcsopts.Options_HYPERVISOR) { - // For CIMFS layers only process isolation is supported right now. - return nil, fmt.Errorf("cimfs doesn't support hyperv isolation") - } - - source, parentLayerPaths, err := parseLegacyRootfsMount(m) - if err != nil { - return nil, err - } - - // Append the parents - spec.Windows.LayerFolders = append(spec.Windows.LayerFolders, parentLayerPaths...) - // Append the scratch - spec.Windows.LayerFolders = append(spec.Windows.LayerFolders, source) - - if m.Type == CimFSMountType { - // write the layers to a file so that it can be used for proper cleanup during shim - // delete. We can't write to the config.json as it is read-only for shim. - f, err = os.Create(filepath.Join(req.Bundle, layersFile)) - if err != nil { - return nil, err - } - if err := json.NewEncoder(f).Encode(spec.Windows.LayerFolders); err != nil { - f.Close() - return nil, err - } - f.Close() - } - } - // This is a Windows Argon make sure that we have a Root filled in. if spec.Windows.HyperV == nil { if spec.Root == nil { diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index 4a9e82d286..d544cb0934 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -35,6 +35,7 @@ import ( hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/hcsoci" "github.com/Microsoft/hcsshim/internal/jobcontainers" + "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/memory" "github.com/Microsoft/hcsshim/internal/oc" @@ -82,23 +83,15 @@ func newHcsStandaloneTask(ctx context.Context, events publisher, req *task.Creat return nil, err } case *uvm.OptionsWCOW: + var layerFolders []string + if s.Windows != nil { + layerFolders = s.Windows.LayerFolders + } wopts := (opts).(*uvm.OptionsWCOW) - - // In order for the UVM sandbox.vhdx not to collide with the actual - // nested Argon sandbox.vhdx we append the \vm folder to the last - // entry in the list. - layersLen := len(s.Windows.LayerFolders) - layers := make([]string, layersLen) - copy(layers, s.Windows.LayerFolders) - - vmPath := filepath.Join(layers[layersLen-1], "vm") - err := os.MkdirAll(vmPath, 0) + wopts.BootFiles, err = layers.GetWCOWUVMBootFilesFromLayers(ctx, req.Rootfs, layerFolders) if err != nil { return nil, err } - layers[layersLen-1] = vmPath - wopts.LayerFolders = layers - parent, err = uvm.CreateWCOW(ctx, wopts) if err != nil { return nil, err @@ -140,8 +133,24 @@ func createContainer( resources *resources.Resources ) + var wcowLayers layers.WCOWLayers + var lcowLayers *layers.LCOWLayers + var layerFolders []string + if s.Windows != nil { + layerFolders = s.Windows.LayerFolders + } + if s.Linux != nil { + lcowLayers, err = layers.ParseLCOWLayers(rootfs, layerFolders) + } else { + wcowLayers, err = layers.ParseWCOWLayers(rootfs, layerFolders) + } + if err != nil { + return nil, nil, err + } + if oci.IsJobContainer(s) { - container, resources, err = jobcontainers.Create(ctx, id, s) + opts := jobcontainers.CreateOptions{WCOWLayers: wcowLayers} + container, resources, err = jobcontainers.Create(ctx, id, s, opts) if err != nil { return nil, nil, err } @@ -152,18 +161,10 @@ func createContainer( Spec: s, HostingSystem: parent, NetworkNamespace: netNS, + LCOWLayers: lcowLayers, + WCOWLayers: wcowLayers, } - if s.Linux != nil { - var layerFolders []string - if s.Windows != nil { - layerFolders = s.Windows.LayerFolders - } - lcowLayers, err := getLCOWLayers(rootfs, layerFolders) - if err != nil { - return nil, nil, err - } - opts.LCOWLayers = lcowLayers - } + if shimOpts != nil { opts.ScaleCPULimitsToSandbox = shimOpts.ScaleCpuLimitsToSandbox } diff --git a/cmd/runhcs/container.go b/cmd/runhcs/container.go index c801248d90..2a5f5b7669 100644 --- a/cmd/runhcs/container.go +++ b/cmd/runhcs/container.go @@ -18,6 +18,7 @@ import ( "github.com/Microsoft/hcsshim/internal/cni" "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcsoci" + "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oci" "github.com/Microsoft/hcsshim/internal/regstate" @@ -402,21 +403,10 @@ func createContainer(cfg *containerConfig) (_ *container, err error) { case *uvm.OptionsLCOW: opts.ConsolePipe = cfg.VMConsolePipe case *uvm.OptionsWCOW: - // In order for the UVM sandbox.vhdx not to collide with the actual - // nested Argon sandbox.vhdx we append the \vm folder to the last entry - // in the list. - layersLen := len(cfg.Spec.Windows.LayerFolders) - layers := make([]string, layersLen) - copy(layers, cfg.Spec.Windows.LayerFolders) - - vmPath := filepath.Join(layers[layersLen-1], "vm") - err := os.MkdirAll(vmPath, 0) + opts.BootFiles, err = layers.GetWCOWUVMBootFilesFromLayers(context.Background(), nil, cfg.Spec.Windows.LayerFolders) if err != nil { return nil, err } - layers[layersLen-1] = vmPath - - opts.LayerFolders = layers } shim, err := c.startVMShim(cfg.VMLogFile, opts) diff --git a/internal/hcsoci/create.go b/internal/hcsoci/create.go index a71c734bed..774449b7ee 100644 --- a/internal/hcsoci/create.go +++ b/internal/hcsoci/create.go @@ -45,6 +45,7 @@ type CreateOptions struct { HostingSystem *uvm.UtilityVM // Utility or service VM in which the container is to be created. NetworkNamespace string // Host network namespace to use (overrides anything in the spec) LCOWLayers *layers.LCOWLayers + WCOWLayers layers.WCOWLayers // This is an advanced debugging parameter. It allows for diagnosability by leaving a containers // resources allocated in case of a failure. Thus you would be able to use tools such as hcsdiag @@ -70,6 +71,8 @@ type createOptionsInternal struct { ccgState *hcsschema.ContainerCredentialGuardState // Container Credential Guard information to be attached to HCS container document windowsAdditionalMounts []hcsschema.MappedDirectory // Holds additional mounts based on added devices (such as SCSI). Only used for Windows v2 schema containers. + + mountedWCOWLayers *layers.MountedWCOWLayers } func validateContainerConfig(ctx context.Context, coi *createOptionsInternal) error { diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index 51695ff706..a6a642a282 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -20,7 +20,6 @@ import ( "github.com/Microsoft/hcsshim/internal/guestpath" "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" - "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oci" "github.com/Microsoft/hcsshim/internal/processorinfo" @@ -161,11 +160,6 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter // ID is a property on the create call in V2 rather than part of the schema. v2Container := &hcsschema.Container{Storage: &hcsschema.Storage{}} - // TODO: Still want to revisit this. - if coi.Spec.Windows.LayerFolders == nil || len(coi.Spec.Windows.LayerFolders) < 2 { - return nil, nil, fmt.Errorf("invalid spec - not enough layer folders supplied") - } - if coi.Spec.Hostname != "" { v1.HostName = coi.Spec.Hostname v2Container.GuestOs = &hcsschema.GuestOs{HostName: coi.Spec.Hostname} @@ -310,7 +304,8 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter } // Strip off the top-most RW/scratch layer as that's passed in separately to HCS for v1 - v1.LayerFolderPath = coi.Spec.Windows.LayerFolders[len(coi.Spec.Windows.LayerFolders)-1] + // TODO(ambarve) Understand how this path is exactly used and fix it. + // v1.LayerFolderPath = coi.Spec.Windows.LayerFolders[len(coi.Spec.Windows.LayerFolders)-1] if coi.isV2Argon() || coi.isV1Argon() { // Argon v1 or v2. @@ -334,7 +329,14 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter v1.HvRuntime = &schema1.HvRuntime{ImagePath: coi.Spec.Windows.HyperV.UtilityVMPath} } else { // Client was lazy. Let's locate it from the layer folders instead. - uvmImagePath, err := uvmfolder.LocateUVMFolder(ctx, coi.Spec.Windows.LayerFolders) + // We are using v1xenon so we can't be using CimFS layers, that + // means mounted layers has to have individual layer directory + // paths that can be passed here. + layerFolders := []string{} + for _, ml := range coi.mountedWCOWLayers.MountedLayerPaths { + layerFolders = append(layerFolders, ml.MountedPath) + } + uvmImagePath, err := uvmfolder.LocateUVMFolder(ctx, layerFolders) if err != nil { return nil, nil, err } @@ -344,22 +346,24 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter // Hosting system was supplied, so is v2 Xenon. v2Container.Storage.Path = coi.Spec.Root.Path if coi.HostingSystem.OS() == "windows" { - layers, err := layers.GetHCSLayers(ctx, coi.HostingSystem, coi.Spec.Windows.LayerFolders[:len(coi.Spec.Windows.LayerFolders)-1]) - if err != nil { - return nil, nil, err + layers := []hcsschema.Layer{} + for _, ml := range coi.mountedWCOWLayers.MountedLayerPaths { + layers = append(layers, hcsschema.Layer{ + Id: ml.LayerID, + Path: ml.MountedPath, + }) } v2Container.Storage.Layers = layers } } if coi.isV2Argon() || coi.isV1Argon() { // Argon v1 or v2 - mountedLayers, err := layers.ToHostHcsSchemaLayers(ctx, coi.ID, coi.Spec.Windows.LayerFolders[:len(coi.Spec.Windows.LayerFolders)-1]) - if err != nil { - return nil, nil, err - } - for _, ml := range mountedLayers { - v1.Layers = append(v1.Layers, schema1.Layer{ID: ml.Id, Path: ml.Path}) - v2Container.Storage.Layers = append(v2Container.Storage.Layers, ml) + for _, ml := range coi.mountedWCOWLayers.MountedLayerPaths { + v1.Layers = append(v1.Layers, schema1.Layer{ID: ml.LayerID, Path: ml.MountedPath}) + v2Container.Storage.Layers = append(v2Container.Storage.Layers, hcsschema.Layer{ + Id: ml.LayerID, + Path: ml.MountedPath, + }) } } diff --git a/internal/hcsoci/resources_wcow.go b/internal/hcsoci/resources_wcow.go index b505dcc1e5..0503b371f6 100644 --- a/internal/hcsoci/resources_wcow.go +++ b/internal/hcsoci/resources_wcow.go @@ -9,7 +9,6 @@ import ( "bytes" "context" "fmt" - "os" "path/filepath" "strings" @@ -26,45 +25,23 @@ import ( "github.com/Microsoft/hcsshim/internal/schemaversion" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/internal/uvm/scsi" - "github.com/Microsoft/hcsshim/internal/wclayer" ) const wcowSandboxMountPath = "C:\\SandboxMounts" func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r *resources.Resources, isSandbox bool) error { - if coi.Spec == nil || coi.Spec.Windows == nil || coi.Spec.Windows.LayerFolders == nil { - return errors.New("field 'Spec.Windows.Layerfolders' is not populated") - } - - scratchFolder := coi.Spec.Windows.LayerFolders[len(coi.Spec.Windows.LayerFolders)-1] - - // TODO: Remove this code for auto-creation. Make the caller responsible. - // Create the directory for the RW scratch layer if it doesn't exist - if _, err := os.Stat(scratchFolder); os.IsNotExist(err) { - if err := os.MkdirAll(scratchFolder, 0777); err != nil { - return errors.Wrapf(err, "failed to auto-create container scratch folder %s", scratchFolder) - } - } - - // Create sandbox.vhdx if it doesn't exist in the scratch folder. It's called sandbox.vhdx - // rather than scratch.vhdx as in the v1 schema, it's hard-coded in HCS. - if _, err := os.Stat(filepath.Join(scratchFolder, "sandbox.vhdx")); os.IsNotExist(err) { - if err := wclayer.CreateScratchLayer(ctx, scratchFolder, coi.Spec.Windows.LayerFolders[:len(coi.Spec.Windows.LayerFolders)-1]); err != nil { - return errors.Wrap(err, "failed to CreateSandboxLayer") - } - } - if coi.Spec.Root == nil { coi.Spec.Root = &specs.Root{} } if coi.Spec.Root.Path == "" && (coi.HostingSystem != nil || coi.Spec.Windows.HyperV == nil) { log.G(ctx).Debug("hcsshim::allocateWindowsResources mounting storage") - containerRootPath, closer, err := layers.MountWCOWLayers(ctx, coi.actualID, coi.Spec.Windows.LayerFolders, "", coi.HostingSystem) + mountedLayers, closer, err := layers.MountWCOWLayers(ctx, coi.actualID, coi.HostingSystem, coi.WCOWLayers) if err != nil { return errors.Wrap(err, "failed to mount container storage") } - coi.Spec.Root.Path = containerRootPath + coi.Spec.Root.Path = mountedLayers.RootFS + coi.mountedWCOWLayers = mountedLayers // If this is the pause container in a hypervisor-isolated pod, we can skip cleanup of // layers, as that happens automatically when the UVM is terminated. if !isSandbox || coi.HostingSystem == nil { diff --git a/internal/jobcontainers/jobcontainer.go b/internal/jobcontainers/jobcontainer.go index 05b66703f2..4de9c40251 100644 --- a/internal/jobcontainers/jobcontainer.go +++ b/internal/jobcontainers/jobcontainer.go @@ -21,6 +21,7 @@ import ( "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/jobobject" + "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/queue" "github.com/Microsoft/hcsshim/internal/resources" @@ -97,8 +98,12 @@ func newJobContainer(id string, s *specs.Spec) *JobContainer { } } +type CreateOptions struct { + WCOWLayers layers.WCOWLayers +} + // Create creates a new JobContainer from the OCI runtime spec `s`. -func Create(ctx context.Context, id string, s *specs.Spec) (_ cow.Container, _ *resources.Resources, err error) { +func Create(ctx context.Context, id string, s *specs.Spec, createOpts CreateOptions) (_ cow.Container, _ *resources.Resources, err error) { log.G(ctx).WithField("id", id).Debug("Creating job container") if s == nil { @@ -116,12 +121,12 @@ func Create(ctx context.Context, id string, s *specs.Spec) (_ cow.Container, _ * container := newJobContainer(id, s) // Create the job object all processes will run in. - options := &jobobject.Options{ + jobOpts := &jobobject.Options{ Name: fmt.Sprintf(jobContainerNameFmt, id), Notifications: true, EnableIOTracking: true, } - container.job, err = jobobject.Create(ctx, options) + container.job, err = jobobject.Create(ctx, jobOpts) if err != nil { return nil, nil, fmt.Errorf("failed to create job object: %w", err) } @@ -190,9 +195,9 @@ func Create(ctx context.Context, id string, s *specs.Spec) (_ cow.Container, _ * var closer resources.ResourceCloser if fileBindingSupport { - closer, err = container.bindSetup(ctx, s) + closer, err = container.bindSetup(ctx, s, createOpts) } else { - closer, err = container.fallbackSetup(ctx, s) + closer, err = container.fallbackSetup(ctx, s, createOpts) } if err != nil { return nil, nil, err @@ -765,13 +770,13 @@ func (c *JobContainer) replaceWithMountPoint(str string) (string, bool) { return newStr, str != newStr } -func (c *JobContainer) bindSetup(ctx context.Context, s *specs.Spec) (_ resources.ResourceCloser, err error) { +func (c *JobContainer) bindSetup(ctx context.Context, s *specs.Spec, opts CreateOptions) (_ resources.ResourceCloser, err error) { // Must be upgraded to a silo so we can get per silo bindings for the container. if err := c.job.PromoteToSilo(); err != nil { return nil, err } // Union the container layers. - closer, err := c.mountLayers(ctx, c.id, s, "") + closer, err := c.mountLayers(ctx, c.id, s, opts.WCOWLayers, "") if err != nil { return nil, fmt.Errorf("failed to mount container layers: %w", err) } @@ -798,12 +803,12 @@ func (c *JobContainer) bindSetup(ctx context.Context, s *specs.Spec) (_ resource // This handles the fallback case where bind mounting isn't available on the machine. This mounts the // container layers on the host and sets up any mounts present in the OCI runtime spec. -func (c *JobContainer) fallbackSetup(ctx context.Context, s *specs.Spec) (_ resources.ResourceCloser, err error) { +func (c *JobContainer) fallbackSetup(ctx context.Context, s *specs.Spec, opts CreateOptions) (_ resources.ResourceCloser, err error) { rootfsLocation := fmt.Sprintf(fallbackRootfsFormat, c.id) if loc := customRootfsLocation(s.Annotations); loc != "" { rootfsLocation = filepath.Join(loc, c.id) } - closer, err := c.mountLayers(ctx, c.id, s, rootfsLocation) + closer, err := c.mountLayers(ctx, c.id, s, opts.WCOWLayers, rootfsLocation) if err != nil { return nil, fmt.Errorf("failed to mount container layers: %w", err) } diff --git a/internal/jobcontainers/storage.go b/internal/jobcontainers/storage.go index 180c27a862..b4d4d42bcc 100644 --- a/internal/jobcontainers/storage.go +++ b/internal/jobcontainers/storage.go @@ -5,15 +5,11 @@ package jobcontainers import ( "context" "fmt" - "os" - "path/filepath" "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/resources" - "github.com/Microsoft/hcsshim/internal/wclayer" specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" ) // fallbackRootfsFormat is the fallback location for the rootfs if file binding support isn't available. @@ -26,39 +22,46 @@ const fallbackRootfsFormat = `C:\hpc\%s\` // C:\hpc\ const defaultSiloRootfsLocation = `C:\hpc\` -func (c *JobContainer) mountLayers(ctx context.Context, containerID string, s *specs.Spec, volumeMountPath string) (_ resources.ResourceCloser, err error) { - if s == nil || s.Windows == nil || s.Windows.LayerFolders == nil { - return nil, errors.New("field 'Spec.Windows.Layerfolders' is not populated") - } - - // Last layer always contains the sandbox.vhdx, or 'scratch' space for the container. - scratchFolder := s.Windows.LayerFolders[len(s.Windows.LayerFolders)-1] - if _, err := os.Stat(scratchFolder); os.IsNotExist(err) { - if err := os.MkdirAll(scratchFolder, 0777); err != nil { - return nil, fmt.Errorf("failed to auto-create container scratch folder %s: %w", scratchFolder, err) - } - } - - // Create sandbox.vhdx if it doesn't exist in the scratch folder. - if _, err := os.Stat(filepath.Join(scratchFolder, "sandbox.vhdx")); os.IsNotExist(err) { - if err := wclayer.CreateScratchLayer(ctx, scratchFolder, s.Windows.LayerFolders[:len(s.Windows.LayerFolders)-1]); err != nil { - return nil, fmt.Errorf("failed to CreateSandboxLayer: %w", err) - } - } - +func (c *JobContainer) mountLayers(ctx context.Context, containerID string, s *specs.Spec, wl layers.WCOWLayers, volumeMountPath string) (_ resources.ResourceCloser, err error) { if s.Root == nil { s.Root = &specs.Root{} } + if wl == nil { + return nil, fmt.Errorf("layers can not be nil") + } var closer resources.ResourceCloser if s.Root.Path == "" { + var mountedLayers *layers.MountedWCOWLayers log.G(ctx).Debug("mounting job container storage") - var rootPath string - rootPath, closer, err = layers.MountWCOWLayers(ctx, containerID, s.Windows.LayerFolders, volumeMountPath, nil) + mountedLayers, closer, err = layers.MountWCOWLayers(ctx, containerID, nil, wl) if err != nil { return nil, fmt.Errorf("failed to mount job container storage: %w", err) } - s.Root.Path = rootPath + "\\" + defer func() { + if err != nil { + closeErr := closer.Release(ctx) + if closeErr != nil { + log.G(ctx).WithError(closeErr).Errorf("failed to cleanup mounted layers during another failure(%s)", err) + } + } + }() + + s.Root.Path = mountedLayers.RootFS + "\\" + } + + if volumeMountPath != "" { + if err = layers.MountSandboxVolume(ctx, volumeMountPath, s.Root.Path); err != nil { + return nil, err + } + layerCloser := closer + closer = resources.ResourceCloserFunc(func(ctx context.Context) error { + unmountErr := layers.RemoveSandboxMountPoint(ctx, volumeMountPath) + if unmountErr != nil { + return unmountErr + } + return layerCloser.Release(ctx) + }) } return closer, nil diff --git a/internal/layers/helpers.go b/internal/layers/helpers.go new file mode 100644 index 0000000000..2a67a7fb1b --- /dev/null +++ b/internal/layers/helpers.go @@ -0,0 +1,100 @@ +//go:build windows +// +build windows + +package layers + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/Microsoft/hcsshim/internal/wclayer" + "github.com/containerd/containerd/api/types" + "github.com/containerd/errdefs" +) + +// validateRootfsAndLayers checks to ensure we have appropriate information +// for setting up the container's root filesystem. It ensures the following: +// - One and only one of Rootfs or LayerFolders can be provided. +// - If LayerFolders are provided, there are at least two entries. +// - If Rootfs is provided, there is a single entry and it does not have a Target set. +func validateRootfsAndLayers(rootfs []*types.Mount, layerFolders []string) error { + if len(rootfs) > 0 && len(layerFolders) > 0 { + return fmt.Errorf("cannot pass both a rootfs mount and Windows.LayerFolders: %w", errdefs.ErrFailedPrecondition) + } + if len(rootfs) == 0 && len(layerFolders) == 0 { + return fmt.Errorf("must pass either a rootfs mount or Windows.LayerFolders: %w", errdefs.ErrFailedPrecondition) + } + if len(rootfs) > 0 { + // We have a rootfs. + + if len(rootfs) > 1 { + return fmt.Errorf("expected a single rootfs mount: %w", errdefs.ErrFailedPrecondition) + } + if rootfs[0].Target != "" { + return fmt.Errorf("rootfs mount is missing Target path: %w", errdefs.ErrFailedPrecondition) + } + } else { + // We have layerFolders. + + if len(layerFolders) < 2 { + return fmt.Errorf("must pass at least two Windows.LayerFolders: %w", errdefs.ErrFailedPrecondition) + } + } + + return nil +} + +func ensureScratchVHD(ctx context.Context, scratchFolder string, layerFolders []string) error { + if _, err := os.Stat(scratchFolder); os.IsNotExist(err) { + if err := os.MkdirAll(scratchFolder, 0777); err != nil { + return fmt.Errorf("failed to auto-create container scratch folder %s: %w", scratchFolder, err) + } + } + + // Create sandbox.vhdx if it doesn't exist in the scratch folder. + if _, err := os.Stat(filepath.Join(scratchFolder, "sandbox.vhdx")); os.IsNotExist(err) { + if err := wclayer.CreateScratchLayer(ctx, scratchFolder, layerFolders); err != nil { + return fmt.Errorf("failed to CreateSandboxLayer: %w", err) + } + } + return nil +} + +// TODO(ambarve): functions & constants defined below are direct copies of functions already defined in +// in containerd 2.0 snapshotter/mount packages. Once we vendor containerd 2.0 in shim we can get rid of these +const ( + // parentLayerPathsFlag is the options flag used to represent the JSON encoded + // list of parent layers required to use the layer + parentLayerPathsFlag = "parentLayerPaths=" + + // Similar to ParentLayerPathsFlag this is the optinos flag used to represent the JSON encoded list of + // parent layer CIMs + parentLayerCimPathsFlag = "parentCimPaths=" + + LegacyMountType string = "windows-layer" + CimFSMountType string = "CimFS" +) + +// getOptionAsArray finds if there is an option which has the given prefix and if such an +// option is found, the prefix is removed from that option string and remaining string is +// JSON unmarshalled into a string array. Note that this works because such option values +// are always stored in the form of `option_name=`. In this case the +// optPrefix becomes `option_name=` so that remaining substring can be directly +// unmarshalled as JSON. +func getOptionAsArray(m *types.Mount, optPrefix string) ([]string, error) { + var values []string + for _, option := range m.Options { + if val, ok := strings.CutPrefix(option, optPrefix); ok { + err := json.Unmarshal([]byte(val), &values) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal option `%s`: %w", optPrefix, err) + } + break + } + } + return values, nil +} diff --git a/internal/layers/layers.go b/internal/layers/layers.go deleted file mode 100644 index c91303db91..0000000000 --- a/internal/layers/layers.go +++ /dev/null @@ -1,649 +0,0 @@ -//go:build windows -// +build windows - -package layers - -import ( - "context" - "fmt" - "os" - "path/filepath" - "time" - - "github.com/Microsoft/go-winio/pkg/fs" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/windows" - - "github.com/Microsoft/hcsshim/computestorage" - "github.com/Microsoft/hcsshim/internal/guestpath" - hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" - "github.com/Microsoft/hcsshim/internal/hcserror" - "github.com/Microsoft/hcsshim/internal/log" - "github.com/Microsoft/hcsshim/internal/ospath" - "github.com/Microsoft/hcsshim/internal/resources" - "github.com/Microsoft/hcsshim/internal/uvm" - "github.com/Microsoft/hcsshim/internal/uvm/scsi" - "github.com/Microsoft/hcsshim/internal/wclayer" - cimlayer "github.com/Microsoft/hcsshim/internal/wclayer/cim" -) - -type LCOWLayer struct { - VHDPath string - Partition uint64 -} - -// LCOWLayers defines a set of LCOW layers. -// For future extensibility, the LCOWLayer type could be swapped for an interface, -// and we could either call some method on the interface to "apply" it directly to the UVM, -// or type cast it to the various types that we support, and use the one it matches. -// This would allow us to support different "types" of mounts, such as raw VHD, VHD+partition, etc. -type LCOWLayers struct { - // Should be in order from top-most layer to bottom-most layer. - Layers []*LCOWLayer - ScratchVHDPath string -} - -type lcowLayersCloser struct { - uvm *uvm.UtilityVM - guestCombinedLayersPath string - scratchMount resources.ResourceCloser - layerClosers []resources.ResourceCloser -} - -func (lc *lcowLayersCloser) Release(ctx context.Context) (retErr error) { - if err := lc.uvm.RemoveCombinedLayersLCOW(ctx, lc.guestCombinedLayersPath); err != nil { - log.G(ctx).WithError(err).Error("failed RemoveCombinedLayersLCOW") - if retErr == nil { //nolint:govet // nilness: consistency with below - retErr = fmt.Errorf("first error: %w", err) - } - } - if err := lc.scratchMount.Release(ctx); err != nil { - log.G(ctx).WithError(err).Error("failed LCOW scratch mount release") - if retErr == nil { - retErr = fmt.Errorf("first error: %w", err) - } - } - for i, closer := range lc.layerClosers { - if err := closer.Release(ctx); err != nil { - log.G(ctx).WithFields(logrus.Fields{ - logrus.ErrorKey: err, - "layerIndex": i, - }).Error("failed releasing LCOW layer") - if retErr == nil { - retErr = fmt.Errorf("first error: %w", err) - } - } - } - return -} - -// MountLCOWLayers is a helper for clients to hide all the complexity of layer mounting for LCOW -// Layer folder are in order: base, [rolayer1..rolayern,] scratch -// Returns the path at which the `rootfs` of the container can be accessed. Also, returns the path inside the -// UVM at which container scratch directory is located. Usually, this path is the path at which the container -// scratch VHD is mounted. However, in case of scratch sharing this is a directory under the UVM scratch. -func MountLCOWLayers(ctx context.Context, containerID string, layers *LCOWLayers, guestRoot string, vm *uvm.UtilityVM) (_, _ string, _ resources.ResourceCloser, err error) { - if vm == nil { - return "", "", nil, errors.New("MountLCOWLayers cannot be called for process-isolated containers") - } - - if vm.OS() != "linux" { - return "", "", nil, errors.New("MountLCOWLayers should only be called for LCOW") - } - - // V2 UVM - log.G(ctx).WithField("os", vm.OS()).Debug("hcsshim::MountLCOWLayers V2 UVM") - - var ( - layerClosers []resources.ResourceCloser - lcowUvmLayerPaths []string - ) - defer func() { - if err != nil { - for _, closer := range layerClosers { - if err := closer.Release(ctx); err != nil { - log.G(ctx).WithError(err).Warn("failed to remove lcow layer on cleanup") - } - } - } - }() - - for _, layer := range layers.Layers { - log.G(ctx).WithField("layerPath", layer.VHDPath).Debug("mounting layer") - uvmPath, closer, err := addLCOWLayer(ctx, vm, layer) - if err != nil { - return "", "", nil, fmt.Errorf("failed to add LCOW layer: %w", err) - } - layerClosers = append(layerClosers, closer) - lcowUvmLayerPaths = append(lcowUvmLayerPaths, uvmPath) - } - - hostPath := layers.ScratchVHDPath - hostPath, err = filepath.EvalSymlinks(hostPath) - if err != nil { - return "", "", nil, fmt.Errorf("failed to eval symlinks on scratch path: %w", err) - } - log.G(ctx).WithField("hostPath", hostPath).Debug("mounting scratch VHD") - - mConfig := &scsi.MountConfig{ - Encrypted: vm.ScratchEncryptionEnabled(), - // For scratch disks, we support formatting the disk if it is not already - // formatted. - EnsureFilesystem: true, - Filesystem: "ext4", - } - if vm.ScratchEncryptionEnabled() { - // Encrypted scratch devices are formatted with xfs - mConfig.Filesystem = "xfs" - } - scsiMount, err := vm.SCSIManager.AddVirtualDisk( - ctx, - hostPath, - false, - vm.ID(), - mConfig, - ) - if err != nil { - return "", "", nil, fmt.Errorf("failed to add SCSI scratch VHD: %w", err) - } - - // handles the case where we want to share a scratch disk for multiple containers instead - // of mounting a new one. Pass a unique value for `ScratchPath` to avoid container upper and - // work directories colliding in the UVM. - containerScratchPathInUVM := ospath.Join("linux", scsiMount.GuestPath(), "scratch", containerID) - - defer func() { - if err != nil { - if err := scsiMount.Release(ctx); err != nil { - log.G(ctx).WithError(err).Warn("failed to remove scratch on cleanup") - } - } - }() - - rootfs := ospath.Join(vm.OS(), guestRoot, guestpath.RootfsPath) - err = vm.CombineLayersLCOW(ctx, containerID, lcowUvmLayerPaths, containerScratchPathInUVM, rootfs) - if err != nil { - return "", "", nil, err - } - log.G(ctx).Debug("hcsshim::MountLCOWLayers Succeeded") - closer := &lcowLayersCloser{ - uvm: vm, - guestCombinedLayersPath: rootfs, - scratchMount: scsiMount, - layerClosers: layerClosers, - } - return rootfs, containerScratchPathInUVM, closer, nil -} - -// MountWCOWLayers is a helper for clients to hide all the complexity of layer mounting for WCOW. -// Layer folder are in order: [rolayerN..rolayer1, base] scratch -// -// v1/v2: Argon WCOW: Returns the mount path on the host as a volume GUID. -// v1: Xenon WCOW: Done internally in HCS, so no point calling doing anything here. -// v2: Xenon WCOW: Returns a CombinedLayersV2 structure where ContainerRootPath is a folder -// inside the utility VM which is a GUID mapping of the scratch folder. Each of the layers are -// the VSMB locations where the read-only layers are mounted. -// -// Job container: Returns the mount path on the host as a volume guid, with the volume mounted on -// the host at `volumeMountPath`. -func MountWCOWLayers(ctx context.Context, containerID string, layerFolders []string, volumeMountPath string, vm *uvm.UtilityVM) (_ string, _ resources.ResourceCloser, err error) { - if vm == nil { - return mountWCOWHostLayers(ctx, layerFolders, containerID, volumeMountPath) - } - - if vm.OS() != "windows" { - return "", nil, errors.New("MountWCOWLayers should only be called for WCOW") - } - - return mountWCOWIsolatedLayers(ctx, containerID, layerFolders, volumeMountPath, vm) -} - -type wcowHostLayersCloser struct { - containerID string - volumeMountPath string - layers []string -} - -func ReleaseCimFSHostLayers(ctx context.Context, scratchLayerFolderPath, containerID string) error { - mountPath, err := wclayer.GetLayerMountPath(ctx, scratchLayerFolderPath) - if err != nil { - return err - } - - if err = computestorage.DetachOverlayFilter(ctx, mountPath, hcsschema.UnionFS); err != nil { - return err - } - - return cimlayer.CleanupContainerMounts(containerID) -} - -func (lc *wcowHostLayersCloser) Release(ctx context.Context) error { - if lc.volumeMountPath != "" { - if err := RemoveSandboxMountPoint(ctx, lc.volumeMountPath); err != nil { - return err - } - } - scratchLayerFolderPath := lc.layers[len(lc.layers)-1] - var err error - if cimlayer.IsCimLayer(lc.layers[0]) { - err = ReleaseCimFSHostLayers(ctx, scratchLayerFolderPath, lc.containerID) - } else { - err = wclayer.UnprepareLayer(ctx, scratchLayerFolderPath) - } - if err != nil { - return err - } - return wclayer.DeactivateLayer(ctx, scratchLayerFolderPath) -} - -func mountWCOWHostLegacyLayers(ctx context.Context, layerFolders []string, volumeMountPath string) (_ string, err error) { - if len(layerFolders) < 2 { - return "", errors.New("need at least two layers - base and scratch") - } - path := layerFolders[len(layerFolders)-1] - rest := layerFolders[:len(layerFolders)-1] - // Simple retry loop to handle some behavior on RS5. Loopback VHDs used to be mounted in a different manner on RS5 (ws2019) which led to some - // very odd cases where things would succeed when they shouldn't have, or we'd simply timeout if an operation took too long. Many - // parallel invocations of this code path and stressing the machine seem to bring out the issues, but all of the possible failure paths - // that bring about the errors we have observed aren't known. - // - // On 19h1+ this *shouldn't* be needed, but the logic is to break if everything succeeded so this is harmless and shouldn't need a version check. - var lErr error - for i := 0; i < 5; i++ { - lErr = func() (err error) { - if err := wclayer.ActivateLayer(ctx, path); err != nil { - return err - } - - defer func() { - if err != nil { - _ = wclayer.DeactivateLayer(ctx, path) - } - }() - - return wclayer.PrepareLayer(ctx, path, rest) - }() - - if lErr != nil { - // Common errors seen from the RS5 behavior mentioned above is ERROR_NOT_READY and ERROR_DEVICE_NOT_CONNECTED. The former occurs when HCS - // tries to grab the volume path of the disk but it doesn't succeed, usually because the disk isn't actually mounted. DEVICE_NOT_CONNECTED - // has been observed after launching multiple containers in parallel on a machine under high load. This has also been observed to be a trigger - // for ERROR_NOT_READY as well. - var hcserr *hcserror.HcsError - if errors.As(lErr, &hcserr) { - if errors.Is(hcserr.Err, windows.ERROR_NOT_READY) || errors.Is(hcserr.Err, windows.ERROR_DEVICE_NOT_CONNECTED) { - log.G(ctx).WithField("path", path).WithError(hcserr.Err).Warning("retrying layer operations after failure") - - // Sleep for a little before a re-attempt. A probable cause for these issues in the first place is events not getting - // reported in time so might be good to give some time for things to "cool down" or get back to a known state. - time.Sleep(time.Millisecond * 100) - continue - } - } - // This was a failure case outside of the commonly known error conditions, don't retry here. - return "", lErr - } - - // No errors in layer setup, we can leave the loop - break - } - // If we got unlucky and ran into one of the two errors mentioned five times in a row and left the loop, we need to check - // the loop error here and fail also. - if lErr != nil { - return "", errors.Wrap(lErr, "layer retry loop failed") - } - - // If any of the below fails, we want to detach the filter and unmount the disk. - defer func() { - if err != nil { - _ = wclayer.UnprepareLayer(ctx, path) - _ = wclayer.DeactivateLayer(ctx, path) - } - }() - - mountPath, err := wclayer.GetLayerMountPath(ctx, path) - if err != nil { - return "", err - } - return mountPath, nil - -} - -func mountWCOWHostCimFSLayers(ctx context.Context, layerFolders []string, containerID, volumeMountPath string) (_ string, err error) { - scratchLayer := layerFolders[len(layerFolders)-1] - topMostLayer := layerFolders[0] - if err = wclayer.ActivateLayer(ctx, scratchLayer); err != nil { - return "", err - } - defer func() { - if err != nil { - _ = wclayer.DeactivateLayer(ctx, scratchLayer) - } - }() - - mountPath, err := wclayer.GetLayerMountPath(ctx, scratchLayer) - if err != nil { - return "", err - } - - volume, err := cimlayer.MountCimLayer(ctx, cimlayer.GetCimPathFromLayer(topMostLayer), containerID) - if err != nil { - return "", fmt.Errorf("mount layer cim for %s: %w", topMostLayer, err) - } - defer func() { - if err != nil { - _ = cimlayer.UnmountCimLayer(ctx, cimlayer.GetCimPathFromLayer(topMostLayer), containerID) - } - }() - - // Use the layer path for GUID rather than the mounted volume path, so that the generated layerID - // remains same. - layerID, err := wclayer.LayerID(ctx, topMostLayer) - if err != nil { - return "", err - } - - layerData := computestorage.LayerData{ - FilterType: hcsschema.UnionFS, - // Container filesystem contents are under a directory named "Files" inside the mounted cim. - // UnionFS needs this path, so append "Files" to the layer path before passing it on. - Layers: []hcsschema.Layer{ - { - Id: layerID.String(), - Path: filepath.Join(volume, "Files"), - }, - }, - } - - if err = computestorage.AttachOverlayFilter(ctx, mountPath, layerData); err != nil { - return "", err - } - return mountPath, nil -} - -func mountWCOWHostLayers(ctx context.Context, layerFolders []string, containerID, volumeMountPath string) (_ string, _ resources.ResourceCloser, err error) { - var mountPath string - if cimlayer.IsCimLayer(layerFolders[0]) { - mountPath, err = mountWCOWHostCimFSLayers(ctx, layerFolders, containerID, volumeMountPath) - } else { - mountPath, err = mountWCOWHostLegacyLayers(ctx, layerFolders, volumeMountPath) - } - if err != nil { - return "", nil, err - } - closer := &wcowHostLayersCloser{ - volumeMountPath: volumeMountPath, - layers: layerFolders, - containerID: containerID, - } - defer func() { - if err != nil { - _ = closer.Release(ctx) - } - }() - - // Mount the volume to a directory on the host if requested. This is the case for job containers. - if volumeMountPath != "" { - if err := MountSandboxVolume(ctx, volumeMountPath, mountPath); err != nil { - return "", nil, err - } - } - - return mountPath, closer, nil -} - -type wcowIsolatedLayersCloser struct { - uvm *uvm.UtilityVM - guestCombinedLayersPath string - scratchMount resources.ResourceCloser - layerClosers []resources.ResourceCloser -} - -func (lc *wcowIsolatedLayersCloser) Release(ctx context.Context) (retErr error) { - if err := lc.uvm.RemoveCombinedLayersWCOW(ctx, lc.guestCombinedLayersPath); err != nil { - log.G(ctx).WithError(err).Error("failed RemoveCombinedLayersWCOW") - if retErr == nil { //nolint:govet // nilness: consistency with below - retErr = fmt.Errorf("first error: %w", err) - } - } - if err := lc.scratchMount.Release(ctx); err != nil { - log.G(ctx).WithError(err).Error("failed WCOW scratch mount release") - if retErr == nil { - retErr = fmt.Errorf("first error: %w", err) - } - } - for i, closer := range lc.layerClosers { - if err := closer.Release(ctx); err != nil { - log.G(ctx).WithFields(logrus.Fields{ - logrus.ErrorKey: err, - "layerIndex": i, - }).Error("failed releasing WCOW layer") - if retErr == nil { - retErr = fmt.Errorf("first error: %w", err) - } - } - } - return -} - -func mountWCOWIsolatedLayers(ctx context.Context, containerID string, layerFolders []string, volumeMountPath string, vm *uvm.UtilityVM) (_ string, _ resources.ResourceCloser, err error) { - log.G(ctx).WithField("os", vm.OS()).Debug("hcsshim::MountWCOWLayers V2 UVM") - - var ( - layersAdded []string - layerClosers []resources.ResourceCloser - ) - defer func() { - if err != nil { - for _, l := range layerClosers { - if err := l.Release(ctx); err != nil { - log.G(ctx).WithError(err).Warn("failed to remove wcow layer on cleanup") - } - } - } - }() - - for _, layerPath := range layerFolders[:len(layerFolders)-1] { - log.G(ctx).WithField("layerPath", layerPath).Debug("mounting layer") - options := vm.DefaultVSMBOptions(true) - options.TakeBackupPrivilege = true - mount, err := vm.AddVSMB(ctx, layerPath, options) - if err != nil { - return "", nil, fmt.Errorf("failed to add VSMB layer: %w", err) - } - layersAdded = append(layersAdded, layerPath) - layerClosers = append(layerClosers, mount) - } - - hostPath, err := getScratchVHDPath(layerFolders) - if err != nil { - return "", nil, fmt.Errorf("failed to get scratch VHD path in layer folders: %w", err) - } - log.G(ctx).WithField("hostPath", hostPath).Debug("mounting scratch VHD") - - scsiMount, err := vm.SCSIManager.AddVirtualDisk(ctx, hostPath, false, vm.ID(), &scsi.MountConfig{}) - if err != nil { - return "", nil, fmt.Errorf("failed to add SCSI scratch VHD: %w", err) - } - containerScratchPathInUVM := scsiMount.GuestPath() - - defer func() { - if err != nil { - if err := scsiMount.Release(ctx); err != nil { - log.G(ctx).WithError(err).Warn("failed to remove scratch on cleanup") - } - } - }() - - // Load the filter at the C:\s location calculated above. We pass into this - // request each of the read-only layer folders. - var layers []hcsschema.Layer - layers, err = GetHCSLayers(ctx, vm, layersAdded) - if err != nil { - return "", nil, err - } - err = vm.CombineLayersWCOW(ctx, layers, containerScratchPathInUVM) - if err != nil { - return "", nil, err - } - log.G(ctx).Debug("hcsshim::MountWCOWLayers Succeeded") - closer := &wcowIsolatedLayersCloser{ - uvm: vm, - guestCombinedLayersPath: containerScratchPathInUVM, - scratchMount: scsiMount, - layerClosers: layerClosers, - } - return containerScratchPathInUVM, closer, nil -} - -func addLCOWLayer(ctx context.Context, vm *uvm.UtilityVM, layer *LCOWLayer) (uvmPath string, _ resources.ResourceCloser, err error) { - // Don't add as VPMEM when we want additional devices on the UVM to be fully physically backed. - // Also don't use VPMEM when we need to mount a specific partition of the disk, as this is only - // supported for SCSI. - if !vm.DevicesPhysicallyBacked() && layer.Partition == 0 { - // We first try vPMEM and if it is full or the file is too large we - // fall back to SCSI. - mount, err := vm.AddVPMem(ctx, layer.VHDPath) - if err == nil { - log.G(ctx).WithFields(logrus.Fields{ - "layerPath": layer.VHDPath, - "layerType": "vpmem", - }).Debug("Added LCOW layer") - return mount.GuestPath, mount, nil - } else if !errors.Is(err, uvm.ErrNoAvailableLocation) && !errors.Is(err, uvm.ErrMaxVPMemLayerSize) { - return "", nil, fmt.Errorf("failed to add VPMEM layer: %w", err) - } - } - - sm, err := vm.SCSIManager.AddVirtualDisk( - ctx, - layer.VHDPath, - true, - "", - &scsi.MountConfig{ - Partition: layer.Partition, - Options: []string{"ro"}, - }, - ) - if err != nil { - return "", nil, fmt.Errorf("failed to add SCSI layer: %w", err) - } - log.G(ctx).WithFields(logrus.Fields{ - "layerPath": layer.VHDPath, - "layerPartition": layer.Partition, - "layerType": "scsi", - }).Debug("Added LCOW layer") - return sm.GuestPath(), sm, nil -} - -// GetHCSLayers converts host paths corresponding to container layers into HCS schema V2 layers -func GetHCSLayers(ctx context.Context, vm *uvm.UtilityVM, paths []string) (layers []hcsschema.Layer, err error) { - for _, path := range paths { - uvmPath, err := vm.GetVSMBUvmPath(ctx, path, true) - if err != nil { - return nil, err - } - layerID, err := wclayer.LayerID(ctx, path) - if err != nil { - return nil, err - } - layers = append(layers, hcsschema.Layer{Id: layerID.String(), Path: uvmPath}) - } - return layers, nil -} - -// ToHostHcsSchemaLayers converts the layer paths for Argon into HCS schema V2 layers -func ToHostHcsSchemaLayers(ctx context.Context, containerID string, roLayers []string) ([]hcsschema.Layer, error) { - if cimlayer.IsCimLayer(roLayers[0]) { - return cimLayersToHostHcsSchemaLayers(ctx, containerID, roLayers) - } - layers := []hcsschema.Layer{} - for _, layerPath := range roLayers { - layerID, err := wclayer.LayerID(ctx, layerPath) - if err != nil { - return nil, err - } - layers = append(layers, hcsschema.Layer{Id: layerID.String(), Path: layerPath}) - } - return layers, nil -} - -// cimLayersToHostHcsSchemaLayers converts given cimfs Argon layers to HCS schema V2 layers. -func cimLayersToHostHcsSchemaLayers(ctx context.Context, containerID string, paths []string) ([]hcsschema.Layer, error) { - topMostLayer := paths[0] - cimPath := cimlayer.GetCimPathFromLayer(topMostLayer) - volume, err := cimlayer.GetCimMountPath(cimPath, containerID) - if err != nil { - return nil, err - } - // Use the layer path for GUID rather than the mounted volume path, so that the generated layerID - // remains same everywhere - layerID, err := wclayer.LayerID(ctx, topMostLayer) - if err != nil { - return nil, err - } - // Note that when passing the hcsschema formatted layer, "Files" SHOULDN'T be appended to the volume - // path. The internal code automatically does that. - return []hcsschema.Layer{{Id: layerID.String(), Path: volume}}, nil - -} -func getScratchVHDPath(layerFolders []string) (string, error) { - hostPath := filepath.Join(layerFolders[len(layerFolders)-1], "sandbox.vhdx") - // For LCOW, we can reuse another container's scratch space (usually the sandbox container's). - // - // When sharing a scratch space, the `hostPath` will be a symlink to the sandbox.vhdx location to use. - // When not sharing a scratch space, `hostPath` will be the path to the sandbox.vhdx to use. - // - // Evaluate the symlink here (if there is one). - hostPath, err := fs.ResolvePath(hostPath) - if err != nil { - return "", errors.Wrap(err, "failed to resolve path") - } - return hostPath, nil -} - -// Mount the sandbox vhd to a user friendly path. -func MountSandboxVolume(ctx context.Context, hostPath, volumeName string) (err error) { - log.G(ctx).WithFields(logrus.Fields{ - "hostpath": hostPath, - "volumeName": volumeName, - }).Debug("mounting volume for container") - - if _, err := os.Stat(hostPath); os.IsNotExist(err) { - if err := os.MkdirAll(hostPath, 0777); err != nil { - return err - } - } - - defer func() { - if err != nil { - os.RemoveAll(hostPath) - } - }() - - // Make sure volumeName ends with a trailing slash as required. - if volumeName[len(volumeName)-1] != '\\' { - volumeName += `\` // Be nice to clients and make sure well-formed for back-compat - } - - if err = windows.SetVolumeMountPoint(windows.StringToUTF16Ptr(hostPath), windows.StringToUTF16Ptr(volumeName)); err != nil { - return errors.Wrapf(err, "failed to mount sandbox volume to %s on host", hostPath) - } - return nil -} - -// Remove volume mount point. And remove folder afterwards. -func RemoveSandboxMountPoint(ctx context.Context, hostPath string) error { - log.G(ctx).WithFields(logrus.Fields{ - "hostpath": hostPath, - }).Debug("removing volume mount point for container") - - if err := windows.DeleteVolumeMountPoint(windows.StringToUTF16Ptr(hostPath)); err != nil { - return errors.Wrap(err, "failed to delete sandbox volume mount point") - } - if err := os.Remove(hostPath); err != nil { - return errors.Wrapf(err, "failed to remove sandbox mounted folder path %q", hostPath) - } - return nil -} diff --git a/internal/layers/lcow.go b/internal/layers/lcow.go new file mode 100644 index 0000000000..cc934121b4 --- /dev/null +++ b/internal/layers/lcow.go @@ -0,0 +1,285 @@ +//go:build windows +// +build windows + +package layers + +import ( + "context" + "encoding/json" + "fmt" + "path/filepath" + "strings" + + "github.com/containerd/containerd/api/types" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/Microsoft/hcsshim/internal/guestpath" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/ospath" + "github.com/Microsoft/hcsshim/internal/resources" + "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" +) + +type LCOWLayer struct { + VHDPath string + Partition uint64 +} + +// LCOWLayers defines a set of LCOW layers. +// For future extensibility, the LCOWLayer type could be swapped for an interface, +// and we could either call some method on the interface to "apply" it directly to the UVM, +// or type cast it to the various types that we support, and use the one it matches. +// This would allow us to support different "types" of mounts, such as raw VHD, VHD+partition, etc. +type LCOWLayers struct { + // Should be in order from top-most layer to bottom-most layer. + Layers []*LCOWLayer + ScratchVHDPath string +} + +type lcowLayersCloser struct { + uvm *uvm.UtilityVM + guestCombinedLayersPath string + scratchMount resources.ResourceCloser + layerClosers []resources.ResourceCloser +} + +func (lc *lcowLayersCloser) Release(ctx context.Context) (retErr error) { + if err := lc.uvm.RemoveCombinedLayersLCOW(ctx, lc.guestCombinedLayersPath); err != nil { + log.G(ctx).WithError(err).Error("failed RemoveCombinedLayersLCOW") + if retErr == nil { //nolint:govet // nilness: consistency with below + retErr = fmt.Errorf("first error: %w", err) + } + } + if err := lc.scratchMount.Release(ctx); err != nil { + log.G(ctx).WithError(err).Error("failed LCOW scratch mount release") + if retErr == nil { + retErr = fmt.Errorf("first error: %w", err) + } + } + for i, closer := range lc.layerClosers { + if err := closer.Release(ctx); err != nil { + log.G(ctx).WithFields(logrus.Fields{ + logrus.ErrorKey: err, + "layerIndex": i, + }).Error("failed releasing LCOW layer") + if retErr == nil { + retErr = fmt.Errorf("first error: %w", err) + } + } + } + return +} + +// MountLCOWLayers is a helper for clients to hide all the complexity of layer mounting for LCOW +// Layer folder are in order: base, [rolayer1..rolayern,] scratch +// Returns the path at which the `rootfs` of the container can be accessed. Also, returns the path inside the +// UVM at which container scratch directory is located. Usually, this path is the path at which the container +// scratch VHD is mounted. However, in case of scratch sharing this is a directory under the UVM scratch. +func MountLCOWLayers(ctx context.Context, containerID string, layers *LCOWLayers, guestRoot string, vm *uvm.UtilityVM) (_, _ string, _ resources.ResourceCloser, err error) { + if vm == nil { + return "", "", nil, errors.New("MountLCOWLayers cannot be called for process-isolated containers") + } + + if vm.OS() != "linux" { + return "", "", nil, errors.New("MountLCOWLayers should only be called for LCOW") + } + + // V2 UVM + log.G(ctx).WithField("os", vm.OS()).Debug("hcsshim::MountLCOWLayers V2 UVM") + + var ( + layerClosers []resources.ResourceCloser + lcowUvmLayerPaths []string + ) + defer func() { + if err != nil { + for _, closer := range layerClosers { + if err := closer.Release(ctx); err != nil { + log.G(ctx).WithError(err).Warn("failed to remove lcow layer on cleanup") + } + } + } + }() + + for _, layer := range layers.Layers { + log.G(ctx).WithField("layerPath", layer.VHDPath).Debug("mounting layer") + uvmPath, closer, err := addLCOWLayer(ctx, vm, layer) + if err != nil { + return "", "", nil, fmt.Errorf("failed to add LCOW layer: %w", err) + } + layerClosers = append(layerClosers, closer) + lcowUvmLayerPaths = append(lcowUvmLayerPaths, uvmPath) + } + + hostPath := layers.ScratchVHDPath + hostPath, err = filepath.EvalSymlinks(hostPath) + if err != nil { + return "", "", nil, fmt.Errorf("failed to eval symlinks on scratch path: %w", err) + } + log.G(ctx).WithField("hostPath", hostPath).Debug("mounting scratch VHD") + + mConfig := &scsi.MountConfig{ + Encrypted: vm.ScratchEncryptionEnabled(), + // For scratch disks, we support formatting the disk if it is not already + // formatted. + EnsureFilesystem: true, + Filesystem: "ext4", + } + if vm.ScratchEncryptionEnabled() { + // Encrypted scratch devices are formatted with xfs + mConfig.Filesystem = "xfs" + } + scsiMount, err := vm.SCSIManager.AddVirtualDisk( + ctx, + hostPath, + false, + vm.ID(), + mConfig, + ) + if err != nil { + return "", "", nil, fmt.Errorf("failed to add SCSI scratch VHD: %w", err) + } + + // handles the case where we want to share a scratch disk for multiple containers instead + // of mounting a new one. Pass a unique value for `ScratchPath` to avoid container upper and + // work directories colliding in the UVM. + containerScratchPathInUVM := ospath.Join("linux", scsiMount.GuestPath(), "scratch", containerID) + + defer func() { + if err != nil { + if err := scsiMount.Release(ctx); err != nil { + log.G(ctx).WithError(err).Warn("failed to remove scratch on cleanup") + } + } + }() + + rootfs := ospath.Join(vm.OS(), guestRoot, guestpath.RootfsPath) + err = vm.CombineLayersLCOW(ctx, containerID, lcowUvmLayerPaths, containerScratchPathInUVM, rootfs) + if err != nil { + return "", "", nil, err + } + log.G(ctx).Debug("hcsshim::MountLCOWLayers Succeeded") + closer := &lcowLayersCloser{ + uvm: vm, + guestCombinedLayersPath: rootfs, + scratchMount: scsiMount, + layerClosers: layerClosers, + } + return rootfs, containerScratchPathInUVM, closer, nil +} + +func addLCOWLayer(ctx context.Context, vm *uvm.UtilityVM, layer *LCOWLayer) (uvmPath string, _ resources.ResourceCloser, err error) { + // Don't add as VPMEM when we want additional devices on the UVM to be fully physically backed. + // Also don't use VPMEM when we need to mount a specific partition of the disk, as this is only + // supported for SCSI. + if !vm.DevicesPhysicallyBacked() && layer.Partition == 0 { + // We first try vPMEM and if it is full or the file is too large we + // fall back to SCSI. + mount, err := vm.AddVPMem(ctx, layer.VHDPath) + if err == nil { + log.G(ctx).WithFields(logrus.Fields{ + "layerPath": layer.VHDPath, + "layerType": "vpmem", + }).Debug("Added LCOW layer") + return mount.GuestPath, mount, nil + } else if !errors.Is(err, uvm.ErrNoAvailableLocation) && !errors.Is(err, uvm.ErrMaxVPMemLayerSize) { + return "", nil, fmt.Errorf("failed to add VPMEM layer: %w", err) + } + } + + sm, err := vm.SCSIManager.AddVirtualDisk( + ctx, + layer.VHDPath, + true, + "", + &scsi.MountConfig{ + Partition: layer.Partition, + Options: []string{"ro"}, + }, + ) + if err != nil { + return "", nil, fmt.Errorf("failed to add SCSI layer: %w", err) + } + log.G(ctx).WithFields(logrus.Fields{ + "layerPath": layer.VHDPath, + "layerPartition": layer.Partition, + "layerType": "scsi", + }).Debug("Added LCOW layer") + return sm.GuestPath(), sm, nil +} + +// ParseLCOWLayers returns a layers.LCOWLayers describing the rootfs that should be set up +// for an LCOW container. It takes as input the set of rootfs mounts and the layer folders +// from the OCI spec. +func ParseLCOWLayers(rootfs []*types.Mount, layerFolders []string) (*LCOWLayers, error) { + if err := validateRootfsAndLayers(rootfs, layerFolders); err != nil { + return nil, err + } + + legacyLayer := func(scratchLayer string, parentLayers []string) *LCOWLayers { + // Each read-only layer should have a layer.vhd, and the scratch layer should have a sandbox.vhdx. + roLayers := make([]*LCOWLayer, 0, len(parentLayers)) + for _, parentLayer := range parentLayers { + roLayers = append( + roLayers, + &LCOWLayer{ + VHDPath: filepath.Join(parentLayer, "layer.vhd"), + }, + ) + } + return &LCOWLayers{ + Layers: roLayers, + ScratchVHDPath: filepath.Join(scratchLayer, "sandbox.vhdx"), + } + } + // Due to previous validation, we know that for a Linux container we either have LayerFolders, or + // a single rootfs mount. + if len(layerFolders) > 0 { + return legacyLayer(layerFolders[len(layerFolders)-1], layerFolders[:len(layerFolders)-1]), nil + } + m := rootfs[0] + switch m.Type { + case "lcow-layer": + scratchLayer := m.Source + parentLayers, err := getOptionAsArray(m, parentLayerPathsFlag) + if err != nil { + return nil, err + } + return legacyLayer(scratchLayer, parentLayers), nil + case "lcow-partitioned-layer": + var ( + scratchPath string + layerData []struct { + Path string + Partition uint64 + } + ) + for _, opt := range m.Options { + if optPrefix := "scratch="; strings.HasPrefix(opt, optPrefix) { + scratchPath = strings.TrimPrefix(opt, optPrefix) + } else if optPrefix := "parent-partitioned-layers="; strings.HasPrefix(opt, optPrefix) { + layerJSON := strings.TrimPrefix(opt, optPrefix) + if err := json.Unmarshal([]byte(layerJSON), &layerData); err != nil { + return nil, err + } + } else { + return nil, fmt.Errorf("unrecognized %s mount option: %s", m.Type, opt) + } + } + roLayers := make([]*LCOWLayer, 0, len(layerData)) + for _, layer := range layerData { + roLayers = append( + roLayers, + &LCOWLayer{ + VHDPath: layer.Path, + Partition: layer.Partition, + }, + ) + } + return &LCOWLayers{Layers: roLayers, ScratchVHDPath: scratchPath}, nil + default: + return nil, fmt.Errorf("unrecognized rootfs mount type: %s", m.Type) + } +} diff --git a/internal/layers/wcow_mount.go b/internal/layers/wcow_mount.go new file mode 100644 index 0000000000..d12593d179 --- /dev/null +++ b/internal/layers/wcow_mount.go @@ -0,0 +1,430 @@ +//go:build windows +// +build windows + +package layers + +import ( + "context" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/windows" + + "github.com/Microsoft/hcsshim/computestorage" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/hcserror" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/resources" + "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvm/scsi" + "github.com/Microsoft/hcsshim/internal/wclayer" + cimlayer "github.com/Microsoft/hcsshim/internal/wclayer/cim" +) + +func MountWCOWLayers(ctx context.Context, containerID string, vm *uvm.UtilityVM, wl WCOWLayers) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) { + switch l := wl.(type) { + case *wcowWCIFSLayers: + if vm == nil { + return mountProcessIsolatedWCIFSLayers(ctx, l) + } + return mountHypervIsolatedWCIFSLayers(ctx, l, vm) + case *wcowForkedCIMLayers: + if vm == nil { + return mountProcessIsolatedForkedCimLayers(ctx, containerID, l) + } + return nil, nil, fmt.Errorf("hyperv isolated containers aren't supported with forked cim layers") + default: + return nil, nil, fmt.Errorf("invalid layer type %T", wl) + } +} + +// Represents a single layer that is mounted and ready to use. Depending on the type of +// layers each individual layer may or may not be mounted. However, HCS still needs paths +// of individual layers and a unique ID for each layer. +type MountedWCOWLayer struct { + // A unique layer GUID is expected by HCS for every layer + LayerID string + // The path at which this layer is mounted. Could be a path on the host or a path + // inside the guest. + MountedPath string +} + +type MountedWCOWLayers struct { + // path at which rootfs is setup - this could be a path on the host or a path + // inside the guest + RootFS string + // mounted read-only layer paths are required in the container doc that we send to HCS. + // In case of WCIFS based layers these would be layer directory paths, however, in case + // of CimFS layers this would a single volume path at which the CIM is mounted. + MountedLayerPaths []MountedWCOWLayer +} + +// layer closers are used to correctly clean up layers once container exits. Note that +// these layer closers live in the shim process so they can't cleanup the layer in case of +// a shim crash. +// +// wcowHostWCIFSLayerCloser is used to cleanup WCIFS based layers mounted on the host for +// process isolated containers. +type wcowHostWCIFSLayerCloser struct { + scratchLayerData +} + +func (l *wcowHostWCIFSLayerCloser) Release(ctx context.Context) error { + if err := wclayer.UnprepareLayer(ctx, l.scratchLayerPath); err != nil { + return err + } + return wclayer.DeactivateLayer(ctx, l.scratchLayerPath) +} + +func mountProcessIsolatedWCIFSLayers(ctx context.Context, l *wcowWCIFSLayers) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) { + // In some legacy layer use cases the scratch VHD might not be already created by the client + // continue to support those scenarios. + if err = ensureScratchVHD(ctx, l.scratchLayerPath, l.layerPaths); err != nil { + return nil, nil, err + } + + // Simple retry loop to handle some behavior on RS5. Loopback VHDs used to be mounted in a different manner on RS5 (ws2019) which led to some + // very odd cases where things would succeed when they shouldn't have, or we'd simply timeout if an operation took too long. Many + // parallel invocations of this code path and stressing the machine seem to bring out the issues, but all of the possible failure paths + // that bring about the errors we have observed aren't known. + // + // On 19h1+ this *shouldn't* be needed, but the logic is to break if everything succeeded so this is harmless and shouldn't need a version check. + var lErr error + for i := 0; i < 5; i++ { + lErr = func() (err error) { + if err := wclayer.ActivateLayer(ctx, l.scratchLayerPath); err != nil { + return err + } + + defer func() { + if err != nil { + _ = wclayer.DeactivateLayer(ctx, l.scratchLayerPath) + } + }() + + return wclayer.PrepareLayer(ctx, l.scratchLayerPath, l.layerPaths) + }() + + if lErr != nil { + // Common errors seen from the RS5 behavior mentioned above is ERROR_NOT_READY and ERROR_DEVICE_NOT_CONNECTED. The former occurs when HCS + // tries to grab the volume path of the disk but it doesn't succeed, usually because the disk isn't actually mounted. DEVICE_NOT_CONNECTED + // has been observed after launching multiple containers in parallel on a machine under high load. This has also been observed to be a trigger + // for ERROR_NOT_READY as well. + var hcserr *hcserror.HcsError + if errors.As(lErr, &hcserr) { + if errors.Is(hcserr.Err, windows.ERROR_NOT_READY) || errors.Is(hcserr.Err, windows.ERROR_DEVICE_NOT_CONNECTED) { + log.G(ctx).WithField("path", l.scratchLayerPath).WithError(hcserr.Err).Warning("retrying layer operations after failure") + + // Sleep for a little before a re-attempt. A probable cause for these issues in the first place is events not getting + // reported in time so might be good to give some time for things to "cool down" or get back to a known state. + time.Sleep(time.Millisecond * 100) + continue + } + } + // This was a failure case outside of the commonly known error conditions, don't retry here. + return nil, nil, lErr + } + + // No errors in layer setup, we can leave the loop + break + } + // If we got unlucky and ran into one of the two errors mentioned five times in a row and left the loop, we need to check + // the loop error here and fail also. + if lErr != nil { + return nil, nil, errors.Wrap(lErr, "layer retry loop failed") + } + + // If any of the below fails, we want to detach the filter and unmount the disk. + defer func() { + if err != nil { + _ = wclayer.UnprepareLayer(ctx, l.scratchLayerPath) + _ = wclayer.DeactivateLayer(ctx, l.scratchLayerPath) + } + }() + + mountPath, err := wclayer.GetLayerMountPath(ctx, l.scratchLayerPath) + if err != nil { + return nil, nil, err + } + + layersWithID := []MountedWCOWLayer{} + for _, l := range l.layerPaths { + layerID, err := wclayer.LayerID(ctx, l) + if err != nil { + return nil, nil, err + } + layersWithID = append(layersWithID, MountedWCOWLayer{ + LayerID: layerID.String(), + MountedPath: l, + }) + } + + return &MountedWCOWLayers{ + RootFS: mountPath, + MountedLayerPaths: layersWithID, + }, &wcowHostWCIFSLayerCloser{ + scratchLayerData: l.scratchLayerData, + }, nil +} + +// wcowHostForkedCIMLayerCloser is used to cleanup forked CIM layers mounted on the host for process isolated +// containers +type wcowHostForkedCIMLayerCloser struct { + scratchLayerData + containerID string +} + +func (l *wcowHostForkedCIMLayerCloser) Release(ctx context.Context) error { + mountPath, err := wclayer.GetLayerMountPath(ctx, l.scratchLayerPath) + if err != nil { + return err + } + + if err = computestorage.DetachOverlayFilter(ctx, mountPath, hcsschema.UnionFS); err != nil { + return err + } + + if err = cimlayer.CleanupContainerMounts(l.containerID); err != nil { + return err + } + return wclayer.DeactivateLayer(ctx, l.scratchLayerPath) +} + +func mountProcessIsolatedForkedCimLayers(ctx context.Context, containerID string, l *wcowForkedCIMLayers) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) { + if err = wclayer.ActivateLayer(ctx, l.scratchLayerPath); err != nil { + return nil, nil, err + } + defer func() { + if err != nil { + _ = wclayer.DeactivateLayer(ctx, l.scratchLayerPath) + } + }() + + mountPath, err := wclayer.GetLayerMountPath(ctx, l.scratchLayerPath) + if err != nil { + return nil, nil, err + } + + volume, err := cimlayer.MountCimLayer(ctx, l.layers[0].cimPath, containerID) + if err != nil { + return nil, nil, fmt.Errorf("mount layer cim: %w", err) + } + defer func() { + if err != nil { + _ = cimlayer.UnmountCimLayer(ctx, l.layers[0].cimPath, containerID) + } + }() + + // Use the layer path for GUID rather than the mounted volume path, so that the generated layerID + // remains same. + layerID, err := cimlayer.LayerID(l.layers[0].cimPath, containerID) + if err != nil { + return nil, nil, err + } + + layerData := computestorage.LayerData{ + FilterType: hcsschema.UnionFS, + // Container filesystem contents are under a directory named "Files" inside the mounted cim. + // UnionFS needs this path, so append "Files" to the layer path before passing it on. + Layers: []hcsschema.Layer{ + { + Id: layerID, + Path: filepath.Join(volume, "Files"), + }, + }, + } + + if err = computestorage.AttachOverlayFilter(ctx, mountPath, layerData); err != nil { + return nil, nil, err + } + defer func() { + if err != nil { + _ = computestorage.DetachOverlayFilter(ctx, mountPath, hcsschema.UnionFS) + } + }() + + return &MountedWCOWLayers{ + RootFS: mountPath, + MountedLayerPaths: []MountedWCOWLayer{{ + LayerID: layerID, + MountedPath: volume, + }}, + }, &wcowHostForkedCIMLayerCloser{ + containerID: containerID, + scratchLayerData: l.scratchLayerData, + }, nil +} + +type wcowIsolatedWCIFSLayerCloser struct { + uvm *uvm.UtilityVM + guestCombinedLayersPath string + scratchMount resources.ResourceCloser + layerClosers []resources.ResourceCloser +} + +func (lc *wcowIsolatedWCIFSLayerCloser) Release(ctx context.Context) (retErr error) { + if err := lc.uvm.RemoveCombinedLayersWCOW(ctx, lc.guestCombinedLayersPath); err != nil { + log.G(ctx).WithError(err).Error("failed RemoveCombinedLayersWCOW") + if retErr == nil { //nolint:govet // nilness: consistency with below + retErr = fmt.Errorf("first error: %w", err) + } + } + if err := lc.scratchMount.Release(ctx); err != nil { + log.G(ctx).WithError(err).Error("failed WCOW scratch mount release") + if retErr == nil { + retErr = fmt.Errorf("first error: %w", err) + } + } + for i, closer := range lc.layerClosers { + if err := closer.Release(ctx); err != nil { + log.G(ctx).WithFields(logrus.Fields{ + logrus.ErrorKey: err, + "layerIndex": i, + }).Error("failed releasing WCOW layer") + if retErr == nil { + retErr = fmt.Errorf("first error: %w", err) + } + } + } + return +} + +func mountHypervIsolatedWCIFSLayers(ctx context.Context, l *wcowWCIFSLayers, vm *uvm.UtilityVM) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) { + log.G(ctx).WithField("os", vm.OS()).Debug("hcsshim::MountWCOWLayers V2 UVM") + + // In some legacy layer use cases the scratch VHD might not be already created by the client + // continue to support those scenarios. + if err = ensureScratchVHD(ctx, l.scratchLayerPath, l.layerPaths); err != nil { + return nil, nil, err + } + + var ( + layersAdded []*uvm.VSMBShare + layerClosers []resources.ResourceCloser + ) + defer func() { + if err != nil { + for _, l := range layersAdded { + if err := l.Release(ctx); err != nil { + log.G(ctx).WithError(err).Warn("failed to remove wcow layer on cleanup") + } + } + } + }() + + for _, layerPath := range l.layerPaths { + log.G(ctx).WithField("layerPath", layerPath).Debug("mounting layer") + options := vm.DefaultVSMBOptions(true) + options.TakeBackupPrivilege = true + mount, err := vm.AddVSMB(ctx, layerPath, options) + if err != nil { + return nil, nil, fmt.Errorf("failed to add VSMB layer: %w", err) + } + layersAdded = append(layersAdded, mount) + layerClosers = append(layerClosers, mount) + } + + hostPath := filepath.Join(l.scratchLayerPath, "sandbox.vhdx") + log.G(ctx).WithField("hostPath", hostPath).Debug("mounting scratch VHD") + + scsiMount, err := vm.SCSIManager.AddVirtualDisk(ctx, hostPath, false, vm.ID(), &scsi.MountConfig{}) + if err != nil { + return nil, nil, fmt.Errorf("failed to add SCSI scratch VHD: %w", err) + } + containerScratchPathInUVM := scsiMount.GuestPath() + + defer func() { + if err != nil { + if err := scsiMount.Release(ctx); err != nil { + log.G(ctx).WithError(err).Warn("failed to remove scratch on cleanup") + } + } + }() + + ml := &MountedWCOWLayers{ + RootFS: containerScratchPathInUVM, + } + // Windows GCS needs the layers in the HCS format. Convert to that format before + // sending to GCS + hcsLayers := []hcsschema.Layer{} + for _, a := range layersAdded { + uvmPath, err := vm.GetVSMBUvmPath(ctx, a.HostPath, true) + if err != nil { + return nil, nil, err + } + layerID, err := wclayer.LayerID(ctx, a.HostPath) + if err != nil { + return nil, nil, err + } + ml.MountedLayerPaths = append(ml.MountedLayerPaths, MountedWCOWLayer{ + LayerID: layerID.String(), + MountedPath: uvmPath, + }) + hcsLayers = append(hcsLayers, hcsschema.Layer{ + Id: layerID.String(), + Path: uvmPath, + }) + } + + err = vm.CombineLayersWCOW(ctx, hcsLayers, ml.RootFS) + if err != nil { + return nil, nil, err + } + log.G(ctx).Debug("hcsshim::MountWCOWLayers Succeeded") + + return ml, &wcowIsolatedWCIFSLayerCloser{ + uvm: vm, + guestCombinedLayersPath: ml.RootFS, + scratchMount: scsiMount, + layerClosers: layerClosers, + }, nil +} + +// Mount the sandbox vhd to a user friendly path. +func MountSandboxVolume(ctx context.Context, hostPath, volumeName string) (err error) { + log.G(ctx).WithFields(logrus.Fields{ + "hostpath": hostPath, + "volumeName": volumeName, + }).Debug("mounting volume for container") + + if _, err := os.Stat(hostPath); os.IsNotExist(err) { + if err := os.MkdirAll(hostPath, 0777); err != nil { + return err + } + } + + defer func() { + if err != nil { + os.RemoveAll(hostPath) + } + }() + + // Make sure volumeName ends with a trailing slash as required. + if volumeName[len(volumeName)-1] != '\\' { + volumeName += `\` // Be nice to clients and make sure well-formed for back-compat + } + + if err = windows.SetVolumeMountPoint(windows.StringToUTF16Ptr(hostPath), windows.StringToUTF16Ptr(volumeName)); err != nil { + return errors.Wrapf(err, "failed to mount sandbox volume to %s on host", hostPath) + } + return nil +} + +// Remove volume mount point. And remove folder afterwards. +func RemoveSandboxMountPoint(ctx context.Context, hostPath string) error { + log.G(ctx).WithFields(logrus.Fields{ + "hostpath": hostPath, + }).Debug("removing volume mount point for container") + + if err := windows.DeleteVolumeMountPoint(windows.StringToUTF16Ptr(hostPath)); err != nil { + return errors.Wrap(err, "failed to delete sandbox volume mount point") + } + if err := os.Remove(hostPath); err != nil { + return errors.Wrapf(err, "failed to remove sandbox mounted folder path %q", hostPath) + } + return nil +} diff --git a/internal/layers/wcow_parse.go b/internal/layers/wcow_parse.go new file mode 100644 index 0000000000..541766358c --- /dev/null +++ b/internal/layers/wcow_parse.go @@ -0,0 +1,185 @@ +//go:build windows +// +build windows + +package layers + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "github.com/containerd/containerd/api/types" + + "github.com/Microsoft/hcsshim/internal/copyfile" + "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/uvmfolder" +) + +// WCOW image layers is a tagging interface that all WCOW layers MUST implement. This is +// only used so that any random struct cannot be passed as a WCOWLayers type. +type WCOWLayers interface { + IsWCOWLayers() +} + +// scratchLayerData contains data related to the container scratch. Scratch layer format +// (i.e a VHD representing a scratch layer) doesn't change much across different types of +// read-only layers (i.e WCIFS, CIMFS etc.) so this common struct is used across all other +// layer types. +// +// Even though we can simply replace `scratchLayerData` with `scratchLayerPath` +// everywhere, it is a bit convenient to have `scratchLayerData` struct. It implements the +// `WCOWLayers` interface so that we don't have to add it for every other layer +// type. Plus, in the future if we need to include more information for some other type of +// scratch layers we can just add it to this struct. +type scratchLayerData struct { + // Path to the scratch layer. (In most of the cases this will be a path to the + // directory which contains the scratch vhd, however, in future this could be + // volume or a directory that is already setup for writing) + scratchLayerPath string +} + +func (scratchLayerData) IsWCOWLayers() {} + +// Legacy WCIFS based layers. Can be used for process isolated as well as hyperv isolated +// containers. +type wcowWCIFSLayers struct { + scratchLayerData + // layer paths in order [layerN (top-most), layerN-1,..layer0 (base)] + layerPaths []string +} + +// Represents a single forked CIM based layer. In case of a CimFS layer, most of the layer +// files are stored inside the CIM. However, some files (like registry hives) are still +// stored in the layer directory. +type forkedCIMLayer struct { + // Path to the layer directory + layerPath string + // Path to the layer CIM + cimPath string +} + +// Represents CIM layers where each layer CIM is forked from its parent layer +// CIM. Currently can only be used for process isolated containers. +type wcowForkedCIMLayers struct { + scratchLayerData + // layer paths in order [layerN (top-most), layerN-1,..layer0 (base)] + layers []forkedCIMLayer +} + +func parseForkedCimMount(m *types.Mount) (*wcowForkedCIMLayers, error) { + parentLayerPaths, err := getOptionAsArray(m, parentLayerPathsFlag) + if err != nil { + return nil, err + } + parentCimPaths, err := getOptionAsArray(m, parentLayerCimPathsFlag) + if err != nil { + return nil, err + } + if len(parentLayerPaths) != len(parentCimPaths) { + return nil, fmt.Errorf("invalid mount, number of parent layer paths & cim paths should be same") + } + forkedCimLayers := []forkedCIMLayer{} + for i := 0; i < len(parentCimPaths); i++ { + forkedCimLayers = append(forkedCimLayers, forkedCIMLayer{ + layerPath: parentLayerPaths[i], + cimPath: parentCimPaths[i], + }) + } + return &wcowForkedCIMLayers{ + scratchLayerData: scratchLayerData{ + scratchLayerPath: m.Source, + }, + layers: forkedCimLayers, + }, nil +} + +// ParseWCOWLayers parses the layers provided by containerd into the format understood by hcsshim and prepares +// them for mounting. +func ParseWCOWLayers(rootfs []*types.Mount, layerFolders []string) (WCOWLayers, error) { + if err := validateRootfsAndLayers(rootfs, layerFolders); err != nil { + return nil, err + } + + if len(layerFolders) > 0 { + return &wcowWCIFSLayers{ + scratchLayerData: scratchLayerData{ + scratchLayerPath: layerFolders[len(layerFolders)-1], + }, + layerPaths: layerFolders[:len(layerFolders)-1], + }, nil + } + + m := rootfs[0] + switch m.Type { + case LegacyMountType: + parentLayers, err := getOptionAsArray(m, parentLayerPathsFlag) + if err != nil { + return nil, err + } + return &wcowWCIFSLayers{ + scratchLayerData: scratchLayerData{ + scratchLayerPath: m.Source, + }, + layerPaths: parentLayers, + }, nil + case CimFSMountType: + return parseForkedCimMount(m) + default: + return nil, fmt.Errorf("invalid windows mount type: '%s'", m.Type) + } +} + +// GetWCOWUVMBootFilesFromLayers prepares the UVM boot files from the rootfs or layerFolders. +func GetWCOWUVMBootFilesFromLayers(ctx context.Context, rootfs []*types.Mount, layerFolders []string) (*uvm.WCOWBootFiles, error) { + var parentLayers []string + var scratchLayer string + var err error + + if err = validateRootfsAndLayers(rootfs, layerFolders); err != nil { + return nil, err + } + + if len(layerFolders) > 0 { + parentLayers = layerFolders[:len(layerFolders)-1] + scratchLayer = layerFolders[len(layerFolders)-1] + } else { + m := rootfs[0] + switch m.Type { + case LegacyMountType: + parentLayers, err = getOptionAsArray(m, parentLayerPathsFlag) + if err != nil { + return nil, err + } + scratchLayer = m.Source + default: + return nil, fmt.Errorf("mount type '%s' is not supported for UVM boot", m.Type) + } + } + + uvmFolder, err := uvmfolder.LocateUVMFolder(ctx, parentLayers) + if err != nil { + return nil, fmt.Errorf("failed to locate utility VM folder from layer folders: %w", err) + } + + // In order for the UVM sandbox.vhdx not to collide with the actual + // nested Argon sandbox.vhdx we append the \vm folder to the last + // entry in the list. + scratchLayer = filepath.Join(scratchLayer, "vm") + scratchVHDPath := filepath.Join(scratchLayer, "sandbox.vhdx") + if err = os.MkdirAll(scratchLayer, 0777); err != nil { + return nil, err + } + + if _, err = os.Stat(scratchVHDPath); os.IsNotExist(err) { + sourceScratch := filepath.Join(uvmFolder, `UtilityVM\SystemTemplate.vhdx`) + if err := copyfile.CopyFile(ctx, sourceScratch, scratchVHDPath, true); err != nil { + return nil, err + } + } + return &uvm.WCOWBootFiles{ + OSFilesPath: filepath.Join(uvmFolder, `UtilityVM\Files`), + OSRelativeBootDirPath: `\EFI\Microsoft\Boot`, + ScratchVHDPath: scratchVHDPath, + }, nil +} diff --git a/internal/resources/resources.go b/internal/resources/resources.go index d1f83dbc64..cc08b3a566 100644 --- a/internal/resources/resources.go +++ b/internal/resources/resources.go @@ -97,6 +97,12 @@ type ResourceCloser interface { Release(context.Context) error } +type ResourceCloserFunc func(context.Context) error + +func (f ResourceCloserFunc) Release(ctx context.Context) error { + return f(ctx) +} + // NewContainerResources returns a new empty container Resources struct with the // given container id func NewContainerResources(id string) *Resources { diff --git a/internal/tools/uvmboot/wcow.go b/internal/tools/uvmboot/wcow.go index ce653bfc89..37e0eeedc8 100644 --- a/internal/tools/uvmboot/wcow.go +++ b/internal/tools/uvmboot/wcow.go @@ -15,6 +15,7 @@ import ( "github.com/urfave/cli" "github.com/Microsoft/hcsshim/internal/cmd" + "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/uvm" ) @@ -55,19 +56,19 @@ var wcowCommand = cli.Command{ runMany(c, func(id string) error { options := uvm.NewDefaultOptionsWCOW(id, "") setGlobalOptions(c, options.Options) - var layers []string + var layerFolders []string if wcowImage != "" { layer, err := filepath.Abs(wcowImage) if err != nil { return err } - layers = []string{layer} + layerFolders = []string{layer} } else { if wcowDockerImage == "" { wcowDockerImage = "mcr.microsoft.com/windows/nanoserver:1809" } var err error - layers, err = getLayers(wcowDockerImage) + layerFolders, err = getLayers(wcowDockerImage) if err != nil { return err } @@ -77,7 +78,11 @@ var wcowCommand = cli.Command{ return err } defer os.RemoveAll(tempDir) - options.LayerFolders = append(layers, tempDir) + layerFolders = append(layerFolders, tempDir) + options.BootFiles, err = layers.GetWCOWUVMBootFilesFromLayers(context.TODO(), nil, layerFolders) + if err != nil { + return err + } vm, err := uvm.CreateWCOW(context.TODO(), options) if err != nil { return err diff --git a/internal/uvm/create.go b/internal/uvm/create.go index 26f16f1253..fa28857617 100644 --- a/internal/uvm/create.go +++ b/internal/uvm/create.go @@ -136,9 +136,6 @@ func verifyOptions(_ context.Context, options interface{}) error { if opts.EnableDeferredCommit && !opts.AllowOvercommit { return errors.New("EnableDeferredCommit is not supported on physically backed VMs") } - if len(opts.LayerFolders) < 2 { - return errors.New("at least 2 LayerFolders must be supplied") - } if opts.SCSIControllerCount != 1 { return errors.New("exactly 1 SCSI controller is required for WCOW") } diff --git a/internal/uvm/create_test.go b/internal/uvm/create_test.go index 10577e55ac..b79f37b7e7 100644 --- a/internal/uvm/create_test.go +++ b/internal/uvm/create_test.go @@ -4,7 +4,6 @@ package uvm import ( "context" - "fmt" "testing" ) @@ -20,12 +19,3 @@ func TestCreateBadBootFilesPath(t *testing.T) { t.Fatal(err) } } - -func TestCreateWCOWBadLayerFolders(t *testing.T) { - opts := NewDefaultOptionsWCOW(t.Name(), "") - _, err := CreateWCOW(context.Background(), opts) - errMsg := fmt.Sprintf("%s: %s", errBadUVMOpts, "at least 2 LayerFolders must be supplied") - if err == nil || err.Error() != errMsg { - t.Fatal(err) - } -} diff --git a/internal/uvm/create_wcow.go b/internal/uvm/create_wcow.go index 62c2d2bf95..f413720b5c 100644 --- a/internal/uvm/create_wcow.go +++ b/internal/uvm/create_wcow.go @@ -24,9 +24,7 @@ import ( "github.com/Microsoft/hcsshim/internal/schemaversion" "github.com/Microsoft/hcsshim/internal/security" "github.com/Microsoft/hcsshim/internal/uvm/scsi" - "github.com/Microsoft/hcsshim/internal/uvmfolder" "github.com/Microsoft/hcsshim/internal/wclayer" - "github.com/Microsoft/hcsshim/internal/wcow" "github.com/Microsoft/hcsshim/osversion" ) @@ -34,7 +32,7 @@ import ( type OptionsWCOW struct { *Options - LayerFolders []string // Set of folders for base layers and scratch. Ordered from top most read-only through base read-only layer, followed by scratch + BootFiles *WCOWBootFiles // NoDirectMap specifies that no direct mapping should be used for any VSMBs added to the UVM NoDirectMap bool @@ -47,7 +45,7 @@ type OptionsWCOW struct { } // NewDefaultOptionsWCOW creates the default options for a bootable version of -// WCOW. The caller `MUST` set the `LayerFolders` path on the returned value. +// WCOW. The caller `MUST` set the `BootFiles` on the returned value. // // `id` the ID of the compute system. If not passed will generate a new GUID. // @@ -74,7 +72,7 @@ func (uvm *UtilityVM) startExternalGcsListener(ctx context.Context) error { return nil } -func prepareConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWCOW, uvmFolder string) (*hcsschema.ComputeSystem, error) { +func prepareConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWCOW) (*hcsschema.ComputeSystem, error) { processorTopology, err := processorinfo.HostProcessorInfo(ctx) if err != nil { return nil, fmt.Errorf("failed to get host processor information: %w", err) @@ -95,7 +93,7 @@ func prepareConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWCOW, uv Shares: []hcsschema.VirtualSmbShare{ { Name: "os", - Path: filepath.Join(uvmFolder, `UtilityVM\Files`), + Path: opts.BootFiles.OSFilesPath, Options: vsmbOpts, }, }, @@ -175,7 +173,7 @@ func prepareConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWCOW, uv Chipset: &hcsschema.Chipset{ Uefi: &hcsschema.Uefi{ BootThis: &hcsschema.UefiBootEntry{ - DevicePath: `\EFI\Microsoft\Boot\bootmgfw.efi`, + DevicePath: filepath.Join(opts.BootFiles.OSRelativeBootDirPath, "bootmgfw.efi"), DeviceType: "VmbFs", }, }, @@ -284,42 +282,13 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error return nil, errors.Wrap(err, errBadUVMOpts.Error()) } - uvmFolder, err := uvmfolder.LocateUVMFolder(ctx, opts.LayerFolders) - if err != nil { - return nil, fmt.Errorf("failed to locate utility VM folder from layer folders: %w", err) - } - - // TODO: BUGBUG Remove this. @jhowardmsft - // It should be the responsibility of the caller to do the creation and population. - // - Update runhcs too (vm.go). - // - Remove comment in function header - // - Update tests that rely on this current behavior. - // Create the RW scratch in the top-most layer folder, creating the folder if it doesn't already exist. - scratchFolder := opts.LayerFolders[len(opts.LayerFolders)-1] - - // Create the directory if it doesn't exist - if _, err := os.Stat(scratchFolder); os.IsNotExist(err) { - if err := os.MkdirAll(scratchFolder, 0777); err != nil { - return nil, fmt.Errorf("failed to create utility VM scratch folder: %w", err) - } - } - - doc, err := prepareConfigDoc(ctx, uvm, opts, uvmFolder) + doc, err := prepareConfigDoc(ctx, uvm, opts) if err != nil { return nil, fmt.Errorf("error in preparing config doc: %w", err) } - // Create sandbox.vhdx in the scratch folder based on the template, granting the correct permissions to it - scratchPath := filepath.Join(scratchFolder, "sandbox.vhdx") - if _, err := os.Stat(scratchPath); os.IsNotExist(err) { - if err := wcow.CreateUVMScratch(ctx, uvmFolder, scratchFolder, uvm.id); err != nil { - return nil, fmt.Errorf("failed to create scratch: %w", err) - } - } else { - // Sandbox.vhdx exists, just need to grant vm access to it. - if err := wclayer.GrantVmAccess(ctx, uvm.id, scratchPath); err != nil { - return nil, errors.Wrap(err, "failed to grant vm access to scratch") - } + if err := wclayer.GrantVmAccess(ctx, uvm.id, opts.BootFiles.ScratchVHDPath); err != nil { + return nil, errors.Wrap(err, "failed to grant vm access to scratch") } doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{} @@ -331,7 +300,7 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error doc.VirtualMachine.Devices.Scsi[guestrequest.ScsiControllerGuids[0]].Attachments["0"] = hcsschema.Attachment{ - Path: scratchPath, + Path: opts.BootFiles.ScratchVHDPath, Type_: "VirtualDisk", } diff --git a/internal/uvm/types.go b/internal/uvm/types.go index 4b99c15843..6d736f837e 100644 --- a/internal/uvm/types.go +++ b/internal/uvm/types.go @@ -152,3 +152,13 @@ func (uvm *UtilityVM) ScratchEncryptionEnabled() bool { type OutputHandler func(io.Reader) type OutputHandlerCreator func(*Options) OutputHandler + +type WCOWBootFiles struct { + // Path to the directory that contains the OS files. + OSFilesPath string + // Path of the boot directory relative to the `OSFilesPath`. This boot directory MUST + // contain the BCD & bootmgfw.efi files. + OSRelativeBootDirPath string + // Path for the scratch VHD of thef UVM + ScratchVHDPath string +} diff --git a/internal/wclayer/cim/LayerWriter.go b/internal/wclayer/cim/LayerWriter.go index 386cbcda9e..9315971b64 100644 --- a/internal/wclayer/cim/LayerWriter.go +++ b/internal/wclayer/cim/LayerWriter.go @@ -6,16 +6,12 @@ import ( "context" "fmt" "io" - "os" "path/filepath" - "strconv" "strings" "github.com/Microsoft/go-winio" - "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/wclayer" - "github.com/Microsoft/hcsshim/osversion" "github.com/Microsoft/hcsshim/pkg/cimfs" "go.opencensus.io/trace" ) @@ -30,7 +26,7 @@ type CimLayerWriter struct { // path to the layer (i.e layer's directory) as provided by the caller. // Even if a layer is stored as a cim in the cim directory, some files associated // with a layer are still stored in this path. - path string + layerPath string // parent layer paths parentLayerPaths []string // Handle to the layer cim - writes to the cim file @@ -92,7 +88,7 @@ func (cw *CimLayerWriter) Add(name string, fileInfo *winio.FileBasicInfo, fileSi // create a pending op for this file cw.pendingOps = append(cw.pendingOps, &addOp{ pathInCim: name, - hostPath: filepath.Join(cw.path, name), + hostPath: filepath.Join(cw.layerPath, name), fileInfo: fileInfo, securityDescriptor: securityDescriptor, extendedAttributes: extendedAttributes, @@ -174,31 +170,8 @@ func (cw *CimLayerWriter) Close(ctx context.Context) (retErr error) { } }() - // Find out the osversion of this layer, both base & non-base layers can have UtilityVM layer. + // UVM based containers aren't supported with CimFS, don't process the UVM layer processUtilityVM := false - if cw.hasUtilityVM { - uvmSoftwareHivePath := filepath.Join(cw.path, wclayer.UtilityVMPath, wclayer.RegFilesPath, "SOFTWARE") - osvStr, err := getOsBuildNumberFromRegistry(uvmSoftwareHivePath) - if err != nil { - return fmt.Errorf("read os version string from UtilityVM SOFTWARE hive: %w", err) - } - - osv, err := strconv.ParseUint(osvStr, 10, 16) - if err != nil { - return fmt.Errorf("parse os version string (%s): %w", osvStr, err) - } - - // write this version to a file for future reference by the shim process - if err = wclayer.WriteLayerUvmBuildFile(cw.path, uint16(osv)); err != nil { - return fmt.Errorf("write uvm build version: %w", err) - } - - // CIMFS for hyperV isolated is only supported after 20348, processing UtilityVM layer on 2048 - // & lower will cause failures since those images won't have CIMFS specific UVM files (mostly - // BCD entries required for CIMFS) - processUtilityVM = (osv > osversion.LTSC2022) - log.G(ctx).Debugf("import image os version %d, processing UtilityVM layer: %t\n", osv, processUtilityVM) - } if len(cw.parentLayerPaths) == 0 { if err := cw.processBaseLayer(ctx, processUtilityVM); err != nil { @@ -218,7 +191,7 @@ func (cw *CimLayerWriter) Close(ctx context.Context) (retErr error) { return nil } -func NewCimLayerWriter(ctx context.Context, path string, parentLayerPaths []string) (_ *CimLayerWriter, err error) { +func NewCimLayerWriter(ctx context.Context, layerPath, cimPath string, parentLayerPaths, parentLayerCimPaths []string) (_ *CimLayerWriter, err error) { if !cimfs.IsCimFSSupported() { return nil, fmt.Errorf("CimFs not supported on this build") } @@ -231,59 +204,36 @@ func NewCimLayerWriter(ctx context.Context, path string, parentLayerPaths []stri } }() span.AddAttributes( - trace.StringAttribute("path", path), + trace.StringAttribute("path", layerPath), + trace.StringAttribute("cimPath", cimPath), + trace.StringAttribute("parentLayerPaths", strings.Join(parentLayerCimPaths, ", ")), trace.StringAttribute("parentLayerPaths", strings.Join(parentLayerPaths, ", "))) parentCim := "" - cimDirPath := GetCimDirFromLayer(path) - if _, err = os.Stat(cimDirPath); os.IsNotExist(err) { - // create cim directory - if err = os.Mkdir(cimDirPath, 0755); err != nil { - return nil, fmt.Errorf("failed while creating cim layers directory: %w", err) - } - } else if err != nil { - return nil, fmt.Errorf("unable to access cim layers directory: %w", err) - - } - if len(parentLayerPaths) > 0 { - parentCim = GetCimNameFromLayer(parentLayerPaths[0]) + if filepath.Dir(cimPath) != filepath.Dir(parentLayerCimPaths[0]) { + return nil, fmt.Errorf("parent cim can not be stored in different directory") + } + // We only need to provide parent CIM name, it is assumed that both parent CIM + // and newly created CIM are present in the same directory. + parentCim = filepath.Base(parentLayerCimPaths[0]) } - cim, err := cimfs.Create(cimDirPath, parentCim, GetCimNameFromLayer(path)) + cim, err := cimfs.Create(filepath.Dir(cimPath), parentCim, filepath.Base(cimPath)) if err != nil { return nil, fmt.Errorf("error in creating a new cim: %w", err) } - sfw, err := newStdFileWriter(path, parentLayerPaths) + sfw, err := newStdFileWriter(layerPath, parentLayerPaths) if err != nil { return nil, fmt.Errorf("error in creating new standard file writer: %w", err) } return &CimLayerWriter{ ctx: ctx, s: span, - path: path, + layerPath: layerPath, parentLayerPaths: parentLayerPaths, cimWriter: cim, stdFileWriter: sfw, }, nil } - -// DestroyCimLayer destroys a cim layer i.e it removes all the cimfs files for the given layer as well as -// all of the other files that are stored in the layer directory (at path `layerPath`). -// If this is not a cimfs layer (i.e a cim file for the given layer does not exist) then nothing is done. -func DestroyCimLayer(ctx context.Context, layerPath string) error { - cimPath := GetCimPathFromLayer(layerPath) - - // verify that such a cim exists first, sometimes containerd tries to call - // this with the root snapshot directory as the layer path. We don't want to - // destroy everything inside the snapshots directory. - if _, err := os.Stat(cimPath); err != nil { - if os.IsNotExist(err) { - return nil - } - return err - } - - return cimfs.DestroyCim(ctx, cimPath) -} diff --git a/internal/wclayer/cim/bcd.go b/internal/wclayer/cim/bcd.go deleted file mode 100644 index 23a3ce6776..0000000000 --- a/internal/wclayer/cim/bcd.go +++ /dev/null @@ -1,107 +0,0 @@ -//go:build windows - -package cim - -import ( - "bytes" - "fmt" - "os/exec" - - "github.com/Microsoft/go-winio/pkg/guid" -) - -const ( - bcdFilePath = "UtilityVM\\Files\\EFI\\Microsoft\\Boot\\BCD" - cimfsDeviceOptionsID = "{763e9fea-502d-434f-aad9-5fabe9c91a7b}" - vmbusDeviceID = "{c63c9bdf-5fa5-4208-b03f-6b458b365592}" - compositeDeviceOptionsID = "{e1787220-d17f-49e7-977a-d8fe4c8537e2}" - bootContainerID = "{b890454c-80de-4e98-a7ab-56b74b4fbd0c}" -) - -func bcdExec(storePath string, args ...string) error { - var out bytes.Buffer - argsArr := []string{"/store", storePath, "/offline"} - argsArr = append(argsArr, args...) - cmd := exec.Command("bcdedit.exe", argsArr...) - cmd.Stdout = &out - if err := cmd.Run(); err != nil { - return fmt.Errorf("bcd command (%s) failed: %w", cmd, err) - } - return nil -} - -// A registry configuration required for the uvm. -func setBcdRestartOnFailure(storePath string) error { - return bcdExec(storePath, "/set", "{default}", "restartonfailure", "yes") -} - -func setBcdCimBootDevice(storePath, cimPathRelativeToVSMB string, diskID, partitionID guid.GUID) error { - // create options for cimfs boot device - if err := bcdExec(storePath, "/create", cimfsDeviceOptionsID, "/d", "CimFS Device Options", "/device"); err != nil { - return err - } - - // Set options. For now we need to set 2 options. First is the parent device i.e the device under - // which all cim files will be available. Second is the path of the cim (from which this UVM should - // boot) relative to the parent device. Note that even though the 2nd option is named - // `cimfsrootdirectory` it expects a path to the cim file and not a directory path. - if err := bcdExec(storePath, "/set", cimfsDeviceOptionsID, "cimfsparentdevice", fmt.Sprintf("vmbus=%s", vmbusDeviceID)); err != nil { - return err - } - - if err := bcdExec(storePath, "/set", cimfsDeviceOptionsID, "cimfsrootdirectory", fmt.Sprintf("\\%s", cimPathRelativeToVSMB)); err != nil { - return err - } - - // create options for the composite device - if err := bcdExec(storePath, "/create", compositeDeviceOptionsID, "/d", "Composite Device Options", "/device"); err != nil { - return err - } - - // We need to specify the diskID & the partition ID of the boot disk and we need to set the cimfs boot - // options ID - partitionStr := fmt.Sprintf("gpt_partition={%s};{%s}", diskID, partitionID) - if err := bcdExec(storePath, "/set", compositeDeviceOptionsID, "primarydevice", partitionStr); err != nil { - return err - } - - if err := bcdExec(storePath, "/set", compositeDeviceOptionsID, "secondarydevice", fmt.Sprintf("cimfs=%s,%s", bootContainerID, cimfsDeviceOptionsID)); err != nil { - return err - } - - if err := bcdExec(storePath, "/set", "{default}", "device", fmt.Sprintf("composite=0,%s", compositeDeviceOptionsID)); err != nil { - return err - } - - if err := bcdExec(storePath, "/set", "{default}", "osdevice", fmt.Sprintf("composite=0,%s", compositeDeviceOptionsID)); err != nil { - return err - } - - // Since our UVM file are stored under UtilityVM\Files directory inside the CIM we must prepend that - // directory in front of paths used by bootmgr - if err := bcdExec(storePath, "/set", "{default}", "path", "\\UtilityVM\\Files\\Windows\\System32\\winload.efi"); err != nil { - return err - } - - if err := bcdExec(storePath, "/set", "{default}", "systemroot", "\\UtilityVM\\Files\\Windows"); err != nil { - return err - } - - return nil -} - -// updateBcdStoreForBoot Updates the bcd store at path layerPath + UtilityVM\Files\EFI\Microsoft\Boot\BCD` to -// boot with the disk with given ID and given partitionID. cimPathRelativeToVSMB is the path of the cim which -// will be used for booting this UVM relative to the VSMB share. (Usually, the entire snapshots directory will -// be shared over VSMB, so if this is the cim-layers\1.cim under that directory, the value of -// `cimPathRelativeToVSMB` should be cim-layers\1.cim) -func updateBcdStoreForBoot(storePath string, cimPathRelativeToVSMB string, diskID, partitionID guid.GUID) error { - if err := setBcdRestartOnFailure(storePath); err != nil { - return err - } - - if err := setBcdCimBootDevice(storePath, cimPathRelativeToVSMB, diskID, partitionID); err != nil { - return err - } - return nil -} diff --git a/internal/wclayer/cim/common.go b/internal/wclayer/cim/common.go deleted file mode 100644 index bdeebd3c03..0000000000 --- a/internal/wclayer/cim/common.go +++ /dev/null @@ -1,41 +0,0 @@ -//go:build windows - -package cim - -import ( - "os" - "path/filepath" -) - -const ( - // name of the directory in which cims are stored - cimDir = "cim-layers" -) - -// Usually layers are stored at ./root/io.containerd.snapshotter.v1.windows/snapshots/. For cimfs we -// must store all layer cims in the same directory (for forked cims to work). So all cim layers are stored in -// /root/io.containerd.snapshotter.v1.windows/snapshots/cim-layers. And the cim file representing each -// individual layer is stored at /root/io.containerd.snapshotter.v1.windows/snapshots/cim-layers/.cim - -// CimName is the filename (.cim) of the file representing the cim -func GetCimNameFromLayer(layerPath string) string { - return filepath.Base(layerPath) + ".cim" -} - -// CimPath is the path to the CimDir/.cim file that represents a layer cim. -func GetCimPathFromLayer(layerPath string) string { - return filepath.Join(GetCimDirFromLayer(layerPath), GetCimNameFromLayer(layerPath)) -} - -// CimDir is the directory inside which all cims are stored. -func GetCimDirFromLayer(layerPath string) string { - dir := filepath.Dir(layerPath) - return filepath.Join(dir, cimDir) -} - -// IsCimLayer returns `true` if the layer at path `layerPath` is a cim layer. Returns `false` otherwise. -func IsCimLayer(layerPath string) bool { - cimPath := GetCimPathFromLayer(layerPath) - _, err := os.Stat(cimPath) - return (err == nil) -} diff --git a/internal/wclayer/cim/mount.go b/internal/wclayer/cim/mount.go index 22f21dd43a..e4b193533f 100644 --- a/internal/wclayer/cim/mount.go +++ b/internal/wclayer/cim/mount.go @@ -6,6 +6,7 @@ import ( "context" "fmt" "os" + "strings" "sync" "github.com/Microsoft/go-winio/pkg/guid" @@ -87,3 +88,15 @@ func CleanupContainerMounts(containerID string) error { } return nil } + +func LayerID(cimPath, containerID string) (string, error) { + cimMountMapLock.Lock() + defer cimMountMapLock.Unlock() + if vol, ok := cimMounts[fmt.Sprintf("%s_%s", containerID, cimPath)]; !ok { + return "", fmt.Errorf("cim %s not mounted", cimPath) + } else if !strings.HasPrefix(vol, "\\\\?\\Volume{") || !strings.HasSuffix(vol, "}\\") { + return "", fmt.Errorf("volume path %s is not in the expected format", vol) + } else { + return strings.TrimSuffix(strings.TrimPrefix(vol, "\\\\?\\Volume{"), "}\\"), nil + } +} diff --git a/internal/wclayer/cim/process.go b/internal/wclayer/cim/process.go index 9ed0428b7e..8fdb3bad3f 100644 --- a/internal/wclayer/cim/process.go +++ b/internal/wclayer/cim/process.go @@ -7,95 +7,16 @@ import ( "fmt" "os" "path/filepath" - "syscall" "time" "github.com/Microsoft/go-winio" - "github.com/Microsoft/go-winio/vhd" - "github.com/Microsoft/hcsshim/computestorage" - "github.com/Microsoft/hcsshim/internal/memory" - "github.com/Microsoft/hcsshim/internal/security" - "github.com/Microsoft/hcsshim/internal/vhdx" "github.com/Microsoft/hcsshim/internal/wclayer" "golang.org/x/sys/windows" ) -const defaultVHDXBlockSizeInMB = 1 - -// processUtilityVMLayer is similar to createContainerBaseLayerVHDs but along with the scratch creation it -// also does some BCD modifications to allow the UVM to boot from the CIM. It expects that the UVM BCD file is -// present at layerPath/`wclayer.BcdFilePath` and a UVM SYSTEM hive is present at -// layerPath/UtilityVM/`wclayer.RegFilesPath`/SYSTEM. The scratch VHDs are created under the `layerPath` -// directory. +// processUtilityVMLayer will handle processing of UVM specific files when we start +// supporting UVM based containers with CimFS in the future. func processUtilityVMLayer(ctx context.Context, layerPath string) error { - // func createUtilityVMLayerVHDs(ctx context.Context, layerPath string) error { - baseVhdPath := filepath.Join(layerPath, wclayer.UtilityVMPath, wclayer.UtilityVMBaseVhd) - diffVhdPath := filepath.Join(layerPath, wclayer.UtilityVMPath, wclayer.UtilityVMScratchVhd) - defaultVhdSize := uint64(10) - - // Just create the vhdx for utilityVM layer, no need to format it. - createParams := &vhd.CreateVirtualDiskParameters{ - Version: 2, - Version2: vhd.CreateVersion2{ - MaximumSize: defaultVhdSize * memory.GiB, - BlockSizeInBytes: defaultVHDXBlockSizeInMB * memory.MiB, - }, - } - - handle, err := vhd.CreateVirtualDisk(baseVhdPath, vhd.VirtualDiskAccessNone, vhd.CreateVirtualDiskFlagNone, createParams) - if err != nil { - return fmt.Errorf("failed to create vhdx: %w", err) - } - - defer func() { - if err != nil { - os.RemoveAll(baseVhdPath) - os.RemoveAll(diffVhdPath) - } - }() - - err = computestorage.FormatWritableLayerVhd(ctx, windows.Handle(handle)) - closeErr := syscall.CloseHandle(handle) - if err != nil { - return err - } else if closeErr != nil { - return fmt.Errorf("failed to close vhdx handle: %w", closeErr) - } - - partitionInfo, err := vhdx.GetScratchVhdPartitionInfo(ctx, baseVhdPath) - if err != nil { - return fmt.Errorf("failed to get base vhd layout info: %w", err) - } - // relativeCimPath needs to be the cim path relative to the snapshots directory. The snapshots - // directory is shared inside the UVM over VSMB, so during the UVM boot this relative path will be - // used to find the cim file under that VSMB share. - relativeCimPath := filepath.Join(filepath.Base(GetCimDirFromLayer(layerPath)), GetCimNameFromLayer(layerPath)) - bcdPath := filepath.Join(layerPath, bcdFilePath) - if err = updateBcdStoreForBoot(bcdPath, relativeCimPath, partitionInfo.DiskID, partitionInfo.PartitionID); err != nil { - return fmt.Errorf("failed to update BCD: %w", err) - } - - if err := enableCimBoot(filepath.Join(layerPath, wclayer.UtilityVMPath, wclayer.RegFilesPath, "SYSTEM")); err != nil { - return fmt.Errorf("failed to setup cim image for uvm boot: %w", err) - } - - // Note: diff vhd creation and granting of vm group access must be done AFTER - // getting the partition info of the base VHD. Otherwise it causes the vhd parent - // chain to get corrupted. - // TODO(ambarve): figure out why this happens so that bcd update can be moved to a separate function - - // Create the differencing disk that will be what's copied for the final rw layer - // for a container. - if err = vhd.CreateDiffVhd(diffVhdPath, baseVhdPath, defaultVHDXBlockSizeInMB); err != nil { - return fmt.Errorf("failed to create differencing disk: %w", err) - } - - if err := security.GrantVmGroupAccess(baseVhdPath); err != nil { - return fmt.Errorf("failed to grant vm group access to %s: %w", baseVhdPath, err) - } - if err := security.GrantVmGroupAccess(diffVhdPath); err != nil { - return fmt.Errorf("failed to grant vm group access to %s: %w", diffVhdPath, err) - } return nil } @@ -170,18 +91,18 @@ func processLayoutFile(layerPath string) ([]pendingCimOp, error) { // steps. This function opens the cim file for writing and updates it. func (cw *CimLayerWriter) processBaseLayer(ctx context.Context, processUtilityVM bool) (err error) { if processUtilityVM { - if err = processUtilityVMLayer(ctx, cw.path); err != nil { + if err = processUtilityVMLayer(ctx, cw.layerPath); err != nil { return fmt.Errorf("process utilityVM layer: %w", err) } } - ops, err := processBaseLayerHives(cw.path) + ops, err := processBaseLayerHives(cw.layerPath) if err != nil { return err } cw.pendingOps = append(cw.pendingOps, ops...) - ops, err = processLayoutFile(cw.path) + ops, err = processLayoutFile(cw.layerPath) if err != nil { return err } @@ -196,14 +117,14 @@ func (cw *CimLayerWriter) processNonBaseLayer(ctx context.Context, processUtilit for _, hv := range hives { baseHive := filepath.Join(wclayer.HivesPath, hv.base) deltaHive := filepath.Join(wclayer.HivesPath, hv.delta) - _, err := os.Stat(filepath.Join(cw.path, deltaHive)) + _, err := os.Stat(filepath.Join(cw.layerPath, deltaHive)) // merge with parent layer if delta exists. if err != nil && !os.IsNotExist(err) { - return fmt.Errorf("stat delta hive %s: %w", filepath.Join(cw.path, deltaHive), err) + return fmt.Errorf("stat delta hive %s: %w", filepath.Join(cw.layerPath, deltaHive), err) } else if err == nil { // merge base hive of parent layer with the delta hive of this layer and write it as // the base hive of this layer. - err = mergeHive(filepath.Join(cw.parentLayerPaths[0], baseHive), filepath.Join(cw.path, deltaHive), filepath.Join(cw.path, baseHive)) + err = mergeHive(filepath.Join(cw.parentLayerPaths[0], baseHive), filepath.Join(cw.layerPath, deltaHive), filepath.Join(cw.layerPath, baseHive)) if err != nil { return err } @@ -211,7 +132,7 @@ func (cw *CimLayerWriter) processNonBaseLayer(ctx context.Context, processUtilit // the newly created merged file must be added to the cim cw.pendingOps = append(cw.pendingOps, &addOp{ pathInCim: baseHive, - hostPath: filepath.Join(cw.path, baseHive), + hostPath: filepath.Join(cw.layerPath, baseHive), fileInfo: &winio.FileBasicInfo{ CreationTime: windows.NsecToFiletime(time.Now().UnixNano()), LastAccessTime: windows.NsecToFiletime(time.Now().UnixNano()), @@ -224,7 +145,7 @@ func (cw *CimLayerWriter) processNonBaseLayer(ctx context.Context, processUtilit } if processUtilityVM { - return processUtilityVMLayer(ctx, cw.path) + return processUtilityVMLayer(ctx, cw.layerPath) } return nil } diff --git a/internal/wclayer/cim/registry.go b/internal/wclayer/cim/registry.go index dd2af81cf3..c95b03ca37 100644 --- a/internal/wclayer/cim/registry.go +++ b/internal/wclayer/cim/registry.go @@ -3,88 +3,13 @@ package cim import ( - "encoding/binary" "fmt" - "os" - "unsafe" - "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/winapi" "github.com/Microsoft/hcsshim/osversion" "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/windows" ) -// enableCimBoot Opens the SYSTEM registry hive at path `hivePath` and updates it to include a CIMFS Start -// registry key. This prepares the uvm to boot from a cim file if requested. The registry changes required to -// actually make the uvm boot from a cim will be added in the uvm config (look at -// addBootFromCimRegistryChanges for details). This registry key needs to be available in the early boot -// phase and so including it in the uvm config doesn't work. -func enableCimBoot(hivePath string) (err error) { - dataZero := make([]byte, 4) - dataOne := make([]byte, 4) - binary.LittleEndian.PutUint32(dataOne, 1) - dataFour := make([]byte, 4) - binary.LittleEndian.PutUint32(dataFour, 4) - - bootGUID, err := windows.UTF16FromString(bootContainerID) - if err != nil { - return fmt.Errorf("failed to encode boot guid to utf16: %w", err) - } - - overrideBootPath, err := windows.UTF16FromString("\\Windows\\") - if err != nil { - return fmt.Errorf("failed to encode override boot path to utf16: %w", err) - } - - regChanges := []struct { - keyPath string - valueName string - valueType winapi.RegType - data *byte - dataLen uint32 - }{ - {"ControlSet001\\Control", "BootContainerGuid", winapi.REG_TYPE_SZ, (*byte)(unsafe.Pointer(&bootGUID[0])), 2 * uint32(len(bootGUID))}, - {"ControlSet001\\Services\\UnionFS", "Start", winapi.REG_TYPE_DWORD, &dataZero[0], uint32(len(dataZero))}, - {"ControlSet001\\Services\\wcifs", "Start", winapi.REG_TYPE_DWORD, &dataFour[0], uint32(len(dataZero))}, - // The bootmgr loads the uvm files from the cim and so uses the relative path `UtilityVM\\Files` inside the cim to access the uvm files. However, once the cim is mounted UnionFS will merge the correct directory (UtilityVM\\Files) of the cim with the scratch and then that point onwards we don't need to use the relative path. Below registry key tells the kernel that the boot path that was provided in BCD should now be overriden with this new path. - {"Setup", "BootPathOverride", winapi.REG_TYPE_SZ, (*byte)(unsafe.Pointer(&overrideBootPath[0])), 2 * uint32(len(overrideBootPath))}, - } - - var storeHandle winapi.ORHKey - if err = winapi.OROpenHive(hivePath, &storeHandle); err != nil { - return fmt.Errorf("failed to open registry store at %s: %w", hivePath, err) - } - - for _, change := range regChanges { - var changeKey winapi.ORHKey - if err = winapi.ORCreateKey(storeHandle, change.keyPath, 0, 0, 0, &changeKey, nil); err != nil { - return fmt.Errorf("failed to open reg key %s: %w", change.keyPath, err) - } - - if err = winapi.ORSetValue(changeKey, change.valueName, uint32(change.valueType), change.data, change.dataLen); err != nil { - return fmt.Errorf("failed to set value for regkey %s\\%s : %w", change.keyPath, change.valueName, err) - } - } - - // remove the existing file first - if err := os.Remove(hivePath); err != nil { - return fmt.Errorf("failed to remove existing registry %s: %w", hivePath, err) - } - - if err = winapi.ORSaveHive(winapi.ORHKey(storeHandle), hivePath, uint32(osversion.Get().MajorVersion), uint32(osversion.Get().MinorVersion)); err != nil { - return fmt.Errorf("error saving the registry store: %w", err) - } - - // close hive irrespective of the errors - if err := winapi.ORCloseHive(winapi.ORHKey(storeHandle)); err != nil { - return fmt.Errorf("error closing registry store; %w", err) - } - return nil - -} - // mergeHive merges the hive located at parentHivePath with the hive located at deltaHivePath and stores // the result into the file at mergedHivePath. If a file already exists at path `mergedHivePath` then it // throws an error. @@ -122,51 +47,3 @@ func mergeHive(parentHivePath, deltaHivePath, mergedHivePath string) (err error) } return } - -// getOsBuildNumberFromRegistry fetches the "CurrentBuild" value at path -// "Microsoft\Windows NT\CurrentVersion" from the SOFTWARE registry hive at path -// `regHivePath`. This is used to detect the build version of the uvm. -func getOsBuildNumberFromRegistry(regHivePath string) (_ string, err error) { - var storeHandle, keyHandle winapi.ORHKey - var dataType, dataLen uint32 - keyPath := "Microsoft\\Windows NT\\CurrentVersion" - valueName := "CurrentBuild" - dataLen = 16 // build version string can't be more than 5 wide chars? - dataBuf := make([]byte, dataLen) - - if err = winapi.OROpenHive(regHivePath, &storeHandle); err != nil { - return "", fmt.Errorf("failed to open registry store at %s: %w", regHivePath, err) - } - defer func() { - if closeErr := winapi.ORCloseHive(storeHandle); closeErr != nil { - log.L.WithFields(logrus.Fields{ - "error": closeErr, - "hive": regHivePath, - }).Warnf("failed to close hive") - } - }() - - if err = winapi.OROpenKey(storeHandle, keyPath, &keyHandle); err != nil { - return "", fmt.Errorf("failed to open key at %s: %w", keyPath, err) - } - defer func() { - if closeErr := winapi.ORCloseKey(keyHandle); closeErr != nil { - log.L.WithFields(logrus.Fields{ - "error": closeErr, - "hive": regHivePath, - "key": keyPath, - "value": valueName, - }).Warnf("failed to close hive key") - } - }() - - if err = winapi.ORGetValue(keyHandle, "", valueName, &dataType, &dataBuf[0], &dataLen); err != nil { - return "", fmt.Errorf("failed to get value of %s: %w", valueName, err) - } - - if dataType != uint32(winapi.REG_TYPE_SZ) { - return "", fmt.Errorf("unexpected build number data type (%d)", dataType) - } - - return winapi.ParseUtf16LE(dataBuf[:(dataLen - 2)]), nil -} diff --git a/pkg/ociwclayer/cim/import.go b/pkg/ociwclayer/cim/import.go index a2e8759529..d8f4a1aa95 100644 --- a/pkg/ociwclayer/cim/import.go +++ b/pkg/ociwclayer/cim/import.go @@ -23,26 +23,25 @@ import ( ) // ImportCimLayerFromTar reads a layer from an OCI layer tar stream and extracts it into -// the CIM format at the specified path. The caller must specify the parent layers, if -// any, ordered from lowest to highest layer. -// This function expects that the layer paths (both the layer that is being imported & the parent layers) are -// formatted like `.../snapshots/` and the corresponding layer CIMs are located/will be created at -// `.../snapshots/cim-layers/.cim`. Each CIM file also has corresponding region & objectID files and those -// files will also be stored inside the `cim-layers` directory. +// the CIM format at the specified path. +// `layerPath` is the directory which can be used to store intermediate files generated during layer extraction (and these file are also used when extracting children layers of this layer) +// `cimPath` is the path to the CIM in which layer files must be stored. Note that region & object files are created when writing to a CIM, these files will be created next to the `cimPath`. +// `parentLayerCimPaths` are paths to the parent layer CIMs, ordered from highest to lowest, i.e the CIM at `parentLayerCimPaths[0]` will be the immediate parent of the layer that is being extracted here. +// `parentLayerPaths` are paths to the parent layer directories. Ordered from highest to lowest. // // This function returns the total size of the layer's files, in bytes. -func ImportCimLayerFromTar(ctx context.Context, r io.Reader, layerPath string, parentLayerPaths []string) (int64, error) { +func ImportCimLayerFromTar(ctx context.Context, r io.Reader, layerPath, cimPath string, parentLayerPaths, parentLayerCimPaths []string) (int64, error) { err := os.MkdirAll(layerPath, 0) if err != nil { return 0, err } - w, err := cim.NewCimLayerWriter(ctx, layerPath, parentLayerPaths) + w, err := cim.NewCimLayerWriter(ctx, layerPath, cimPath, parentLayerPaths, parentLayerCimPaths) if err != nil { return 0, err } - n, err := writeCimLayerFromTar(ctx, r, w, layerPath) + n, err := writeCimLayerFromTar(ctx, r, w) cerr := w.Close(ctx) if err != nil { return 0, err @@ -53,7 +52,7 @@ func ImportCimLayerFromTar(ctx context.Context, r io.Reader, layerPath string, p return n, nil } -func writeCimLayerFromTar(ctx context.Context, r io.Reader, w *cim.CimLayerWriter, layerPath string) (int64, error) { +func writeCimLayerFromTar(ctx context.Context, r io.Reader, w *cim.CimLayerWriter) (int64, error) { tr := tar.NewReader(r) buf := bufio.NewWriter(w) size := int64(0) @@ -160,7 +159,3 @@ func writeCimLayerFromTar(ctx context.Context, r io.Reader, w *cim.CimLayerWrite } return size, nil } - -func DestroyCimLayer(layerPath string) error { - return cim.DestroyCimLayer(context.Background(), layerPath) -} diff --git a/test/functional/main_test.go b/test/functional/main_test.go index 463fb645a1..0dd7f478cb 100644 --- a/test/functional/main_test.go +++ b/test/functional/main_test.go @@ -25,6 +25,7 @@ import ( "github.com/urfave/cli/v2" "go.opencensus.io/trace" + "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/sync" @@ -275,9 +276,11 @@ func defaultWCOWOptions(ctx context.Context, tb testing.TB) *uvm.OptionsWCOW { opts := testuvm.DefaultWCOWOptions(ctx, tb, testName(tb), hcsOwner) uvmLayers := windowsImageLayers(ctx, tb) scratchDir := testlayers.WCOWScratchDir(ctx, tb, "") - opts.LayerFolders = append(opts.LayerFolders, uvmLayers...) - opts.LayerFolders = append(opts.LayerFolders, scratchDir) - + bootFiles, err := layers.GetWCOWUVMBootFilesFromLayers(ctx, nil, append(uvmLayers, scratchDir)) + if err != nil { + tb.Fatalf("failed to parse WCOW Boot files: %s", err) + } + opts.BootFiles = bootFiles return opts } diff --git a/test/functional/wcow_test.go b/test/functional/wcow_test.go index 513ab742ef..abd5eda5be 100644 --- a/test/functional/wcow_test.go +++ b/test/functional/wcow_test.go @@ -380,11 +380,16 @@ func TestWCOWArgonShim(t *testing.T) { layers := generateShimLayersStruct(t, imageLayers) id := "argon" - // This is a cheat but stops us re-writing exactly the same code just for test - argonShimLocalMountPath, closer, err := layerspkg.MountWCOWLayers(context.Background(), id, append(imageLayers, argonShimScratchDir), "", nil) + wcowLayers, err := layerspkg.ParseWCOWLayers(nil, append(imageLayers, argonShimScratchDir)) if err != nil { t.Fatal(err) } + + mountedLayers, closer, err := layerspkg.MountWCOWLayers(context.Background(), id, nil, wcowLayers) + if err != nil { + t.Fatal(err) + } + defer func() { if closer != nil { _ = closer.Release(context.Background()) @@ -393,7 +398,7 @@ func TestWCOWArgonShim(t *testing.T) { argonShim, err := hcsshim.CreateContainer(id, &hcsshim.ContainerConfig{ SystemType: "Container", Name: "argonShim", - VolumePath: argonShimLocalMountPath, + VolumePath: mountedLayers.RootFS, LayerFolderPath: argonShimScratchDir, Layers: layers, MappedDirectories: []schema1.MappedDir{ @@ -691,7 +696,10 @@ func TestWCOWXenonOciV2(t *testing.T) { } xenonOciOpts := uvm.NewDefaultOptionsWCOW(xenonOci2UVMId, "") - xenonOciOpts.LayerFolders = append(imageLayers, xenonOci2UVMScratchDir) + xenonOciOpts.BootFiles, err = layerspkg.GetWCOWUVMBootFilesFromLayers(context.Background(), nil, append(imageLayers, xenonOci2UVMScratchDir)) + if err != nil { + t.Fatalf("Failed to parse UVM boot files: %s", err) + } xenonOci2UVM, err = uvm.CreateWCOW(context.Background(), xenonOciOpts) if err != nil { t.Fatalf("Failed create UVM: %s", err) diff --git a/test/internal/container/container.go b/test/internal/container/container.go index 5246c257c5..0d3a9a0a5b 100644 --- a/test/internal/container/container.go +++ b/test/internal/container/container.go @@ -4,7 +4,6 @@ package container import ( "context" - "path/filepath" "testing" "github.com/opencontainers/runtime-spec/specs-go" @@ -37,9 +36,20 @@ func Create( tb.Fatalf("improperly configured windows spec for container %q: %#+v", name, spec.Windows) } + var wcowLayers layers.WCOWLayers + var lcowLayers *layers.LCOWLayers var err error + if spec.Linux != nil { + lcowLayers, err = layers.ParseLCOWLayers(nil, spec.Windows.LayerFolders) + } else { + wcowLayers, err = layers.ParseWCOWLayers(nil, spec.Windows.LayerFolders) + } + if err != nil { + tb.Fatalf("layer parsing failed: %s", err) + } + if oci.IsJobContainer(spec) { - c, r, err = jobcontainers.Create(ctx, name, spec) + c, r, err = jobcontainers.Create(ctx, name, spec, jobcontainers.CreateOptions{WCOWLayers: wcowLayers}) } else { co := &hcsoci.CreateOptions{ ID: name, @@ -51,34 +61,9 @@ func Create( // Additionally, these are "standalone" containers, and not CRI pod/workload containers, // so leave end-to-end testing with namespaces for CRI tests NetworkNamespace: "", + WCOWLayers: wcowLayers, + LCOWLayers: lcowLayers, } - - if co.Spec.Linux != nil { - if vm == nil { - tb.Fatalf("LCOW requires a uVM") - } - - var layerFolders []string - if co.Spec.Windows != nil { - layerFolders = co.Spec.Windows.LayerFolders - } - if len(layerFolders) <= 1 { - tb.Fatalf("LCOW requires at least 2 layers (including scratch): %v", layerFolders) - } - scratch := layerFolders[len(layerFolders)-1] - parents := layerFolders[:len(layerFolders)-1] - - // todo: support partitioned layers - co.LCOWLayers = &layers.LCOWLayers{ - Layers: make([]*layers.LCOWLayer, 0, len(parents)), - ScratchVHDPath: filepath.Join(scratch, "sandbox.vhdx"), - } - - for _, p := range parents { - co.LCOWLayers.Layers = append(co.LCOWLayers.Layers, &layers.LCOWLayer{VHDPath: filepath.Join(p, "layer.vhd")}) - } - } - c, r, err = hcsoci.CreateContainer(ctx, co) } diff --git a/test/pkg/uvm/wcow.go b/test/pkg/uvm/wcow.go index 02f9cabe09..2f4644cae8 100644 --- a/test/pkg/uvm/wcow.go +++ b/test/pkg/uvm/wcow.go @@ -7,6 +7,7 @@ import ( "testing" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + shimlayers "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/test/internal/layers" @@ -36,8 +37,8 @@ func CreateWCOWUVM(ctx context.Context, tb testing.TB, id, image string) (*uvm.U func CreateWCOW(ctx context.Context, tb testing.TB, opts *uvm.OptionsWCOW) (*uvm.UtilityVM, CleanupFn) { tb.Helper() - if opts == nil || len(opts.LayerFolders) < 2 { - tb.Fatalf("opts must bet set with LayerFolders") + if opts == nil || opts.BootFiles == nil { + tb.Fatalf("opts must bet set with BootFiles") } vm, err := uvm.CreateWCOW(ctx, opts) @@ -75,8 +76,11 @@ func CreateWCOWUVMFromOptsWithImage( uvmLayers := img.Layers(ctx, tb) scratchDir := tb.TempDir() - opts.LayerFolders = append(opts.LayerFolders, uvmLayers...) - opts.LayerFolders = append(opts.LayerFolders, scratchDir) + bootFiles, err := shimlayers.GetWCOWUVMBootFilesFromLayers(ctx, nil, append(uvmLayers, scratchDir)) + if err != nil { + tb.Fatalf("failed to parse UVM boot layers: %s", err) + } + opts.BootFiles = bootFiles vm, cleanup := CreateWCOW(ctx, tb, opts) tb.Cleanup(func() { cleanup(ctx) })