From ae3dcd27539b168191f3dae0ca81ac753d7096bc Mon Sep 17 00:00:00 2001 From: Amit Barve Date: Fri, 25 Feb 2022 12:10:56 -0800 Subject: [PATCH 1/3] Enable using multiple SCSI controllers Until RS5 we didn't have support for multiple SCSI controllers. However, now we do allow upto 4 SCSI controllers. This commit removes restrictions in code to limit the number of SCSI controllers to 1. This also fixes a bug in the SCSI device ID generation in GCS. Signed-off-by: Amit Barve --- internal/guest/storage/scsi/scsi.go | 5 ++++- internal/uvm/create.go | 4 ++-- internal/uvm/create_lcow.go | 18 +++++++++--------- internal/uvm/create_wcow.go | 22 ++++++++++++---------- internal/uvm/scsi.go | 8 ++++---- 5 files changed, 31 insertions(+), 26 deletions(-) diff --git a/internal/guest/storage/scsi/scsi.go b/internal/guest/storage/scsi/scsi.go index f5231dfb9d..bdb68854b7 100644 --- a/internal/guest/storage/scsi/scsi.go +++ b/internal/guest/storage/scsi/scsi.go @@ -217,7 +217,10 @@ func ControllerLunToName(ctx context.Context, controller, lun uint8) (_ string, trace.Int64Attribute("controller", int64(controller)), trace.Int64Attribute("lun", int64(lun))) - scsiID := fmt.Sprintf("0:0:%d:%d", controller, lun) + // Linux uses following format to Identify a SCSI disk: + // ::: + // In our case bus number and target id will always remain 0. + scsiID := fmt.Sprintf("%d:0:0:%d", controller, lun) // Devices matching the given SCSI code should each have a subdirectory // under /sys/bus/scsi/devices//block. diff --git a/internal/uvm/create.go b/internal/uvm/create.go index 8f293f67fe..c696d9658b 100644 --- a/internal/uvm/create.go +++ b/internal/uvm/create.go @@ -128,8 +128,8 @@ func verifyOptions(ctx context.Context, options interface{}) error { if opts.EnableDeferredCommit && !opts.AllowOvercommit { return errors.New("EnableDeferredCommit is not supported on physically backed VMs") } - if opts.SCSIControllerCount > 1 { - return errors.New("SCSI controller count must be 0 or 1") // Future extension here for up to 4 + if opts.SCSIControllerCount > 4 { + return errors.New("SCSI controller count must be less than 4") } if opts.VPMemDeviceCount > MaxVPMEMCount { return fmt.Errorf("VPMem device count cannot be greater than %d", MaxVPMEMCount) diff --git a/internal/uvm/create_lcow.go b/internal/uvm/create_lcow.go index 5a5628230d..766460126d 100644 --- a/internal/uvm/create_lcow.go +++ b/internal/uvm/create_lcow.go @@ -137,7 +137,7 @@ func NewDefaultOptionsLCOW(id, owner string) *OptionsLCOW { KernelBootOptions: "", EnableGraphicsConsole: false, ConsolePipe: "", - SCSIControllerCount: 1, + SCSIControllerCount: 4, UseGuestConnection: true, ExecCommandLine: fmt.Sprintf("/bin/gcs -v4 -log-format json -loglevel %s", logrus.StandardLogger().Level.String()), ForwardStdout: false, @@ -353,11 +353,11 @@ func makeLCOWVMGSDoc(ctx context.Context, opts *OptionsLCOW, uvm *UtilityVM) (_ } if uvm.scsiControllerCount > 0 { - // TODO: JTERRY75 - this should enumerate scsicount and add an entry per value. - doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{ - "0": { + doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{} + for i := 0; i < int(uvm.scsiControllerCount); i++ { + doc.VirtualMachine.Devices.Scsi[fmt.Sprintf("%d", i)] = hcsschema.Scsi{ Attachments: make(map[string]hcsschema.Attachment), - }, + } } } @@ -538,11 +538,11 @@ func makeLCOWDoc(ctx context.Context, opts *OptionsLCOW, uvm *UtilityVM) (_ *hcs } if uvm.scsiControllerCount > 0 { - // TODO: JTERRY75 - this should enumerate scsicount and add an entry per value. - doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{ - "0": { + doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{} + for i := 0; i < int(uvm.scsiControllerCount); i++ { + doc.VirtualMachine.Devices.Scsi[fmt.Sprintf("%d", i)] = hcsschema.Scsi{ Attachments: make(map[string]hcsschema.Attachment), - }, + } } } if uvm.vpmemMaxCount > 0 { diff --git a/internal/uvm/create_wcow.go b/internal/uvm/create_wcow.go index c09b49d9f0..1ee33980c0 100644 --- a/internal/uvm/create_wcow.go +++ b/internal/uvm/create_wcow.go @@ -249,7 +249,7 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error id: opts.ID, owner: opts.Owner, operatingSystem: "windows", - scsiControllerCount: 1, + scsiControllerCount: 4, vsmbDirShares: make(map[string]*VSMBShare), vsmbFileShares: make(map[string]*VSMBShare), vpciDevices: make(map[VPCIDeviceKey]*VPCIDevice), @@ -309,15 +309,17 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error } } - doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{ - "0": { - Attachments: map[string]hcsschema.Attachment{ - "0": { - Path: scratchPath, - Type_: "VirtualDisk", - }, - }, - }, + // WCOW will always have at least 1 SCSI controller for the UVM scratch VHD. + doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{} + for i := 0; i < int(uvm.scsiControllerCount); i++ { + doc.VirtualMachine.Devices.Scsi[fmt.Sprintf("%d", i)] = hcsschema.Scsi{ + Attachments: make(map[string]hcsschema.Attachment), + } + } + + doc.VirtualMachine.Devices.Scsi["0"].Attachments["0"] = hcsschema.Attachment{ + Path: scratchPath, + Type_: "VirtualDisk", } uvm.scsiLocations[0][0] = newSCSIMount(uvm, diff --git a/internal/uvm/scsi.go b/internal/uvm/scsi.go index 216da8dc43..66beaadc8c 100644 --- a/internal/uvm/scsi.go +++ b/internal/uvm/scsi.go @@ -408,10 +408,10 @@ func (uvm *UtilityVM) addSCSIActual(ctx context.Context, addReq *addSCSIRequest) return nil, ErrNoSCSIControllers } - // Note: Can remove this check post-RS5 if multiple controllers are supported - if sm.Controller > 0 { - return nil, ErrTooManyAttachments - } + // TODO(ambarve): Do this check only if RS5 or lower + // if sm.Controller > 0 { + // return nil, ErrTooManyAttachments + // } SCSIModification := &hcsschema.ModifySettingRequest{ RequestType: guestrequest.RequestTypeAdd, From 82ec8e87ff1e58240ca476be2339c288589aa423 Mon Sep 17 00:00:00 2001 From: Amit Barve Date: Thu, 10 Mar 2022 12:03:24 -0800 Subject: [PATCH 2/3] GCS changes to detect SCSI devices. This commit modifies the GCS code to look for new SCSI devices attached to the system instead of relying on the controller & LUN numbers provided by hcsshim. We add new wrappers on top of the Mount, Unmount and UnplugDevice SCSI functions to identify the correct controller LUN numbers first and then call into the actual functions. Signed-off-by: Amit Barve --- internal/guest/storage/scsi/detect.go | 168 +++++++++++++++++++++++ internal/guest/storage/scsi/scsi.go | 14 +- internal/guest/storage/scsi/scsi_test.go | 38 ++--- internal/uvm/scsi.go | 10 ++ internal/uvm/types.go | 7 + 5 files changed, 211 insertions(+), 26 deletions(-) create mode 100644 internal/guest/storage/scsi/detect.go diff --git a/internal/guest/storage/scsi/detect.go b/internal/guest/storage/scsi/detect.go new file mode 100644 index 0000000000..5262b7ea81 --- /dev/null +++ b/internal/guest/storage/scsi/detect.go @@ -0,0 +1,168 @@ +//go:build linux +// +build linux + +package scsi + +import ( + "context" + "fmt" + "os" + "path" + "strconv" + "strings" + "time" + + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + "github.com/Microsoft/hcsshim/pkg/securitypolicy" + "github.com/sirupsen/logrus" +) + +var ( + knownScsiDevices = make(map[string]bool) + actualMappings = make(map[string]string) +) + +// parseScsiDeviceName parses the given name into a SCSI device name format to return the +// controller number and LUN number. Returns error if it is not a valid SCSI device name. +// Expected device format: ::: +func parseScsiDeviceName(name string) (controller uint8, lun uint8, _ error) { + tokens := strings.Split(name, ":") + if len(tokens) != 4 { + return 0, 0, fmt.Errorf("invalid scsi device name: %s", name) + } + + for i, tok := range tokens { + n, err := strconv.ParseUint(tok, 10, 8) + if err != nil { + return 0, 0, fmt.Errorf("invalid scsi device name: %s", name) + } + switch i { + case 0: + controller = uint8(n) + case 3: + lun = uint8(n) + } + } + return +} + +// detectNewScsiDevice looks for all the SCSI devices attached on the host compares them with the +// previously known devices and returns the path of that device which can be used for mounting it. +func detectNewScsiDevice(ctx context.Context) (uint8, uint8, error) { + // all scsi devices show up under /sys/bus/scsi/devices + // The devices are named as 0:0:0:0, 0:0:0:1, 1:0:0:0 etc. + // The naming format is as follows: + // ::: + // (Section 3.1 from https://tldp.org/HOWTO/html_single/SCSI-2.4-HOWTO/) + + for { + devices, err := os.ReadDir(scsiDevicesPath) + if err != nil { + return 0, 0, fmt.Errorf("failed to read entries from %s: %w", scsiDevicesPath, err) + } + + for _, dev := range devices { + name := path.Base(dev.Name()) + newController, newLUN, err := parseScsiDeviceName(name) + if err != nil { + continue + } + if _, ok := knownScsiDevices[name]; !ok { + knownScsiDevices[name] = true + return newController, newLUN, nil + } + } + + select { + case <-ctx.Done(): + return 0, 0, fmt.Errorf("context timed out waiting for SCSI device") + default: + time.Sleep(time.Millisecond * 10) + continue + } + + } +} + +func removeKnownDevice(controller, LUN uint8) { + name := fmt.Sprintf("%d:0:0:%d", controller, LUN) + delete(knownScsiDevices, name) +} + +// recordActualSCSIMapping keeps a mapping of SCSI devices that are mapped at different +// controller/LUN number than expected. +func recordActualSCSIMapping(origController, origLUN, actualController, actualLUN uint8) error { + origPath := fmt.Sprintf("%d:0:0:%d", origController, origLUN) + actualPath := fmt.Sprintf("%d:0:0:%d", actualController, actualLUN) + if origPath != actualPath { + logrus.Debugf("SCSI disk expected at %s, actually attached to: %s", origPath, actualPath) + } + if _, ok := actualMappings[origPath]; !ok { + actualMappings[origPath] = actualPath + } else { + return fmt.Errorf("double mapping of scsi device %s", origPath) + } + return nil +} + +func getActualMapping(origController, origLUN uint8) (actualController, actualLUN uint8) { + origPath := fmt.Sprintf("%d:0:0:%d", origController, origLUN) + if actualMapping, ok := actualMappings[origPath]; ok { + // should never return error + actualController, actualLUN, _ = parseScsiDeviceName(actualMapping) + } else { + return origController, origLUN + } + return +} + +func removeActualMapping(origController, origLUN uint8) (actualController, actualLUN uint8) { + origPath := fmt.Sprintf("%d:0:0:%d", origController, origLUN) + actualController, actualLUN = getActualMapping(origController, origLUN) + delete(actualMappings, origPath) + return +} + +// Mount is the wrapper on top of the actual Mount call to detect the correct controller and LUN numbers. +func Mount( + ctx context.Context, + controller, + lun uint8, + target string, + readonly bool, + encrypted bool, + options []string, + verityInfo *guestresource.DeviceVerityInfo, + securityPolicy securitypolicy.SecurityPolicyEnforcer, +) (err error) { + actualController, actualLUN, err := detectNewScsiDevice(ctx) + if err != nil { + return err + } + err = recordActualSCSIMapping(controller, lun, actualController, actualLUN) + if err != nil { + return err + } + return mount(ctx, actualController, actualLUN, target, readonly, encrypted, options, verityInfo, securityPolicy) +} + +// Unmount is the wrapper on top of the actual Unmount call to pass the correct controller and LUN numbers. +func Unmount( + ctx context.Context, + controller, + lun uint8, + target string, + encrypted bool, + verityInfo *guestresource.DeviceVerityInfo, + securityPolicy securitypolicy.SecurityPolicyEnforcer, +) (err error) { + controller, lun = getActualMapping(controller, lun) + return unmount(ctx, controller, lun, target, encrypted, verityInfo, securityPolicy) +} + +// UnplugDevice is the wrapper on top of the actual UnplugDevice call to pass the correct controller and LUN numbers. +func UnplugDevice(ctx context.Context, controller, lun uint8) (err error) { + controller, lun = removeActualMapping(controller, lun) + removeKnownDevice(controller, lun) + return unplugDevice(ctx, controller, lun) +} diff --git a/internal/guest/storage/scsi/scsi.go b/internal/guest/storage/scsi/scsi.go index bdb68854b7..f917f0fa2c 100644 --- a/internal/guest/storage/scsi/scsi.go +++ b/internal/guest/storage/scsi/scsi.go @@ -43,7 +43,7 @@ const ( verityDeviceFmt = "verity-scsi-contr%d-lun%d-%s" ) -// Mount creates a mount from the SCSI device on `controller` index `lun` to +// mount creates a mount from the SCSI device on `controller` index `lun` to // `target` // // `target` will be created. On mount failure the created `target` will be @@ -51,7 +51,7 @@ const ( // // If `encrypted` is set to true, the SCSI device will be encrypted using // dm-crypt. -func Mount( +func mount( ctx context.Context, controller, lun uint8, @@ -159,10 +159,10 @@ func Mount( return nil } -// Unmount unmounts a SCSI device mounted at `target`. +// unmount unmounts a SCSI device mounted at `target`. // // If `encrypted` is true, it removes all its associated dm-crypto state. -func Unmount( +func unmount( ctx context.Context, controller, lun uint8, @@ -252,11 +252,11 @@ func ControllerLunToName(ctx context.Context, controller, lun uint8) (_ string, return devicePath, nil } -// UnplugDevice finds the SCSI device on `controller` index `lun` and issues a +// unplugDevice finds the SCSI device on `controller` index `lun` and issues a // guest initiated unplug. // // If the device is not attached returns no error. -func UnplugDevice(ctx context.Context, controller, lun uint8) (err error) { +func unplugDevice(ctx context.Context, controller, lun uint8) (err error) { _, span := trace.StartSpan(ctx, "scsi::UnplugDevice") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -265,7 +265,7 @@ func UnplugDevice(ctx context.Context, controller, lun uint8) (err error) { trace.Int64Attribute("controller", int64(controller)), trace.Int64Attribute("lun", int64(lun))) - scsiID := fmt.Sprintf("0:0:%d:%d", controller, lun) + scsiID := fmt.Sprintf("%d:0:0:%d", controller, lun) f, err := os.OpenFile(filepath.Join(scsiDevicesPath, scsiID, "delete"), os.O_WRONLY, 0644) if err != nil { if os.IsNotExist(err) { diff --git a/internal/guest/storage/scsi/scsi_test.go b/internal/guest/storage/scsi/scsi_test.go index 4ac33ebcb4..c4cb2f55ae 100644 --- a/internal/guest/storage/scsi/scsi_test.go +++ b/internal/guest/storage/scsi/scsi_test.go @@ -37,7 +37,7 @@ func Test_Mount_Mkdir_Fails_Error(t *testing.T) { return "", nil } - if err := Mount( + if err := mount( context.Background(), 0, 0, @@ -74,7 +74,7 @@ func Test_Mount_Mkdir_ExpectedPath(t *testing.T) { return nil } - if err := Mount( + if err := mount( context.Background(), 0, 0, @@ -111,7 +111,7 @@ func Test_Mount_Mkdir_ExpectedPerm(t *testing.T) { return nil } - if err := Mount( + if err := mount( context.Background(), 0, 0, @@ -148,7 +148,7 @@ func Test_Mount_ControllerLunToName_Valid_Controller(t *testing.T) { return nil } - if err := Mount( + if err := mount( context.Background(), expectedController, 0, @@ -185,7 +185,7 @@ func Test_Mount_ControllerLunToName_Valid_Lun(t *testing.T) { return nil } - if err := Mount( + if err := mount( context.Background(), 0, expectedLun, @@ -225,7 +225,7 @@ func Test_Mount_Calls_RemoveAll_OnMountFailure(t *testing.T) { return expectedErr } - if err := Mount( + if err := mount( context.Background(), 0, 0, @@ -263,7 +263,7 @@ func Test_Mount_Valid_Source(t *testing.T) { } return nil } - err := Mount(context.Background(), 0, 0, "/fake/path", false, false, nil, nil, openDoorSecurityPolicyEnforcer()) + err := mount(context.Background(), 0, 0, "/fake/path", false, false, nil, nil, openDoorSecurityPolicyEnforcer()) if err != nil { t.Fatalf("expected nil err, got: %v", err) } @@ -290,7 +290,7 @@ func Test_Mount_Valid_Target(t *testing.T) { return nil } - if err := Mount( + if err := mount( context.Background(), 0, 0, @@ -326,7 +326,7 @@ func Test_Mount_Valid_FSType(t *testing.T) { return nil } - if err := Mount( + if err := mount( context.Background(), 0, 0, @@ -362,7 +362,7 @@ func Test_Mount_Valid_Flags(t *testing.T) { return nil } - if err := Mount( + if err := mount( context.Background(), 0, 0, @@ -398,7 +398,7 @@ func Test_Mount_Readonly_Valid_Flags(t *testing.T) { return nil } - if err := Mount( + if err := mount( context.Background(), 0, 0, @@ -433,7 +433,7 @@ func Test_Mount_Valid_Data(t *testing.T) { return nil } - if err := Mount( + if err := mount( context.Background(), 0, 0, @@ -469,7 +469,7 @@ func Test_Mount_Readonly_Valid_Data(t *testing.T) { return nil } - if err := Mount( + if err := mount( context.Background(), 0, 0, @@ -506,7 +506,7 @@ func Test_Read_Only_Security_Policy_Enforcement_Mount_Calls(t *testing.T) { } enforcer := mountMonitoringSecurityPolicyEnforcer() - err := Mount(context.Background(), 0, 0, target, true, false, nil, nil, enforcer) + err := mount(context.Background(), 0, 0, target, true, false, nil, nil, enforcer) if err != nil { t.Fatalf("expected nil err, got: %v", err) } @@ -549,7 +549,7 @@ func Test_Read_Write_Security_Policy_Enforcement_Mount_Calls(t *testing.T) { } enforcer := mountMonitoringSecurityPolicyEnforcer() - err := Mount(context.Background(), 0, 0, target, false, false, nil, nil, enforcer) + err := mount(context.Background(), 0, 0, target, false, false, nil, nil, enforcer) if err != nil { t.Fatalf("expected nil err, got: %v", err) } @@ -592,12 +592,12 @@ func Test_Security_Policy_Enforcement_Unmount_Calls(t *testing.T) { } enforcer := mountMonitoringSecurityPolicyEnforcer() - err := Mount(context.Background(), 0, 0, target, true, false, nil, nil, enforcer) + err := mount(context.Background(), 0, 0, target, true, false, nil, nil, enforcer) if err != nil { t.Fatalf("expected nil err, got: %v", err) } - err = Unmount(context.Background(), 0, 0, target, false, nil, enforcer) + err = unmount(context.Background(), 0, 0, target, false, nil, enforcer) if err != nil { t.Fatalf("expected nil err, got: %v", err) } @@ -668,7 +668,7 @@ func Test_CreateVerityTarget_And_Mount_Called_With_Correct_Parameters(t *testing return nil } - if err := Mount( + if err := mount( context.Background(), 0, 0, @@ -717,7 +717,7 @@ func Test_osMkdirAllFails_And_RemoveDevice_Called(t *testing.T) { return nil } - if err := Mount( + if err := mount( context.Background(), 0, 0, diff --git a/internal/uvm/scsi.go b/internal/uvm/scsi.go index 66beaadc8c..e86a2e1ece 100644 --- a/internal/uvm/scsi.go +++ b/internal/uvm/scsi.go @@ -268,6 +268,11 @@ func (uvm *UtilityVM) RemoveSCSI(ctx context.Context, hostPath string) error { } } + // Do not remove multiple SCSI discs simultaneously. + if uvm.operatingSystem == "linux" { + uvm.scsiModificationLock.Lock() + defer uvm.scsiModificationLock.Unlock() + } if err := uvm.modify(ctx, scsiModification); err != nil { return fmt.Errorf("failed to remove SCSI disk %s from container %s: %s", hostPath, uvm.id, err) } @@ -462,6 +467,11 @@ func (uvm *UtilityVM) addSCSIActual(ctx context.Context, addReq *addSCSIRequest) SCSIModification.GuestRequest = guestReq } + // Do not add multiple SCSI discs simultaneously. + if uvm.operatingSystem == "linux" { + uvm.scsiModificationLock.Lock() + defer uvm.scsiModificationLock.Unlock() + } if err := uvm.modify(ctx, SCSIModification); err != nil { return nil, fmt.Errorf("failed to modify UVM with new SCSI mount: %s", err) } diff --git a/internal/uvm/types.go b/internal/uvm/types.go index 065f61c0ba..ca2e709b59 100644 --- a/internal/uvm/types.go +++ b/internal/uvm/types.go @@ -139,4 +139,11 @@ type UtilityVM struct { // noInheritHostTimezone specifies whether to not inherit the hosts timezone for the UVM. UTC will be set as the default instead. // This only applies for WCOW. noInheritHostTimezone bool + + // scsiModificationLock must be held when attaching or detaching a SCSI device + // from the UVM. This is different than the `m` lock because this lock is only + // required for the multiple scsi controller workaround. It can be removed when + // the workaround isn't necessary anymore. Also, we don't want to block all other + // modification requests by using the same lock. + scsiModificationLock sync.Mutex } From b99461265d6dc0821c0389bc25389957c5f7e89f Mon Sep 17 00:00:00 2001 From: Amit Barve Date: Fri, 11 Mar 2022 09:23:37 -0800 Subject: [PATCH 3/3] Add test, PR suggestions, go mod vendor Signed-off-by: Amit Barve --- internal/guest/storage/scsi/detect.go | 51 +--------- internal/guest/storage/scsi/scsi.go | 44 +++++++++ internal/uvm/constants.go | 3 + internal/uvm/create.go | 8 +- internal/uvm/create_lcow.go | 6 +- internal/uvm/create_wcow.go | 2 +- internal/uvm/scsi.go | 9 +- test/cri-containerd/disable_vpmem_test.go | 95 +++++++++++++++++++ .../hcsshim/internal/uvm/constants.go | 3 + .../Microsoft/hcsshim/internal/uvm/create.go | 8 +- .../hcsshim/internal/uvm/create_lcow.go | 22 +++-- .../hcsshim/internal/uvm/create_wcow.go | 22 +++-- .../Microsoft/hcsshim/internal/uvm/scsi.go | 19 ++-- .../Microsoft/hcsshim/internal/uvm/types.go | 7 ++ 14 files changed, 210 insertions(+), 89 deletions(-) create mode 100644 test/cri-containerd/disable_vpmem_test.go diff --git a/internal/guest/storage/scsi/detect.go b/internal/guest/storage/scsi/detect.go index 5262b7ea81..4015e51489 100644 --- a/internal/guest/storage/scsi/detect.go +++ b/internal/guest/storage/scsi/detect.go @@ -12,8 +12,6 @@ import ( "strings" "time" - "github.com/Microsoft/hcsshim/internal/protocol/guestresource" - "github.com/Microsoft/hcsshim/pkg/securitypolicy" "github.com/sirupsen/logrus" ) @@ -55,6 +53,7 @@ func detectNewScsiDevice(ctx context.Context) (uint8, uint8, error) { // ::: // (Section 3.1 from https://tldp.org/HOWTO/html_single/SCSI-2.4-HOWTO/) + timer := time.NewTimer(2 * time.Second) for { devices, err := os.ReadDir(scsiDevicesPath) if err != nil { @@ -74,8 +73,8 @@ func detectNewScsiDevice(ctx context.Context) (uint8, uint8, error) { } select { - case <-ctx.Done(): - return 0, 0, fmt.Errorf("context timed out waiting for SCSI device") + case <-timer.C: + return 0, 0, fmt.Errorf("timed out waiting for SCSI device") default: time.Sleep(time.Millisecond * 10) continue @@ -122,47 +121,3 @@ func removeActualMapping(origController, origLUN uint8) (actualController, actua delete(actualMappings, origPath) return } - -// Mount is the wrapper on top of the actual Mount call to detect the correct controller and LUN numbers. -func Mount( - ctx context.Context, - controller, - lun uint8, - target string, - readonly bool, - encrypted bool, - options []string, - verityInfo *guestresource.DeviceVerityInfo, - securityPolicy securitypolicy.SecurityPolicyEnforcer, -) (err error) { - actualController, actualLUN, err := detectNewScsiDevice(ctx) - if err != nil { - return err - } - err = recordActualSCSIMapping(controller, lun, actualController, actualLUN) - if err != nil { - return err - } - return mount(ctx, actualController, actualLUN, target, readonly, encrypted, options, verityInfo, securityPolicy) -} - -// Unmount is the wrapper on top of the actual Unmount call to pass the correct controller and LUN numbers. -func Unmount( - ctx context.Context, - controller, - lun uint8, - target string, - encrypted bool, - verityInfo *guestresource.DeviceVerityInfo, - securityPolicy securitypolicy.SecurityPolicyEnforcer, -) (err error) { - controller, lun = getActualMapping(controller, lun) - return unmount(ctx, controller, lun, target, encrypted, verityInfo, securityPolicy) -} - -// UnplugDevice is the wrapper on top of the actual UnplugDevice call to pass the correct controller and LUN numbers. -func UnplugDevice(ctx context.Context, controller, lun uint8) (err error) { - controller, lun = removeActualMapping(controller, lun) - removeKnownDevice(controller, lun) - return unplugDevice(ctx, controller, lun) -} diff --git a/internal/guest/storage/scsi/scsi.go b/internal/guest/storage/scsi/scsi.go index f917f0fa2c..de8ea1de86 100644 --- a/internal/guest/storage/scsi/scsi.go +++ b/internal/guest/storage/scsi/scsi.go @@ -159,6 +159,29 @@ func mount( return nil } +// Mount is the wrapper on top of the actual Mount call to detect the correct controller and LUN numbers. +func Mount( + ctx context.Context, + controller, + lun uint8, + target string, + readonly bool, + encrypted bool, + options []string, + verityInfo *guestresource.DeviceVerityInfo, + securityPolicy securitypolicy.SecurityPolicyEnforcer, +) (err error) { + actualController, actualLUN, err := detectNewScsiDevice(ctx) + if err != nil { + return err + } + err = recordActualSCSIMapping(controller, lun, actualController, actualLUN) + if err != nil { + return err + } + return mount(ctx, actualController, actualLUN, target, readonly, encrypted, options, verityInfo, securityPolicy) +} + // unmount unmounts a SCSI device mounted at `target`. // // If `encrypted` is true, it removes all its associated dm-crypto state. @@ -206,6 +229,20 @@ func unmount( return nil } +// Unmount is the wrapper on top of the actual Unmount call to pass the correct controller and LUN numbers. +func Unmount( + ctx context.Context, + controller, + lun uint8, + target string, + encrypted bool, + verityInfo *guestresource.DeviceVerityInfo, + securityPolicy securitypolicy.SecurityPolicyEnforcer, +) (err error) { + controller, lun = getActualMapping(controller, lun) + return unmount(ctx, controller, lun, target, encrypted, verityInfo, securityPolicy) +} + // ControllerLunToName finds the `/dev/sd*` path to the SCSI device on // `controller` index `lun`. func ControllerLunToName(ctx context.Context, controller, lun uint8) (_ string, err error) { @@ -280,3 +317,10 @@ func unplugDevice(ctx context.Context, controller, lun uint8) (err error) { } return nil } + +// UnplugDevice is the wrapper on top of the actual UnplugDevice call to pass the correct controller and LUN numbers. +func UnplugDevice(ctx context.Context, controller, lun uint8) (err error) { + controller, lun = removeActualMapping(controller, lun) + removeKnownDevice(controller, lun) + return unplugDevice(ctx, controller, lun) +} diff --git a/internal/uvm/constants.go b/internal/uvm/constants.go index 842a8a5e8c..b75485829c 100644 --- a/internal/uvm/constants.go +++ b/internal/uvm/constants.go @@ -29,6 +29,9 @@ const ( WCOWGlobalMountPrefix = "C:\\mounts\\m%d" // RootfsPath is part of the container's rootfs path RootfsPath = "rootfs" + + // Maximum number of SCSI controllers allowed. + MaxSCSIControllers = 4 ) var ( diff --git a/internal/uvm/create.go b/internal/uvm/create.go index c696d9658b..5513ebc108 100644 --- a/internal/uvm/create.go +++ b/internal/uvm/create.go @@ -91,6 +91,9 @@ type Options struct { // applied to all containers. On Windows it's configurable per container, but we can mimic this for // Windows by just applying the location specified here per container. ProcessDumpLocation string + + // The number of SCSI controllers. Defaults to 1 for WCOW and 4 for LCOW + SCSIControllerCount uint32 } // compares the create opts used during template creation with the create opts @@ -128,8 +131,8 @@ func verifyOptions(ctx context.Context, options interface{}) error { if opts.EnableDeferredCommit && !opts.AllowOvercommit { return errors.New("EnableDeferredCommit is not supported on physically backed VMs") } - if opts.SCSIControllerCount > 4 { - return errors.New("SCSI controller count must be less than 4") + if opts.SCSIControllerCount > MaxSCSIControllers { + return fmt.Errorf("SCSI controller count must be less than %d", MaxSCSIControllers) } if opts.VPMemDeviceCount > MaxVPMEMCount { return fmt.Errorf("VPMem device count cannot be greater than %d", MaxVPMEMCount) @@ -184,6 +187,7 @@ func newDefaultOptions(id, owner string) *Options { EnableDeferredCommit: false, ProcessorCount: defaultProcessorCount(), FullyPhysicallyBacked: false, + SCSIControllerCount: 1, } if opts.Owner == "" { diff --git a/internal/uvm/create_lcow.go b/internal/uvm/create_lcow.go index 766460126d..d4b6a3fba6 100644 --- a/internal/uvm/create_lcow.go +++ b/internal/uvm/create_lcow.go @@ -86,7 +86,6 @@ type OptionsLCOW struct { KernelBootOptions string // Additional boot options for the kernel EnableGraphicsConsole bool // If true, enable a graphics console for the utility VM ConsolePipe string // The named pipe path to use for the serial console. eg \\.\pipe\vmpipe - SCSIControllerCount uint32 // The number of SCSI controllers. Defaults to 1. Currently we only support 0 or 1. UseGuestConnection bool // Whether the HCS should connect to the UVM's GCS. Defaults to true ExecCommandLine string // The command line to exec from init. Defaults to GCS ForwardStdout bool // Whether stdout will be forwarded from the executed program. Defaults to false @@ -137,7 +136,6 @@ func NewDefaultOptionsLCOW(id, owner string) *OptionsLCOW { KernelBootOptions: "", EnableGraphicsConsole: false, ConsolePipe: "", - SCSIControllerCount: 4, UseGuestConnection: true, ExecCommandLine: fmt.Sprintf("/bin/gcs -v4 -log-format json -loglevel %s", logrus.StandardLogger().Level.String()), ForwardStdout: false, @@ -156,6 +154,10 @@ func NewDefaultOptionsLCOW(id, owner string) *OptionsLCOW { DisableTimeSyncService: false, } + if osversion.Build() > osversion.RS5 { + opts.SCSIControllerCount = 4 + } + if _, err := os.Stat(filepath.Join(opts.BootFilesPath, VhdFile)); err == nil { // We have a rootfs.vhd in the boot files path. Use it over an initrd.img opts.RootFSFile = VhdFile diff --git a/internal/uvm/create_wcow.go b/internal/uvm/create_wcow.go index 1ee33980c0..2568edc55e 100644 --- a/internal/uvm/create_wcow.go +++ b/internal/uvm/create_wcow.go @@ -249,7 +249,7 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error id: opts.ID, owner: opts.Owner, operatingSystem: "windows", - scsiControllerCount: 4, + scsiControllerCount: opts.SCSIControllerCount, vsmbDirShares: make(map[string]*VSMBShare), vsmbFileShares: make(map[string]*VSMBShare), vpciDevices: make(map[VPCIDeviceKey]*VPCIDevice), diff --git a/internal/uvm/scsi.go b/internal/uvm/scsi.go index e86a2e1ece..b29de0e19c 100644 --- a/internal/uvm/scsi.go +++ b/internal/uvm/scsi.go @@ -169,8 +169,8 @@ func newSCSIMount( // SCSI controllers associated with a utility VM to use. // Lock must be held when calling this function func (uvm *UtilityVM) allocateSCSISlot(ctx context.Context) (int, int, error) { - for controller, luns := range uvm.scsiLocations { - for lun, sm := range luns { + for controller := 0; controller < int(uvm.scsiControllerCount); controller++ { + for lun, sm := range uvm.scsiLocations[controller] { // If sm is nil, we have found an open slot so we allocate a new SCSIMount if sm == nil { return controller, lun, nil @@ -413,11 +413,6 @@ func (uvm *UtilityVM) addSCSIActual(ctx context.Context, addReq *addSCSIRequest) return nil, ErrNoSCSIControllers } - // TODO(ambarve): Do this check only if RS5 or lower - // if sm.Controller > 0 { - // return nil, ErrTooManyAttachments - // } - SCSIModification := &hcsschema.ModifySettingRequest{ RequestType: guestrequest.RequestTypeAdd, Settings: hcsschema.Attachment{ diff --git a/test/cri-containerd/disable_vpmem_test.go b/test/cri-containerd/disable_vpmem_test.go new file mode 100644 index 0000000000..3caaaddaeb --- /dev/null +++ b/test/cri-containerd/disable_vpmem_test.go @@ -0,0 +1,95 @@ +//go:build functional +// +build functional + +package cri_containerd + +import ( + "context" + "fmt" + "sync" + "testing" + + "github.com/Microsoft/hcsshim/pkg/annotations" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" +) + +func Test_70LayerImagesWithNoVPmemForLayers(t *testing.T) { + requireFeatures(t, featureLCOW) + + ubuntu70Image := "cplatpublic.azurecr.io/ubuntu70extra:18.04" + alpine70Image := "cplatpublic.azurecr.io/alpine70extra:latest" + testImages := []string{ubuntu70Image, alpine70Image} + pullRequiredLCOWImages(t, []string{imageLcowK8sPause, ubuntu70Image, alpine70Image}) + + client := newTestRuntimeClient(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + nContainers := 4 + containerIDs := make([]string, nContainers) + + for i := 0; i < nContainers; i++ { + defer cleanupContainer(t, client, ctx, &containerIDs[i]) + } + + sandboxRequest := getRunPodSandboxRequest( + t, + lcowRuntimeHandler, + WithSandboxAnnotations(map[string]string{ + // 1 VPMEM device will be used for UVM VHD and all container layers + // will be attached over SCSI. + annotations.VPMemCount: "1", + // Make sure the 1 VPMEM device isn't used for multimapping. + annotations.VPMemNoMultiMapping: "true", + }), + ) + // override pod name + sandboxRequest.Config.Metadata.Name = fmt.Sprintf("%s-pod", t.Name()) + + response, err := client.RunPodSandbox(ctx, sandboxRequest) + if err != nil { + t.Fatalf("failed RunPodSandbox request with: %v", err) + } + podID := response.PodSandboxId + + var wg sync.WaitGroup + wg.Add(nContainers) + for idx := 0; idx < nContainers; idx++ { + go func(i int) { + request := &runtime.CreateContainerRequest{ + PodSandboxId: podID, + Config: &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: fmt.Sprintf("%s-container-%d", t.Name(), i), + }, + Image: &runtime.ImageSpec{ + Image: testImages[i%2], + }, + Command: []string{ + "/bin/sh", + "-c", + "while true; do echo 'Hello, World!'; sleep 1; done", + }, + }, + SandboxConfig: sandboxRequest.Config, + } + + containerIDs[i] = createContainer(t, client, ctx, request) + startContainer(t, client, ctx, containerIDs[i]) + wg.Done() + }(idx) + } + wg.Wait() + + for i := 0; i < nContainers; i++ { + containerExecReq := &runtime.ExecSyncRequest{ + ContainerId: containerIDs[i], + Cmd: []string{"ls"}, + Timeout: 20, + } + r := execSync(t, client, ctx, containerExecReq) + if r.ExitCode != 0 { + t.Fatalf("failed to exec inside container, exit code: %d", r.ExitCode) + } + } +} diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/constants.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/constants.go index 842a8a5e8c..b75485829c 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/constants.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/constants.go @@ -29,6 +29,9 @@ const ( WCOWGlobalMountPrefix = "C:\\mounts\\m%d" // RootfsPath is part of the container's rootfs path RootfsPath = "rootfs" + + // Maximum number of SCSI controllers allowed. + MaxSCSIControllers = 4 ) var ( diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go index 8f293f67fe..5513ebc108 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create.go @@ -91,6 +91,9 @@ type Options struct { // applied to all containers. On Windows it's configurable per container, but we can mimic this for // Windows by just applying the location specified here per container. ProcessDumpLocation string + + // The number of SCSI controllers. Defaults to 1 for WCOW and 4 for LCOW + SCSIControllerCount uint32 } // compares the create opts used during template creation with the create opts @@ -128,8 +131,8 @@ func verifyOptions(ctx context.Context, options interface{}) error { if opts.EnableDeferredCommit && !opts.AllowOvercommit { return errors.New("EnableDeferredCommit is not supported on physically backed VMs") } - if opts.SCSIControllerCount > 1 { - return errors.New("SCSI controller count must be 0 or 1") // Future extension here for up to 4 + if opts.SCSIControllerCount > MaxSCSIControllers { + return fmt.Errorf("SCSI controller count must be less than %d", MaxSCSIControllers) } if opts.VPMemDeviceCount > MaxVPMEMCount { return fmt.Errorf("VPMem device count cannot be greater than %d", MaxVPMEMCount) @@ -184,6 +187,7 @@ func newDefaultOptions(id, owner string) *Options { EnableDeferredCommit: false, ProcessorCount: defaultProcessorCount(), FullyPhysicallyBacked: false, + SCSIControllerCount: 1, } if opts.Owner == "" { diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go index 5a5628230d..d4b6a3fba6 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_lcow.go @@ -86,7 +86,6 @@ type OptionsLCOW struct { KernelBootOptions string // Additional boot options for the kernel EnableGraphicsConsole bool // If true, enable a graphics console for the utility VM ConsolePipe string // The named pipe path to use for the serial console. eg \\.\pipe\vmpipe - SCSIControllerCount uint32 // The number of SCSI controllers. Defaults to 1. Currently we only support 0 or 1. UseGuestConnection bool // Whether the HCS should connect to the UVM's GCS. Defaults to true ExecCommandLine string // The command line to exec from init. Defaults to GCS ForwardStdout bool // Whether stdout will be forwarded from the executed program. Defaults to false @@ -137,7 +136,6 @@ func NewDefaultOptionsLCOW(id, owner string) *OptionsLCOW { KernelBootOptions: "", EnableGraphicsConsole: false, ConsolePipe: "", - SCSIControllerCount: 1, UseGuestConnection: true, ExecCommandLine: fmt.Sprintf("/bin/gcs -v4 -log-format json -loglevel %s", logrus.StandardLogger().Level.String()), ForwardStdout: false, @@ -156,6 +154,10 @@ func NewDefaultOptionsLCOW(id, owner string) *OptionsLCOW { DisableTimeSyncService: false, } + if osversion.Build() > osversion.RS5 { + opts.SCSIControllerCount = 4 + } + if _, err := os.Stat(filepath.Join(opts.BootFilesPath, VhdFile)); err == nil { // We have a rootfs.vhd in the boot files path. Use it over an initrd.img opts.RootFSFile = VhdFile @@ -353,11 +355,11 @@ func makeLCOWVMGSDoc(ctx context.Context, opts *OptionsLCOW, uvm *UtilityVM) (_ } if uvm.scsiControllerCount > 0 { - // TODO: JTERRY75 - this should enumerate scsicount and add an entry per value. - doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{ - "0": { + doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{} + for i := 0; i < int(uvm.scsiControllerCount); i++ { + doc.VirtualMachine.Devices.Scsi[fmt.Sprintf("%d", i)] = hcsschema.Scsi{ Attachments: make(map[string]hcsschema.Attachment), - }, + } } } @@ -538,11 +540,11 @@ func makeLCOWDoc(ctx context.Context, opts *OptionsLCOW, uvm *UtilityVM) (_ *hcs } if uvm.scsiControllerCount > 0 { - // TODO: JTERRY75 - this should enumerate scsicount and add an entry per value. - doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{ - "0": { + doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{} + for i := 0; i < int(uvm.scsiControllerCount); i++ { + doc.VirtualMachine.Devices.Scsi[fmt.Sprintf("%d", i)] = hcsschema.Scsi{ Attachments: make(map[string]hcsschema.Attachment), - }, + } } } if uvm.vpmemMaxCount > 0 { diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go index c09b49d9f0..2568edc55e 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/create_wcow.go @@ -249,7 +249,7 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error id: opts.ID, owner: opts.Owner, operatingSystem: "windows", - scsiControllerCount: 1, + scsiControllerCount: opts.SCSIControllerCount, vsmbDirShares: make(map[string]*VSMBShare), vsmbFileShares: make(map[string]*VSMBShare), vpciDevices: make(map[VPCIDeviceKey]*VPCIDevice), @@ -309,15 +309,17 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error } } - doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{ - "0": { - Attachments: map[string]hcsschema.Attachment{ - "0": { - Path: scratchPath, - Type_: "VirtualDisk", - }, - }, - }, + // WCOW will always have at least 1 SCSI controller for the UVM scratch VHD. + doc.VirtualMachine.Devices.Scsi = map[string]hcsschema.Scsi{} + for i := 0; i < int(uvm.scsiControllerCount); i++ { + doc.VirtualMachine.Devices.Scsi[fmt.Sprintf("%d", i)] = hcsschema.Scsi{ + Attachments: make(map[string]hcsschema.Attachment), + } + } + + doc.VirtualMachine.Devices.Scsi["0"].Attachments["0"] = hcsschema.Attachment{ + Path: scratchPath, + Type_: "VirtualDisk", } uvm.scsiLocations[0][0] = newSCSIMount(uvm, diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/scsi.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/scsi.go index 216da8dc43..b29de0e19c 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/scsi.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/scsi.go @@ -169,8 +169,8 @@ func newSCSIMount( // SCSI controllers associated with a utility VM to use. // Lock must be held when calling this function func (uvm *UtilityVM) allocateSCSISlot(ctx context.Context) (int, int, error) { - for controller, luns := range uvm.scsiLocations { - for lun, sm := range luns { + for controller := 0; controller < int(uvm.scsiControllerCount); controller++ { + for lun, sm := range uvm.scsiLocations[controller] { // If sm is nil, we have found an open slot so we allocate a new SCSIMount if sm == nil { return controller, lun, nil @@ -268,6 +268,11 @@ func (uvm *UtilityVM) RemoveSCSI(ctx context.Context, hostPath string) error { } } + // Do not remove multiple SCSI discs simultaneously. + if uvm.operatingSystem == "linux" { + uvm.scsiModificationLock.Lock() + defer uvm.scsiModificationLock.Unlock() + } if err := uvm.modify(ctx, scsiModification); err != nil { return fmt.Errorf("failed to remove SCSI disk %s from container %s: %s", hostPath, uvm.id, err) } @@ -408,11 +413,6 @@ func (uvm *UtilityVM) addSCSIActual(ctx context.Context, addReq *addSCSIRequest) return nil, ErrNoSCSIControllers } - // Note: Can remove this check post-RS5 if multiple controllers are supported - if sm.Controller > 0 { - return nil, ErrTooManyAttachments - } - SCSIModification := &hcsschema.ModifySettingRequest{ RequestType: guestrequest.RequestTypeAdd, Settings: hcsschema.Attachment{ @@ -462,6 +462,11 @@ func (uvm *UtilityVM) addSCSIActual(ctx context.Context, addReq *addSCSIRequest) SCSIModification.GuestRequest = guestReq } + // Do not add multiple SCSI discs simultaneously. + if uvm.operatingSystem == "linux" { + uvm.scsiModificationLock.Lock() + defer uvm.scsiModificationLock.Unlock() + } if err := uvm.modify(ctx, SCSIModification); err != nil { return nil, fmt.Errorf("failed to modify UVM with new SCSI mount: %s", err) } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/types.go b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/types.go index 065f61c0ba..ca2e709b59 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/types.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/uvm/types.go @@ -139,4 +139,11 @@ type UtilityVM struct { // noInheritHostTimezone specifies whether to not inherit the hosts timezone for the UVM. UTC will be set as the default instead. // This only applies for WCOW. noInheritHostTimezone bool + + // scsiModificationLock must be held when attaching or detaching a SCSI device + // from the UVM. This is different than the `m` lock because this lock is only + // required for the multiple scsi controller workaround. It can be removed when + // the workaround isn't necessary anymore. Also, we don't want to block all other + // modification requests by using the same lock. + scsiModificationLock sync.Mutex }