From 222992ede1092f68b524610fc6e0539785f94198 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 8 Sep 2020 15:46:27 +1000 Subject: [PATCH 1/5] vendor: update govmm Bring in the QomGet function. shortlog: Chenbin (1): typo fix David Gibson (1): Add qom-get function Jakob-Naucke (1): Add support for hot-plugging IBM VFIO-AP devices Julio Montes (3): travis: Run coveralls after success github: enable github actions travis: disable amd64 jobs Signed-off-by: David Gibson --- Gopkg.lock | 4 ++-- Gopkg.toml | 2 +- vendor/github.com/intel/govmm/qemu/qemu.go | 3 +++ vendor/github.com/intel/govmm/qemu/qmp.go | 26 +++++++++++++++++++++- 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index 0634bad00a..bcd5438693 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -420,11 +420,11 @@ revision = "2f1d1f20f75d5404f53b9edf6b53ed5505508675" [[projects]] - digest = "1:e4b04b215d8cb0310c06f4a5af631f7df6648ffebf1911d3f4de7f3ea605db87" + digest = "1:f13392fb2409b31673515b4bd6fe65da34256f1424565020c6eb6a6184d54b92" name = "github.com/intel/govmm" packages = ["qemu"] pruneopts = "NUT" - revision = "547a8518098aaa71c5d5d7a370f211e00161016b" + revision = "6fa954a506730087919a5272ed2a9e4515cd7bc8" [[projects]] digest = "1:ee3d719407ec4bd877eaa4da37e2935298c3d9029ec3c20e502c0e14768b754c" diff --git a/Gopkg.toml b/Gopkg.toml index 2cdbda4adc..368e8e26cc 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -48,7 +48,7 @@ [[constraint]] name = "github.com/intel/govmm" - revision = "547a8518098aaa71c5d5d7a370f211e00161016b" + revision = "6fa954a506730087919a5272ed2a9e4515cd7bc8" [[constraint]] name = "github.com/kata-containers/agent" diff --git a/vendor/github.com/intel/govmm/qemu/qemu.go b/vendor/github.com/intel/govmm/qemu/qemu.go index e326713e1e..9288f59f8a 100644 --- a/vendor/github.com/intel/govmm/qemu/qemu.go +++ b/vendor/github.com/intel/govmm/qemu/qemu.go @@ -123,6 +123,9 @@ const ( // VfioCCW is the vfio driver with CCW transport. VfioCCW DeviceDriver = "vfio-ccw" + // VfioAP is the vfio driver with AP transport. + VfioAP DeviceDriver = "vfio-ap" + // VHostVSockPCI is a generic Vsock vhost device with PCI transport. VHostVSockPCI DeviceDriver = "vhost-vsock-pci" diff --git a/vendor/github.com/intel/govmm/qemu/qmp.go b/vendor/github.com/intel/govmm/qemu/qmp.go index 5585a8792b..fc3f1e3f3a 100644 --- a/vendor/github.com/intel/govmm/qemu/qmp.go +++ b/vendor/github.com/intel/govmm/qemu/qmp.go @@ -281,7 +281,7 @@ func (q *QMP) readLoop(fromVMCh chan<- []byte) { fromVMCh <- sendLine } - q.cfg.Logger.Infof("sanner return error: %v", scanner.Err()) + q.cfg.Logger.Infof("scanner return error: %v", scanner.Err()) close(fromVMCh) } @@ -1217,6 +1217,15 @@ func (q *QMP) ExecutePCIVFIOMediatedDeviceAdd(ctx context.Context, devID, sysfsd return q.executeCommand(ctx, "device_add", args, nil) } +// ExecuteAPVFIOMediatedDeviceAdd adds a VFIO mediated AP device to a QEMU instance using the device_add command. +func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string) error { + args := map[string]interface{}{ + "driver": VfioAP, + "sysfsdev": sysfsdev, + } + return q.executeCommand(ctx, "device_add", args, nil) +} + // isSocketIDSupported returns if the cpu driver supports the socket id option func isSocketIDSupported(driver string) bool { if driver == "host-s390x-cpu" || driver == "host-powerpc64-cpu" { @@ -1630,3 +1639,18 @@ func (q *QMP) ExecQomSet(ctx context.Context, path, property string, value uint6 return q.executeCommand(ctx, "qom-set", args, nil) } + +// ExecQomGet qom-get path property +func (q *QMP) ExecQomGet(ctx context.Context, path, property string) (interface{}, error) { + args := map[string]interface{}{ + "path": path, + "property": property, + } + + response, err := q.executeCommandWithResponse(ctx, "qom-get", args, nil, nil) + if err != nil { + return "", err + } + + return response, nil +} From 914d31a5cd95d7060a4f337fc730bd0f0b222cae Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 25 Aug 2020 10:56:25 +1000 Subject: [PATCH 2/5] device: Refactor hotplugVFIODevice() to have common exit path hotplugVFIODevice() has several different paths depending if we're plugging into a root port or a PCIE<->PCI bridge and if we're using a regular or mediated VFIO device. We're going to want some common code on the successful exit path here, so refactor the function to allow that without duplication. Signed-off-by: David Gibson --- virtcontainers/qemu.go | 43 ++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index 86c380b2ab..1950f86d55 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -1330,33 +1330,36 @@ func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) (err erro switch device.Type { case config.VFIODeviceNormalType: - return q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile) + err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile) case config.VFIODeviceMediatedType: - return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile) + err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile) default: - return fmt.Errorf("Incorrect VFIO device type found") + err = fmt.Errorf("Incorrect VFIO device type found") } - } - - addr, bridge, err := q.arch.addDeviceToBridge(devID, types.PCI) - if err != nil { - return err - } - - defer func() { + } else { + addr, bridge, err := q.arch.addDeviceToBridge(devID, types.PCI) if err != nil { - q.arch.removeDeviceFromBridge(devID) + return err } - }() - switch device.Type { - case config.VFIODeviceNormalType: - return q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, addr, bridge.ID, romFile) - case config.VFIODeviceMediatedType: - return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, addr, bridge.ID, romFile) - default: - return fmt.Errorf("Incorrect VFIO device type found") + defer func() { + if err != nil { + q.arch.removeDeviceFromBridge(devID) + } + }() + + device.Bus = bridge.ID + + switch device.Type { + case config.VFIODeviceNormalType: + err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, addr, device.Bus, romFile) + case config.VFIODeviceMediatedType: + err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, addr, device.Bus, romFile) + default: + err = fmt.Errorf("Incorrect VFIO device type found") + } } + return err } else { q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device") From 5f212b4c2c24a3a3f1ebb733041e3cda8e91ad60 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 2 Sep 2020 13:37:29 +1000 Subject: [PATCH 3/5] device: Record guest PCI path for VFIO devices For several device types which correspond to a PCI device in the guest we record the device's PCI path in the guest. We don't currently do that for VFIO devices, but we're going to need to for better handling of SR-IOV devices. To accomplish this, we have to determine the guest PCI path from the information the VMM gives us: For qemu, we query the slot of the device and its bridge from QMP. For cloud-hypervisor, the device add interface gives us a guest PCI address. In fact this represents a design error in the clh API - there's no way it can really know the guest PCI address in general. It works in this case, because clh doesn't use PCI bridges, so the device will always be on the root bus. Based on that, the PCI path is simply the device's slot number. Signed-off-by: David Gibson --- virtcontainers/clh.go | 26 +++++++-- virtcontainers/device/config/config.go | 3 + virtcontainers/qemu.go | 79 +++++++++++++++++++++++++- 3 files changed, 103 insertions(+), 5 deletions(-) diff --git a/virtcontainers/clh.go b/virtcontainers/clh.go index 538e92cb73..4195fa5662 100644 --- a/virtcontainers/clh.go +++ b/virtcontainers/clh.go @@ -444,15 +444,33 @@ func (clh *cloudHypervisor) hotplugAddBlockDevice(drive *config.BlockDrive) erro return err } -func (clh *cloudHypervisor) hotPlugVFIODevice(device config.VFIODev) error { +func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { cl := clh.client() ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second) defer cancel() - _, _, err := cl.VmAddDevicePut(ctx, chclient.VmAddDevice{Path: device.SysfsDev, Id: device.ID}) + pciInfo, _, err := cl.VmAddDevicePut(ctx, chclient.VmAddDevice{Path: device.SysfsDev, Id: device.ID}) if err != nil { - err = fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err)) + return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err)) } + + // clh doesn't use bridges, so the PCI path is simply the slot + // number of the device. This will break if clh starts using + // bridges (including PCI-E root ports), but so will the clh + // API, since there's no way it can reliably a guest Bdf when + // bridges are present. + tokens := strings.Split(pciInfo.Bdf, ":") + if len(tokens) != 3 || tokens[0] != "0000" || tokens[1] != "00" { + return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf) + } + + tokens = strings.Split(tokens[2], ".") + if len(tokens) != 2 || tokens[1] != "0" || len(tokens[0]) != 2 { + return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf) + } + + device.GuestPciPath, err = vcTypes.PciPathFromString(tokens[0]) + return err } @@ -466,7 +484,7 @@ func (clh *cloudHypervisor) hotplugAddDevice(devInfo interface{}, devType device return nil, clh.hotplugAddBlockDevice(drive) case vfioDev: device := devInfo.(*config.VFIODev) - return nil, clh.hotPlugVFIODevice(*device) + return nil, clh.hotPlugVFIODevice(device) default: return nil, fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType) } diff --git a/virtcontainers/device/config/config.go b/virtcontainers/device/config/config.go index bcd0d62695..3dd12825ff 100644 --- a/virtcontainers/device/config/config.go +++ b/virtcontainers/device/config/config.go @@ -223,6 +223,9 @@ type VFIODev struct { // Bus of VFIO PCIe device Bus string + + // Guest PCI address as / + GuestPciPath vcTypes.PciPath } // RNGDev represents a random number generator device diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index 1950f86d55..6685de3d82 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -1293,6 +1293,71 @@ func (q *qemu) hotplugVhostUserDevice(vAttr *config.VhostUserDeviceAttrs, op ope return nil } +// Query QMP to find the PCI slot of a device, given its QOM path or ID +func (q *qemu) qomGetSlot(qomPath string) (vcTypes.PciSlot, error) { + addr, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, qomPath, "addr") + if err != nil { + return vcTypes.PciSlot{}, err + } + addrf, ok := addr.(float64) + // XXX going via float makes no real sense, but that's how + // JSON works, and we'll get away with it for the small values + // we have here + if !ok { + return vcTypes.PciSlot{}, fmt.Errorf("addr QOM property of %q is %T not a number", qomPath, addr) + } + addri := int(addrf) + + slotNum, funcNum := addri>>3, addri&0x7 + if funcNum != 0 { + return vcTypes.PciSlot{}, fmt.Errorf("Unexpected non-zero PCI function (%02x.%1x) on %q", + slotNum, funcNum, qomPath) + } + + return vcTypes.PciSlotFromInt(slotNum) +} + +// Query QMP to find a device's PCI path given its QOM path or ID +// "PCI path" means the PCI slot number of each bridge leading to the +// device, and finally the device itself, as a '/' separated list +func (q *qemu) qomGetPciPath(qemuID string) (vcTypes.PciPath, error) { + // XXX: For now we assume there's exactly one bridge, since + // that's always how we configure qemu from Kata for now. It + // would be good to generalize this to different PCI + // topologies + devSlot, err := q.qomGetSlot(qemuID) + if err != nil { + return vcTypes.PciPath{}, err + } + + busq, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, qemuID, "parent_bus") + if err != nil { + return vcTypes.PciPath{}, err + } + + bus, ok := busq.(string) + if !ok { + return vcTypes.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, busq) + } + + // `bus` is the QOM path of the QOM bus object, but we need + // the PCI bridge which manages that bus. There doesn't seem + // to be a way to get that other than to simply drop the last + // path component. + idx := strings.LastIndex(bus, "/") + if idx == -1 { + return vcTypes.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", bus) + } + bridge := bus[:idx] + + bridgeSlot, err := q.qomGetSlot(bridge) + if err != nil { + return vcTypes.PciPath{}, err + } + + return vcTypes.PciPathFromSlots(bridgeSlot, devSlot) +} + func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) (err error) { err = q.qmpSetup() if err != nil { @@ -1359,7 +1424,19 @@ func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) (err erro err = fmt.Errorf("Incorrect VFIO device type found") } } - return err + if err != nil { + return err + } + // XXX: Depending on whether we're doing root port or + // bridge hotplug, and how the bridge is set up in + // other parts of the code, we may or may not already + // have information about the slot number of the + // bridge and or the device. For simplicity, just + // query both of them back from qemu + device.GuestPciPath, err = q.qomGetPciPath(devID) + if err != nil { + return err + } } else { q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device") From a27cef9faa293e62880b3fdedd997cfeadd5cc37 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 1 Sep 2020 15:46:18 +1000 Subject: [PATCH 4/5] device: Give the agent information about VFIO devices We send information about several kinds of devices to the agent so that it can apply specific handling. We don't currently do this with VFIO devices. However we need to do that so that the agent can properly wait for VFIO devices to be ready (previously it did that using a PCI rescan which may not be reliable and has some very bad side effects). This patch collates and sends the relevant information. Depends-on: github.com/kata-containers/agent#850 fixes #2664 Signed-off-by: David Gibson --- virtcontainers/kata_agent.go | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/virtcontainers/kata_agent.go b/virtcontainers/kata_agent.go index 934b1bee82..cf952fd957 100644 --- a/virtcontainers/kata_agent.go +++ b/virtcontainers/kata_agent.go @@ -77,6 +77,7 @@ var ( kataSCSIDevType = "scsi" kataNvdimmDevType = "nvdimm" kataVirtioFSDevType = "virtio-fs" + kataVfioVMDevType = "vfio-vm" sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} sharedDirVirtioFSOptions = []string{} sharedDirVirtioFSDaxOptions = "dax" @@ -1253,6 +1254,40 @@ func (k *kataAgent) appendVhostUserBlkDevice(dev ContainerDevice, c *Container) return kataDevice } +func (k *kataAgent) appendVfioDevice(dev ContainerDevice, c *Container) *grpc.Device { + device := c.sandbox.devManager.GetDeviceByID(dev.ID) + + devList, ok := device.GetDeviceInfo().([]*config.VFIODev) + if !ok || devList == nil { + k.Logger().WithField("device", device).Error("malformed vfio device") + return nil + } + + groupNum := filepath.Base(dev.ContainerPath) + + // Each /dev/vfio/NN device represents a VFIO group, which + // could include several PCI devices. So we give group + // information in the main structure, then list each + // individual PCI device in the Options array. + // + // Each option is formatted as "DDDD:BB:DD.F=" + // DDDD:BB:DD.F is the device's PCI address on the + // *host*. is the device's PCI path in the guest + // (see qomGetPciPath() for details). + kataDevice := &grpc.Device{ + ContainerPath: dev.ContainerPath, + Type: kataVfioVMDevType, + Id: groupNum, + Options: make([]string, len(devList)), + } + + for i, pciDev := range devList { + kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDev.BDF, pciDev.GuestPciPath) + } + + return kataDevice +} + func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*grpc.Device { var kataDevice *grpc.Device @@ -1268,6 +1303,8 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr kataDevice = k.appendBlockDevice(dev, c) case config.VhostUserBlk: kataDevice = k.appendVhostUserBlkDevice(dev, c) + case config.DeviceVFIO: + kataDevice = k.appendVfioDevice(dev, c) } if kataDevice == nil { From 74f7fda2496ca74695d4995b3c3f40586646381d Mon Sep 17 00:00:00 2001 From: Julio Montes Date: Thu, 7 May 2020 20:19:49 +0000 Subject: [PATCH 5/5] virtcontainers: Do not lazy-attach devices The "lazy attach" mechanism [1] was added to hotplugs LBS (Large BAR space) devices after re-scanning the PCI bus, fixing LBS hotplug in kata containers. Since PCI rescan is removed in kata-containers/agent#782, lazy attach is not longer needed. fixes #2664 [1] https://github.com/kata-containers/runtime/pull/2461 Signed-off-by: Julio Montes --- virtcontainers/container.go | 54 +---------- virtcontainers/device/drivers/utils.go | 16 ---- virtcontainers/device/drivers/vfio.go | 11 ++- virtcontainers/device/manager/manager.go | 2 - virtcontainers/device/manager/utils.go | 101 -------------------- virtcontainers/device/manager/utils_test.go | 45 --------- virtcontainers/sandbox_test.go | 4 +- 7 files changed, 17 insertions(+), 216 deletions(-) diff --git a/virtcontainers/container.go b/virtcontainers/container.go index 37117d7e38..60835fbf65 100644 --- a/virtcontainers/container.go +++ b/virtcontainers/container.go @@ -883,38 +883,9 @@ func (c *Container) create() (err error) { } } - var ( - machineType = c.sandbox.config.HypervisorConfig.HypervisorMachineType - normalAttachedDevs []ContainerDevice //for q35: normally attached devices - delayAttachedDevs []ContainerDevice //for q35: delay attached devices, for example, large bar space device - ) - // Fix: https://github.com/kata-containers/runtime/issues/2460 - if machineType == QemuQ35 { - // add Large Bar space device to delayAttachedDevs - for _, device := range c.devices { - var isLargeBarSpace bool - isLargeBarSpace, err = manager.IsVFIOLargeBarSpaceDevice(device.ContainerPath) - if err != nil { - return - } - if isLargeBarSpace { - delayAttachedDevs = append(delayAttachedDevs, device) - } else { - normalAttachedDevs = append(normalAttachedDevs, device) - } - } - } else { - normalAttachedDevs = c.devices - } - - c.Logger().WithFields(logrus.Fields{ - "machine_type": machineType, - "devices": normalAttachedDevs, - }).Info("normal attach devices") - if len(normalAttachedDevs) > 0 { - if err = c.attachDevices(normalAttachedDevs); err != nil { - return - } + // Attach devices + if err = c.attachDevices(); err != nil { + return } // Deduce additional system mount info that should be handled by the agent @@ -927,17 +898,6 @@ func (c *Container) create() (err error) { } c.process = *process - // lazy attach device after createContainer for q35 - if machineType == QemuQ35 && len(delayAttachedDevs) > 0 { - c.Logger().WithFields(logrus.Fields{ - "machine_type": machineType, - "devices": delayAttachedDevs, - }).Info("lazy attach devices") - if err = c.attachDevices(delayAttachedDevs); err != nil { - return - } - } - if !rootless.IsRootless() && !c.sandbox.config.SandboxCgroupOnly { if err = c.cgroupsCreate(); err != nil { return @@ -1440,15 +1400,11 @@ func (c *Container) removeDrive() (err error) { return nil } -func (c *Container) attachDevices(devices []ContainerDevice) error { +func (c *Container) attachDevices() error { // there's no need to do rollback when error happens, // because if attachDevices fails, container creation will fail too, // and rollbackFailingContainerCreation could do all the rollbacks - - // since devices with large bar space require delayed attachment, - // the devices need to be split into two lists, normalAttachedDevs and delayAttachedDevs. - // so c.device is not used here. See issue https://github.com/kata-containers/runtime/issues/2460. - for _, dev := range devices { + for _, dev := range c.devices { if err := c.sandbox.devManager.AttachDevice(dev.ID, c.sandbox); err != nil { return err } diff --git a/virtcontainers/device/drivers/utils.go b/virtcontainers/device/drivers/utils.go index 72e27a5e21..27fc4c098f 100644 --- a/virtcontainers/device/drivers/utils.go +++ b/virtcontainers/device/drivers/utils.go @@ -92,19 +92,3 @@ func readPCIProperty(propertyPath string) (string, error) { } return strings.Split(string(buf), "\n")[0], nil } - -func GetVFIODeviceType(deviceFileName string) config.VFIODeviceType { - //For example, 0000:04:00.0 - tokens := strings.Split(deviceFileName, ":") - vfioDeviceType := config.VFIODeviceErrorType - if len(tokens) == 3 { - vfioDeviceType = config.VFIODeviceNormalType - } else { - //For example, 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 - tokens = strings.Split(deviceFileName, "-") - if len(tokens) == 5 { - vfioDeviceType = config.VFIODeviceMediatedType - } - } - return vfioDeviceType -} diff --git a/virtcontainers/device/drivers/vfio.go b/virtcontainers/device/drivers/vfio.go index ea170cb307..06b792b1c9 100644 --- a/virtcontainers/device/drivers/vfio.go +++ b/virtcontainers/device/drivers/vfio.go @@ -210,7 +210,16 @@ func (device *VFIODevice) Load(ds persistapi.DeviceState) { // It should implement GetAttachCount() and DeviceID() as api.Device implementation // here it shares function from *GenericDevice so we don't need duplicate codes func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceSysfsDev string, vfioDeviceType config.VFIODeviceType, err error) { - vfioDeviceType = GetVFIODeviceType(deviceFileName) + tokens := strings.Split(deviceFileName, ":") + vfioDeviceType = config.VFIODeviceErrorType + if len(tokens) == 3 { + vfioDeviceType = config.VFIODeviceNormalType + } else { + tokens = strings.Split(deviceFileName, "-") + if len(tokens) == 5 { + vfioDeviceType = config.VFIODeviceMediatedType + } + } switch vfioDeviceType { case config.VFIODeviceNormalType: diff --git a/virtcontainers/device/manager/manager.go b/virtcontainers/device/manager/manager.go index 6a2b665a5e..8c61f60869 100644 --- a/virtcontainers/device/manager/manager.go +++ b/virtcontainers/device/manager/manager.go @@ -78,8 +78,6 @@ func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserS dm.blockDriver = VirtioSCSI } - drivers.AllPCIeDevs = make(map[string]bool) - for _, dev := range devices { dm.devices[dev.DeviceID()] = dev } diff --git a/virtcontainers/device/manager/utils.go b/virtcontainers/device/manager/utils.go index 0e287e736c..86bc16da31 100644 --- a/virtcontainers/device/manager/utils.go +++ b/virtcontainers/device/manager/utils.go @@ -7,16 +7,10 @@ package manager import ( - "fmt" - "io/ioutil" "path/filepath" - "strconv" "strings" - "github.com/sirupsen/logrus" - "github.com/kata-containers/runtime/virtcontainers/device/config" - "github.com/kata-containers/runtime/virtcontainers/device/drivers" ) const ( @@ -42,101 +36,6 @@ func isBlock(devInfo config.DeviceInfo) bool { return devInfo.DevType == "b" } -// IsVFIOLargeBarSpaceDevice checks if the device is a large bar space device. -func IsVFIOLargeBarSpaceDevice(hostPath string) (bool, error) { - if !isVFIO(hostPath) { - return false, nil - } - - iommuDevicesPath := filepath.Join(config.SysIOMMUPath, filepath.Base(hostPath), "devices") - deviceFiles, err := ioutil.ReadDir(iommuDevicesPath) - if err != nil { - return false, err - } - - // Pass all devices in iommu group - for _, deviceFile := range deviceFiles { - vfioDeviceType := drivers.GetVFIODeviceType(deviceFile.Name()) - var isLarge bool - switch vfioDeviceType { - case config.VFIODeviceNormalType: - sysfsResource := filepath.Join(iommuDevicesPath, deviceFile.Name(), "resource") - if isLarge, err = isLargeBarSpace(sysfsResource); err != nil { - return false, err - } - deviceLogger().WithFields(logrus.Fields{ - "device-file": deviceFile.Name(), - "device-type": vfioDeviceType, - "resource": sysfsResource, - "large-bar-space": isLarge, - }).Info("Detect large bar space device") - return isLarge, nil - case config.VFIODeviceMediatedType: - //TODO: support VFIODeviceMediatedType - deviceLogger().WithFields(logrus.Fields{ - "device-file": deviceFile.Name(), - "device-type": vfioDeviceType, - }).Warn("Detect large bar space device is not yet supported for VFIODeviceMediatedType") - default: - deviceLogger().WithFields(logrus.Fields{ - "device-file": deviceFile.Name(), - "device-type": vfioDeviceType, - }).Warn("Incorrect token found when detecting large bar space devices") - } - } - - return false, nil -} - -func isLargeBarSpace(resourcePath string) (bool, error) { - buf, err := ioutil.ReadFile(resourcePath) - if err != nil { - return false, fmt.Errorf("failed to read sysfs resource: %v", err) - } - - // The resource file contains host addresses of PCI resources: - // For example: - // $ cat /sys/bus/pci/devices/0000:04:00.0/resource - // 0x00000000c6000000 0x00000000c6ffffff 0x0000000000040200 - // 0x0000383800000000 0x0000383bffffffff 0x000000000014220c - // Refer: - // resource format: https://github.com/torvalds/linux/blob/63623fd44972d1ed2bfb6e0fb631dfcf547fd1e7/drivers/pci/pci-sysfs.c#L145 - // calculate size : https://github.com/pciutils/pciutils/blob/61ecc14a327de030336f1ff3fea9c7e7e55a90ca/lspci.c#L388 - for rIdx, line := range strings.Split(string(buf), "\n") { - cols := strings.Fields(line) - // start and end columns are required to calculate the size - if len(cols) < 2 { - deviceLogger().WithField("resource-line", line).Debug("not enough columns to calculate PCI size") - continue - } - start, _ := strconv.ParseUint(cols[0], 0, 64) - end, _ := strconv.ParseUint(cols[1], 0, 64) - if start > end { - deviceLogger().WithFields(logrus.Fields{ - "start": start, - "end": end, - }).Debug("start is greater than end") - continue - } - // Use right shift to convert Bytes to GBytes - // This is equivalent to ((end - start + 1) / 1024 / 1024 / 1024) - gbSize := (end - start + 1) >> 30 - deviceLogger().WithFields(logrus.Fields{ - "resource": resourcePath, - "region": rIdx, - "start": cols[0], - "end": cols[1], - "gb-size": gbSize, - }).Debug("Check large bar space device") - //size is large than 4G - if gbSize > 4 { - return true, nil - } - } - - return false, nil -} - // isVhostUserBlk checks if the device is a VhostUserBlk device. func isVhostUserBlk(devInfo config.DeviceInfo) bool { return devInfo.DevType == "b" && devInfo.Major == config.VhostUserBlkMajor diff --git a/virtcontainers/device/manager/utils_test.go b/virtcontainers/device/manager/utils_test.go index 80e2fcdd94..87d72bae90 100644 --- a/virtcontainers/device/manager/utils_test.go +++ b/virtcontainers/device/manager/utils_test.go @@ -7,8 +7,6 @@ package manager import ( - "io/ioutil" - "os" "testing" "github.com/kata-containers/runtime/virtcontainers/device/config" @@ -105,46 +103,3 @@ func TestIsVhostUserSCSI(t *testing.T) { assert.Equal(t, d.expected, isVhostUserSCSI) } } - -func TestIsLargeBarSpace(t *testing.T) { - assert := assert.New(t) - - // File not exist - bs, err := isLargeBarSpace("/abc/xyz/123/rgb") - assert.Error(err) - assert.False(bs) - - f, err := ioutil.TempFile("", "pci") - assert.NoError(err) - defer f.Close() - defer os.RemoveAll(f.Name()) - - type testData struct { - resourceInfo string - error bool - result bool - } - - for _, d := range []testData{ - {"", false, false}, - {"\t\n\t ", false, false}, - {"abc zyx", false, false}, - {"abc zyx rgb", false, false}, - {"abc\t zyx \trgb", false, false}, - {"0x00015\n0x0013", false, false}, - {"0x00000000c6000000 0x00000000c6ffffff 0x0000000000040200", false, false}, - {"0x0000383bffffffff 0x0000383800000000", false, false}, // start greater than end - {"0x0000383800000000 0x0000383bffffffff", false, true}, - {"0x0000383800000000 0x0000383bffffffff 0x000000000014220c", false, true}, - } { - f.WriteAt([]byte(d.resourceInfo), 0) - bs, err = isLargeBarSpace(f.Name()) - assert.NoError(f.Truncate(0)) - if d.error { - assert.Error(err, d.resourceInfo) - } else { - assert.NoError(err, d.resourceInfo) - } - assert.Equal(d.result, bs, d.resourceInfo) - } -} diff --git a/virtcontainers/sandbox_test.go b/virtcontainers/sandbox_test.go index 69c2131b06..44a382fa4e 100644 --- a/virtcontainers/sandbox_test.go +++ b/virtcontainers/sandbox_test.go @@ -534,7 +534,7 @@ func TestSandboxAttachDevicesVFIO(t *testing.T) { containers[c.id].sandbox = &sandbox - err = containers[c.id].attachDevices(c.devices) + err = containers[c.id].attachDevices() assert.Nil(t, err, "Error while attaching devices %s", err) err = containers[c.id].detachDevices() @@ -629,7 +629,7 @@ func TestSandboxAttachDevicesVhostUserBlk(t *testing.T) { containers[c.id].sandbox = &sandbox - err = containers[c.id].attachDevices(c.devices) + err = containers[c.id].attachDevices() assert.Nil(t, err, "Error while attaching vhost-user-blk devices %s", err) err = containers[c.id].detachDevices()