diff --git a/Gopkg.lock b/Gopkg.lock index 0634bad00a..bcd5438693 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -420,11 +420,11 @@ revision = "2f1d1f20f75d5404f53b9edf6b53ed5505508675" [[projects]] - digest = "1:e4b04b215d8cb0310c06f4a5af631f7df6648ffebf1911d3f4de7f3ea605db87" + digest = "1:f13392fb2409b31673515b4bd6fe65da34256f1424565020c6eb6a6184d54b92" name = "github.com/intel/govmm" packages = ["qemu"] pruneopts = "NUT" - revision = "547a8518098aaa71c5d5d7a370f211e00161016b" + revision = "6fa954a506730087919a5272ed2a9e4515cd7bc8" [[projects]] digest = "1:ee3d719407ec4bd877eaa4da37e2935298c3d9029ec3c20e502c0e14768b754c" diff --git a/Gopkg.toml b/Gopkg.toml index 2cdbda4adc..368e8e26cc 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -48,7 +48,7 @@ [[constraint]] name = "github.com/intel/govmm" - revision = "547a8518098aaa71c5d5d7a370f211e00161016b" + revision = "6fa954a506730087919a5272ed2a9e4515cd7bc8" [[constraint]] name = "github.com/kata-containers/agent" diff --git a/vendor/github.com/intel/govmm/qemu/qemu.go b/vendor/github.com/intel/govmm/qemu/qemu.go index e326713e1e..9288f59f8a 100644 --- a/vendor/github.com/intel/govmm/qemu/qemu.go +++ b/vendor/github.com/intel/govmm/qemu/qemu.go @@ -123,6 +123,9 @@ const ( // VfioCCW is the vfio driver with CCW transport. VfioCCW DeviceDriver = "vfio-ccw" + // VfioAP is the vfio driver with AP transport. + VfioAP DeviceDriver = "vfio-ap" + // VHostVSockPCI is a generic Vsock vhost device with PCI transport. VHostVSockPCI DeviceDriver = "vhost-vsock-pci" diff --git a/vendor/github.com/intel/govmm/qemu/qmp.go b/vendor/github.com/intel/govmm/qemu/qmp.go index 5585a8792b..fc3f1e3f3a 100644 --- a/vendor/github.com/intel/govmm/qemu/qmp.go +++ b/vendor/github.com/intel/govmm/qemu/qmp.go @@ -281,7 +281,7 @@ func (q *QMP) readLoop(fromVMCh chan<- []byte) { fromVMCh <- sendLine } - q.cfg.Logger.Infof("sanner return error: %v", scanner.Err()) + q.cfg.Logger.Infof("scanner return error: %v", scanner.Err()) close(fromVMCh) } @@ -1217,6 +1217,15 @@ func (q *QMP) ExecutePCIVFIOMediatedDeviceAdd(ctx context.Context, devID, sysfsd return q.executeCommand(ctx, "device_add", args, nil) } +// ExecuteAPVFIOMediatedDeviceAdd adds a VFIO mediated AP device to a QEMU instance using the device_add command. +func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string) error { + args := map[string]interface{}{ + "driver": VfioAP, + "sysfsdev": sysfsdev, + } + return q.executeCommand(ctx, "device_add", args, nil) +} + // isSocketIDSupported returns if the cpu driver supports the socket id option func isSocketIDSupported(driver string) bool { if driver == "host-s390x-cpu" || driver == "host-powerpc64-cpu" { @@ -1630,3 +1639,18 @@ func (q *QMP) ExecQomSet(ctx context.Context, path, property string, value uint6 return q.executeCommand(ctx, "qom-set", args, nil) } + +// ExecQomGet qom-get path property +func (q *QMP) ExecQomGet(ctx context.Context, path, property string) (interface{}, error) { + args := map[string]interface{}{ + "path": path, + "property": property, + } + + response, err := q.executeCommandWithResponse(ctx, "qom-get", args, nil, nil) + if err != nil { + return "", err + } + + return response, nil +} diff --git a/virtcontainers/clh.go b/virtcontainers/clh.go index 538e92cb73..4195fa5662 100644 --- a/virtcontainers/clh.go +++ b/virtcontainers/clh.go @@ -444,15 +444,33 @@ func (clh *cloudHypervisor) hotplugAddBlockDevice(drive *config.BlockDrive) erro return err } -func (clh *cloudHypervisor) hotPlugVFIODevice(device config.VFIODev) error { +func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { cl := clh.client() ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second) defer cancel() - _, _, err := cl.VmAddDevicePut(ctx, chclient.VmAddDevice{Path: device.SysfsDev, Id: device.ID}) + pciInfo, _, err := cl.VmAddDevicePut(ctx, chclient.VmAddDevice{Path: device.SysfsDev, Id: device.ID}) if err != nil { - err = fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err)) + return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err)) } + + // clh doesn't use bridges, so the PCI path is simply the slot + // number of the device. This will break if clh starts using + // bridges (including PCI-E root ports), but so will the clh + // API, since there's no way it can reliably a guest Bdf when + // bridges are present. + tokens := strings.Split(pciInfo.Bdf, ":") + if len(tokens) != 3 || tokens[0] != "0000" || tokens[1] != "00" { + return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf) + } + + tokens = strings.Split(tokens[2], ".") + if len(tokens) != 2 || tokens[1] != "0" || len(tokens[0]) != 2 { + return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf) + } + + device.GuestPciPath, err = vcTypes.PciPathFromString(tokens[0]) + return err } @@ -466,7 +484,7 @@ func (clh *cloudHypervisor) hotplugAddDevice(devInfo interface{}, devType device return nil, clh.hotplugAddBlockDevice(drive) case vfioDev: device := devInfo.(*config.VFIODev) - return nil, clh.hotPlugVFIODevice(*device) + return nil, clh.hotPlugVFIODevice(device) default: return nil, fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType) } diff --git a/virtcontainers/container.go b/virtcontainers/container.go index 37117d7e38..60835fbf65 100644 --- a/virtcontainers/container.go +++ b/virtcontainers/container.go @@ -883,38 +883,9 @@ func (c *Container) create() (err error) { } } - var ( - machineType = c.sandbox.config.HypervisorConfig.HypervisorMachineType - normalAttachedDevs []ContainerDevice //for q35: normally attached devices - delayAttachedDevs []ContainerDevice //for q35: delay attached devices, for example, large bar space device - ) - // Fix: https://github.com/kata-containers/runtime/issues/2460 - if machineType == QemuQ35 { - // add Large Bar space device to delayAttachedDevs - for _, device := range c.devices { - var isLargeBarSpace bool - isLargeBarSpace, err = manager.IsVFIOLargeBarSpaceDevice(device.ContainerPath) - if err != nil { - return - } - if isLargeBarSpace { - delayAttachedDevs = append(delayAttachedDevs, device) - } else { - normalAttachedDevs = append(normalAttachedDevs, device) - } - } - } else { - normalAttachedDevs = c.devices - } - - c.Logger().WithFields(logrus.Fields{ - "machine_type": machineType, - "devices": normalAttachedDevs, - }).Info("normal attach devices") - if len(normalAttachedDevs) > 0 { - if err = c.attachDevices(normalAttachedDevs); err != nil { - return - } + // Attach devices + if err = c.attachDevices(); err != nil { + return } // Deduce additional system mount info that should be handled by the agent @@ -927,17 +898,6 @@ func (c *Container) create() (err error) { } c.process = *process - // lazy attach device after createContainer for q35 - if machineType == QemuQ35 && len(delayAttachedDevs) > 0 { - c.Logger().WithFields(logrus.Fields{ - "machine_type": machineType, - "devices": delayAttachedDevs, - }).Info("lazy attach devices") - if err = c.attachDevices(delayAttachedDevs); err != nil { - return - } - } - if !rootless.IsRootless() && !c.sandbox.config.SandboxCgroupOnly { if err = c.cgroupsCreate(); err != nil { return @@ -1440,15 +1400,11 @@ func (c *Container) removeDrive() (err error) { return nil } -func (c *Container) attachDevices(devices []ContainerDevice) error { +func (c *Container) attachDevices() error { // there's no need to do rollback when error happens, // because if attachDevices fails, container creation will fail too, // and rollbackFailingContainerCreation could do all the rollbacks - - // since devices with large bar space require delayed attachment, - // the devices need to be split into two lists, normalAttachedDevs and delayAttachedDevs. - // so c.device is not used here. See issue https://github.com/kata-containers/runtime/issues/2460. - for _, dev := range devices { + for _, dev := range c.devices { if err := c.sandbox.devManager.AttachDevice(dev.ID, c.sandbox); err != nil { return err } diff --git a/virtcontainers/device/config/config.go b/virtcontainers/device/config/config.go index bcd0d62695..3dd12825ff 100644 --- a/virtcontainers/device/config/config.go +++ b/virtcontainers/device/config/config.go @@ -223,6 +223,9 @@ type VFIODev struct { // Bus of VFIO PCIe device Bus string + + // Guest PCI address as / + GuestPciPath vcTypes.PciPath } // RNGDev represents a random number generator device diff --git a/virtcontainers/device/drivers/utils.go b/virtcontainers/device/drivers/utils.go index 72e27a5e21..27fc4c098f 100644 --- a/virtcontainers/device/drivers/utils.go +++ b/virtcontainers/device/drivers/utils.go @@ -92,19 +92,3 @@ func readPCIProperty(propertyPath string) (string, error) { } return strings.Split(string(buf), "\n")[0], nil } - -func GetVFIODeviceType(deviceFileName string) config.VFIODeviceType { - //For example, 0000:04:00.0 - tokens := strings.Split(deviceFileName, ":") - vfioDeviceType := config.VFIODeviceErrorType - if len(tokens) == 3 { - vfioDeviceType = config.VFIODeviceNormalType - } else { - //For example, 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 - tokens = strings.Split(deviceFileName, "-") - if len(tokens) == 5 { - vfioDeviceType = config.VFIODeviceMediatedType - } - } - return vfioDeviceType -} diff --git a/virtcontainers/device/drivers/vfio.go b/virtcontainers/device/drivers/vfio.go index ea170cb307..06b792b1c9 100644 --- a/virtcontainers/device/drivers/vfio.go +++ b/virtcontainers/device/drivers/vfio.go @@ -210,7 +210,16 @@ func (device *VFIODevice) Load(ds persistapi.DeviceState) { // It should implement GetAttachCount() and DeviceID() as api.Device implementation // here it shares function from *GenericDevice so we don't need duplicate codes func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceSysfsDev string, vfioDeviceType config.VFIODeviceType, err error) { - vfioDeviceType = GetVFIODeviceType(deviceFileName) + tokens := strings.Split(deviceFileName, ":") + vfioDeviceType = config.VFIODeviceErrorType + if len(tokens) == 3 { + vfioDeviceType = config.VFIODeviceNormalType + } else { + tokens = strings.Split(deviceFileName, "-") + if len(tokens) == 5 { + vfioDeviceType = config.VFIODeviceMediatedType + } + } switch vfioDeviceType { case config.VFIODeviceNormalType: diff --git a/virtcontainers/device/manager/manager.go b/virtcontainers/device/manager/manager.go index 6a2b665a5e..8c61f60869 100644 --- a/virtcontainers/device/manager/manager.go +++ b/virtcontainers/device/manager/manager.go @@ -78,8 +78,6 @@ func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserS dm.blockDriver = VirtioSCSI } - drivers.AllPCIeDevs = make(map[string]bool) - for _, dev := range devices { dm.devices[dev.DeviceID()] = dev } diff --git a/virtcontainers/device/manager/utils.go b/virtcontainers/device/manager/utils.go index 0e287e736c..86bc16da31 100644 --- a/virtcontainers/device/manager/utils.go +++ b/virtcontainers/device/manager/utils.go @@ -7,16 +7,10 @@ package manager import ( - "fmt" - "io/ioutil" "path/filepath" - "strconv" "strings" - "github.com/sirupsen/logrus" - "github.com/kata-containers/runtime/virtcontainers/device/config" - "github.com/kata-containers/runtime/virtcontainers/device/drivers" ) const ( @@ -42,101 +36,6 @@ func isBlock(devInfo config.DeviceInfo) bool { return devInfo.DevType == "b" } -// IsVFIOLargeBarSpaceDevice checks if the device is a large bar space device. -func IsVFIOLargeBarSpaceDevice(hostPath string) (bool, error) { - if !isVFIO(hostPath) { - return false, nil - } - - iommuDevicesPath := filepath.Join(config.SysIOMMUPath, filepath.Base(hostPath), "devices") - deviceFiles, err := ioutil.ReadDir(iommuDevicesPath) - if err != nil { - return false, err - } - - // Pass all devices in iommu group - for _, deviceFile := range deviceFiles { - vfioDeviceType := drivers.GetVFIODeviceType(deviceFile.Name()) - var isLarge bool - switch vfioDeviceType { - case config.VFIODeviceNormalType: - sysfsResource := filepath.Join(iommuDevicesPath, deviceFile.Name(), "resource") - if isLarge, err = isLargeBarSpace(sysfsResource); err != nil { - return false, err - } - deviceLogger().WithFields(logrus.Fields{ - "device-file": deviceFile.Name(), - "device-type": vfioDeviceType, - "resource": sysfsResource, - "large-bar-space": isLarge, - }).Info("Detect large bar space device") - return isLarge, nil - case config.VFIODeviceMediatedType: - //TODO: support VFIODeviceMediatedType - deviceLogger().WithFields(logrus.Fields{ - "device-file": deviceFile.Name(), - "device-type": vfioDeviceType, - }).Warn("Detect large bar space device is not yet supported for VFIODeviceMediatedType") - default: - deviceLogger().WithFields(logrus.Fields{ - "device-file": deviceFile.Name(), - "device-type": vfioDeviceType, - }).Warn("Incorrect token found when detecting large bar space devices") - } - } - - return false, nil -} - -func isLargeBarSpace(resourcePath string) (bool, error) { - buf, err := ioutil.ReadFile(resourcePath) - if err != nil { - return false, fmt.Errorf("failed to read sysfs resource: %v", err) - } - - // The resource file contains host addresses of PCI resources: - // For example: - // $ cat /sys/bus/pci/devices/0000:04:00.0/resource - // 0x00000000c6000000 0x00000000c6ffffff 0x0000000000040200 - // 0x0000383800000000 0x0000383bffffffff 0x000000000014220c - // Refer: - // resource format: https://github.com/torvalds/linux/blob/63623fd44972d1ed2bfb6e0fb631dfcf547fd1e7/drivers/pci/pci-sysfs.c#L145 - // calculate size : https://github.com/pciutils/pciutils/blob/61ecc14a327de030336f1ff3fea9c7e7e55a90ca/lspci.c#L388 - for rIdx, line := range strings.Split(string(buf), "\n") { - cols := strings.Fields(line) - // start and end columns are required to calculate the size - if len(cols) < 2 { - deviceLogger().WithField("resource-line", line).Debug("not enough columns to calculate PCI size") - continue - } - start, _ := strconv.ParseUint(cols[0], 0, 64) - end, _ := strconv.ParseUint(cols[1], 0, 64) - if start > end { - deviceLogger().WithFields(logrus.Fields{ - "start": start, - "end": end, - }).Debug("start is greater than end") - continue - } - // Use right shift to convert Bytes to GBytes - // This is equivalent to ((end - start + 1) / 1024 / 1024 / 1024) - gbSize := (end - start + 1) >> 30 - deviceLogger().WithFields(logrus.Fields{ - "resource": resourcePath, - "region": rIdx, - "start": cols[0], - "end": cols[1], - "gb-size": gbSize, - }).Debug("Check large bar space device") - //size is large than 4G - if gbSize > 4 { - return true, nil - } - } - - return false, nil -} - // isVhostUserBlk checks if the device is a VhostUserBlk device. func isVhostUserBlk(devInfo config.DeviceInfo) bool { return devInfo.DevType == "b" && devInfo.Major == config.VhostUserBlkMajor diff --git a/virtcontainers/device/manager/utils_test.go b/virtcontainers/device/manager/utils_test.go index 80e2fcdd94..87d72bae90 100644 --- a/virtcontainers/device/manager/utils_test.go +++ b/virtcontainers/device/manager/utils_test.go @@ -7,8 +7,6 @@ package manager import ( - "io/ioutil" - "os" "testing" "github.com/kata-containers/runtime/virtcontainers/device/config" @@ -105,46 +103,3 @@ func TestIsVhostUserSCSI(t *testing.T) { assert.Equal(t, d.expected, isVhostUserSCSI) } } - -func TestIsLargeBarSpace(t *testing.T) { - assert := assert.New(t) - - // File not exist - bs, err := isLargeBarSpace("/abc/xyz/123/rgb") - assert.Error(err) - assert.False(bs) - - f, err := ioutil.TempFile("", "pci") - assert.NoError(err) - defer f.Close() - defer os.RemoveAll(f.Name()) - - type testData struct { - resourceInfo string - error bool - result bool - } - - for _, d := range []testData{ - {"", false, false}, - {"\t\n\t ", false, false}, - {"abc zyx", false, false}, - {"abc zyx rgb", false, false}, - {"abc\t zyx \trgb", false, false}, - {"0x00015\n0x0013", false, false}, - {"0x00000000c6000000 0x00000000c6ffffff 0x0000000000040200", false, false}, - {"0x0000383bffffffff 0x0000383800000000", false, false}, // start greater than end - {"0x0000383800000000 0x0000383bffffffff", false, true}, - {"0x0000383800000000 0x0000383bffffffff 0x000000000014220c", false, true}, - } { - f.WriteAt([]byte(d.resourceInfo), 0) - bs, err = isLargeBarSpace(f.Name()) - assert.NoError(f.Truncate(0)) - if d.error { - assert.Error(err, d.resourceInfo) - } else { - assert.NoError(err, d.resourceInfo) - } - assert.Equal(d.result, bs, d.resourceInfo) - } -} diff --git a/virtcontainers/kata_agent.go b/virtcontainers/kata_agent.go index 934b1bee82..cf952fd957 100644 --- a/virtcontainers/kata_agent.go +++ b/virtcontainers/kata_agent.go @@ -77,6 +77,7 @@ var ( kataSCSIDevType = "scsi" kataNvdimmDevType = "nvdimm" kataVirtioFSDevType = "virtio-fs" + kataVfioVMDevType = "vfio-vm" sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} sharedDirVirtioFSOptions = []string{} sharedDirVirtioFSDaxOptions = "dax" @@ -1253,6 +1254,40 @@ func (k *kataAgent) appendVhostUserBlkDevice(dev ContainerDevice, c *Container) return kataDevice } +func (k *kataAgent) appendVfioDevice(dev ContainerDevice, c *Container) *grpc.Device { + device := c.sandbox.devManager.GetDeviceByID(dev.ID) + + devList, ok := device.GetDeviceInfo().([]*config.VFIODev) + if !ok || devList == nil { + k.Logger().WithField("device", device).Error("malformed vfio device") + return nil + } + + groupNum := filepath.Base(dev.ContainerPath) + + // Each /dev/vfio/NN device represents a VFIO group, which + // could include several PCI devices. So we give group + // information in the main structure, then list each + // individual PCI device in the Options array. + // + // Each option is formatted as "DDDD:BB:DD.F=" + // DDDD:BB:DD.F is the device's PCI address on the + // *host*. is the device's PCI path in the guest + // (see qomGetPciPath() for details). + kataDevice := &grpc.Device{ + ContainerPath: dev.ContainerPath, + Type: kataVfioVMDevType, + Id: groupNum, + Options: make([]string, len(devList)), + } + + for i, pciDev := range devList { + kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDev.BDF, pciDev.GuestPciPath) + } + + return kataDevice +} + func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*grpc.Device { var kataDevice *grpc.Device @@ -1268,6 +1303,8 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr kataDevice = k.appendBlockDevice(dev, c) case config.VhostUserBlk: kataDevice = k.appendVhostUserBlkDevice(dev, c) + case config.DeviceVFIO: + kataDevice = k.appendVfioDevice(dev, c) } if kataDevice == nil { diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index 86c380b2ab..6685de3d82 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -1293,6 +1293,71 @@ func (q *qemu) hotplugVhostUserDevice(vAttr *config.VhostUserDeviceAttrs, op ope return nil } +// Query QMP to find the PCI slot of a device, given its QOM path or ID +func (q *qemu) qomGetSlot(qomPath string) (vcTypes.PciSlot, error) { + addr, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, qomPath, "addr") + if err != nil { + return vcTypes.PciSlot{}, err + } + addrf, ok := addr.(float64) + // XXX going via float makes no real sense, but that's how + // JSON works, and we'll get away with it for the small values + // we have here + if !ok { + return vcTypes.PciSlot{}, fmt.Errorf("addr QOM property of %q is %T not a number", qomPath, addr) + } + addri := int(addrf) + + slotNum, funcNum := addri>>3, addri&0x7 + if funcNum != 0 { + return vcTypes.PciSlot{}, fmt.Errorf("Unexpected non-zero PCI function (%02x.%1x) on %q", + slotNum, funcNum, qomPath) + } + + return vcTypes.PciSlotFromInt(slotNum) +} + +// Query QMP to find a device's PCI path given its QOM path or ID +// "PCI path" means the PCI slot number of each bridge leading to the +// device, and finally the device itself, as a '/' separated list +func (q *qemu) qomGetPciPath(qemuID string) (vcTypes.PciPath, error) { + // XXX: For now we assume there's exactly one bridge, since + // that's always how we configure qemu from Kata for now. It + // would be good to generalize this to different PCI + // topologies + devSlot, err := q.qomGetSlot(qemuID) + if err != nil { + return vcTypes.PciPath{}, err + } + + busq, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, qemuID, "parent_bus") + if err != nil { + return vcTypes.PciPath{}, err + } + + bus, ok := busq.(string) + if !ok { + return vcTypes.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, busq) + } + + // `bus` is the QOM path of the QOM bus object, but we need + // the PCI bridge which manages that bus. There doesn't seem + // to be a way to get that other than to simply drop the last + // path component. + idx := strings.LastIndex(bus, "/") + if idx == -1 { + return vcTypes.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", bus) + } + bridge := bus[:idx] + + bridgeSlot, err := q.qomGetSlot(bridge) + if err != nil { + return vcTypes.PciPath{}, err + } + + return vcTypes.PciPathFromSlots(bridgeSlot, devSlot) +} + func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) (err error) { err = q.qmpSetup() if err != nil { @@ -1330,32 +1395,47 @@ func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) (err erro switch device.Type { case config.VFIODeviceNormalType: - return q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile) + err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile) case config.VFIODeviceMediatedType: - return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile) + err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile) default: - return fmt.Errorf("Incorrect VFIO device type found") + err = fmt.Errorf("Incorrect VFIO device type found") } - } + } else { + addr, bridge, err := q.arch.addDeviceToBridge(devID, types.PCI) + if err != nil { + return err + } + + defer func() { + if err != nil { + q.arch.removeDeviceFromBridge(devID) + } + }() + + device.Bus = bridge.ID - addr, bridge, err := q.arch.addDeviceToBridge(devID, types.PCI) + switch device.Type { + case config.VFIODeviceNormalType: + err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, addr, device.Bus, romFile) + case config.VFIODeviceMediatedType: + err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, addr, device.Bus, romFile) + default: + err = fmt.Errorf("Incorrect VFIO device type found") + } + } if err != nil { return err } - - defer func() { - if err != nil { - q.arch.removeDeviceFromBridge(devID) - } - }() - - switch device.Type { - case config.VFIODeviceNormalType: - return q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, addr, bridge.ID, romFile) - case config.VFIODeviceMediatedType: - return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, addr, bridge.ID, romFile) - default: - return fmt.Errorf("Incorrect VFIO device type found") + // XXX: Depending on whether we're doing root port or + // bridge hotplug, and how the bridge is set up in + // other parts of the code, we may or may not already + // have information about the slot number of the + // bridge and or the device. For simplicity, just + // query both of them back from qemu + device.GuestPciPath, err = q.qomGetPciPath(devID) + if err != nil { + return err } } else { q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device") diff --git a/virtcontainers/sandbox_test.go b/virtcontainers/sandbox_test.go index 69c2131b06..44a382fa4e 100644 --- a/virtcontainers/sandbox_test.go +++ b/virtcontainers/sandbox_test.go @@ -534,7 +534,7 @@ func TestSandboxAttachDevicesVFIO(t *testing.T) { containers[c.id].sandbox = &sandbox - err = containers[c.id].attachDevices(c.devices) + err = containers[c.id].attachDevices() assert.Nil(t, err, "Error while attaching devices %s", err) err = containers[c.id].detachDevices() @@ -629,7 +629,7 @@ func TestSandboxAttachDevicesVhostUserBlk(t *testing.T) { containers[c.id].sandbox = &sandbox - err = containers[c.id].attachDevices(c.devices) + err = containers[c.id].attachDevices() assert.Nil(t, err, "Error while attaching vhost-user-blk devices %s", err) err = containers[c.id].detachDevices()