From: Keir Fraser Date: Wed, 28 May 2008 13:41:23 +0000 (+0100) Subject: iommu: Handle sibling device assignment correctly X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~14207^2~14 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=491c0ade25702af5fc89d00667687091a52a66b2;p=xen.git iommu: Handle sibling device assignment correctly Domctl interface is extended to allow libxc retrieve device group information from hypervisor. Vendor-specific iommu_ops is also extended by adding a new operation "get_device_group_id()", which is currently a null pointer but could be implemented later for vt-d. Error will be raised from tools side when user trying to assign PCI device with a sibling device being driven by dom0. User will keep being prompted until he has hidden the entire device group (at least, the sibling devices driven by dom0) in dom0 kernel parameter. Hopefully this framework could be flexible enough to support both amd iommu and vt-d. The following 2 cases are not covered by this patch, but should be easy to handle. * Checking for hot-plug devices (maybe we can delay calling ImageHandler.signalDeviceModel() until all checks are done?) * Checking for splitted device group between different passthru domains Signed-off-by: Wei Wang --- diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c index dc38af89e4..d212c7d020 100644 --- a/tools/libxc/xc_domain.c +++ b/tools/libxc/xc_domain.c @@ -767,6 +767,37 @@ int xc_assign_device( return do_domctl(xc_handle, &domctl); } +int xc_get_device_group( + int xc_handle, + uint32_t domid, + uint32_t machine_bdf, + uint32_t max_sdevs, + uint32_t *num_sdevs, + uint32_t *sdev_array) +{ + int rc; + DECLARE_DOMCTL; + + domctl.cmd = XEN_DOMCTL_get_device_group; + domctl.domain = (domid_t)domid; + + domctl.u.get_device_group.machine_bdf = machine_bdf; + domctl.u.get_device_group.max_sdevs = max_sdevs; + + set_xen_guest_handle(domctl.u.get_device_group.sdev_array, sdev_array); + + if ( lock_pages(sdev_array, max_sdevs * sizeof(*sdev_array)) != 0 ) + { + PERROR("Could not lock memory for xc_get_device_group\n"); + return -ENOMEM; + } + rc = do_domctl(xc_handle, &domctl); + unlock_pages(sdev_array, max_sdevs * sizeof(*sdev_array)); + + *num_sdevs = domctl.u.get_device_group.num_sdevs; + return rc; +} + int xc_test_assign_device( int xc_handle, uint32_t domid, diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h index 1e03025ca6..77dba6ae2d 100644 --- a/tools/libxc/xenctrl.h +++ b/tools/libxc/xenctrl.h @@ -955,6 +955,13 @@ int xc_assign_device(int xc_handle, uint32_t domid, uint32_t machine_bdf); +int xc_get_device_group(int xc_handle, + uint32_t domid, + uint32_t machine_bdf, + uint32_t max_sdevs, + uint32_t *num_sdevs, + uint32_t *sdev_array); + int xc_test_assign_device(int xc_handle, uint32_t domid, uint32_t machine_bdf); diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c index 90d541eca4..51e3ec79c9 100644 --- a/tools/python/xen/lowlevel/xc/xc.c +++ b/tools/python/xen/lowlevel/xc/xc.c @@ -646,6 +646,68 @@ static PyObject *pyxc_deassign_device(XcObject *self, return Py_BuildValue("i", bdf); } +static PyObject *pyxc_get_device_group(XcObject *self, + PyObject *args) +{ + domid_t domid; + uint32_t bdf = 0; + uint32_t max_sdevs, num_sdevs; + int seg, bus, dev, func, rc, i; + PyObject *Pystr; + char *group_str; + char dev_str[9]; + uint32_t *sdev_array; + + if ( !PyArg_ParseTuple(args, "iiiii", &domid, &seg, &bus, &dev, &func) ) + return NULL; + + /* Maximum allowed siblings device number per group */ + max_sdevs = 1024; + + if ( (sdev_array = malloc(max_sdevs * sizeof(*sdev_array))) == NULL ) + return PyErr_NoMemory(); + memset(sdev_array, 0, max_sdevs * sizeof(*sdev_array)); + + bdf |= (bus & 0xff) << 16; + bdf |= (dev & 0x1f) << 11; + bdf |= (func & 0x7) << 8; + + rc = xc_get_device_group(self->xc_handle, + domid, bdf, max_sdevs, &num_sdevs, sdev_array); + + if ( rc < 0 ) + { + free(sdev_array); + return pyxc_error_to_exception(); + } + + if ( !num_sdevs ) + { + free(sdev_array); + return Py_BuildValue("s", ""); + } + + if ( (group_str = malloc(num_sdevs * sizeof(dev_str))) == NULL ) + return PyErr_NoMemory(); + memset(group_str, '\0', num_sdevs * sizeof(dev_str)); + + for ( i = 0; i < num_sdevs; i++ ) + { + bus = (sdev_array[i] >> 16) & 0xff; + dev = (sdev_array[i] >> 11) & 0x1f; + func = (sdev_array[i] >> 8) & 0x7; + sprintf(dev_str, "%02x:%02x.%x,", bus, dev, func); + strcat(group_str, dev_str); + } + + Pystr = Py_BuildValue("s", group_str); + + free(sdev_array); + free(group_str); + + return Pystr; +} + #ifdef __ia64__ static PyObject *pyxc_nvram_init(XcObject *self, PyObject *args) @@ -1584,6 +1646,17 @@ static PyMethodDef pyxc_methods[] = { " value [long]: Value of param.\n" "Returns: [int] 0 on success.\n" }, + { "get_device_group", + (PyCFunction)pyxc_get_device_group, + METH_VARARGS, "\n" + "get sibling devices infomation.\n" + " dom [int]: Domain to assign device to.\n" + " seg [int]: PCI segment.\n" + " bus [int]: PCI bus.\n" + " dev [int]: PCI dev.\n" + " func [int]: PCI func.\n" + "Returns: [string]: Sibling devices \n" }, + { "test_assign_device", (PyCFunction)pyxc_test_assign_device, METH_VARARGS | METH_KEYWORDS, "\n" diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py index 3128adfbee..27159bf6dc 100644 --- a/tools/python/xen/xend/server/pciif.py +++ b/tools/python/xen/xend/server/pciif.py @@ -226,6 +226,39 @@ class PciController(DevController): return sxpr + def CheckSiblingDevices(self, domid, dev): + """ Check if all sibling devices of dev are owned by pciback + """ + if not self.vm.info.is_hvm(): + return + + group_str = xc.get_device_group(domid, dev.domain, dev.bus, dev.slot, dev.func) + if group_str == "": + return + + #group string format xx:xx.x,xx:xx.x, + devstr_len = group_str.find(',') + for i in range(0, len(group_str), devstr_len + 1): + (bus, slotfunc) = group_str[i:i + devstr_len].split(':') + (slot, func) = slotfunc.split('.') + b = parse_hex(bus) + d = parse_hex(slot) + f = parse_hex(func) + try: + sdev = PciDevice(dev.domain, b, d, f) + except Exception, e: + #no dom0 drivers bound to sdev + continue + + if sdev.driver!='pciback': + raise VmError(("pci: PCI Backend does not own\n "+ \ + "sibling device %s of device %s\n"+ \ + "See the pciback.hide kernel "+ \ + "command-line parameter or\n"+ \ + "bind your slot/device to the PCI backend using sysfs" \ + )%(sdev.name, dev.name)) + return + def setupOneDevice(self, domain, bus, slot, func): """ Attach I/O resources for device to frontend domain """ @@ -245,6 +278,8 @@ class PciController(DevController): "bind your slot/device to the PCI backend using sysfs" \ )%(dev.name)) + self.CheckSiblingDevices(fe_domid, dev) + PCIQuirk(dev.vendor, dev.device, dev.subvendor, dev.subdevice, domain, bus, slot, func) diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c index c632b7015e..7b758fe868 100644 --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -526,6 +526,45 @@ long arch_do_domctl( } break; + case XEN_DOMCTL_get_device_group: + { + struct domain *d; + u32 max_sdevs; + u8 bus, devfn; + XEN_GUEST_HANDLE_64(uint32) sdevs; + int num_sdevs; + + ret = -ENOSYS; + if ( !iommu_enabled ) + break; + + ret = -EINVAL; + if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL ) + break; + + bus = (domctl->u.get_device_group.machine_bdf >> 16) & 0xff; + devfn = (domctl->u.get_device_group.machine_bdf >> 8) & 0xff; + max_sdevs = domctl->u.get_device_group.max_sdevs; + sdevs = domctl->u.get_device_group.sdev_array; + + num_sdevs = iommu_get_device_group(d, bus, devfn, sdevs, max_sdevs); + if ( num_sdevs < 0 ) + { + dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n"); + ret = -EFAULT; + domctl->u.get_device_group.num_sdevs = 0; + } + else + { + ret = 0; + domctl->u.get_device_group.num_sdevs = num_sdevs; + } + if ( copy_to_guest(u_domctl, domctl, 1) ) + ret = -EFAULT; + rcu_unlock_domain(d); + } + break; + case XEN_DOMCTL_test_assign_device: { u8 bus, devfn; diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c index af3b68fc91..e32a022e4d 100644 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -635,6 +635,16 @@ static void amd_iommu_return_device( reassign_device(s, t, bus, devfn); } +static int amd_iommu_group_id(u8 bus, u8 devfn) +{ + int rt; + int bdf = (bus << 8) | devfn; + rt = ( bdf < ivrs_bdf_entries ) ? + ivrs_mappings[bdf].dte_requestor_id : + bdf; + return rt; +} + struct iommu_ops amd_iommu_ops = { .init = amd_iommu_domain_init, .assign_device = amd_iommu_assign_device, @@ -642,4 +652,5 @@ struct iommu_ops amd_iommu_ops = { .map_page = amd_iommu_map_page, .unmap_page = amd_iommu_unmap_page, .reassign_device = amd_iommu_return_device, + .get_device_group_id = amd_iommu_group_id, }; diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c index a41972360c..b6ce7da658 100644 --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -16,6 +16,7 @@ #include #include #include +#include extern struct iommu_ops intel_iommu_ops; extern struct iommu_ops amd_iommu_ops; @@ -216,7 +217,41 @@ static int iommu_setup(void) } __initcall(iommu_setup); +int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, + XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct pci_dev *pdev; + int group_id, sdev_id; + u32 bdf; + int i = 0; + struct iommu_ops *ops = hd->platform_ops; + + if ( !iommu_enabled || !ops || !ops->get_device_group_id ) + return 0; + + group_id = ops->get_device_group_id(bus, devfn); + + list_for_each_entry(pdev, + &(dom0->arch.hvm_domain.hvm_iommu.pdev_list), list) + { + if ( (pdev->bus == bus) && (pdev->devfn == devfn) ) + continue; + + sdev_id = ops->get_device_group_id(pdev->bus, pdev->devfn); + if ( (sdev_id == group_id) && (i < max_sdevs) ) + { + bdf = 0; + bdf |= (pdev->bus & 0xff) << 16; + bdf |= (pdev->devfn & 0xff) << 8; + if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) ) + return -1; + i++; + } + } + return i; +} /* * Local variables: * mode: C diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c index 2ab599b23b..4e9deca49b 100644 --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -1955,6 +1955,7 @@ struct iommu_ops intel_iommu_ops = { .map_page = intel_iommu_map_page, .unmap_page = intel_iommu_unmap_page, .reassign_device = reassign_device_ownership, + .get_device_group_id = NULL, }; /* diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index 65e2b64a9c..8e400e2dfc 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -448,6 +448,16 @@ struct xen_domctl_assign_device { typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); +/* Retrieve sibling devices infomation of machine_bdf */ +#define XEN_DOMCTL_get_device_group 50 +struct xen_domctl_get_device_group { + uint32_t machine_bdf; /* IN */ + uint32_t max_sdevs; /* IN */ + uint32_t num_sdevs; /* OUT */ + XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */ +}; +typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t); /* Pass-through interrupts: bind real irq -> hvm devfn. */ #define XEN_DOMCTL_bind_pt_irq 38 @@ -619,6 +629,7 @@ struct xen_domctl { struct xen_domctl_hvmcontext hvmcontext; struct xen_domctl_address_size address_size; struct xen_domctl_sendtrigger sendtrigger; + struct xen_domctl_get_device_group get_device_group; struct xen_domctl_assign_device assign_device; struct xen_domctl_bind_pt_irq bind_pt_irq; struct xen_domctl_memory_mapping memory_mapping; diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h index 6e512044c1..3b0b06ef32 100644 --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -61,6 +61,8 @@ void iommu_domain_destroy(struct domain *d); int device_assigned(u8 bus, u8 devfn); int assign_device(struct domain *d, u8 bus, u8 devfn); void deassign_device(struct domain *d, u8 bus, u8 devfn); +int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, + XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs); void reassign_device_ownership(struct domain *source, struct domain *target, u8 bus, u8 devfn); @@ -98,6 +100,7 @@ struct iommu_ops { int (*unmap_page)(struct domain *d, unsigned long gfn); void (*reassign_device)(struct domain *s, struct domain *t, u8 bus, u8 devfn); + int (*get_device_group_id)(u8 bus, u8 devfn); }; #endif /* _IOMMU_H_ */