|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH, v2] IOMMU: don't immediately disable bus mastering on faults
At 16:05 +0000 on 07 Nov (1352304352), Jan Beulich wrote:
> Instead, give the owning domain at least a small opportunity of fixing
> things up, and allow for rare faults to not bring down the device at
> all. The amount of faults tolerated within a given time period (all
> numbers are made up with no specific rationale) is higher for Dom0 than
> for DomU-s.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Looks good to me, though I don't think dom0 should have any special
treatment here. I'd be inclined to set the threshold to 10 for
everyone.
Tim.
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -566,7 +566,7 @@ static hw_irq_controller iommu_maskable_
>
> static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
> {
> - u16 domain_id, device_id, bdf, cword, flags;
> + u16 domain_id, device_id, bdf, flags;
> u32 code;
> u64 *addr;
> int count = 0;
> @@ -620,18 +620,10 @@ static void parse_event_log_entry(struct
> "fault address = %#"PRIx64", flags = %#x\n",
> event_str[code-1], domain_id, device_id, *addr, flags);
>
> - /* Tell the device to stop DMAing; we can't rely on the guest to
> - * control it for us. */
> for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
> if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
> - {
> - cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
> - PCI_SLOT(bdf), PCI_FUNC(bdf),
> - PCI_COMMAND);
> - pci_conf_write16(iommu->seg, PCI_BUS(bdf), PCI_SLOT(bdf),
> - PCI_FUNC(bdf), PCI_COMMAND,
> - cword & ~PCI_COMMAND_MASTER);
> - }
> + pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
> + PCI_DEVFN2(bdf));
> }
> else
> {
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -215,6 +215,7 @@ static int device_assigned(u16 seg, u8 b
> static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
> {
> struct hvm_iommu *hd = domain_hvm_iommu(d);
> + struct pci_dev *pdev;
> int rc = 0;
>
> if ( !iommu_enabled || !hd->platform_ops )
> @@ -228,6 +229,13 @@ static int assign_device(struct domain *
> return -EXDEV;
>
> spin_lock(&pcidevs_lock);
> + pdev = pci_get_pdev(seg, bus, devfn);
> + if ( pdev )
> + {
> + pdev->fault.threshold = PT_FAULT_THRESHOLD_UNPRIV;
> + pdev->fault.count = 0;
> + }
> +
> if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
> goto done;
>
> @@ -239,6 +247,8 @@ static int assign_device(struct domain *
> goto done;
> }
> done:
> + if ( pdev && pdev->domain != d )
> + pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
> spin_unlock(&pcidevs_lock);
> return rc;
> }
> @@ -379,6 +389,9 @@ int deassign_device(struct domain *d, u1
> return ret;
> }
>
> + pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
> + pdev->fault.count = 0;
> +
> if ( !has_arch_pdevs(d) && need_iommu(d) )
> {
> d->need_iommu = 0;
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -137,6 +137,7 @@ static struct pci_dev *alloc_pdev(struct
> *((u8*) &pdev->bus) = bus;
> *((u8*) &pdev->devfn) = devfn;
> pdev->domain = NULL;
> + pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
> INIT_LIST_HEAD(&pdev->msi_list);
> list_add(&pdev->alldevs_list, &pseg->alldevs_list);
> spin_lock_init(&pdev->msix_table_lock);
> @@ -672,6 +673,36 @@ int __init pci_device_detect(u16 seg, u8
> return 1;
> }
>
> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
> +{
> + struct pci_dev *pdev;
> + s_time_t now = NOW();
> + u16 cword;
> +
> + spin_lock(&pcidevs_lock);
> + pdev = pci_get_pdev(seg, bus, devfn);
> + if ( pdev )
> + {
> + if ( now < pdev->fault.time ||
> + now - pdev->fault.time > MILLISECS(10) )
> + pdev->fault.count >>= 1;
> + pdev->fault.time = now;
> + if ( ++pdev->fault.count < pdev->fault.threshold )
> + pdev = NULL;
> + }
> + spin_unlock(&pcidevs_lock);
> +
> + if ( !pdev )
> + return;
> +
> + /* Tell the device to stop DMAing; we can't rely on the guest to
> + * control it for us. */
> + cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
> + PCI_COMMAND);
> + pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
> + PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
> +}
> +
> /*
> * scan pci devices to add all existed PCI devices to alldevs_list,
> * and setup pci hierarchy in array bus2bridge.
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -917,7 +917,7 @@ static void __do_iommu_page_fault(struct
> while (1)
> {
> u8 fault_reason;
> - u16 source_id, cword;
> + u16 source_id;
> u32 data;
> u64 guest_addr;
> int type;
> @@ -950,14 +950,8 @@ static void __do_iommu_page_fault(struct
> iommu_page_fault_do_one(iommu, type, fault_reason,
> source_id, guest_addr);
>
> - /* Tell the device to stop DMAing; we can't rely on the guest to
> - * control it for us. */
> - cword = pci_conf_read16(iommu->intel->drhd->segment,
> - PCI_BUS(source_id), PCI_SLOT(source_id),
> - PCI_FUNC(source_id), PCI_COMMAND);
> - pci_conf_write16(iommu->intel->drhd->segment, PCI_BUS(source_id),
> - PCI_SLOT(source_id), PCI_FUNC(source_id),
> - PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
> + pci_check_disable_device(iommu->intel->drhd->segment,
> + PCI_BUS(source_id), PCI_DEVFN2(source_id));
>
> fault_index++;
> if ( fault_index > cap_num_fault_regs(iommu->cap) )
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -64,6 +64,13 @@ struct pci_dev {
> const u8 devfn;
> struct pci_dev_info info;
> struct arch_pci_dev arch;
> + struct {
> + s_time_t time;
> + unsigned int threshold;
> +#define PT_FAULT_THRESHOLD_PRIV 10
> +#define PT_FAULT_THRESHOLD_UNPRIV 3
> + unsigned int count;
> + } fault;
> u64 vf_rlen[6];
> };
>
> @@ -107,6 +114,7 @@ int pci_hide_device(int bus, int devfn);
> struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
> struct pci_dev *pci_get_pdev_by_domain(
> struct domain *, int seg, int bus, int devfn);
> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
>
> uint8_t pci_conf_read8(
> unsigned int seg, unsigned int bus, unsigned int dev, unsigned int func,
>
>
> IOMMU: don't immediately disable bus mastering on faults
>
> Instead, give the owning domain at least a small opportunity of fixing
> things up, and allow for rare faults to not bring down the device at
> all. The amount of faults tolerated within a given time period (all
> numbers are made up with no specific rationale) is higher for Dom0 than
> for DomU-s.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
>
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -566,7 +566,7 @@ static hw_irq_controller iommu_maskable_
>
> static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
> {
> - u16 domain_id, device_id, bdf, cword, flags;
> + u16 domain_id, device_id, bdf, flags;
> u32 code;
> u64 *addr;
> int count = 0;
> @@ -620,18 +620,10 @@ static void parse_event_log_entry(struct
> "fault address = %#"PRIx64", flags = %#x\n",
> event_str[code-1], domain_id, device_id, *addr, flags);
>
> - /* Tell the device to stop DMAing; we can't rely on the guest to
> - * control it for us. */
> for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
> if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
> - {
> - cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
> - PCI_SLOT(bdf), PCI_FUNC(bdf),
> - PCI_COMMAND);
> - pci_conf_write16(iommu->seg, PCI_BUS(bdf), PCI_SLOT(bdf),
> - PCI_FUNC(bdf), PCI_COMMAND,
> - cword & ~PCI_COMMAND_MASTER);
> - }
> + pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
> + PCI_DEVFN2(bdf));
> }
> else
> {
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -215,6 +215,7 @@ static int device_assigned(u16 seg, u8 b
> static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
> {
> struct hvm_iommu *hd = domain_hvm_iommu(d);
> + struct pci_dev *pdev;
> int rc = 0;
>
> if ( !iommu_enabled || !hd->platform_ops )
> @@ -228,6 +229,13 @@ static int assign_device(struct domain *
> return -EXDEV;
>
> spin_lock(&pcidevs_lock);
> + pdev = pci_get_pdev(seg, bus, devfn);
> + if ( pdev )
> + {
> + pdev->fault.threshold = PT_FAULT_THRESHOLD_UNPRIV;
> + pdev->fault.count = 0;
> + }
> +
> if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
> goto done;
>
> @@ -239,6 +247,8 @@ static int assign_device(struct domain *
> goto done;
> }
> done:
> + if ( pdev && pdev->domain != d )
> + pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
> spin_unlock(&pcidevs_lock);
> return rc;
> }
> @@ -379,6 +389,9 @@ int deassign_device(struct domain *d, u1
> return ret;
> }
>
> + pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
> + pdev->fault.count = 0;
> +
> if ( !has_arch_pdevs(d) && need_iommu(d) )
> {
> d->need_iommu = 0;
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -137,6 +137,7 @@ static struct pci_dev *alloc_pdev(struct
> *((u8*) &pdev->bus) = bus;
> *((u8*) &pdev->devfn) = devfn;
> pdev->domain = NULL;
> + pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
> INIT_LIST_HEAD(&pdev->msi_list);
> list_add(&pdev->alldevs_list, &pseg->alldevs_list);
> spin_lock_init(&pdev->msix_table_lock);
> @@ -672,6 +673,36 @@ int __init pci_device_detect(u16 seg, u8
> return 1;
> }
>
> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
> +{
> + struct pci_dev *pdev;
> + s_time_t now = NOW();
> + u16 cword;
> +
> + spin_lock(&pcidevs_lock);
> + pdev = pci_get_pdev(seg, bus, devfn);
> + if ( pdev )
> + {
> + if ( now < pdev->fault.time ||
> + now - pdev->fault.time > MILLISECS(10) )
> + pdev->fault.count >>= 1;
> + pdev->fault.time = now;
> + if ( ++pdev->fault.count < pdev->fault.threshold )
> + pdev = NULL;
> + }
> + spin_unlock(&pcidevs_lock);
> +
> + if ( !pdev )
> + return;
> +
> + /* Tell the device to stop DMAing; we can't rely on the guest to
> + * control it for us. */
> + cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
> + PCI_COMMAND);
> + pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
> + PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
> +}
> +
> /*
> * scan pci devices to add all existed PCI devices to alldevs_list,
> * and setup pci hierarchy in array bus2bridge.
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -917,7 +917,7 @@ static void __do_iommu_page_fault(struct
> while (1)
> {
> u8 fault_reason;
> - u16 source_id, cword;
> + u16 source_id;
> u32 data;
> u64 guest_addr;
> int type;
> @@ -950,14 +950,8 @@ static void __do_iommu_page_fault(struct
> iommu_page_fault_do_one(iommu, type, fault_reason,
> source_id, guest_addr);
>
> - /* Tell the device to stop DMAing; we can't rely on the guest to
> - * control it for us. */
> - cword = pci_conf_read16(iommu->intel->drhd->segment,
> - PCI_BUS(source_id), PCI_SLOT(source_id),
> - PCI_FUNC(source_id), PCI_COMMAND);
> - pci_conf_write16(iommu->intel->drhd->segment, PCI_BUS(source_id),
> - PCI_SLOT(source_id), PCI_FUNC(source_id),
> - PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
> + pci_check_disable_device(iommu->intel->drhd->segment,
> + PCI_BUS(source_id), PCI_DEVFN2(source_id));
>
> fault_index++;
> if ( fault_index > cap_num_fault_regs(iommu->cap) )
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -64,6 +64,13 @@ struct pci_dev {
> const u8 devfn;
> struct pci_dev_info info;
> struct arch_pci_dev arch;
> + struct {
> + s_time_t time;
> + unsigned int threshold;
> +#define PT_FAULT_THRESHOLD_PRIV 10
> +#define PT_FAULT_THRESHOLD_UNPRIV 3
> + unsigned int count;
> + } fault;
> u64 vf_rlen[6];
> };
>
> @@ -107,6 +114,7 @@ int pci_hide_device(int bus, int devfn);
> struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
> struct pci_dev *pci_get_pdev_by_domain(
> struct domain *, int seg, int bus, int devfn);
> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
>
> uint8_t pci_conf_read8(
> unsigned int seg, unsigned int bus, unsigned int dev, unsigned int func,
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |