[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] x86/MSI: add mechanism to protect MSI-X table from PV guest accesses
Hi, Jan I think this fix also covers HVM side, right ? For PV side, I recalled there was already a fix in Xend to protect PV guest from accessing the related MMIO range. Also for HVM guest, last year you proposed and submitted a patch to Xen-Qemu for blocking guest's accesses to the related MMIO range. Looks like the previous fixes are not needed any more if this fix got check-in, right ? Thanks! Xiantao > -----Original Message----- > From: xen-devel-bounces@xxxxxxxxxxxxx [mailto:xen-devel- > bounces@xxxxxxxxxxxxx] On Behalf Of Jan Beulich > Sent: Thursday, February 07, 2013 12:51 AM > To: xen-devel > Subject: [Xen-devel] [PATCH] x86/MSI: add mechanism to protect MSI-X > table from PV guest accesses > > This adds two new physdev operations for Dom0 to invoke when resource > allocation for devices is known to be complete, so that the hypervisor > can arrange for the respective MMIO ranges to be marked read-only > before an eventual guest getting such a device assigned even gets > started, such that it won't be able to set up writable mappings for > these MMIO ranges before Xen has a chance to protect them. > > This also addresses another issue with the code being modified here, > in that so far write protection for the address ranges in question got > set up only once during the lifetime of a device (i.e. until either > system shutdown or device hot removal), while teardown happened when > the last interrupt was disposed of by the guest (which at least allowed > the tables to be writable when the device got assigned to a second > guest [instance] after the first terminated). > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > > --- a/xen/arch/x86/msi.c > +++ b/xen/arch/x86/msi.c > @@ -656,8 +656,8 @@ static u64 read_pci_mem_bar(u16 seg, u8 > * @entries: pointer to an array of struct msix_entry entries > * @nvec: number of @entries > * > - * Setup the MSI-X capability structure of device function with a > - * single MSI-X irq. A return of zero indicates the successful setup of > + * Setup the MSI-X capability structure of device function with the > requested > + * number MSI-X irqs. A return of zero indicates the successful setup of > * requested MSI-X entries with allocated irqs or non-zero for otherwise. > **/ > static int msix_capability_init(struct pci_dev *dev, > @@ -665,86 +665,69 @@ static int msix_capability_init(struct p > struct msi_desc **desc, > unsigned int nr_entries) > { > - struct msi_desc *entry; > - int pos; > + struct msi_desc *entry = NULL; > + int pos, vf; > u16 control; > - u64 table_paddr, entry_paddr; > - u32 table_offset, entry_offset; > - u8 bir; > - void __iomem *base; > - int idx; > + u64 table_paddr; > + u32 table_offset; > + u8 bir, pbus, pslot, pfunc; > u16 seg = dev->seg; > u8 bus = dev->bus; > u8 slot = PCI_SLOT(dev->devfn); > u8 func = PCI_FUNC(dev->devfn); > > ASSERT(spin_is_locked(&pcidevs_lock)); > - ASSERT(desc); > > pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX); > control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos)); > msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */ > > - /* MSI-X Table Initialization */ > - entry = alloc_msi_entry(); > - if ( !entry ) > - return -ENOMEM; > + if ( desc ) > + { > + entry = alloc_msi_entry(); > + if ( !entry ) > + return -ENOMEM; > + ASSERT(msi); > + } > > - /* Request & Map MSI-X table region */ > + /* Locate MSI-X table region */ > table_offset = pci_conf_read32(seg, bus, slot, func, > msix_table_offset_reg(pos)); > bir = (u8)(table_offset & PCI_MSIX_BIRMASK); > table_offset &= ~PCI_MSIX_BIRMASK; > - entry_offset = msi->entry_nr * PCI_MSIX_ENTRY_SIZE; > > - table_paddr = msi->table_base + table_offset; > - entry_paddr = table_paddr + entry_offset; > - idx = msix_get_fixmap(dev, table_paddr, entry_paddr); > - if ( idx < 0 ) > - { > - xfree(entry); > - return idx; > - } > - base = (void *)(fix_to_virt(idx) + > - ((unsigned long)entry_paddr & ((1UL << PAGE_SHIFT) - 1))); > - > - entry->msi_attrib.type = PCI_CAP_ID_MSIX; > - entry->msi_attrib.is_64 = 1; > - entry->msi_attrib.entry_nr = msi->entry_nr; > - entry->msi_attrib.maskbit = 1; > - entry->msi_attrib.masked = 1; > - entry->msi_attrib.pos = pos; > - entry->irq = msi->irq; > - entry->dev = dev; > - entry->mask_base = base; > - > - list_add_tail(&entry->list, &dev->msi_list); > - > - if ( !dev->msix_nr_entries ) > + if ( !dev->info.is_virtfn ) > { > - u8 pbus, pslot, pfunc; > - int vf; > - u64 pba_paddr; > - u32 pba_offset; > + pbus = bus; > + pslot = slot; > + pfunc = func; > + vf = -1; > + } > + else > + { > + pbus = dev->info.physfn.bus; > + pslot = PCI_SLOT(dev->info.physfn.devfn); > + pfunc = PCI_FUNC(dev->info.physfn.devfn); > + vf = PCI_BDF2(dev->bus, dev->devfn); > + } > > - if ( !dev->info.is_virtfn ) > - { > - pbus = bus; > - pslot = slot; > - pfunc = func; > - vf = -1; > - } > - else > + table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf); > + WARN_ON(msi && msi->table_base != table_paddr); > + if ( !table_paddr ) > + { > + if ( !msi || !msi->table_base ) > { > - pbus = dev->info.physfn.bus; > - pslot = PCI_SLOT(dev->info.physfn.devfn); > - pfunc = PCI_FUNC(dev->info.physfn.devfn); > - vf = PCI_BDF2(dev->bus, dev->devfn); > + xfree(entry); > + return -ENXIO; > } > + table_paddr = msi->table_base; > + } > + table_paddr += table_offset; > > - ASSERT(!dev->msix_used_entries); > - WARN_ON(msi->table_base != > - read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf)); > + if ( !dev->msix_used_entries ) > + { > + u64 pba_paddr; > + u32 pba_offset; > > dev->msix_nr_entries = nr_entries; > dev->msix_table.first = PFN_DOWN(table_paddr); > @@ -765,7 +748,42 @@ static int msix_capability_init(struct p > BITS_TO_LONGS(nr_entries) - 1); > WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, dev- > >msix_pba.first, > dev->msix_pba.last)); > + } > > + if ( entry ) > + { > + /* Map MSI-X table region */ > + u64 entry_paddr = table_paddr + msi->entry_nr * > PCI_MSIX_ENTRY_SIZE; > + int idx = msix_get_fixmap(dev, table_paddr, entry_paddr); > + void __iomem *base; > + > + if ( idx < 0 ) > + { > + xfree(entry); > + return idx; > + } > + base = (void *)(fix_to_virt(idx) + > + ((unsigned long)entry_paddr & (PAGE_SIZE - 1))); > + > + /* Mask interrupt here */ > + writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); > + > + entry->msi_attrib.type = PCI_CAP_ID_MSIX; > + entry->msi_attrib.is_64 = 1; > + entry->msi_attrib.entry_nr = msi->entry_nr; > + entry->msi_attrib.maskbit = 1; > + entry->msi_attrib.masked = 1; > + entry->msi_attrib.pos = pos; > + entry->irq = msi->irq; > + entry->dev = dev; > + entry->mask_base = base; > + > + list_add_tail(&entry->list, &dev->msi_list); > + *desc = entry; > + } > + > + if ( !dev->msix_used_entries ) > + { > if ( rangeset_add_range(mmio_ro_ranges, dev->msix_table.first, > dev->msix_table.last) ) > WARN(); > @@ -776,7 +794,7 @@ static int msix_capability_init(struct p > if ( dev->domain ) > p2m_change_entry_type_global(dev->domain, > p2m_mmio_direct, p2m_mmio_direct); > - if ( !dev->domain || !paging_mode_translate(dev->domain) ) > + if ( desc && (!dev->domain || !paging_mode_translate(dev->domain)) ) > { > struct domain *d = dev->domain; > > @@ -790,6 +808,13 @@ static int msix_capability_init(struct p > break; > if ( d ) > { > + if ( !IS_PRIV(d) && dev->msix_warned != d->domain_id ) > + { > + dev->msix_warned = d->domain_id; > + printk(XENLOG_ERR > + "Potentially insecure use of MSI-X on > %04x:%02x:%02x.%u by > Dom%d\n", > + seg, bus, slot, func, d->domain_id); > + } > /* XXX How to deal with existing mappings? */ > } > } > @@ -798,10 +823,6 @@ static int msix_capability_init(struct p > WARN_ON(dev->msix_table.first != (table_paddr >> PAGE_SHIFT)); > ++dev->msix_used_entries; > > - /* Mask interrupt here */ > - writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); > - > - *desc = entry; > /* Restore MSI-X enabled bits */ > pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control); > > @@ -926,6 +947,19 @@ static int __pci_enable_msix(struct msi_ > return status; > } > > +static void _pci_cleanup_msix(struct pci_dev *dev) > +{ > + if ( !--dev->msix_used_entries ) > + { > + if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_table.first, > + dev->msix_table.last) ) > + WARN(); > + if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_pba.first, > + dev->msix_pba.last) ) > + WARN(); > + } > +} > + > static void __pci_disable_msix(struct msi_desc *entry) > { > struct pci_dev *dev; > @@ -949,15 +983,42 @@ static void __pci_disable_msix(struct ms > > pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control); > > - if ( !--dev->msix_used_entries ) > + _pci_cleanup_msix(dev); > +} > + > +int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off) > +{ > + int rc; > + struct pci_dev *pdev; > + u8 slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn); > + unsigned int pos = pci_find_cap_offset(seg, bus, slot, func, > + PCI_CAP_ID_MSIX); > + > + if ( !pos ) > + return -ENODEV; > + > + spin_lock(&pcidevs_lock); > + pdev = pci_get_pdev(seg, bus, devfn); > + if ( !pdev ) > + rc = -ENODEV; > + else if ( pdev->msix_used_entries != !!off ) > + rc = -EBUSY; > + else if ( off ) > { > - if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_table.first, > - dev->msix_table.last) ) > - WARN(); > - if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_pba.first, > - dev->msix_pba.last) ) > - WARN(); > + _pci_cleanup_msix(pdev); > + rc = 0; > } > + else > + { > + u16 control = pci_conf_read16(seg, bus, slot, func, > + msix_control_reg(pos)); > + > + rc = msix_capability_init(pdev, NULL, NULL, > + multi_msix_capable(control)); > + } > + spin_unlock(&pcidevs_lock); > + > + return rc; > } > > /* > --- a/xen/arch/x86/physdev.c > +++ b/xen/arch/x86/physdev.c > @@ -575,6 +575,18 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H > break; > } > > + case PHYSDEVOP_prepare_msix: > + case PHYSDEVOP_release_msix: { > + struct physdev_pci_device dev; > + > + if ( copy_from_guest(&dev, arg, 1) ) > + ret = -EFAULT; > + else > + ret = pci_prepare_msix(dev.seg, dev.bus, dev.devfn, > + cmd != PHYSDEVOP_prepare_msix); > + break; > + } > + > case PHYSDEVOP_pci_mmcfg_reserved: { > struct physdev_pci_mmcfg_reserved info; > > --- a/xen/include/asm-x86/msi.h > +++ b/xen/include/asm-x86/msi.h > @@ -76,6 +76,7 @@ struct msi_desc; > /* Helper functions */ > extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc); > extern void pci_disable_msi(struct msi_desc *desc); > +extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off); > extern void pci_cleanup_msi(struct pci_dev *pdev); > extern void setup_msi_handler(struct irq_desc *, struct msi_desc *); > extern void setup_msi_irq(struct irq_desc *); > --- a/xen/include/public/physdev.h > +++ b/xen/include/public/physdev.h > @@ -303,6 +303,12 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_devi > > #define PHYSDEVOP_pci_device_remove 26 > #define PHYSDEVOP_restore_msi_ext 27 > +/* > + * Dom0 should use these two to announce MMIO resources assigned to > + * MSI-X capable devices won't (prepare) or may (release) change. > + */ > +#define PHYSDEVOP_prepare_msix 30 > +#define PHYSDEVOP_release_msix 31 > struct physdev_pci_device { > /* IN */ > uint16_t seg; > --- a/xen/include/xen/pci.h > +++ b/xen/include/xen/pci.h > @@ -57,6 +57,7 @@ struct pci_dev { > int msix_table_refcnt[MAX_MSIX_TABLE_PAGES]; > int msix_table_idx[MAX_MSIX_TABLE_PAGES]; > spinlock_t msix_table_lock; > + domid_t msix_warned; > > struct domain *domain; > const u16 seg; > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |