[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1/2] hvm: passthrough MSI-X mask bit acceleration
Add a new parameter to DOMCTL_bind_pt_irq to allow Xen to know the guest physical address of MSI-X table. Also add a new MMIO intercept handler to intercept that gpa in order to handle MSI-X vector mask bit operation in the hypervisor. This reduces the load of device model considerably if the guest does mask and unmask frequently Signed-off-by: Qing He <qing.he@xxxxxxxxx> --- tools/libxc/xc_domain.c | 4 tools/libxc/xenctrl.h | 3 xen/arch/x86/hvm/hvm.c | 7 xen/arch/x86/hvm/intercept.c | 6 xen/arch/x86/hvm/vmsi.c | 280 +++++++++++++++++++++++++++++++++++++++ xen/arch/x86/msi.c | 20 ++ xen/drivers/passthrough/io.c | 6 xen/include/asm-x86/hvm/domain.h | 4 xen/include/asm-x86/msi.h | 2 xen/include/public/domctl.h | 1 xen/include/xen/pci.h | 3 11 files changed, 331 insertions(+), 5 deletions(-) diff -r f8187a343ad2 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Fri Feb 20 17:02:36 2009 +0000 +++ b/tools/libxc/xc_domain.c Thu Feb 26 13:29:10 2009 +0800 @@ -920,7 +920,8 @@ int xc_domain_update_msi_irq( uint32_t domid, uint32_t gvec, uint32_t pirq, - uint32_t gflags) + uint32_t gflags, + uint64_t gtable) { int rc; xen_domctl_bind_pt_irq_t *bind; @@ -936,6 +937,7 @@ int xc_domain_update_msi_irq( bind->machine_irq = pirq; bind->u.msi.gvec = gvec; bind->u.msi.gflags = gflags; + bind->u.msi.gtable = gtable; rc = do_domctl(xc_handle, &domctl); return rc; diff -r f8187a343ad2 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Fri Feb 20 17:02:36 2009 +0000 +++ b/tools/libxc/xenctrl.h Thu Feb 26 13:29:10 2009 +0800 @@ -1092,7 +1092,8 @@ int xc_domain_update_msi_irq( uint32_t domid, uint32_t gvec, uint32_t pirq, - uint32_t gflags); + uint32_t gflags, + uint64_t gtable); int xc_domain_unbind_msi_irq(int xc_handle, uint32_t domid, diff -r f8187a343ad2 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Fri Feb 20 17:02:36 2009 +0000 +++ b/xen/arch/x86/hvm/hvm.c Thu Feb 26 13:29:10 2009 +0800 @@ -308,6 +308,9 @@ int hvm_domain_initialise(struct domain spin_lock_init(&d->arch.hvm_domain.irq_lock); spin_lock_init(&d->arch.hvm_domain.uc_lock); + INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list); + spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock); + hvm_init_guest_time(d); d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1; @@ -348,10 +351,14 @@ int hvm_domain_initialise(struct domain return rc; } +extern void msixtbl_pt_cleanup(struct domain *d); + void hvm_domain_relinquish_resources(struct domain *d) { hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq); hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); + + msixtbl_pt_cleanup(d); /* Stop all asynchronous timer actions. */ rtc_deinit(d); diff -r f8187a343ad2 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Fri Feb 20 17:02:36 2009 +0000 +++ b/xen/arch/x86/hvm/intercept.c Thu Feb 26 13:29:10 2009 +0800 @@ -35,14 +35,16 @@ extern struct hvm_mmio_handler hpet_mmio extern struct hvm_mmio_handler hpet_mmio_handler; extern struct hvm_mmio_handler vlapic_mmio_handler; extern struct hvm_mmio_handler vioapic_mmio_handler; +extern struct hvm_mmio_handler msixtbl_mmio_handler; -#define HVM_MMIO_HANDLER_NR 3 +#define HVM_MMIO_HANDLER_NR 4 static struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] = { &hpet_mmio_handler, &vlapic_mmio_handler, - &vioapic_mmio_handler + &vioapic_mmio_handler, + &msixtbl_mmio_handler }; static int hvm_mmio_access(struct vcpu *v, diff -r f8187a343ad2 xen/arch/x86/hvm/vmsi.c --- a/xen/arch/x86/hvm/vmsi.c Fri Feb 20 17:02:36 2009 +0000 +++ b/xen/arch/x86/hvm/vmsi.c Thu Feb 26 13:29:10 2009 +0800 @@ -193,3 +193,283 @@ int vmsi_deliver(struct domain *d, int p return 1; } +/* MSI-X mask bit hypervisor interception */ +struct msixtbl_entry +{ + struct list_head list; + atomic_t refcnt; /* how many bind_pt_irq called for the device */ + + /* TODO: resolve the potential race by destruction of pdev */ + struct pci_dev *pdev; + unsigned long gtable; /* gpa of msix table */ + unsigned long table_len; + unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1]; + + struct rcu_head rcu; +}; + +static struct msixtbl_entry *msixtbl_find_entry( + struct vcpu *v, unsigned long addr) +{ + struct msixtbl_entry *entry; + struct domain *d = v->domain; + + list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list ) + if ( addr >= entry->gtable && + addr < entry->gtable + entry->table_len ) + return entry; + + return NULL; +} + +static void __iomem *msixtbl_addr_to_virt( + struct msixtbl_entry *entry, unsigned long addr) +{ + int idx, nr_page; + + if ( !entry ) + return NULL; + + nr_page = (addr >> PAGE_SHIFT) - + (entry->gtable >> PAGE_SHIFT); + + if ( !entry->pdev ) + return NULL; + + idx = entry->pdev->msix_table_idx[nr_page]; + if ( !idx ) + return NULL; + + return (void *)(fix_to_virt(idx) + + (addr & ((1UL << PAGE_SHIFT) - 1))); +} + +static int msixtbl_read( + struct vcpu *v, unsigned long address, + unsigned long len, unsigned long *pval) +{ + unsigned long offset; + struct msixtbl_entry *entry; + void *virt; + int r = X86EMUL_UNHANDLEABLE; + + rcu_read_lock(); + + if ( len != 4 ) + goto out; + + offset = address & (PCI_MSIX_ENTRY_SIZE - 1); + if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) + goto out; + + entry = msixtbl_find_entry(v, address); + virt = msixtbl_addr_to_virt(entry, address); + if ( !virt ) + goto out; + + *pval = readl(virt); + r = X86EMUL_OKAY; + +out: + rcu_read_unlock(); + return r; +} + +static int msixtbl_write(struct vcpu *v, unsigned long address, + unsigned long len, unsigned long val) +{ + unsigned long offset; + struct msixtbl_entry *entry; + void *virt; + int nr_entry; + int r = X86EMUL_UNHANDLEABLE; + + rcu_read_lock(); + + if ( len != 4 ) + goto out; + + entry = msixtbl_find_entry(v, address); + nr_entry = (address - entry->gtable) % PCI_MSIX_ENTRY_SIZE; + + offset = address & (PCI_MSIX_ENTRY_SIZE - 1); + if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) + { + set_bit(nr_entry, &entry->table_flags); + goto out; + } + + /* exit to device model if address/data has been modified */ + if ( test_and_clear_bit(nr_entry, &entry->table_flags) ) + goto out; + + virt = msixtbl_addr_to_virt(entry, address); + if ( !virt ) + goto out; + + writel(val, virt); + r = X86EMUL_OKAY; + +out: + rcu_read_unlock(); + return r; +} + +static int msixtbl_range(struct vcpu *v, unsigned long addr) +{ + struct msixtbl_entry *entry; + void *virt; + + rcu_read_lock(); + + entry = msixtbl_find_entry(v, addr); + virt = msixtbl_addr_to_virt(entry, addr); + + rcu_read_unlock(); + + return !!virt; +} + +struct hvm_mmio_handler msixtbl_mmio_handler = { + .check_handler = msixtbl_range, + .read_handler = msixtbl_read, + .write_handler = msixtbl_write +}; + +static struct msixtbl_entry *add_msixtbl_entry(struct domain *d, + struct pci_dev *pdev, + uint64_t gtable) +{ + struct msixtbl_entry *entry; + u32 len; + + entry = xmalloc(struct msixtbl_entry); + if ( !entry ) + return NULL; + + memset(entry, 0, sizeof(struct msixtbl_entry)); + + INIT_LIST_HEAD(&entry->list); + INIT_RCU_HEAD(&entry->rcu); + atomic_set(&entry->refcnt, 0); + + len = pci_msix_get_table_len(pdev); + entry->table_len = len; + entry->pdev = pdev; + entry->gtable = (unsigned long) gtable; + + list_add_rcu(&entry->list, &d->arch.hvm_domain.msixtbl_list); + + return entry; +} + +static void free_msixtbl_entry(struct rcu_head *rcu) +{ + struct msixtbl_entry *entry; + + entry = container_of (rcu, struct msixtbl_entry, rcu); + + xfree(entry); +} + +static void del_msixtbl_entry(struct msixtbl_entry *entry) +{ + list_del_rcu(&entry->list); + call_rcu(&entry->rcu, free_msixtbl_entry); +} + +int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable) +{ + irq_desc_t *irq_desc; + struct msi_desc *msi_desc; + struct pci_dev *pdev; + struct msixtbl_entry *entry; + int r = -EINVAL; + + /* pcidevs_lock already held */ + irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL); + + if ( irq_desc->handler != &pci_msi_type ) + goto out; + + msi_desc = irq_desc->msi_desc; + if ( !msi_desc ) + goto out; + + pdev = msi_desc->dev; + + spin_lock(&d->arch.hvm_domain.msixtbl_list_lock); + + list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list ) + if ( pdev == entry->pdev ) + goto found; + + entry = add_msixtbl_entry(d, pdev, gtable); + if ( !entry ) + { + spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock); + goto out; + } + +found: + atomic_inc(&entry->refcnt); + + spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock); + +out: + spin_unlock_irq(&irq_desc->lock); + return r; + +} + +void msixtbl_pt_unregister(struct domain *d, int pirq) +{ + irq_desc_t *irq_desc; + struct msi_desc *msi_desc; + struct pci_dev *pdev; + struct msixtbl_entry *entry; + + /* pcidevs_lock already held */ + irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL); + + if ( irq_desc->handler != &pci_msi_type ) + goto out; + + msi_desc = irq_desc->msi_desc; + if ( !msi_desc ) + goto out; + + pdev = msi_desc->dev; + + spin_lock(&d->arch.hvm_domain.msixtbl_list_lock); + + list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list ) + if ( pdev == entry->pdev ) + goto found; + + spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock); + + +out: + spin_unlock(&irq_desc->lock); + return; + +found: + if ( !atomic_dec_and_test(&entry->refcnt) ) + del_msixtbl_entry(entry); + + spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock); + spin_unlock(&irq_desc->lock); +} +void msixtbl_pt_cleanup(struct domain *d, int pirq) +{ + struct msixtbl_entry *entry, *temp; + + spin_lock(&d->arch.hvm_domain.msixtbl_list_lock); + + list_for_each_entry_safe( entry, temp, + &d->arch.hvm_domain.msixtbl_list, list ) + del_msixtbl_entry(entry); + + spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock); +} diff -r f8187a343ad2 xen/arch/x86/msi.c --- a/xen/arch/x86/msi.c Fri Feb 20 17:02:36 2009 +0000 +++ b/xen/arch/x86/msi.c Thu Feb 26 13:29:10 2009 +0800 @@ -839,3 +839,23 @@ int pci_restore_msi_state(struct pci_dev return 0; } +unsigned int pci_msix_get_table_len(struct pci_dev *pdev) +{ + int pos; + u16 control; + u8 bus, slot, func; + unsigned int len; + + bus = pdev->bus; + slot = PCI_SLOT(pdev->devfn); + func = PCI_FUNC(pdev->devfn); + + pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX); + if ( !pos ) + return 0; + + control = pci_conf_read16(bus, slot, func, msix_control_reg(pos)); + len = msix_table_size(control) * PCI_MSIX_ENTRY_SIZE; + + return len; +} diff -r f8187a343ad2 xen/drivers/passthrough/io.c --- a/xen/drivers/passthrough/io.c Fri Feb 20 17:02:36 2009 +0000 +++ b/xen/drivers/passthrough/io.c Thu Feb 26 13:29:10 2009 +0800 @@ -58,6 +58,9 @@ static void pt_irq_time_out(void *data) pirq_guest_eoi(irq_map->dom, machine_gsi); } +extern int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable); +extern int msixtbl_pt_unregister(struct domain *d, int pirq); + int pt_irq_create_bind_vtd( struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind) { @@ -115,6 +118,8 @@ int pt_irq_create_bind_vtd( spin_unlock(&d->event_lock); return rc; } + if ( pt_irq_bind->u.msi.gtable ) + msixtbl_pt_register(d, pirq, pt_irq_bind->u.msi.gtable); } else if (hvm_irq_dpci->mirq[pirq].gmsi.gvec != pt_irq_bind->u.msi.gvec ||hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] != pirq) @@ -259,6 +264,7 @@ int pt_irq_destroy_bind_vtd( if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) ) { pirq_guest_unbind(d, machine_gsi); + msixtbl_pt_unregister(d, machine_gsi); if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) ) kill_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, machine_gsi)]); hvm_irq_dpci->mirq[machine_gsi].dom = NULL; diff -r f8187a343ad2 xen/include/asm-x86/hvm/domain.h --- a/xen/include/asm-x86/hvm/domain.h Fri Feb 20 17:02:36 2009 +0000 +++ b/xen/include/asm-x86/hvm/domain.h Thu Feb 26 13:29:10 2009 +0800 @@ -75,6 +75,10 @@ struct hvm_domain { /* Pass-through */ struct hvm_iommu hvm_iommu; + /* hypervisor intercepted msix table */ + struct list_head msixtbl_list; + spinlock_t msixtbl_list_lock; + struct viridian_domain viridian; bool_t hap_enabled; diff -r f8187a343ad2 xen/include/asm-x86/msi.h --- a/xen/include/asm-x86/msi.h Fri Feb 20 17:02:36 2009 +0000 +++ b/xen/include/asm-x86/msi.h Thu Feb 26 13:29:10 2009 +0800 @@ -80,6 +80,8 @@ extern void teardown_msi_vector(int vect extern void teardown_msi_vector(int vector); extern int msi_free_vector(struct msi_desc *entry); extern int pci_restore_msi_state(struct pci_dev *pdev); + +extern unsigned int pci_msix_get_table_len(struct pci_dev *pdev); struct msi_desc { struct { diff -r f8187a343ad2 xen/include/public/domctl.h --- a/xen/include/public/domctl.h Fri Feb 20 17:02:36 2009 +0000 +++ b/xen/include/public/domctl.h Thu Feb 26 13:29:10 2009 +0800 @@ -485,6 +485,7 @@ struct xen_domctl_bind_pt_irq { struct { uint8_t gvec; uint32_t gflags; + uint64_t gtable; } msi; } u; }; diff -r f8187a343ad2 xen/include/xen/pci.h --- a/xen/include/xen/pci.h Fri Feb 20 17:02:36 2009 +0000 +++ b/xen/include/xen/pci.h Thu Feb 26 13:29:10 2009 +0800 @@ -29,7 +29,8 @@ #define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f)) #define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff)) -#define MAX_MSIX_TABLE_PAGES 8 /* 2048 entries */ +#define MAX_MSIX_TABLE_ENTRIES 2048 +#define MAX_MSIX_TABLE_PAGES 8 struct pci_dev { struct list_head alldevs_list; struct list_head domain_list; _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |