[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen staging] xen/pci: move x86 specific code to x86 directory
commit f7e77e55d33b7e52477d65f4c5e67281384b650f Author: Rahul Singh <rahul.singh@xxxxxxx> AuthorDate: Fri Nov 27 18:06:04 2020 +0100 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Fri Nov 27 18:06:04 2020 +0100 xen/pci: move x86 specific code to x86 directory passthrough/pci.c file is common for all architecture, but there is x86 specific code in this file. Move x86 specific code to the drivers/passthrough/io.c file to avoid compilation error for other architecture. As drivers/passthrough/io.c is compiled only for x86 move it to x86 directory and rename it to hvm.c. No functional change intended. Signed-off-by: Rahul Singh <rahul.singh@xxxxxxx> Reviewed-by: Stefano Stabellini <sstabellini@xxxxxxxxxx> Reviewed-by: Bertrand Marquis <bertrand.marquis@xxxxxxx> Acked-by: Jan Beulich <jbeulich@xxxxxxxx> --- xen/drivers/passthrough/Makefile | 3 - xen/drivers/passthrough/io.c | 1127 -------------------------------- xen/drivers/passthrough/pci.c | 68 +- xen/drivers/passthrough/x86/Makefile | 1 + xen/drivers/passthrough/x86/hvm.c | 1193 ++++++++++++++++++++++++++++++++++ xen/include/xen/pci.h | 9 + 6 files changed, 1204 insertions(+), 1197 deletions(-) diff --git a/xen/drivers/passthrough/Makefile b/xen/drivers/passthrough/Makefile index e973e16c74..cc646612c7 100644 --- a/xen/drivers/passthrough/Makefile +++ b/xen/drivers/passthrough/Makefile @@ -6,6 +6,3 @@ obj-$(CONFIG_ARM) += arm/ obj-y += iommu.o obj-$(CONFIG_HAS_PCI) += pci.o obj-$(CONFIG_HAS_DEVICE_TREE) += device_tree.o - -x86-$(CONFIG_HVM) := io.o -obj-$(CONFIG_X86) += $(x86-y) diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c deleted file mode 100644 index 6b1305a3e5..0000000000 --- a/xen/drivers/passthrough/io.c +++ /dev/null @@ -1,1127 +0,0 @@ -/* - * Copyright (c) 2006, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; If not, see <http://www.gnu.org/licenses/>. - * - * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx> - * Copyright (C) Xiaohui Xin <xiaohui.xin@xxxxxxxxx> - */ - -#include <xen/event.h> -#include <xen/iommu.h> -#include <xen/cpu.h> -#include <xen/irq.h> -#include <asm/hvm/irq.h> -#include <asm/hvm/support.h> -#include <asm/io_apic.h> - -static DEFINE_PER_CPU(struct list_head, dpci_list); - -/* - * These two bit states help to safely schedule, deschedule, and wait until - * the softirq has finished. - * - * The semantics behind these two bits is as follow: - * - STATE_SCHED - whoever modifies it has to ref-count the domain (->dom). - * - STATE_RUN - only softirq is allowed to set and clear it. If it has - * been set hvm_dirq_assist will RUN with a saved value of the - * 'struct domain' copied from 'pirq_dpci->dom' before STATE_RUN was set. - * - * The usual states are: STATE_SCHED(set) -> STATE_RUN(set) -> - * STATE_SCHED(unset) -> STATE_RUN(unset). - * - * However the states can also diverge such as: STATE_SCHED(set) -> - * STATE_SCHED(unset) -> STATE_RUN(set) -> STATE_RUN(unset). That means - * the 'hvm_dirq_assist' never run and that the softirq did not do any - * ref-counting. - */ - -enum { - STATE_SCHED, - STATE_RUN -}; - -/* - * This can be called multiple times, but the softirq is only raised once. - * That is until the STATE_SCHED state has been cleared. The state can be - * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'), - * or by 'pt_pirq_softirq_reset' (which will try to clear the state before - * the softirq had a chance to run). - */ -static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci) -{ - unsigned long flags; - - if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) ) - return; - - get_knownalive_domain(pirq_dpci->dom); - - local_irq_save(flags); - list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list)); - local_irq_restore(flags); - - raise_softirq(HVM_DPCI_SOFTIRQ); -} - -/* - * If we are racing with softirq_dpci (STATE_SCHED) we return - * true. Otherwise we return false. - * - * If it is false, it is the callers responsibility to make sure - * that the softirq (with the event_lock dropped) has ran. - */ -bool pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci) -{ - if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED)) ) - return true; - - /* - * If in the future we would call 'raise_softirq_for' right away - * after 'pt_pirq_softirq_active' we MUST reset the list (otherwise it - * might have stale data). - */ - return false; -} - -/* - * Reset the pirq_dpci->dom parameter to NULL. - * - * This function checks the different states to make sure it can do it - * at the right time. If it unschedules the 'hvm_dirq_assist' from running - * it also refcounts (which is what the softirq would have done) properly. - */ -static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci) -{ - struct domain *d = pirq_dpci->dom; - - ASSERT(spin_is_locked(&d->event_lock)); - - switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) ) - { - case (1 << STATE_SCHED): - /* - * We are going to try to de-schedule the softirq before it goes in - * STATE_RUN. Whoever clears STATE_SCHED MUST refcount the 'dom'. - */ - put_domain(d); - /* fallthrough. */ - case (1 << STATE_RUN): - case (1 << STATE_RUN) | (1 << STATE_SCHED): - /* - * The reason it is OK to reset 'dom' when STATE_RUN bit is set is due - * to a shortcut the 'dpci_softirq' implements. It stashes the 'dom' - * in local variable before it sets STATE_RUN - and therefore will not - * dereference '->dom' which would crash. - */ - pirq_dpci->dom = NULL; - break; - } - /* - * Inhibit 'hvm_dirq_assist' from doing anything useful and at worst - * calling 'set_timer' which will blow up (as we have called kill_timer - * or never initialized it). Note that we hold the lock that - * 'hvm_dirq_assist' could be spinning on. - */ - pirq_dpci->masked = 0; -} - -bool pt_irq_need_timer(uint32_t flags) -{ - return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE | - HVM_IRQ_DPCI_NO_EOI)); -} - -static int pt_irq_guest_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci, - void *arg) -{ - if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT, - &pirq_dpci->flags) ) - { - pirq_dpci->masked = 0; - pirq_dpci->pending = 0; - pirq_guest_eoi(dpci_pirq(pirq_dpci)); - } - - return 0; -} - -static void pt_irq_time_out(void *data) -{ - struct hvm_pirq_dpci *irq_map = data; - const struct hvm_irq_dpci *dpci; - const struct dev_intx_gsi_link *digl; - - spin_lock(&irq_map->dom->event_lock); - - if ( irq_map->flags & HVM_IRQ_DPCI_IDENTITY_GSI ) - { - ASSERT(is_hardware_domain(irq_map->dom)); - /* - * Identity mapped, no need to iterate over the guest GSI list to find - * other pirqs sharing the same guest GSI. - * - * In the identity mapped case the EOI can also be done now, this way - * the iteration over the list of domain pirqs is avoided. - */ - hvm_gsi_deassert(irq_map->dom, dpci_pirq(irq_map)->pirq); - irq_map->flags |= HVM_IRQ_DPCI_EOI_LATCH; - pt_irq_guest_eoi(irq_map->dom, irq_map, NULL); - spin_unlock(&irq_map->dom->event_lock); - return; - } - - dpci = domain_get_irq_dpci(irq_map->dom); - if ( unlikely(!dpci) ) - { - ASSERT_UNREACHABLE(); - spin_unlock(&irq_map->dom->event_lock); - return; - } - list_for_each_entry ( digl, &irq_map->digl_list, list ) - { - unsigned int guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx); - const struct hvm_girq_dpci_mapping *girq; - - list_for_each_entry ( girq, &dpci->girq[guest_gsi], list ) - { - struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi); - - pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH; - } - hvm_pci_intx_deassert(irq_map->dom, digl->device, digl->intx); - } - - pt_pirq_iterate(irq_map->dom, pt_irq_guest_eoi, NULL); - - spin_unlock(&irq_map->dom->event_lock); -} - -struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *d) -{ - if ( !d || !is_hvm_domain(d) ) - return NULL; - - return hvm_domain_irq(d)->dpci; -} - -void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci) -{ - xfree(dpci); -} - -/* - * This routine handles lowest-priority interrupts using vector-hashing - * mechanism. As an example, modern Intel CPUs use this method to handle - * lowest-priority interrupts. - * - * Here is the details about the vector-hashing mechanism: - * 1. For lowest-priority interrupts, store all the possible destination - * vCPUs in an array. - * 2. Use "gvec % max number of destination vCPUs" to find the right - * destination vCPU in the array for the lowest-priority interrupt. - */ -static struct vcpu *vector_hashing_dest(const struct domain *d, - uint32_t dest_id, - bool dest_mode, - uint8_t gvec) - -{ - unsigned long *dest_vcpu_bitmap; - unsigned int dest_vcpus = 0; - struct vcpu *v, *dest = NULL; - unsigned int i; - - dest_vcpu_bitmap = xzalloc_array(unsigned long, - BITS_TO_LONGS(d->max_vcpus)); - if ( !dest_vcpu_bitmap ) - return NULL; - - for_each_vcpu ( d, v ) - { - if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, APIC_DEST_NOSHORT, - dest_id, dest_mode) ) - continue; - - __set_bit(v->vcpu_id, dest_vcpu_bitmap); - dest_vcpus++; - } - - if ( dest_vcpus != 0 ) - { - unsigned int mod = gvec % dest_vcpus; - unsigned int idx = 0; - - for ( i = 0; i <= mod; i++ ) - { - idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1; - BUG_ON(idx > d->max_vcpus); - } - - dest = d->vcpu[idx - 1]; - } - - xfree(dest_vcpu_bitmap); - - return dest; -} - -int pt_irq_create_bind( - struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind) -{ - struct hvm_irq_dpci *hvm_irq_dpci; - struct hvm_pirq_dpci *pirq_dpci; - struct pirq *info; - int rc, pirq = pt_irq_bind->machine_irq; - - if ( pirq < 0 || pirq >= d->nr_pirqs ) - return -EINVAL; - - restart: - spin_lock(&d->event_lock); - - hvm_irq_dpci = domain_get_irq_dpci(d); - if ( !hvm_irq_dpci && !is_hardware_domain(d) ) - { - unsigned int i; - - /* - * NB: the hardware domain doesn't use a hvm_irq_dpci struct because - * it's only allowed to identity map GSIs, and so the data contained in - * that struct (used to map guest GSIs into machine GSIs and perform - * interrupt routing) is completely useless to it. - */ - hvm_irq_dpci = xzalloc(struct hvm_irq_dpci); - if ( hvm_irq_dpci == NULL ) - { - spin_unlock(&d->event_lock); - return -ENOMEM; - } - for ( i = 0; i < NR_HVM_DOMU_IRQS; i++ ) - INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]); - - hvm_domain_irq(d)->dpci = hvm_irq_dpci; - } - - info = pirq_get_info(d, pirq); - if ( !info ) - { - spin_unlock(&d->event_lock); - return -ENOMEM; - } - pirq_dpci = pirq_dpci(info); - - /* - * A crude 'while' loop with us dropping the spinlock and giving - * the softirq_dpci a chance to run. - * We MUST check for this condition as the softirq could be scheduled - * and hasn't run yet. Note that this code replaced tasklet_kill which - * would have spun forever and would do the same thing (wait to flush out - * outstanding hvm_dirq_assist calls. - */ - if ( pt_pirq_softirq_active(pirq_dpci) ) - { - spin_unlock(&d->event_lock); - cpu_relax(); - goto restart; - } - - switch ( pt_irq_bind->irq_type ) - { - case PT_IRQ_TYPE_MSI: - { - uint8_t dest, delivery_mode; - bool dest_mode; - int dest_vcpu_id; - const struct vcpu *vcpu; - uint32_t gflags = pt_irq_bind->u.msi.gflags & - ~XEN_DOMCTL_VMSI_X86_UNMASKED; - - if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) - { - pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI | - HVM_IRQ_DPCI_GUEST_MSI; - pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec; - pirq_dpci->gmsi.gflags = gflags; - /* - * 'pt_irq_create_bind' can be called after 'pt_irq_destroy_bind'. - * The 'pirq_cleanup_check' which would free the structure is only - * called if the event channel for the PIRQ is active. However - * OS-es that use event channels usually bind PIRQs to eventds - * and unbind them before calling 'pt_irq_destroy_bind' - with the - * result that we re-use the 'dpci' structure. This can be - * reproduced with unloading and loading the driver for a device. - * - * As such on every 'pt_irq_create_bind' call we MUST set it. - */ - pirq_dpci->dom = d; - /* bind after hvm_irq_dpci is setup to avoid race with irq handler*/ - rc = pirq_guest_bind(d->vcpu[0], info, 0); - if ( rc == 0 && pt_irq_bind->u.msi.gtable ) - { - rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable); - if ( unlikely(rc) ) - { - pirq_guest_unbind(d, info); - /* - * Between 'pirq_guest_bind' and before 'pirq_guest_unbind' - * an interrupt can be scheduled. No more of them are going - * to be scheduled but we must deal with the one that may be - * in the queue. - */ - pt_pirq_softirq_reset(pirq_dpci); - } - } - if ( unlikely(rc) ) - { - pirq_dpci->gmsi.gflags = 0; - pirq_dpci->gmsi.gvec = 0; - pirq_dpci->dom = NULL; - pirq_dpci->flags = 0; - pirq_cleanup_check(info, d); - spin_unlock(&d->event_lock); - return rc; - } - } - else - { - uint32_t mask = HVM_IRQ_DPCI_MACH_MSI | HVM_IRQ_DPCI_GUEST_MSI; - - if ( (pirq_dpci->flags & mask) != mask ) - { - spin_unlock(&d->event_lock); - return -EBUSY; - } - - /* If pirq is already mapped as vmsi, update guest data/addr. */ - if ( pirq_dpci->gmsi.gvec != pt_irq_bind->u.msi.gvec || - pirq_dpci->gmsi.gflags != gflags ) - { - /* Directly clear pending EOIs before enabling new MSI info. */ - pirq_guest_eoi(info); - - pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec; - pirq_dpci->gmsi.gflags = gflags; - } - } - /* Calculate dest_vcpu_id for MSI-type pirq migration. */ - dest = MASK_EXTR(pirq_dpci->gmsi.gflags, - XEN_DOMCTL_VMSI_X86_DEST_ID_MASK); - dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK; - delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags, - XEN_DOMCTL_VMSI_X86_DELIV_MASK); - - dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode); - pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id; - spin_unlock(&d->event_lock); - - pirq_dpci->gmsi.posted = false; - vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL; - if ( iommu_intpost ) - { - if ( delivery_mode == dest_LowestPrio ) - vcpu = vector_hashing_dest(d, dest, dest_mode, - pirq_dpci->gmsi.gvec); - if ( vcpu ) - pirq_dpci->gmsi.posted = true; - } - if ( vcpu && is_iommu_enabled(d) ) - hvm_migrate_pirq(pirq_dpci, vcpu); - - /* Use interrupt posting if it is supported. */ - if ( iommu_intpost ) - pi_update_irte(vcpu ? &vcpu->arch.hvm.vmx.pi_desc : NULL, - info, pirq_dpci->gmsi.gvec); - - if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED ) - { - unsigned long flags; - struct irq_desc *desc = pirq_spin_lock_irq_desc(info, &flags); - - if ( !desc ) - { - pt_irq_destroy_bind(d, pt_irq_bind); - return -EINVAL; - } - - guest_mask_msi_irq(desc, false); - spin_unlock_irqrestore(&desc->lock, flags); - } - - break; - } - - case PT_IRQ_TYPE_PCI: - case PT_IRQ_TYPE_MSI_TRANSLATE: - { - struct dev_intx_gsi_link *digl = NULL; - struct hvm_girq_dpci_mapping *girq = NULL; - unsigned int guest_gsi; - - /* - * Mapping GSIs for the hardware domain is different than doing it for - * an unpriviledged guest, the hardware domain is only allowed to - * identity map GSIs, and as such all the data in the u.pci union is - * discarded. - */ - if ( hvm_irq_dpci ) - { - unsigned int link; - - digl = xmalloc(struct dev_intx_gsi_link); - girq = xmalloc(struct hvm_girq_dpci_mapping); - - if ( !digl || !girq ) - { - spin_unlock(&d->event_lock); - xfree(girq); - xfree(digl); - return -ENOMEM; - } - - girq->bus = digl->bus = pt_irq_bind->u.pci.bus; - girq->device = digl->device = pt_irq_bind->u.pci.device; - girq->intx = digl->intx = pt_irq_bind->u.pci.intx; - list_add_tail(&digl->list, &pirq_dpci->digl_list); - - guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx); - link = hvm_pci_intx_link(digl->device, digl->intx); - - hvm_irq_dpci->link_cnt[link]++; - - girq->machine_gsi = pirq; - list_add_tail(&girq->list, &hvm_irq_dpci->girq[guest_gsi]); - } - else - { - ASSERT(is_hardware_domain(d)); - - /* MSI_TRANSLATE is not supported for the hardware domain. */ - if ( pt_irq_bind->irq_type != PT_IRQ_TYPE_PCI || - pirq >= hvm_domain_irq(d)->nr_gsis ) - { - spin_unlock(&d->event_lock); - - return -EINVAL; - } - guest_gsi = pirq; - } - - /* Bind the same mirq once in the same domain */ - if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) - { - unsigned int share; - - /* MUST be set, as the pirq_dpci can be re-used. */ - pirq_dpci->dom = d; - if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_TRANSLATE ) - { - pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | - HVM_IRQ_DPCI_MACH_MSI | - HVM_IRQ_DPCI_GUEST_PCI | - HVM_IRQ_DPCI_TRANSLATE; - share = 0; - } - else /* PT_IRQ_TYPE_PCI */ - { - pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | - HVM_IRQ_DPCI_MACH_PCI | - HVM_IRQ_DPCI_GUEST_PCI; - if ( !is_hardware_domain(d) ) - share = BIND_PIRQ__WILL_SHARE; - else - { - int mask = vioapic_get_mask(d, guest_gsi); - int trigger_mode = vioapic_get_trigger_mode(d, guest_gsi); - - if ( mask < 0 || trigger_mode < 0 ) - { - spin_unlock(&d->event_lock); - - ASSERT_UNREACHABLE(); - return -EINVAL; - } - pirq_dpci->flags |= HVM_IRQ_DPCI_IDENTITY_GSI; - /* - * Check if the corresponding vIO APIC pin is configured - * level or edge trigger, level triggered interrupts will - * be marked as shareable. - */ - ASSERT(!mask); - share = trigger_mode; - if ( trigger_mode == VIOAPIC_EDGE_TRIG ) - /* - * Edge IO-APIC interrupt, no EOI or unmask to perform - * and hence no timer needed. - */ - pirq_dpci->flags |= HVM_IRQ_DPCI_NO_EOI; - } - } - - /* Init timer before binding */ - if ( pt_irq_need_timer(pirq_dpci->flags) ) - init_timer(&pirq_dpci->timer, pt_irq_time_out, pirq_dpci, 0); - /* Deal with gsi for legacy devices */ - rc = pirq_guest_bind(d->vcpu[0], info, share); - if ( unlikely(rc) ) - { - if ( pt_irq_need_timer(pirq_dpci->flags) ) - kill_timer(&pirq_dpci->timer); - /* - * There is no path for __do_IRQ to schedule softirq as - * IRQ_GUEST is not set. As such we can reset 'dom' directly. - */ - pirq_dpci->dom = NULL; - if ( hvm_irq_dpci ) - { - unsigned int link; - - ASSERT(girq && digl); - list_del(&girq->list); - list_del(&digl->list); - link = hvm_pci_intx_link(digl->device, digl->intx); - hvm_irq_dpci->link_cnt[link]--; - } - pirq_dpci->flags = 0; - pirq_cleanup_check(info, d); - spin_unlock(&d->event_lock); - xfree(girq); - xfree(digl); - return rc; - } - } - - spin_unlock(&d->event_lock); - - if ( iommu_verbose ) - { - char buf[24] = ""; - - if ( digl ) - snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u", - digl->bus, PCI_SLOT(digl->device), - PCI_FUNC(digl->device), digl->intx); - - printk(XENLOG_G_INFO "d%d: bind: m_gsi=%u g_gsi=%u%s\n", - d->domain_id, pirq, guest_gsi, buf); - } - break; - } - - default: - spin_unlock(&d->event_lock); - return -EOPNOTSUPP; - } - - return 0; -} - -int pt_irq_destroy_bind( - struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind) -{ - struct hvm_irq_dpci *hvm_irq_dpci; - struct hvm_pirq_dpci *pirq_dpci; - unsigned int machine_gsi = pt_irq_bind->machine_irq; - struct pirq *pirq; - const char *what = NULL; - - switch ( pt_irq_bind->irq_type ) - { - case PT_IRQ_TYPE_PCI: - case PT_IRQ_TYPE_MSI_TRANSLATE: - if ( iommu_verbose ) - { - unsigned int device = pt_irq_bind->u.pci.device; - unsigned int intx = pt_irq_bind->u.pci.intx; - - printk(XENLOG_G_INFO - "d%d: unbind: m_gsi=%u g_gsi=%u dev=%02x:%02x.%u intx=%u\n", - d->domain_id, machine_gsi, hvm_pci_intx_gsi(device, intx), - pt_irq_bind->u.pci.bus, - PCI_SLOT(device), PCI_FUNC(device), intx); - } - break; - case PT_IRQ_TYPE_MSI: - { - unsigned long flags; - struct irq_desc *desc = domain_spin_lock_irq_desc(d, machine_gsi, - &flags); - - if ( !desc ) - return -EINVAL; - /* - * Leave the MSI masked, so that the state when calling - * pt_irq_create_bind is consistent across bind/unbinds. - */ - guest_mask_msi_irq(desc, true); - spin_unlock_irqrestore(&desc->lock, flags); - break; - } - - default: - return -EOPNOTSUPP; - } - - spin_lock(&d->event_lock); - - hvm_irq_dpci = domain_get_irq_dpci(d); - - if ( !hvm_irq_dpci && !is_hardware_domain(d) ) - { - spin_unlock(&d->event_lock); - return -EINVAL; - } - - pirq = pirq_info(d, machine_gsi); - pirq_dpci = pirq_dpci(pirq); - - if ( hvm_irq_dpci && pt_irq_bind->irq_type != PT_IRQ_TYPE_MSI ) - { - unsigned int bus = pt_irq_bind->u.pci.bus; - unsigned int device = pt_irq_bind->u.pci.device; - unsigned int intx = pt_irq_bind->u.pci.intx; - unsigned int guest_gsi = hvm_pci_intx_gsi(device, intx); - unsigned int link = hvm_pci_intx_link(device, intx); - struct hvm_girq_dpci_mapping *girq; - struct dev_intx_gsi_link *digl, *tmp; - - list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list ) - { - if ( girq->bus == bus && - girq->device == device && - girq->intx == intx && - girq->machine_gsi == machine_gsi ) - { - list_del(&girq->list); - xfree(girq); - girq = NULL; - break; - } - } - - if ( girq ) - { - spin_unlock(&d->event_lock); - return -EINVAL; - } - - hvm_irq_dpci->link_cnt[link]--; - - /* clear the mirq info */ - if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) - { - list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list ) - { - if ( digl->bus == bus && - digl->device == device && - digl->intx == intx ) - { - list_del(&digl->list); - xfree(digl); - } - } - what = list_empty(&pirq_dpci->digl_list) ? "final" : "partial"; - } - else - what = "bogus"; - } - else if ( pirq_dpci && pirq_dpci->gmsi.posted ) - pi_update_irte(NULL, pirq, 0); - - if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) && - list_empty(&pirq_dpci->digl_list) ) - { - pirq_guest_unbind(d, pirq); - msixtbl_pt_unregister(d, pirq); - if ( pt_irq_need_timer(pirq_dpci->flags) ) - kill_timer(&pirq_dpci->timer); - pirq_dpci->flags = 0; - /* - * See comment in pt_irq_create_bind's PT_IRQ_TYPE_MSI before the - * call to pt_pirq_softirq_reset. - */ - pt_pirq_softirq_reset(pirq_dpci); - - pirq_cleanup_check(pirq, d); - } - - spin_unlock(&d->event_lock); - - if ( what && iommu_verbose ) - { - unsigned int device = pt_irq_bind->u.pci.device; - char buf[24] = ""; - - if ( hvm_irq_dpci ) - snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u", - pt_irq_bind->u.pci.bus, PCI_SLOT(device), - PCI_FUNC(device), pt_irq_bind->u.pci.intx); - - printk(XENLOG_G_INFO "d%d %s unmap: m_irq=%u%s\n", - d->domain_id, what, machine_gsi, buf); - } - - return 0; -} - -void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci *dpci) -{ - INIT_LIST_HEAD(&dpci->digl_list); - dpci->gmsi.dest_vcpu_id = -1; -} - -bool pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci) -{ - if ( !dpci->flags && !pt_pirq_softirq_active(dpci) ) - { - dpci->dom = NULL; - return true; - } - return false; -} - -int pt_pirq_iterate(struct domain *d, - int (*cb)(struct domain *, - struct hvm_pirq_dpci *, void *), - void *arg) -{ - int rc = 0; - unsigned int pirq = 0, n, i; - struct pirq *pirqs[8]; - - ASSERT(spin_is_locked(&d->event_lock)); - - do { - n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq, - ARRAY_SIZE(pirqs)); - for ( i = 0; i < n; ++i ) - { - struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirqs[i]); - - pirq = pirqs[i]->pirq; - if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) - rc = cb(d, pirq_dpci, arg); - } - } while ( !rc && ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) ); - - return rc; -} - -int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq) -{ - struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d); - struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirq); - - ASSERT(is_hvm_domain(d)); - - if ( !is_iommu_enabled(d) || (!is_hardware_domain(d) && !dpci) || - !pirq_dpci || !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) - return 0; - - pirq_dpci->masked = 1; - raise_softirq_for(pirq_dpci); - return 1; -} - -/* called with d->event_lock held */ -static void __msi_pirq_eoi(struct hvm_pirq_dpci *pirq_dpci) -{ - irq_desc_t *desc; - - if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) && - (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) ) - { - struct pirq *pirq = dpci_pirq(pirq_dpci); - - BUG_ON(!local_irq_is_enabled()); - desc = pirq_spin_lock_irq_desc(pirq, NULL); - if ( !desc ) - return; - desc_guest_eoi(desc, pirq); - } -} - -static int _hvm_dpci_msi_eoi(struct domain *d, - struct hvm_pirq_dpci *pirq_dpci, void *arg) -{ - int vector = (long)arg; - - if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) && - (pirq_dpci->gmsi.gvec == vector) ) - { - unsigned int dest = MASK_EXTR(pirq_dpci->gmsi.gflags, - XEN_DOMCTL_VMSI_X86_DEST_ID_MASK); - bool dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK; - - if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest, - dest_mode) ) - { - __msi_pirq_eoi(pirq_dpci); - return 1; - } - } - - return 0; -} - -void hvm_dpci_msi_eoi(struct domain *d, int vector) -{ - if ( !is_iommu_enabled(d) || - (!hvm_domain_irq(d)->dpci && !is_hardware_domain(d)) ) - return; - - spin_lock(&d->event_lock); - pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector); - spin_unlock(&d->event_lock); -} - -static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci) -{ - if ( unlikely(!hvm_domain_irq(d)->dpci) && !is_hardware_domain(d) ) - { - ASSERT_UNREACHABLE(); - return; - } - - spin_lock(&d->event_lock); - if ( test_and_clear_bool(pirq_dpci->masked) ) - { - struct pirq *pirq = dpci_pirq(pirq_dpci); - const struct dev_intx_gsi_link *digl; - - if ( hvm_domain_use_pirq(d, pirq) ) - { - send_guest_pirq(d, pirq); - - if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI ) - goto out; - } - - if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI ) - { - vmsi_deliver_pirq(d, pirq_dpci); - goto out; - } - - list_for_each_entry ( digl, &pirq_dpci->digl_list, list ) - { - ASSERT(!(pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI)); - hvm_pci_intx_assert(d, digl->device, digl->intx); - pirq_dpci->pending++; - } - - if ( pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI ) - { - hvm_gsi_assert(d, pirq->pirq); - if ( pirq_dpci->flags & HVM_IRQ_DPCI_NO_EOI ) - goto out; - pirq_dpci->pending++; - } - - if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE ) - { - /* for translated MSI to INTx interrupt, eoi as early as possible */ - __msi_pirq_eoi(pirq_dpci); - goto out; - } - - /* - * Set a timer to see if the guest can finish the interrupt or not. For - * example, the guest OS may unmask the PIC during boot, before the - * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the - * guest will never deal with the irq, then the physical interrupt line - * will never be deasserted. - */ - ASSERT(pt_irq_need_timer(pirq_dpci->flags)); - set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT); - } - - out: - spin_unlock(&d->event_lock); -} - -static void hvm_pirq_eoi(struct pirq *pirq, - const union vioapic_redir_entry *ent) -{ - struct hvm_pirq_dpci *pirq_dpci; - - if ( !pirq ) - { - ASSERT_UNREACHABLE(); - return; - } - - pirq_dpci = pirq_dpci(pirq); - - /* - * No need to get vector lock for timer - * since interrupt is still not EOIed - */ - if ( --pirq_dpci->pending || - (ent && ent->fields.mask) || - !pt_irq_need_timer(pirq_dpci->flags) ) - return; - - stop_timer(&pirq_dpci->timer); - pirq_guest_eoi(pirq); -} - -static void __hvm_dpci_eoi(struct domain *d, - const struct hvm_girq_dpci_mapping *girq, - const union vioapic_redir_entry *ent) -{ - struct pirq *pirq = pirq_info(d, girq->machine_gsi); - - if ( !hvm_domain_use_pirq(d, pirq) ) - hvm_pci_intx_deassert(d, girq->device, girq->intx); - - hvm_pirq_eoi(pirq, ent); -} - -static void hvm_gsi_eoi(struct domain *d, unsigned int gsi, - const union vioapic_redir_entry *ent) -{ - struct pirq *pirq = pirq_info(d, gsi); - - /* Check if GSI is actually mapped. */ - if ( !pirq_dpci(pirq) ) - return; - - hvm_gsi_deassert(d, gsi); - hvm_pirq_eoi(pirq, ent); -} - -void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi, - const union vioapic_redir_entry *ent) -{ - const struct hvm_irq_dpci *hvm_irq_dpci; - const struct hvm_girq_dpci_mapping *girq; - - if ( !is_iommu_enabled(d) ) - return; - - if ( is_hardware_domain(d) ) - { - spin_lock(&d->event_lock); - hvm_gsi_eoi(d, guest_gsi, ent); - goto unlock; - } - - if ( guest_gsi < NR_ISAIRQS ) - { - hvm_dpci_isairq_eoi(d, guest_gsi); - return; - } - - spin_lock(&d->event_lock); - hvm_irq_dpci = domain_get_irq_dpci(d); - - if ( !hvm_irq_dpci ) - goto unlock; - - list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list ) - __hvm_dpci_eoi(d, girq, ent); - -unlock: - spin_unlock(&d->event_lock); -} - -/* - * Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to - * doing it. If that is the case we let 'pt_pirq_softirq_reset' do ref-counting. - */ -static void dpci_softirq(void) -{ - unsigned int cpu = smp_processor_id(); - LIST_HEAD(our_list); - - local_irq_disable(); - list_splice_init(&per_cpu(dpci_list, cpu), &our_list); - local_irq_enable(); - - while ( !list_empty(&our_list) ) - { - struct hvm_pirq_dpci *pirq_dpci; - struct domain *d; - - pirq_dpci = list_entry(our_list.next, struct hvm_pirq_dpci, softirq_list); - list_del(&pirq_dpci->softirq_list); - - d = pirq_dpci->dom; - smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */ - if ( test_and_set_bit(STATE_RUN, &pirq_dpci->state) ) - { - unsigned long flags; - - /* Put back on the list and retry. */ - local_irq_save(flags); - list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list)); - local_irq_restore(flags); - - raise_softirq(HVM_DPCI_SOFTIRQ); - continue; - } - /* - * The one who clears STATE_SCHED MUST refcount the domain. - */ - if ( test_and_clear_bit(STATE_SCHED, &pirq_dpci->state) ) - { - hvm_dirq_assist(d, pirq_dpci); - put_domain(d); - } - clear_bit(STATE_RUN, &pirq_dpci->state); - } -} - -static int cpu_callback( - struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch ( action ) - { - case CPU_UP_PREPARE: - INIT_LIST_HEAD(&per_cpu(dpci_list, cpu)); - break; - case CPU_UP_CANCELED: - case CPU_DEAD: - /* - * On CPU_DYING this callback is called (on the CPU that is dying) - * with an possible HVM_DPIC_SOFTIRQ pending - at which point we can - * clear out any outstanding domains (by the virtue of the idle loop - * calling the softirq later). In CPU_DEAD case the CPU is deaf and - * there are no pending softirqs for us to handle so we can chill. - */ - ASSERT(list_empty(&per_cpu(dpci_list, cpu))); - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block cpu_nfb = { - .notifier_call = cpu_callback, -}; - -static int __init setup_dpci_softirq(void) -{ - unsigned int cpu; - - for_each_online_cpu(cpu) - INIT_LIST_HEAD(&per_cpu(dpci_list, cpu)); - - open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq); - register_cpu_notifier(&cpu_nfb); - return 0; -} -__initcall(setup_dpci_softirq); diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 51e584127e..ab590ca398 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -24,7 +24,6 @@ #include <xen/irq.h> #include <xen/param.h> #include <xen/vm_event.h> -#include <asm/hvm/irq.h> #include <xen/delay.h> #include <xen/keyhandler.h> #include <xen/event.h> @@ -842,71 +841,6 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn) return ret; } -static int pci_clean_dpci_irq(struct domain *d, - struct hvm_pirq_dpci *pirq_dpci, void *arg) -{ - struct dev_intx_gsi_link *digl, *tmp; - - pirq_guest_unbind(d, dpci_pirq(pirq_dpci)); - - if ( pt_irq_need_timer(pirq_dpci->flags) ) - kill_timer(&pirq_dpci->timer); - - list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list ) - { - list_del(&digl->list); - xfree(digl); - } - - radix_tree_delete(&d->pirq_tree, dpci_pirq(pirq_dpci)->pirq); - - if ( !pt_pirq_softirq_active(pirq_dpci) ) - return 0; - - domain_get_irq_dpci(d)->pending_pirq_dpci = pirq_dpci; - - return -ERESTART; -} - -static int pci_clean_dpci_irqs(struct domain *d) -{ - struct hvm_irq_dpci *hvm_irq_dpci = NULL; - - if ( !is_iommu_enabled(d) ) - return 0; - - if ( !is_hvm_domain(d) ) - return 0; - - spin_lock(&d->event_lock); - hvm_irq_dpci = domain_get_irq_dpci(d); - if ( hvm_irq_dpci != NULL ) - { - int ret = 0; - - if ( hvm_irq_dpci->pending_pirq_dpci ) - { - if ( pt_pirq_softirq_active(hvm_irq_dpci->pending_pirq_dpci) ) - ret = -ERESTART; - else - hvm_irq_dpci->pending_pirq_dpci = NULL; - } - - if ( !ret ) - ret = pt_pirq_iterate(d, pci_clean_dpci_irq, NULL); - if ( ret ) - { - spin_unlock(&d->event_lock); - return ret; - } - - hvm_domain_irq(d)->dpci = NULL; - free_hvm_irq_dpci(hvm_irq_dpci); - } - spin_unlock(&d->event_lock); - return 0; -} - /* Caller should hold the pcidevs_lock */ static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus, uint8_t devfn) @@ -966,7 +900,7 @@ int pci_release_devices(struct domain *d) int ret; pcidevs_lock(); - ret = pci_clean_dpci_irqs(d); + ret = arch_pci_clean_pirqs(d); if ( ret ) { pcidevs_unlock(); diff --git a/xen/drivers/passthrough/x86/Makefile b/xen/drivers/passthrough/x86/Makefile index a70cf9460d..69284a5d19 100644 --- a/xen/drivers/passthrough/x86/Makefile +++ b/xen/drivers/passthrough/x86/Makefile @@ -1,2 +1,3 @@ obj-y += ats.o obj-y += iommu.o +obj-$(CONFIG_HVM) += hvm.o diff --git a/xen/drivers/passthrough/x86/hvm.c b/xen/drivers/passthrough/x86/hvm.c new file mode 100644 index 0000000000..41cfa2e200 --- /dev/null +++ b/xen/drivers/passthrough/x86/hvm.c @@ -0,0 +1,1193 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; If not, see <http://www.gnu.org/licenses/>. + * + * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx> + * Copyright (C) Xiaohui Xin <xiaohui.xin@xxxxxxxxx> + */ + +#include <xen/event.h> +#include <xen/iommu.h> +#include <xen/cpu.h> +#include <xen/irq.h> +#include <asm/hvm/irq.h> +#include <asm/hvm/support.h> +#include <asm/io_apic.h> + +static DEFINE_PER_CPU(struct list_head, dpci_list); + +/* + * These two bit states help to safely schedule, deschedule, and wait until + * the softirq has finished. + * + * The semantics behind these two bits is as follow: + * - STATE_SCHED - whoever modifies it has to ref-count the domain (->dom). + * - STATE_RUN - only softirq is allowed to set and clear it. If it has + * been set hvm_dirq_assist will RUN with a saved value of the + * 'struct domain' copied from 'pirq_dpci->dom' before STATE_RUN was set. + * + * The usual states are: STATE_SCHED(set) -> STATE_RUN(set) -> + * STATE_SCHED(unset) -> STATE_RUN(unset). + * + * However the states can also diverge such as: STATE_SCHED(set) -> + * STATE_SCHED(unset) -> STATE_RUN(set) -> STATE_RUN(unset). That means + * the 'hvm_dirq_assist' never run and that the softirq did not do any + * ref-counting. + */ + +enum { + STATE_SCHED, + STATE_RUN +}; + +/* + * This can be called multiple times, but the softirq is only raised once. + * That is until the STATE_SCHED state has been cleared. The state can be + * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'), + * or by 'pt_pirq_softirq_reset' (which will try to clear the state before + * the softirq had a chance to run). + */ +static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci) +{ + unsigned long flags; + + if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) ) + return; + + get_knownalive_domain(pirq_dpci->dom); + + local_irq_save(flags); + list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list)); + local_irq_restore(flags); + + raise_softirq(HVM_DPCI_SOFTIRQ); +} + +/* + * If we are racing with softirq_dpci (STATE_SCHED) we return + * true. Otherwise we return false. + * + * If it is false, it is the callers responsibility to make sure + * that the softirq (with the event_lock dropped) has ran. + */ +bool pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci) +{ + if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED)) ) + return true; + + /* + * If in the future we would call 'raise_softirq_for' right away + * after 'pt_pirq_softirq_active' we MUST reset the list (otherwise it + * might have stale data). + */ + return false; +} + +/* + * Reset the pirq_dpci->dom parameter to NULL. + * + * This function checks the different states to make sure it can do it + * at the right time. If it unschedules the 'hvm_dirq_assist' from running + * it also refcounts (which is what the softirq would have done) properly. + */ +static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci) +{ + struct domain *d = pirq_dpci->dom; + + ASSERT(spin_is_locked(&d->event_lock)); + + switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) ) + { + case (1 << STATE_SCHED): + /* + * We are going to try to de-schedule the softirq before it goes in + * STATE_RUN. Whoever clears STATE_SCHED MUST refcount the 'dom'. + */ + put_domain(d); + /* fallthrough. */ + case (1 << STATE_RUN): + case (1 << STATE_RUN) | (1 << STATE_SCHED): + /* + * The reason it is OK to reset 'dom' when STATE_RUN bit is set is due + * to a shortcut the 'dpci_softirq' implements. It stashes the 'dom' + * in local variable before it sets STATE_RUN - and therefore will not + * dereference '->dom' which would crash. + */ + pirq_dpci->dom = NULL; + break; + } + /* + * Inhibit 'hvm_dirq_assist' from doing anything useful and at worst + * calling 'set_timer' which will blow up (as we have called kill_timer + * or never initialized it). Note that we hold the lock that + * 'hvm_dirq_assist' could be spinning on. + */ + pirq_dpci->masked = 0; +} + +bool pt_irq_need_timer(uint32_t flags) +{ + return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE | + HVM_IRQ_DPCI_NO_EOI)); +} + +static int pt_irq_guest_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci, + void *arg) +{ + if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT, + &pirq_dpci->flags) ) + { + pirq_dpci->masked = 0; + pirq_dpci->pending = 0; + pirq_guest_eoi(dpci_pirq(pirq_dpci)); + } + + return 0; +} + +static void pt_irq_time_out(void *data) +{ + struct hvm_pirq_dpci *irq_map = data; + const struct hvm_irq_dpci *dpci; + const struct dev_intx_gsi_link *digl; + + spin_lock(&irq_map->dom->event_lock); + + if ( irq_map->flags & HVM_IRQ_DPCI_IDENTITY_GSI ) + { + ASSERT(is_hardware_domain(irq_map->dom)); + /* + * Identity mapped, no need to iterate over the guest GSI list to find + * other pirqs sharing the same guest GSI. + * + * In the identity mapped case the EOI can also be done now, this way + * the iteration over the list of domain pirqs is avoided. + */ + hvm_gsi_deassert(irq_map->dom, dpci_pirq(irq_map)->pirq); + irq_map->flags |= HVM_IRQ_DPCI_EOI_LATCH; + pt_irq_guest_eoi(irq_map->dom, irq_map, NULL); + spin_unlock(&irq_map->dom->event_lock); + return; + } + + dpci = domain_get_irq_dpci(irq_map->dom); + if ( unlikely(!dpci) ) + { + ASSERT_UNREACHABLE(); + spin_unlock(&irq_map->dom->event_lock); + return; + } + list_for_each_entry ( digl, &irq_map->digl_list, list ) + { + unsigned int guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx); + const struct hvm_girq_dpci_mapping *girq; + + list_for_each_entry ( girq, &dpci->girq[guest_gsi], list ) + { + struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi); + + pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH; + } + hvm_pci_intx_deassert(irq_map->dom, digl->device, digl->intx); + } + + pt_pirq_iterate(irq_map->dom, pt_irq_guest_eoi, NULL); + + spin_unlock(&irq_map->dom->event_lock); +} + +struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *d) +{ + if ( !d || !is_hvm_domain(d) ) + return NULL; + + return hvm_domain_irq(d)->dpci; +} + +void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci) +{ + xfree(dpci); +} + +/* + * This routine handles lowest-priority interrupts using vector-hashing + * mechanism. As an example, modern Intel CPUs use this method to handle + * lowest-priority interrupts. + * + * Here is the details about the vector-hashing mechanism: + * 1. For lowest-priority interrupts, store all the possible destination + * vCPUs in an array. + * 2. Use "gvec % max number of destination vCPUs" to find the right + * destination vCPU in the array for the lowest-priority interrupt. + */ +static struct vcpu *vector_hashing_dest(const struct domain *d, + uint32_t dest_id, + bool dest_mode, + uint8_t gvec) + +{ + unsigned long *dest_vcpu_bitmap; + unsigned int dest_vcpus = 0; + struct vcpu *v, *dest = NULL; + unsigned int i; + + dest_vcpu_bitmap = xzalloc_array(unsigned long, + BITS_TO_LONGS(d->max_vcpus)); + if ( !dest_vcpu_bitmap ) + return NULL; + + for_each_vcpu ( d, v ) + { + if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, APIC_DEST_NOSHORT, + dest_id, dest_mode) ) + continue; + + __set_bit(v->vcpu_id, dest_vcpu_bitmap); + dest_vcpus++; + } + + if ( dest_vcpus != 0 ) + { + unsigned int mod = gvec % dest_vcpus; + unsigned int idx = 0; + + for ( i = 0; i <= mod; i++ ) + { + idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1; + BUG_ON(idx > d->max_vcpus); + } + + dest = d->vcpu[idx - 1]; + } + + xfree(dest_vcpu_bitmap); + + return dest; +} + +int pt_irq_create_bind( + struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind) +{ + struct hvm_irq_dpci *hvm_irq_dpci; + struct hvm_pirq_dpci *pirq_dpci; + struct pirq *info; + int rc, pirq = pt_irq_bind->machine_irq; + + if ( pirq < 0 || pirq >= d->nr_pirqs ) + return -EINVAL; + + restart: + spin_lock(&d->event_lock); + + hvm_irq_dpci = domain_get_irq_dpci(d); + if ( !hvm_irq_dpci && !is_hardware_domain(d) ) + { + unsigned int i; + + /* + * NB: the hardware domain doesn't use a hvm_irq_dpci struct because + * it's only allowed to identity map GSIs, and so the data contained in + * that struct (used to map guest GSIs into machine GSIs and perform + * interrupt routing) is completely useless to it. + */ + hvm_irq_dpci = xzalloc(struct hvm_irq_dpci); + if ( hvm_irq_dpci == NULL ) + { + spin_unlock(&d->event_lock); + return -ENOMEM; + } + for ( i = 0; i < NR_HVM_DOMU_IRQS; i++ ) + INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]); + + hvm_domain_irq(d)->dpci = hvm_irq_dpci; + } + + info = pirq_get_info(d, pirq); + if ( !info ) + { + spin_unlock(&d->event_lock); + return -ENOMEM; + } + pirq_dpci = pirq_dpci(info); + + /* + * A crude 'while' loop with us dropping the spinlock and giving + * the softirq_dpci a chance to run. + * We MUST check for this condition as the softirq could be scheduled + * and hasn't run yet. Note that this code replaced tasklet_kill which + * would have spun forever and would do the same thing (wait to flush out + * outstanding hvm_dirq_assist calls. + */ + if ( pt_pirq_softirq_active(pirq_dpci) ) + { + spin_unlock(&d->event_lock); + cpu_relax(); + goto restart; + } + + switch ( pt_irq_bind->irq_type ) + { + case PT_IRQ_TYPE_MSI: + { + uint8_t dest, delivery_mode; + bool dest_mode; + int dest_vcpu_id; + const struct vcpu *vcpu; + uint32_t gflags = pt_irq_bind->u.msi.gflags & + ~XEN_DOMCTL_VMSI_X86_UNMASKED; + + if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) + { + pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI | + HVM_IRQ_DPCI_GUEST_MSI; + pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec; + pirq_dpci->gmsi.gflags = gflags; + /* + * 'pt_irq_create_bind' can be called after 'pt_irq_destroy_bind'. + * The 'pirq_cleanup_check' which would free the structure is only + * called if the event channel for the PIRQ is active. However + * OS-es that use event channels usually bind PIRQs to eventds + * and unbind them before calling 'pt_irq_destroy_bind' - with the + * result that we re-use the 'dpci' structure. This can be + * reproduced with unloading and loading the driver for a device. + * + * As such on every 'pt_irq_create_bind' call we MUST set it. + */ + pirq_dpci->dom = d; + /* bind after hvm_irq_dpci is setup to avoid race with irq handler*/ + rc = pirq_guest_bind(d->vcpu[0], info, 0); + if ( rc == 0 && pt_irq_bind->u.msi.gtable ) + { + rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable); + if ( unlikely(rc) ) + { + pirq_guest_unbind(d, info); + /* + * Between 'pirq_guest_bind' and before 'pirq_guest_unbind' + * an interrupt can be scheduled. No more of them are going + * to be scheduled but we must deal with the one that may be + * in the queue. + */ + pt_pirq_softirq_reset(pirq_dpci); + } + } + if ( unlikely(rc) ) + { + pirq_dpci->gmsi.gflags = 0; + pirq_dpci->gmsi.gvec = 0; + pirq_dpci->dom = NULL; + pirq_dpci->flags = 0; + pirq_cleanup_check(info, d); + spin_unlock(&d->event_lock); + return rc; + } + } + else + { + uint32_t mask = HVM_IRQ_DPCI_MACH_MSI | HVM_IRQ_DPCI_GUEST_MSI; + + if ( (pirq_dpci->flags & mask) != mask ) + { + spin_unlock(&d->event_lock); + return -EBUSY; + } + + /* If pirq is already mapped as vmsi, update guest data/addr. */ + if ( pirq_dpci->gmsi.gvec != pt_irq_bind->u.msi.gvec || + pirq_dpci->gmsi.gflags != gflags ) + { + /* Directly clear pending EOIs before enabling new MSI info. */ + pirq_guest_eoi(info); + + pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec; + pirq_dpci->gmsi.gflags = gflags; + } + } + /* Calculate dest_vcpu_id for MSI-type pirq migration. */ + dest = MASK_EXTR(pirq_dpci->gmsi.gflags, + XEN_DOMCTL_VMSI_X86_DEST_ID_MASK); + dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK; + delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags, + XEN_DOMCTL_VMSI_X86_DELIV_MASK); + + dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode); + pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id; + spin_unlock(&d->event_lock); + + pirq_dpci->gmsi.posted = false; + vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL; + if ( iommu_intpost ) + { + if ( delivery_mode == dest_LowestPrio ) + vcpu = vector_hashing_dest(d, dest, dest_mode, + pirq_dpci->gmsi.gvec); + if ( vcpu ) + pirq_dpci->gmsi.posted = true; + } + if ( vcpu && is_iommu_enabled(d) ) + hvm_migrate_pirq(pirq_dpci, vcpu); + + /* Use interrupt posting if it is supported. */ + if ( iommu_intpost ) + pi_update_irte(vcpu ? &vcpu->arch.hvm.vmx.pi_desc : NULL, + info, pirq_dpci->gmsi.gvec); + + if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED ) + { + unsigned long flags; + struct irq_desc *desc = pirq_spin_lock_irq_desc(info, &flags); + + if ( !desc ) + { + pt_irq_destroy_bind(d, pt_irq_bind); + return -EINVAL; + } + + guest_mask_msi_irq(desc, false); + spin_unlock_irqrestore(&desc->lock, flags); + } + + break; + } + + case PT_IRQ_TYPE_PCI: + case PT_IRQ_TYPE_MSI_TRANSLATE: + { + struct dev_intx_gsi_link *digl = NULL; + struct hvm_girq_dpci_mapping *girq = NULL; + unsigned int guest_gsi; + + /* + * Mapping GSIs for the hardware domain is different than doing it for + * an unpriviledged guest, the hardware domain is only allowed to + * identity map GSIs, and as such all the data in the u.pci union is + * discarded. + */ + if ( hvm_irq_dpci ) + { + unsigned int link; + + digl = xmalloc(struct dev_intx_gsi_link); + girq = xmalloc(struct hvm_girq_dpci_mapping); + + if ( !digl || !girq ) + { + spin_unlock(&d->event_lock); + xfree(girq); + xfree(digl); + return -ENOMEM; + } + + girq->bus = digl->bus = pt_irq_bind->u.pci.bus; + girq->device = digl->device = pt_irq_bind->u.pci.device; + girq->intx = digl->intx = pt_irq_bind->u.pci.intx; + list_add_tail(&digl->list, &pirq_dpci->digl_list); + + guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx); + link = hvm_pci_intx_link(digl->device, digl->intx); + + hvm_irq_dpci->link_cnt[link]++; + + girq->machine_gsi = pirq; + list_add_tail(&girq->list, &hvm_irq_dpci->girq[guest_gsi]); + } + else + { + ASSERT(is_hardware_domain(d)); + + /* MSI_TRANSLATE is not supported for the hardware domain. */ + if ( pt_irq_bind->irq_type != PT_IRQ_TYPE_PCI || + pirq >= hvm_domain_irq(d)->nr_gsis ) + { + spin_unlock(&d->event_lock); + + return -EINVAL; + } + guest_gsi = pirq; + } + + /* Bind the same mirq once in the same domain */ + if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) + { + unsigned int share; + + /* MUST be set, as the pirq_dpci can be re-used. */ + pirq_dpci->dom = d; + if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_TRANSLATE ) + { + pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | + HVM_IRQ_DPCI_MACH_MSI | + HVM_IRQ_DPCI_GUEST_PCI | + HVM_IRQ_DPCI_TRANSLATE; + share = 0; + } + else /* PT_IRQ_TYPE_PCI */ + { + pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | + HVM_IRQ_DPCI_MACH_PCI | + HVM_IRQ_DPCI_GUEST_PCI; + if ( !is_hardware_domain(d) ) + share = BIND_PIRQ__WILL_SHARE; + else + { + int mask = vioapic_get_mask(d, guest_gsi); + int trigger_mode = vioapic_get_trigger_mode(d, guest_gsi); + + if ( mask < 0 || trigger_mode < 0 ) + { + spin_unlock(&d->event_lock); + + ASSERT_UNREACHABLE(); + return -EINVAL; + } + pirq_dpci->flags |= HVM_IRQ_DPCI_IDENTITY_GSI; + /* + * Check if the corresponding vIO APIC pin is configured + * level or edge trigger, level triggered interrupts will + * be marked as shareable. + */ + ASSERT(!mask); + share = trigger_mode; + if ( trigger_mode == VIOAPIC_EDGE_TRIG ) + /* + * Edge IO-APIC interrupt, no EOI or unmask to perform + * and hence no timer needed. + */ + pirq_dpci->flags |= HVM_IRQ_DPCI_NO_EOI; + } + } + + /* Init timer before binding */ + if ( pt_irq_need_timer(pirq_dpci->flags) ) + init_timer(&pirq_dpci->timer, pt_irq_time_out, pirq_dpci, 0); + /* Deal with gsi for legacy devices */ + rc = pirq_guest_bind(d->vcpu[0], info, share); + if ( unlikely(rc) ) + { + if ( pt_irq_need_timer(pirq_dpci->flags) ) + kill_timer(&pirq_dpci->timer); + /* + * There is no path for __do_IRQ to schedule softirq as + * IRQ_GUEST is not set. As such we can reset 'dom' directly. + */ + pirq_dpci->dom = NULL; + if ( hvm_irq_dpci ) + { + unsigned int link; + + ASSERT(girq && digl); + list_del(&girq->list); + list_del(&digl->list); + link = hvm_pci_intx_link(digl->device, digl->intx); + hvm_irq_dpci->link_cnt[link]--; + } + pirq_dpci->flags = 0; + pirq_cleanup_check(info, d); + spin_unlock(&d->event_lock); + xfree(girq); + xfree(digl); + return rc; + } + } + + spin_unlock(&d->event_lock); + + if ( iommu_verbose ) + { + char buf[24] = ""; + + if ( digl ) + snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u", + digl->bus, PCI_SLOT(digl->device), + PCI_FUNC(digl->device), digl->intx); + + printk(XENLOG_G_INFO "d%d: bind: m_gsi=%u g_gsi=%u%s\n", + d->domain_id, pirq, guest_gsi, buf); + } + break; + } + + default: + spin_unlock(&d->event_lock); + return -EOPNOTSUPP; + } + + return 0; +} + +int pt_irq_destroy_bind( + struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind) +{ + struct hvm_irq_dpci *hvm_irq_dpci; + struct hvm_pirq_dpci *pirq_dpci; + unsigned int machine_gsi = pt_irq_bind->machine_irq; + struct pirq *pirq; + const char *what = NULL; + + switch ( pt_irq_bind->irq_type ) + { + case PT_IRQ_TYPE_PCI: + case PT_IRQ_TYPE_MSI_TRANSLATE: + if ( iommu_verbose ) + { + unsigned int device = pt_irq_bind->u.pci.device; + unsigned int intx = pt_irq_bind->u.pci.intx; + + printk(XENLOG_G_INFO + "d%d: unbind: m_gsi=%u g_gsi=%u dev=%02x:%02x.%u intx=%u\n", + d->domain_id, machine_gsi, hvm_pci_intx_gsi(device, intx), + pt_irq_bind->u.pci.bus, + PCI_SLOT(device), PCI_FUNC(device), intx); + } + break; + case PT_IRQ_TYPE_MSI: + { + unsigned long flags; + struct irq_desc *desc = domain_spin_lock_irq_desc(d, machine_gsi, + &flags); + + if ( !desc ) + return -EINVAL; + /* + * Leave the MSI masked, so that the state when calling + * pt_irq_create_bind is consistent across bind/unbinds. + */ + guest_mask_msi_irq(desc, true); + spin_unlock_irqrestore(&desc->lock, flags); + break; + } + + default: + return -EOPNOTSUPP; + } + + spin_lock(&d->event_lock); + + hvm_irq_dpci = domain_get_irq_dpci(d); + + if ( !hvm_irq_dpci && !is_hardware_domain(d) ) + { + spin_unlock(&d->event_lock); + return -EINVAL; + } + + pirq = pirq_info(d, machine_gsi); + pirq_dpci = pirq_dpci(pirq); + + if ( hvm_irq_dpci && pt_irq_bind->irq_type != PT_IRQ_TYPE_MSI ) + { + unsigned int bus = pt_irq_bind->u.pci.bus; + unsigned int device = pt_irq_bind->u.pci.device; + unsigned int intx = pt_irq_bind->u.pci.intx; + unsigned int guest_gsi = hvm_pci_intx_gsi(device, intx); + unsigned int link = hvm_pci_intx_link(device, intx); + struct hvm_girq_dpci_mapping *girq; + struct dev_intx_gsi_link *digl, *tmp; + + list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list ) + { + if ( girq->bus == bus && + girq->device == device && + girq->intx == intx && + girq->machine_gsi == machine_gsi ) + { + list_del(&girq->list); + xfree(girq); + girq = NULL; + break; + } + } + + if ( girq ) + { + spin_unlock(&d->event_lock); + return -EINVAL; + } + + hvm_irq_dpci->link_cnt[link]--; + + /* clear the mirq info */ + if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) + { + list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list ) + { + if ( digl->bus == bus && + digl->device == device && + digl->intx == intx ) + { + list_del(&digl->list); + xfree(digl); + } + } + what = list_empty(&pirq_dpci->digl_list) ? "final" : "partial"; + } + else + what = "bogus"; + } + else if ( pirq_dpci && pirq_dpci->gmsi.posted ) + pi_update_irte(NULL, pirq, 0); + + if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) && + list_empty(&pirq_dpci->digl_list) ) + { + pirq_guest_unbind(d, pirq); + msixtbl_pt_unregister(d, pirq); + if ( pt_irq_need_timer(pirq_dpci->flags) ) + kill_timer(&pirq_dpci->timer); + pirq_dpci->flags = 0; + /* + * See comment in pt_irq_create_bind's PT_IRQ_TYPE_MSI before the + * call to pt_pirq_softirq_reset. + */ + pt_pirq_softirq_reset(pirq_dpci); + + pirq_cleanup_check(pirq, d); + } + + spin_unlock(&d->event_lock); + + if ( what && iommu_verbose ) + { + unsigned int device = pt_irq_bind->u.pci.device; + char buf[24] = ""; + + if ( hvm_irq_dpci ) + snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u", + pt_irq_bind->u.pci.bus, PCI_SLOT(device), + PCI_FUNC(device), pt_irq_bind->u.pci.intx); + + printk(XENLOG_G_INFO "d%d %s unmap: m_irq=%u%s\n", + d->domain_id, what, machine_gsi, buf); + } + + return 0; +} + +void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci *dpci) +{ + INIT_LIST_HEAD(&dpci->digl_list); + dpci->gmsi.dest_vcpu_id = -1; +} + +bool pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci) +{ + if ( !dpci->flags && !pt_pirq_softirq_active(dpci) ) + { + dpci->dom = NULL; + return true; + } + return false; +} + +int pt_pirq_iterate(struct domain *d, + int (*cb)(struct domain *, + struct hvm_pirq_dpci *, void *), + void *arg) +{ + int rc = 0; + unsigned int pirq = 0, n, i; + struct pirq *pirqs[8]; + + ASSERT(spin_is_locked(&d->event_lock)); + + do { + n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq, + ARRAY_SIZE(pirqs)); + for ( i = 0; i < n; ++i ) + { + struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirqs[i]); + + pirq = pirqs[i]->pirq; + if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) + rc = cb(d, pirq_dpci, arg); + } + } while ( !rc && ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) ); + + return rc; +} + +int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq) +{ + struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d); + struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirq); + + ASSERT(is_hvm_domain(d)); + + if ( !is_iommu_enabled(d) || (!is_hardware_domain(d) && !dpci) || + !pirq_dpci || !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) + return 0; + + pirq_dpci->masked = 1; + raise_softirq_for(pirq_dpci); + return 1; +} + +/* called with d->event_lock held */ +static void __msi_pirq_eoi(struct hvm_pirq_dpci *pirq_dpci) +{ + irq_desc_t *desc; + + if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) && + (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) ) + { + struct pirq *pirq = dpci_pirq(pirq_dpci); + + BUG_ON(!local_irq_is_enabled()); + desc = pirq_spin_lock_irq_desc(pirq, NULL); + if ( !desc ) + return; + desc_guest_eoi(desc, pirq); + } +} + +static int _hvm_dpci_msi_eoi(struct domain *d, + struct hvm_pirq_dpci *pirq_dpci, void *arg) +{ + int vector = (long)arg; + + if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) && + (pirq_dpci->gmsi.gvec == vector) ) + { + unsigned int dest = MASK_EXTR(pirq_dpci->gmsi.gflags, + XEN_DOMCTL_VMSI_X86_DEST_ID_MASK); + bool dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK; + + if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest, + dest_mode) ) + { + __msi_pirq_eoi(pirq_dpci); + return 1; + } + } + + return 0; +} + +void hvm_dpci_msi_eoi(struct domain *d, int vector) +{ + if ( !is_iommu_enabled(d) || + (!hvm_domain_irq(d)->dpci && !is_hardware_domain(d)) ) + return; + + spin_lock(&d->event_lock); + pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector); + spin_unlock(&d->event_lock); +} + +static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci) +{ + if ( unlikely(!hvm_domain_irq(d)->dpci) && !is_hardware_domain(d) ) + { + ASSERT_UNREACHABLE(); + return; + } + + spin_lock(&d->event_lock); + if ( test_and_clear_bool(pirq_dpci->masked) ) + { + struct pirq *pirq = dpci_pirq(pirq_dpci); + const struct dev_intx_gsi_link *digl; + + if ( hvm_domain_use_pirq(d, pirq) ) + { + send_guest_pirq(d, pirq); + + if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI ) + goto out; + } + + if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI ) + { + vmsi_deliver_pirq(d, pirq_dpci); + goto out; + } + + list_for_each_entry ( digl, &pirq_dpci->digl_list, list ) + { + ASSERT(!(pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI)); + hvm_pci_intx_assert(d, digl->device, digl->intx); + pirq_dpci->pending++; + } + + if ( pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI ) + { + hvm_gsi_assert(d, pirq->pirq); + if ( pirq_dpci->flags & HVM_IRQ_DPCI_NO_EOI ) + goto out; + pirq_dpci->pending++; + } + + if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE ) + { + /* for translated MSI to INTx interrupt, eoi as early as possible */ + __msi_pirq_eoi(pirq_dpci); + goto out; + } + + /* + * Set a timer to see if the guest can finish the interrupt or not. For + * example, the guest OS may unmask the PIC during boot, before the + * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the + * guest will never deal with the irq, then the physical interrupt line + * will never be deasserted. + */ + ASSERT(pt_irq_need_timer(pirq_dpci->flags)); + set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT); + } + + out: + spin_unlock(&d->event_lock); +} + +static void hvm_pirq_eoi(struct pirq *pirq, + const union vioapic_redir_entry *ent) +{ + struct hvm_pirq_dpci *pirq_dpci; + + if ( !pirq ) + { + ASSERT_UNREACHABLE(); + return; + } + + pirq_dpci = pirq_dpci(pirq); + + /* + * No need to get vector lock for timer + * since interrupt is still not EOIed + */ + if ( --pirq_dpci->pending || + (ent && ent->fields.mask) || + !pt_irq_need_timer(pirq_dpci->flags) ) + return; + + stop_timer(&pirq_dpci->timer); + pirq_guest_eoi(pirq); +} + +static void __hvm_dpci_eoi(struct domain *d, + const struct hvm_girq_dpci_mapping *girq, + const union vioapic_redir_entry *ent) +{ + struct pirq *pirq = pirq_info(d, girq->machine_gsi); + + if ( !hvm_domain_use_pirq(d, pirq) ) + hvm_pci_intx_deassert(d, girq->device, girq->intx); + + hvm_pirq_eoi(pirq, ent); +} + +static void hvm_gsi_eoi(struct domain *d, unsigned int gsi, + const union vioapic_redir_entry *ent) +{ + struct pirq *pirq = pirq_info(d, gsi); + + /* Check if GSI is actually mapped. */ + if ( !pirq_dpci(pirq) ) + return; + + hvm_gsi_deassert(d, gsi); + hvm_pirq_eoi(pirq, ent); +} + +void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi, + const union vioapic_redir_entry *ent) +{ + const struct hvm_irq_dpci *hvm_irq_dpci; + const struct hvm_girq_dpci_mapping *girq; + + if ( !is_iommu_enabled(d) ) + return; + + if ( is_hardware_domain(d) ) + { + spin_lock(&d->event_lock); + hvm_gsi_eoi(d, guest_gsi, ent); + goto unlock; + } + + if ( guest_gsi < NR_ISAIRQS ) + { + hvm_dpci_isairq_eoi(d, guest_gsi); + return; + } + + spin_lock(&d->event_lock); + hvm_irq_dpci = domain_get_irq_dpci(d); + + if ( !hvm_irq_dpci ) + goto unlock; + + list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list ) + __hvm_dpci_eoi(d, girq, ent); + +unlock: + spin_unlock(&d->event_lock); +} + +static int pci_clean_dpci_irq(struct domain *d, + struct hvm_pirq_dpci *pirq_dpci, void *arg) +{ + struct dev_intx_gsi_link *digl, *tmp; + + pirq_guest_unbind(d, dpci_pirq(pirq_dpci)); + + if ( pt_irq_need_timer(pirq_dpci->flags) ) + kill_timer(&pirq_dpci->timer); + + list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list ) + { + list_del(&digl->list); + xfree(digl); + } + + radix_tree_delete(&d->pirq_tree, dpci_pirq(pirq_dpci)->pirq); + + if ( !pt_pirq_softirq_active(pirq_dpci) ) + return 0; + + domain_get_irq_dpci(d)->pending_pirq_dpci = pirq_dpci; + + return -ERESTART; +} + +int arch_pci_clean_pirqs(struct domain *d) +{ + struct hvm_irq_dpci *hvm_irq_dpci = NULL; + + if ( !is_iommu_enabled(d) ) + return 0; + + if ( !is_hvm_domain(d) ) + return 0; + + spin_lock(&d->event_lock); + hvm_irq_dpci = domain_get_irq_dpci(d); + if ( hvm_irq_dpci != NULL ) + { + int ret = 0; + + if ( hvm_irq_dpci->pending_pirq_dpci ) + { + if ( pt_pirq_softirq_active(hvm_irq_dpci->pending_pirq_dpci) ) + ret = -ERESTART; + else + hvm_irq_dpci->pending_pirq_dpci = NULL; + } + + if ( !ret ) + ret = pt_pirq_iterate(d, pci_clean_dpci_irq, NULL); + if ( ret ) + { + spin_unlock(&d->event_lock); + return ret; + } + + hvm_domain_irq(d)->dpci = NULL; + free_hvm_irq_dpci(hvm_irq_dpci); + } + spin_unlock(&d->event_lock); + + return 0; +} + +/* + * Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to + * doing it. If that is the case we let 'pt_pirq_softirq_reset' do ref-counting. + */ +static void dpci_softirq(void) +{ + unsigned int cpu = smp_processor_id(); + LIST_HEAD(our_list); + + local_irq_disable(); + list_splice_init(&per_cpu(dpci_list, cpu), &our_list); + local_irq_enable(); + + while ( !list_empty(&our_list) ) + { + struct hvm_pirq_dpci *pirq_dpci; + struct domain *d; + + pirq_dpci = list_entry(our_list.next, struct hvm_pirq_dpci, softirq_list); + list_del(&pirq_dpci->softirq_list); + + d = pirq_dpci->dom; + smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */ + if ( test_and_set_bit(STATE_RUN, &pirq_dpci->state) ) + { + unsigned long flags; + + /* Put back on the list and retry. */ + local_irq_save(flags); + list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list)); + local_irq_restore(flags); + + raise_softirq(HVM_DPCI_SOFTIRQ); + continue; + } + /* + * The one who clears STATE_SCHED MUST refcount the domain. + */ + if ( test_and_clear_bit(STATE_SCHED, &pirq_dpci->state) ) + { + hvm_dirq_assist(d, pirq_dpci); + put_domain(d); + } + clear_bit(STATE_RUN, &pirq_dpci->state); + } +} + +static int cpu_callback( + struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch ( action ) + { + case CPU_UP_PREPARE: + INIT_LIST_HEAD(&per_cpu(dpci_list, cpu)); + break; + case CPU_UP_CANCELED: + case CPU_DEAD: + /* + * On CPU_DYING this callback is called (on the CPU that is dying) + * with an possible HVM_DPIC_SOFTIRQ pending - at which point we can + * clear out any outstanding domains (by the virtue of the idle loop + * calling the softirq later). In CPU_DEAD case the CPU is deaf and + * there are no pending softirqs for us to handle so we can chill. + */ + ASSERT(list_empty(&per_cpu(dpci_list, cpu))); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block cpu_nfb = { + .notifier_call = cpu_callback, +}; + +static int __init setup_dpci_softirq(void) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) + INIT_LIST_HEAD(&per_cpu(dpci_list, cpu)); + + open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq); + register_cpu_notifier(&cpu_nfb); + return 0; +} +__initcall(setup_dpci_softirq); diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h index 20a54a5bb4..8e3d4d9454 100644 --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -208,4 +208,13 @@ int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable); void msixtbl_pt_unregister(struct domain *, struct pirq *); void msixtbl_pt_cleanup(struct domain *d); +#ifdef CONFIG_HVM +int arch_pci_clean_pirqs(struct domain *d); +#else +static inline int arch_pci_clean_pirqs(struct domain *d) +{ + return 0; +} +#endif /* CONFIG_HVM */ + #endif /* __XEN_PCI_H__ */ -- generated by git-patchbot for /home/xen/git/xen.git#staging
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |