[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen stable-4.5] Revert "dpci: replace tasklet with softirq"
commit 72f3c1e26e96686a41d2de1663e578538659f99a Author: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> AuthorDate: Mon Jan 12 11:30:00 2015 -0500 Commit: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> CommitDate: Mon Jan 12 11:30:00 2015 -0500 Revert "dpci: replace tasklet with softirq" This reverts commit f6dd295381f4b6a66acddacf46bca8940586c8d8. As there are issues with huge amount of MSI-X going off. Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> --- xen/arch/x86/domain.c | 4 +- xen/drivers/passthrough/io.c | 251 +++++------------------------------------ xen/drivers/passthrough/pci.c | 31 ++---- xen/include/asm-x86/softirq.h | 3 +- xen/include/xen/hvm/irq.h | 5 +- xen/include/xen/pci.h | 2 +- 6 files changed, 41 insertions(+), 255 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 11c7d9f..f1fc993 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1969,9 +1969,7 @@ int domain_relinquish_resources(struct domain *d) switch ( d->arch.relmem ) { case RELMEM_not_started: - ret = pci_release_devices(d); - if ( ret ) - return ret; + pci_release_devices(d); /* Tear down paging-assistance stuff. */ ret = paging_teardown(d); diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c index efc66dc..dceb17e 100644 --- a/xen/drivers/passthrough/io.c +++ b/xen/drivers/passthrough/io.c @@ -20,116 +20,14 @@ #include <xen/event.h> #include <xen/iommu.h> -#include <xen/cpu.h> #include <xen/irq.h> #include <asm/hvm/irq.h> #include <asm/hvm/iommu.h> #include <asm/hvm/support.h> #include <xen/hvm/irq.h> +#include <xen/tasklet.h> -static DEFINE_PER_CPU(struct list_head, dpci_list); - -/* - * These two bit states help to safely schedule, deschedule, and wait until - * the softirq has finished. - * - * The semantics behind these two bits is as follow: - * - STATE_SCHED - whoever modifies it has to ref-count the domain (->dom). - * - STATE_RUN - only softirq is allowed to set and clear it. If it has - * been set hvm_dirq_assist will RUN with a saved value of the - * 'struct domain' copied from 'pirq_dpci->dom' before STATE_RUN was set. - * - * The usual states are: STATE_SCHED(set) -> STATE_RUN(set) -> - * STATE_SCHED(unset) -> STATE_RUN(unset). - * - * However the states can also diverge such as: STATE_SCHED(set) -> - * STATE_SCHED(unset) -> STATE_RUN(set) -> STATE_RUN(unset). That means - * the 'hvm_dirq_assist' never run and that the softirq did not do any - * ref-counting. - */ - -enum { - STATE_SCHED, - STATE_RUN -}; - -/* - * This can be called multiple times, but the softirq is only raised once. - * That is until the STATE_SCHED state has been cleared. The state can be - * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'), - * or by 'pt_pirq_softirq_reset' (which will try to clear the state before - * the softirq had a chance to run). - */ -static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci) -{ - unsigned long flags; - - if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) ) - return; - - get_knownalive_domain(pirq_dpci->dom); - - local_irq_save(flags); - list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list)); - local_irq_restore(flags); - - raise_softirq(HVM_DPCI_SOFTIRQ); -} - -/* - * If we are racing with softirq_dpci (STATE_SCHED) we return - * true. Otherwise we return false. - * - * If it is false, it is the callers responsibility to make sure - * that the softirq (with the event_lock dropped) has ran. - */ -bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci) -{ - if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED)) ) - return 1; - - /* - * If in the future we would call 'raise_softirq_for' right away - * after 'pt_pirq_softirq_active' we MUST reset the list (otherwise it - * might have stale data). - */ - return 0; -} - -/* - * Reset the pirq_dpci->dom parameter to NULL. - * - * This function checks the different states to make sure it can do it - * at the right time. If it unschedules the 'hvm_dirq_assist' from running - * it also refcounts (which is what the softirq would have done) properly. - */ -static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci) -{ - struct domain *d = pirq_dpci->dom; - - ASSERT(spin_is_locked(&d->event_lock)); - - switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) ) - { - case (1 << STATE_SCHED): - /* - * We are going to try to de-schedule the softirq before it goes in - * STATE_RUN. Whoever clears STATE_SCHED MUST refcount the 'dom'. - */ - put_domain(d); - /* fallthrough. */ - case (1 << STATE_RUN): - case (1 << STATE_RUN) | (1 << STATE_SCHED): - /* - * The reason it is OK to reset 'dom' when STATE_RUN bit is set is due - * to a shortcut the 'dpci_softirq' implements. It stashes the 'dom' - * in local variable before it sets STATE_RUN - and therefore will not - * dereference '->dom' which would crash. - */ - pirq_dpci->dom = NULL; - break; - } -} +static void hvm_dirq_assist(unsigned long arg); bool_t pt_irq_need_timer(uint32_t flags) { @@ -142,7 +40,7 @@ static int pt_irq_guest_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci, if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT, &pirq_dpci->flags) ) { - pirq_dpci->state = 0; + pirq_dpci->masked = 0; pirq_dpci->pending = 0; pirq_guest_eoi(dpci_pirq(pirq_dpci)); } @@ -203,7 +101,6 @@ int pt_irq_create_bind( if ( pirq < 0 || pirq >= d->nr_pirqs ) return -EINVAL; - restart: spin_lock(&d->event_lock); hvm_irq_dpci = domain_get_irq_dpci(d); @@ -231,21 +128,6 @@ int pt_irq_create_bind( } pirq_dpci = pirq_dpci(info); - /* - * A crude 'while' loop with us dropping the spinlock and giving - * the softirq_dpci a chance to run. - * We MUST check for this condition as the softirq could be scheduled - * and hasn't run yet. Note that this code replaced tasklet_kill which - * would have spun forever and would do the same thing (wait to flush out - * outstanding hvm_dirq_assist calls. - */ - if ( pt_pirq_softirq_active(pirq_dpci) ) - { - spin_unlock(&d->event_lock); - cpu_relax(); - goto restart; - } - switch ( pt_irq_bind->irq_type ) { case PT_IRQ_TYPE_MSI: @@ -277,16 +159,7 @@ int pt_irq_create_bind( { rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable); if ( unlikely(rc) ) - { pirq_guest_unbind(d, info); - /* - * Between 'pirq_guest_bind' and before 'pirq_guest_unbind' - * an interrupt can be scheduled. No more of them are going - * to be scheduled but we must deal with the one that may be - * in the queue. - */ - pt_pirq_softirq_reset(pirq_dpci); - } } if ( unlikely(rc) ) { @@ -396,10 +269,6 @@ int pt_irq_create_bind( { if ( pt_irq_need_timer(pirq_dpci->flags) ) kill_timer(&pirq_dpci->timer); - /* - * There is no path for __do_IRQ to schedule softirq as - * IRQ_GUEST is not set. As such we can reset 'dom' directly. - */ pirq_dpci->dom = NULL; list_del(&girq->list); list_del(&digl->list); @@ -533,13 +402,8 @@ int pt_irq_destroy_bind( msixtbl_pt_unregister(d, pirq); if ( pt_irq_need_timer(pirq_dpci->flags) ) kill_timer(&pirq_dpci->timer); + pirq_dpci->dom = NULL; pirq_dpci->flags = 0; - /* - * See comment in pt_irq_create_bind's PT_IRQ_TYPE_MSI before the - * call to pt_pirq_softirq_reset. - */ - pt_pirq_softirq_reset(pirq_dpci); - pirq_cleanup_check(pirq, d); } @@ -562,12 +426,14 @@ void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci *dpci) { INIT_LIST_HEAD(&dpci->digl_list); dpci->gmsi.dest_vcpu_id = -1; + softirq_tasklet_init(&dpci->tasklet, hvm_dirq_assist, (unsigned long)dpci); } bool_t pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci) { - if ( !dpci->flags && !pt_pirq_softirq_active(dpci) ) + if ( !dpci->flags ) { + tasklet_kill(&dpci->tasklet); dpci->dom = NULL; return 1; } @@ -610,7 +476,8 @@ int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq) !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) ) return 0; - raise_softirq_for(pirq_dpci); + pirq_dpci->masked = 1; + tasklet_schedule(&pirq_dpci->tasklet); return 1; } @@ -664,12 +531,28 @@ void hvm_dpci_msi_eoi(struct domain *d, int vector) spin_unlock(&d->event_lock); } -static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci) +static void hvm_dirq_assist(unsigned long arg) { + struct hvm_pirq_dpci *pirq_dpci = (struct hvm_pirq_dpci *)arg; + struct domain *d = pirq_dpci->dom; + + /* + * We can be racing with 'pt_irq_destroy_bind' - with us being scheduled + * right before 'pirq_guest_unbind' gets called - but us not yet executed. + * + * And '->dom' gets cleared later in the destroy path. We exit and clear + * 'masked' - which is OK as later in this code we would + * do nothing except clear the ->masked field anyhow. + */ + if ( !d ) + { + pirq_dpci->masked = 0; + return; + } ASSERT(d->arch.hvm_domain.irq.dpci); spin_lock(&d->event_lock); - if ( pirq_dpci->state ) + if ( test_and_clear_bool(pirq_dpci->masked) ) { struct pirq *pirq = dpci_pirq(pirq_dpci); const struct dev_intx_gsi_link *digl; @@ -771,83 +654,3 @@ void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi, unlock: spin_unlock(&d->event_lock); } - -/* - * Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to - * doing it. If that is the case we let 'pt_pirq_softirq_reset' do ref-counting. - */ -static void dpci_softirq(void) -{ - unsigned int cpu = smp_processor_id(); - LIST_HEAD(our_list); - - local_irq_disable(); - list_splice_init(&per_cpu(dpci_list, cpu), &our_list); - local_irq_enable(); - - while ( !list_empty(&our_list) ) - { - struct hvm_pirq_dpci *pirq_dpci; - struct domain *d; - - pirq_dpci = list_entry(our_list.next, struct hvm_pirq_dpci, softirq_list); - list_del(&pirq_dpci->softirq_list); - - d = pirq_dpci->dom; - smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */ - if ( test_and_set_bit(STATE_RUN, &pirq_dpci->state) ) - BUG(); - /* - * The one who clears STATE_SCHED MUST refcount the domain. - */ - if ( test_and_clear_bit(STATE_SCHED, &pirq_dpci->state) ) - { - hvm_dirq_assist(d, pirq_dpci); - put_domain(d); - } - clear_bit(STATE_RUN, &pirq_dpci->state); - } -} - -static int cpu_callback( - struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch ( action ) - { - case CPU_UP_PREPARE: - INIT_LIST_HEAD(&per_cpu(dpci_list, cpu)); - break; - case CPU_UP_CANCELED: - case CPU_DEAD: - /* - * On CPU_DYING this callback is called (on the CPU that is dying) - * with an possible HVM_DPIC_SOFTIRQ pending - at which point we can - * clear out any outstanding domains (by the virtue of the idle loop - * calling the softirq later). In CPU_DEAD case the CPU is deaf and - * there are no pending softirqs for us to handle so we can chill. - */ - ASSERT(list_empty(&per_cpu(dpci_list, cpu))); - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block cpu_nfb = { - .notifier_call = cpu_callback, -}; - -static int __init setup_dpci_softirq(void) -{ - unsigned int cpu; - - for_each_online_cpu(cpu) - INIT_LIST_HEAD(&per_cpu(dpci_list, cpu)); - - open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq); - register_cpu_notifier(&cpu_nfb); - return 0; -} -__initcall(setup_dpci_softirq); diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 78c6977..81e8a3a 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -767,51 +767,40 @@ static int pci_clean_dpci_irq(struct domain *d, xfree(digl); } - return pt_pirq_softirq_active(pirq_dpci) ? -ERESTART : 0; + tasklet_kill(&pirq_dpci->tasklet); + + return 0; } -static int pci_clean_dpci_irqs(struct domain *d) +static void pci_clean_dpci_irqs(struct domain *d) { struct hvm_irq_dpci *hvm_irq_dpci = NULL; if ( !iommu_enabled ) - return 0; + return; if ( !is_hvm_domain(d) ) - return 0; + return; spin_lock(&d->event_lock); hvm_irq_dpci = domain_get_irq_dpci(d); if ( hvm_irq_dpci != NULL ) { - int ret = pt_pirq_iterate(d, pci_clean_dpci_irq, NULL); - - if ( ret ) - { - spin_unlock(&d->event_lock); - return ret; - } + pt_pirq_iterate(d, pci_clean_dpci_irq, NULL); d->arch.hvm_domain.irq.dpci = NULL; free_hvm_irq_dpci(hvm_irq_dpci); } spin_unlock(&d->event_lock); - return 0; } -int pci_release_devices(struct domain *d) +void pci_release_devices(struct domain *d) { struct pci_dev *pdev; u8 bus, devfn; - int ret; spin_lock(&pcidevs_lock); - ret = pci_clean_dpci_irqs(d); - if ( ret ) - { - spin_unlock(&pcidevs_lock); - return ret; - } + pci_clean_dpci_irqs(d); while ( (pdev = pci_get_pdev_by_domain(d, -1, -1, -1)) ) { bus = pdev->bus; @@ -822,8 +811,6 @@ int pci_release_devices(struct domain *d) PCI_SLOT(devfn), PCI_FUNC(devfn)); } spin_unlock(&pcidevs_lock); - - return 0; } #define PCI_CLASS_BRIDGE_HOST 0x0600 diff --git a/xen/include/asm-x86/softirq.h b/xen/include/asm-x86/softirq.h index ec787d6..7225dea 100644 --- a/xen/include/asm-x86/softirq.h +++ b/xen/include/asm-x86/softirq.h @@ -7,8 +7,7 @@ #define MACHINE_CHECK_SOFTIRQ (NR_COMMON_SOFTIRQS + 3) #define PCI_SERR_SOFTIRQ (NR_COMMON_SOFTIRQS + 4) -#define HVM_DPCI_SOFTIRQ (NR_COMMON_SOFTIRQS + 5) -#define NR_ARCH_SOFTIRQS 6 +#define NR_ARCH_SOFTIRQS 5 bool_t arch_skip_send_event_check(unsigned int cpu); diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h index 9709397..94a550a 100644 --- a/xen/include/xen/hvm/irq.h +++ b/xen/include/xen/hvm/irq.h @@ -93,13 +93,13 @@ struct hvm_irq_dpci { /* Machine IRQ to guest device/intx mapping. */ struct hvm_pirq_dpci { uint32_t flags; - unsigned int state; + bool_t masked; uint16_t pending; struct list_head digl_list; struct domain *dom; struct hvm_gmsi_info gmsi; struct timer timer; - struct list_head softirq_list; + struct tasklet tasklet; }; void pt_pirq_init(struct domain *, struct hvm_pirq_dpci *); @@ -109,7 +109,6 @@ int pt_pirq_iterate(struct domain *d, struct hvm_pirq_dpci *, void *arg), void *arg); -bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *); /* Modify state of a PCI INTx wire. */ void hvm_pci_intx_assert( struct domain *d, unsigned int device, unsigned int intx); diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h index 5f295f3..91520bc 100644 --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -99,7 +99,7 @@ struct pci_dev *pci_lock_domain_pdev( void setup_hwdom_pci_devices(struct domain *, int (*)(u8 devfn, struct pci_dev *)); -int pci_release_devices(struct domain *d); +void pci_release_devices(struct domain *d); int pci_add_segment(u16 seg); const unsigned long *pci_get_ro_map(u16 seg); int pci_add_device(u16 seg, u8 bus, u8 devfn, const struct pci_dev_info *); -- generated by git-patchbot for /home/xen/git/xen.git#stable-4.5 _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |