[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen stable-4.5] Revert "dpci: replace tasklet with softirq"



commit 72f3c1e26e96686a41d2de1663e578538659f99a
Author:     Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
AuthorDate: Mon Jan 12 11:30:00 2015 -0500
Commit:     Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
CommitDate: Mon Jan 12 11:30:00 2015 -0500

    Revert "dpci: replace tasklet with softirq"
    
    This reverts commit f6dd295381f4b6a66acddacf46bca8940586c8d8.
    
    As there are issues with huge amount of MSI-X going off.
    
    Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
---
 xen/arch/x86/domain.c         |    4 +-
 xen/drivers/passthrough/io.c  |  251 +++++------------------------------------
 xen/drivers/passthrough/pci.c |   31 ++----
 xen/include/asm-x86/softirq.h |    3 +-
 xen/include/xen/hvm/irq.h     |    5 +-
 xen/include/xen/pci.h         |    2 +-
 6 files changed, 41 insertions(+), 255 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 11c7d9f..f1fc993 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1969,9 +1969,7 @@ int domain_relinquish_resources(struct domain *d)
     switch ( d->arch.relmem )
     {
     case RELMEM_not_started:
-        ret = pci_release_devices(d);
-        if ( ret )
-            return ret;
+        pci_release_devices(d);
 
         /* Tear down paging-assistance stuff. */
         ret = paging_teardown(d);
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index efc66dc..dceb17e 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -20,116 +20,14 @@
 
 #include <xen/event.h>
 #include <xen/iommu.h>
-#include <xen/cpu.h>
 #include <xen/irq.h>
 #include <asm/hvm/irq.h>
 #include <asm/hvm/iommu.h>
 #include <asm/hvm/support.h>
 #include <xen/hvm/irq.h>
+#include <xen/tasklet.h>
 
-static DEFINE_PER_CPU(struct list_head, dpci_list);
-
-/*
- * These two bit states help to safely schedule, deschedule, and wait until
- * the softirq has finished.
- *
- * The semantics behind these two bits is as follow:
- *  - STATE_SCHED - whoever modifies it has to ref-count the domain (->dom).
- *  - STATE_RUN - only softirq is allowed to set and clear it. If it has
- *      been set hvm_dirq_assist will RUN with a saved value of the
- *      'struct domain' copied from 'pirq_dpci->dom' before STATE_RUN was set.
- *
- * The usual states are: STATE_SCHED(set) -> STATE_RUN(set) ->
- * STATE_SCHED(unset) -> STATE_RUN(unset).
- *
- * However the states can also diverge such as: STATE_SCHED(set) ->
- * STATE_SCHED(unset) -> STATE_RUN(set) -> STATE_RUN(unset). That means
- * the 'hvm_dirq_assist' never run and that the softirq did not do any
- * ref-counting.
- */
-
-enum {
-    STATE_SCHED,
-    STATE_RUN
-};
-
-/*
- * This can be called multiple times, but the softirq is only raised once.
- * That is until the STATE_SCHED state has been cleared. The state can be
- * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
- * or by 'pt_pirq_softirq_reset' (which will try to clear the state before
- * the softirq had a chance to run).
- */
-static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
-{
-    unsigned long flags;
-
-    if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) )
-        return;
-
-    get_knownalive_domain(pirq_dpci->dom);
-
-    local_irq_save(flags);
-    list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list));
-    local_irq_restore(flags);
-
-    raise_softirq(HVM_DPCI_SOFTIRQ);
-}
-
-/*
- * If we are racing with softirq_dpci (STATE_SCHED) we return
- * true. Otherwise we return false.
- *
- * If it is false, it is the callers responsibility to make sure
- * that the softirq (with the event_lock dropped) has ran.
- */
-bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci)
-{
-    if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED)) )
-        return 1;
-
-    /*
-     * If in the future we would call 'raise_softirq_for' right away
-     * after 'pt_pirq_softirq_active' we MUST reset the list (otherwise it
-     * might have stale data).
-     */
-    return 0;
-}
-
-/*
- * Reset the pirq_dpci->dom parameter to NULL.
- *
- * This function checks the different states to make sure it can do it
- * at the right time. If it unschedules the 'hvm_dirq_assist' from running
- * it also refcounts (which is what the softirq would have done) properly.
- */
-static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci)
-{
-    struct domain *d = pirq_dpci->dom;
-
-    ASSERT(spin_is_locked(&d->event_lock));
-
-    switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) )
-    {
-    case (1 << STATE_SCHED):
-        /*
-         * We are going to try to de-schedule the softirq before it goes in
-         * STATE_RUN. Whoever clears STATE_SCHED MUST refcount the 'dom'.
-         */
-        put_domain(d);
-        /* fallthrough. */
-    case (1 << STATE_RUN):
-    case (1 << STATE_RUN) | (1 << STATE_SCHED):
-        /*
-         * The reason it is OK to reset 'dom' when STATE_RUN bit is set is due
-         * to a shortcut the 'dpci_softirq' implements. It stashes the 'dom'
-         * in local variable before it sets STATE_RUN - and therefore will not
-         * dereference '->dom' which would crash.
-         */
-        pirq_dpci->dom = NULL;
-        break;
-    }
-}
+static void hvm_dirq_assist(unsigned long arg);
 
 bool_t pt_irq_need_timer(uint32_t flags)
 {
@@ -142,7 +40,7 @@ static int pt_irq_guest_eoi(struct domain *d, struct 
hvm_pirq_dpci *pirq_dpci,
     if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT,
                               &pirq_dpci->flags) )
     {
-        pirq_dpci->state = 0;
+        pirq_dpci->masked = 0;
         pirq_dpci->pending = 0;
         pirq_guest_eoi(dpci_pirq(pirq_dpci));
     }
@@ -203,7 +101,6 @@ int pt_irq_create_bind(
     if ( pirq < 0 || pirq >= d->nr_pirqs )
         return -EINVAL;
 
- restart:
     spin_lock(&d->event_lock);
 
     hvm_irq_dpci = domain_get_irq_dpci(d);
@@ -231,21 +128,6 @@ int pt_irq_create_bind(
     }
     pirq_dpci = pirq_dpci(info);
 
-    /*
-     * A crude 'while' loop with us dropping the spinlock and giving
-     * the softirq_dpci a chance to run.
-     * We MUST check for this condition as the softirq could be scheduled
-     * and hasn't run yet. Note that this code replaced tasklet_kill which
-     * would have spun forever and would do the same thing (wait to flush out
-     * outstanding hvm_dirq_assist calls.
-     */
-    if ( pt_pirq_softirq_active(pirq_dpci) )
-    {
-        spin_unlock(&d->event_lock);
-        cpu_relax();
-        goto restart;
-    }
-
     switch ( pt_irq_bind->irq_type )
     {
     case PT_IRQ_TYPE_MSI:
@@ -277,16 +159,7 @@ int pt_irq_create_bind(
             {
                 rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable);
                 if ( unlikely(rc) )
-                {
                     pirq_guest_unbind(d, info);
-                    /*
-                     * Between 'pirq_guest_bind' and before 'pirq_guest_unbind'
-                     * an interrupt can be scheduled. No more of them are going
-                     * to be scheduled but we must deal with the one that may 
be
-                     * in the queue.
-                     */
-                    pt_pirq_softirq_reset(pirq_dpci);
-                }
             }
             if ( unlikely(rc) )
             {
@@ -396,10 +269,6 @@ int pt_irq_create_bind(
             {
                 if ( pt_irq_need_timer(pirq_dpci->flags) )
                     kill_timer(&pirq_dpci->timer);
-                /*
-                 * There is no path for __do_IRQ to schedule softirq as
-                 * IRQ_GUEST is not set. As such we can reset 'dom' directly.
-                 */
                 pirq_dpci->dom = NULL;
                 list_del(&girq->list);
                 list_del(&digl->list);
@@ -533,13 +402,8 @@ int pt_irq_destroy_bind(
         msixtbl_pt_unregister(d, pirq);
         if ( pt_irq_need_timer(pirq_dpci->flags) )
             kill_timer(&pirq_dpci->timer);
+        pirq_dpci->dom   = NULL;
         pirq_dpci->flags = 0;
-        /*
-         * See comment in pt_irq_create_bind's PT_IRQ_TYPE_MSI before the
-         * call to pt_pirq_softirq_reset.
-         */
-        pt_pirq_softirq_reset(pirq_dpci);
-
         pirq_cleanup_check(pirq, d);
     }
 
@@ -562,12 +426,14 @@ void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci 
*dpci)
 {
     INIT_LIST_HEAD(&dpci->digl_list);
     dpci->gmsi.dest_vcpu_id = -1;
+    softirq_tasklet_init(&dpci->tasklet, hvm_dirq_assist, (unsigned long)dpci);
 }
 
 bool_t pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci)
 {
-    if ( !dpci->flags && !pt_pirq_softirq_active(dpci) )
+    if ( !dpci->flags )
     {
+        tasklet_kill(&dpci->tasklet);
         dpci->dom = NULL;
         return 1;
     }
@@ -610,7 +476,8 @@ int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
          !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
         return 0;
 
-    raise_softirq_for(pirq_dpci);
+    pirq_dpci->masked = 1;
+    tasklet_schedule(&pirq_dpci->tasklet);
     return 1;
 }
 
@@ -664,12 +531,28 @@ void hvm_dpci_msi_eoi(struct domain *d, int vector)
     spin_unlock(&d->event_lock);
 }
 
-static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
+static void hvm_dirq_assist(unsigned long arg)
 {
+    struct hvm_pirq_dpci *pirq_dpci = (struct hvm_pirq_dpci *)arg;
+    struct domain *d = pirq_dpci->dom;
+
+    /*
+     * We can be racing with 'pt_irq_destroy_bind' - with us being scheduled
+     * right before 'pirq_guest_unbind' gets called - but us not yet executed.
+     *
+     * And '->dom' gets cleared later in the destroy path. We exit and clear
+     * 'masked' - which is OK as later in this code we would
+     * do nothing except clear the ->masked field anyhow.
+     */
+    if ( !d )
+    {
+        pirq_dpci->masked = 0;
+        return;
+    }
     ASSERT(d->arch.hvm_domain.irq.dpci);
 
     spin_lock(&d->event_lock);
-    if ( pirq_dpci->state )
+    if ( test_and_clear_bool(pirq_dpci->masked) )
     {
         struct pirq *pirq = dpci_pirq(pirq_dpci);
         const struct dev_intx_gsi_link *digl;
@@ -771,83 +654,3 @@ void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
 unlock:
     spin_unlock(&d->event_lock);
 }
-
-/*
- * Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to
- * doing it. If that is the case we let 'pt_pirq_softirq_reset' do 
ref-counting.
- */
-static void dpci_softirq(void)
-{
-    unsigned int cpu = smp_processor_id();
-    LIST_HEAD(our_list);
-
-    local_irq_disable();
-    list_splice_init(&per_cpu(dpci_list, cpu), &our_list);
-    local_irq_enable();
-
-    while ( !list_empty(&our_list) )
-    {
-        struct hvm_pirq_dpci *pirq_dpci;
-        struct domain *d;
-
-        pirq_dpci = list_entry(our_list.next, struct hvm_pirq_dpci, 
softirq_list);
-        list_del(&pirq_dpci->softirq_list);
-
-        d = pirq_dpci->dom;
-        smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */
-        if ( test_and_set_bit(STATE_RUN, &pirq_dpci->state) )
-            BUG();
-        /*
-         * The one who clears STATE_SCHED MUST refcount the domain.
-         */
-        if ( test_and_clear_bit(STATE_SCHED, &pirq_dpci->state) )
-        {
-            hvm_dirq_assist(d, pirq_dpci);
-            put_domain(d);
-        }
-        clear_bit(STATE_RUN, &pirq_dpci->state);
-    }
-}
-
-static int cpu_callback(
-    struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
-    unsigned int cpu = (unsigned long)hcpu;
-
-    switch ( action )
-    {
-    case CPU_UP_PREPARE:
-        INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
-        break;
-    case CPU_UP_CANCELED:
-    case CPU_DEAD:
-        /*
-         * On CPU_DYING this callback is called (on the CPU that is dying)
-         * with an possible HVM_DPIC_SOFTIRQ pending - at which point we can
-         * clear out any outstanding domains (by the virtue of the idle loop
-         * calling the softirq later). In CPU_DEAD case the CPU is deaf and
-         * there are no pending softirqs for us to handle so we can chill.
-         */
-        ASSERT(list_empty(&per_cpu(dpci_list, cpu)));
-        break;
-    }
-
-    return NOTIFY_DONE;
-}
-
-static struct notifier_block cpu_nfb = {
-    .notifier_call = cpu_callback,
-};
-
-static int __init setup_dpci_softirq(void)
-{
-    unsigned int cpu;
-
-    for_each_online_cpu(cpu)
-        INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
-
-    open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq);
-    register_cpu_notifier(&cpu_nfb);
-    return 0;
-}
-__initcall(setup_dpci_softirq);
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 78c6977..81e8a3a 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -767,51 +767,40 @@ static int pci_clean_dpci_irq(struct domain *d,
         xfree(digl);
     }
 
-    return pt_pirq_softirq_active(pirq_dpci) ? -ERESTART : 0;
+    tasklet_kill(&pirq_dpci->tasklet);
+
+    return 0;
 }
 
-static int pci_clean_dpci_irqs(struct domain *d)
+static void pci_clean_dpci_irqs(struct domain *d)
 {
     struct hvm_irq_dpci *hvm_irq_dpci = NULL;
 
     if ( !iommu_enabled )
-        return 0;
+        return;
 
     if ( !is_hvm_domain(d) )
-        return 0;
+        return;
 
     spin_lock(&d->event_lock);
     hvm_irq_dpci = domain_get_irq_dpci(d);
     if ( hvm_irq_dpci != NULL )
     {
-        int ret = pt_pirq_iterate(d, pci_clean_dpci_irq, NULL);
-
-        if ( ret )
-        {
-            spin_unlock(&d->event_lock);
-            return ret;
-        }
+        pt_pirq_iterate(d, pci_clean_dpci_irq, NULL);
 
         d->arch.hvm_domain.irq.dpci = NULL;
         free_hvm_irq_dpci(hvm_irq_dpci);
     }
     spin_unlock(&d->event_lock);
-    return 0;
 }
 
-int pci_release_devices(struct domain *d)
+void pci_release_devices(struct domain *d)
 {
     struct pci_dev *pdev;
     u8 bus, devfn;
-    int ret;
 
     spin_lock(&pcidevs_lock);
-    ret = pci_clean_dpci_irqs(d);
-    if ( ret )
-    {
-        spin_unlock(&pcidevs_lock);
-        return ret;
-    }
+    pci_clean_dpci_irqs(d);
     while ( (pdev = pci_get_pdev_by_domain(d, -1, -1, -1)) )
     {
         bus = pdev->bus;
@@ -822,8 +811,6 @@ int pci_release_devices(struct domain *d)
                    PCI_SLOT(devfn), PCI_FUNC(devfn));
     }
     spin_unlock(&pcidevs_lock);
-
-    return 0;
 }
 
 #define PCI_CLASS_BRIDGE_HOST    0x0600
diff --git a/xen/include/asm-x86/softirq.h b/xen/include/asm-x86/softirq.h
index ec787d6..7225dea 100644
--- a/xen/include/asm-x86/softirq.h
+++ b/xen/include/asm-x86/softirq.h
@@ -7,8 +7,7 @@
 
 #define MACHINE_CHECK_SOFTIRQ  (NR_COMMON_SOFTIRQS + 3)
 #define PCI_SERR_SOFTIRQ       (NR_COMMON_SOFTIRQS + 4)
-#define HVM_DPCI_SOFTIRQ       (NR_COMMON_SOFTIRQS + 5)
-#define NR_ARCH_SOFTIRQS       6
+#define NR_ARCH_SOFTIRQS       5
 
 bool_t arch_skip_send_event_check(unsigned int cpu);
 
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index 9709397..94a550a 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -93,13 +93,13 @@ struct hvm_irq_dpci {
 /* Machine IRQ to guest device/intx mapping. */
 struct hvm_pirq_dpci {
     uint32_t flags;
-    unsigned int state;
+    bool_t masked;
     uint16_t pending;
     struct list_head digl_list;
     struct domain *dom;
     struct hvm_gmsi_info gmsi;
     struct timer timer;
-    struct list_head softirq_list;
+    struct tasklet tasklet;
 };
 
 void pt_pirq_init(struct domain *, struct hvm_pirq_dpci *);
@@ -109,7 +109,6 @@ int pt_pirq_iterate(struct domain *d,
                               struct hvm_pirq_dpci *, void *arg),
                     void *arg);
 
-bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *);
 /* Modify state of a PCI INTx wire. */
 void hvm_pci_intx_assert(
     struct domain *d, unsigned int device, unsigned int intx);
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 5f295f3..91520bc 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -99,7 +99,7 @@ struct pci_dev *pci_lock_domain_pdev(
 
 void setup_hwdom_pci_devices(struct domain *,
                             int (*)(u8 devfn, struct pci_dev *));
-int pci_release_devices(struct domain *d);
+void pci_release_devices(struct domain *d);
 int pci_add_segment(u16 seg);
 const unsigned long *pci_get_ro_map(u16 seg);
 int pci_add_device(u16 seg, u8 bus, u8 devfn, const struct pci_dev_info *);
--
generated by git-patchbot for /home/xen/git/xen.git#stable-4.5

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.