|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen stable-4.5] Revert "dpci: replace tasklet with softirq"
commit 72f3c1e26e96686a41d2de1663e578538659f99a
Author: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
AuthorDate: Mon Jan 12 11:30:00 2015 -0500
Commit: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
CommitDate: Mon Jan 12 11:30:00 2015 -0500
Revert "dpci: replace tasklet with softirq"
This reverts commit f6dd295381f4b6a66acddacf46bca8940586c8d8.
As there are issues with huge amount of MSI-X going off.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
---
xen/arch/x86/domain.c | 4 +-
xen/drivers/passthrough/io.c | 251 +++++------------------------------------
xen/drivers/passthrough/pci.c | 31 ++----
xen/include/asm-x86/softirq.h | 3 +-
xen/include/xen/hvm/irq.h | 5 +-
xen/include/xen/pci.h | 2 +-
6 files changed, 41 insertions(+), 255 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 11c7d9f..f1fc993 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1969,9 +1969,7 @@ int domain_relinquish_resources(struct domain *d)
switch ( d->arch.relmem )
{
case RELMEM_not_started:
- ret = pci_release_devices(d);
- if ( ret )
- return ret;
+ pci_release_devices(d);
/* Tear down paging-assistance stuff. */
ret = paging_teardown(d);
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index efc66dc..dceb17e 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -20,116 +20,14 @@
#include <xen/event.h>
#include <xen/iommu.h>
-#include <xen/cpu.h>
#include <xen/irq.h>
#include <asm/hvm/irq.h>
#include <asm/hvm/iommu.h>
#include <asm/hvm/support.h>
#include <xen/hvm/irq.h>
+#include <xen/tasklet.h>
-static DEFINE_PER_CPU(struct list_head, dpci_list);
-
-/*
- * These two bit states help to safely schedule, deschedule, and wait until
- * the softirq has finished.
- *
- * The semantics behind these two bits is as follow:
- * - STATE_SCHED - whoever modifies it has to ref-count the domain (->dom).
- * - STATE_RUN - only softirq is allowed to set and clear it. If it has
- * been set hvm_dirq_assist will RUN with a saved value of the
- * 'struct domain' copied from 'pirq_dpci->dom' before STATE_RUN was set.
- *
- * The usual states are: STATE_SCHED(set) -> STATE_RUN(set) ->
- * STATE_SCHED(unset) -> STATE_RUN(unset).
- *
- * However the states can also diverge such as: STATE_SCHED(set) ->
- * STATE_SCHED(unset) -> STATE_RUN(set) -> STATE_RUN(unset). That means
- * the 'hvm_dirq_assist' never run and that the softirq did not do any
- * ref-counting.
- */
-
-enum {
- STATE_SCHED,
- STATE_RUN
-};
-
-/*
- * This can be called multiple times, but the softirq is only raised once.
- * That is until the STATE_SCHED state has been cleared. The state can be
- * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
- * or by 'pt_pirq_softirq_reset' (which will try to clear the state before
- * the softirq had a chance to run).
- */
-static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
-{
- unsigned long flags;
-
- if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) )
- return;
-
- get_knownalive_domain(pirq_dpci->dom);
-
- local_irq_save(flags);
- list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list));
- local_irq_restore(flags);
-
- raise_softirq(HVM_DPCI_SOFTIRQ);
-}
-
-/*
- * If we are racing with softirq_dpci (STATE_SCHED) we return
- * true. Otherwise we return false.
- *
- * If it is false, it is the callers responsibility to make sure
- * that the softirq (with the event_lock dropped) has ran.
- */
-bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci)
-{
- if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED)) )
- return 1;
-
- /*
- * If in the future we would call 'raise_softirq_for' right away
- * after 'pt_pirq_softirq_active' we MUST reset the list (otherwise it
- * might have stale data).
- */
- return 0;
-}
-
-/*
- * Reset the pirq_dpci->dom parameter to NULL.
- *
- * This function checks the different states to make sure it can do it
- * at the right time. If it unschedules the 'hvm_dirq_assist' from running
- * it also refcounts (which is what the softirq would have done) properly.
- */
-static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci)
-{
- struct domain *d = pirq_dpci->dom;
-
- ASSERT(spin_is_locked(&d->event_lock));
-
- switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) )
- {
- case (1 << STATE_SCHED):
- /*
- * We are going to try to de-schedule the softirq before it goes in
- * STATE_RUN. Whoever clears STATE_SCHED MUST refcount the 'dom'.
- */
- put_domain(d);
- /* fallthrough. */
- case (1 << STATE_RUN):
- case (1 << STATE_RUN) | (1 << STATE_SCHED):
- /*
- * The reason it is OK to reset 'dom' when STATE_RUN bit is set is due
- * to a shortcut the 'dpci_softirq' implements. It stashes the 'dom'
- * in local variable before it sets STATE_RUN - and therefore will not
- * dereference '->dom' which would crash.
- */
- pirq_dpci->dom = NULL;
- break;
- }
-}
+static void hvm_dirq_assist(unsigned long arg);
bool_t pt_irq_need_timer(uint32_t flags)
{
@@ -142,7 +40,7 @@ static int pt_irq_guest_eoi(struct domain *d, struct
hvm_pirq_dpci *pirq_dpci,
if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT,
&pirq_dpci->flags) )
{
- pirq_dpci->state = 0;
+ pirq_dpci->masked = 0;
pirq_dpci->pending = 0;
pirq_guest_eoi(dpci_pirq(pirq_dpci));
}
@@ -203,7 +101,6 @@ int pt_irq_create_bind(
if ( pirq < 0 || pirq >= d->nr_pirqs )
return -EINVAL;
- restart:
spin_lock(&d->event_lock);
hvm_irq_dpci = domain_get_irq_dpci(d);
@@ -231,21 +128,6 @@ int pt_irq_create_bind(
}
pirq_dpci = pirq_dpci(info);
- /*
- * A crude 'while' loop with us dropping the spinlock and giving
- * the softirq_dpci a chance to run.
- * We MUST check for this condition as the softirq could be scheduled
- * and hasn't run yet. Note that this code replaced tasklet_kill which
- * would have spun forever and would do the same thing (wait to flush out
- * outstanding hvm_dirq_assist calls.
- */
- if ( pt_pirq_softirq_active(pirq_dpci) )
- {
- spin_unlock(&d->event_lock);
- cpu_relax();
- goto restart;
- }
-
switch ( pt_irq_bind->irq_type )
{
case PT_IRQ_TYPE_MSI:
@@ -277,16 +159,7 @@ int pt_irq_create_bind(
{
rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable);
if ( unlikely(rc) )
- {
pirq_guest_unbind(d, info);
- /*
- * Between 'pirq_guest_bind' and before 'pirq_guest_unbind'
- * an interrupt can be scheduled. No more of them are going
- * to be scheduled but we must deal with the one that may
be
- * in the queue.
- */
- pt_pirq_softirq_reset(pirq_dpci);
- }
}
if ( unlikely(rc) )
{
@@ -396,10 +269,6 @@ int pt_irq_create_bind(
{
if ( pt_irq_need_timer(pirq_dpci->flags) )
kill_timer(&pirq_dpci->timer);
- /*
- * There is no path for __do_IRQ to schedule softirq as
- * IRQ_GUEST is not set. As such we can reset 'dom' directly.
- */
pirq_dpci->dom = NULL;
list_del(&girq->list);
list_del(&digl->list);
@@ -533,13 +402,8 @@ int pt_irq_destroy_bind(
msixtbl_pt_unregister(d, pirq);
if ( pt_irq_need_timer(pirq_dpci->flags) )
kill_timer(&pirq_dpci->timer);
+ pirq_dpci->dom = NULL;
pirq_dpci->flags = 0;
- /*
- * See comment in pt_irq_create_bind's PT_IRQ_TYPE_MSI before the
- * call to pt_pirq_softirq_reset.
- */
- pt_pirq_softirq_reset(pirq_dpci);
-
pirq_cleanup_check(pirq, d);
}
@@ -562,12 +426,14 @@ void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci
*dpci)
{
INIT_LIST_HEAD(&dpci->digl_list);
dpci->gmsi.dest_vcpu_id = -1;
+ softirq_tasklet_init(&dpci->tasklet, hvm_dirq_assist, (unsigned long)dpci);
}
bool_t pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci)
{
- if ( !dpci->flags && !pt_pirq_softirq_active(dpci) )
+ if ( !dpci->flags )
{
+ tasklet_kill(&dpci->tasklet);
dpci->dom = NULL;
return 1;
}
@@ -610,7 +476,8 @@ int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
!(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
return 0;
- raise_softirq_for(pirq_dpci);
+ pirq_dpci->masked = 1;
+ tasklet_schedule(&pirq_dpci->tasklet);
return 1;
}
@@ -664,12 +531,28 @@ void hvm_dpci_msi_eoi(struct domain *d, int vector)
spin_unlock(&d->event_lock);
}
-static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
+static void hvm_dirq_assist(unsigned long arg)
{
+ struct hvm_pirq_dpci *pirq_dpci = (struct hvm_pirq_dpci *)arg;
+ struct domain *d = pirq_dpci->dom;
+
+ /*
+ * We can be racing with 'pt_irq_destroy_bind' - with us being scheduled
+ * right before 'pirq_guest_unbind' gets called - but us not yet executed.
+ *
+ * And '->dom' gets cleared later in the destroy path. We exit and clear
+ * 'masked' - which is OK as later in this code we would
+ * do nothing except clear the ->masked field anyhow.
+ */
+ if ( !d )
+ {
+ pirq_dpci->masked = 0;
+ return;
+ }
ASSERT(d->arch.hvm_domain.irq.dpci);
spin_lock(&d->event_lock);
- if ( pirq_dpci->state )
+ if ( test_and_clear_bool(pirq_dpci->masked) )
{
struct pirq *pirq = dpci_pirq(pirq_dpci);
const struct dev_intx_gsi_link *digl;
@@ -771,83 +654,3 @@ void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
unlock:
spin_unlock(&d->event_lock);
}
-
-/*
- * Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to
- * doing it. If that is the case we let 'pt_pirq_softirq_reset' do
ref-counting.
- */
-static void dpci_softirq(void)
-{
- unsigned int cpu = smp_processor_id();
- LIST_HEAD(our_list);
-
- local_irq_disable();
- list_splice_init(&per_cpu(dpci_list, cpu), &our_list);
- local_irq_enable();
-
- while ( !list_empty(&our_list) )
- {
- struct hvm_pirq_dpci *pirq_dpci;
- struct domain *d;
-
- pirq_dpci = list_entry(our_list.next, struct hvm_pirq_dpci,
softirq_list);
- list_del(&pirq_dpci->softirq_list);
-
- d = pirq_dpci->dom;
- smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */
- if ( test_and_set_bit(STATE_RUN, &pirq_dpci->state) )
- BUG();
- /*
- * The one who clears STATE_SCHED MUST refcount the domain.
- */
- if ( test_and_clear_bit(STATE_SCHED, &pirq_dpci->state) )
- {
- hvm_dirq_assist(d, pirq_dpci);
- put_domain(d);
- }
- clear_bit(STATE_RUN, &pirq_dpci->state);
- }
-}
-
-static int cpu_callback(
- struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch ( action )
- {
- case CPU_UP_PREPARE:
- INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
- break;
- case CPU_UP_CANCELED:
- case CPU_DEAD:
- /*
- * On CPU_DYING this callback is called (on the CPU that is dying)
- * with an possible HVM_DPIC_SOFTIRQ pending - at which point we can
- * clear out any outstanding domains (by the virtue of the idle loop
- * calling the softirq later). In CPU_DEAD case the CPU is deaf and
- * there are no pending softirqs for us to handle so we can chill.
- */
- ASSERT(list_empty(&per_cpu(dpci_list, cpu)));
- break;
- }
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block cpu_nfb = {
- .notifier_call = cpu_callback,
-};
-
-static int __init setup_dpci_softirq(void)
-{
- unsigned int cpu;
-
- for_each_online_cpu(cpu)
- INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
-
- open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq);
- register_cpu_notifier(&cpu_nfb);
- return 0;
-}
-__initcall(setup_dpci_softirq);
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 78c6977..81e8a3a 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -767,51 +767,40 @@ static int pci_clean_dpci_irq(struct domain *d,
xfree(digl);
}
- return pt_pirq_softirq_active(pirq_dpci) ? -ERESTART : 0;
+ tasklet_kill(&pirq_dpci->tasklet);
+
+ return 0;
}
-static int pci_clean_dpci_irqs(struct domain *d)
+static void pci_clean_dpci_irqs(struct domain *d)
{
struct hvm_irq_dpci *hvm_irq_dpci = NULL;
if ( !iommu_enabled )
- return 0;
+ return;
if ( !is_hvm_domain(d) )
- return 0;
+ return;
spin_lock(&d->event_lock);
hvm_irq_dpci = domain_get_irq_dpci(d);
if ( hvm_irq_dpci != NULL )
{
- int ret = pt_pirq_iterate(d, pci_clean_dpci_irq, NULL);
-
- if ( ret )
- {
- spin_unlock(&d->event_lock);
- return ret;
- }
+ pt_pirq_iterate(d, pci_clean_dpci_irq, NULL);
d->arch.hvm_domain.irq.dpci = NULL;
free_hvm_irq_dpci(hvm_irq_dpci);
}
spin_unlock(&d->event_lock);
- return 0;
}
-int pci_release_devices(struct domain *d)
+void pci_release_devices(struct domain *d)
{
struct pci_dev *pdev;
u8 bus, devfn;
- int ret;
spin_lock(&pcidevs_lock);
- ret = pci_clean_dpci_irqs(d);
- if ( ret )
- {
- spin_unlock(&pcidevs_lock);
- return ret;
- }
+ pci_clean_dpci_irqs(d);
while ( (pdev = pci_get_pdev_by_domain(d, -1, -1, -1)) )
{
bus = pdev->bus;
@@ -822,8 +811,6 @@ int pci_release_devices(struct domain *d)
PCI_SLOT(devfn), PCI_FUNC(devfn));
}
spin_unlock(&pcidevs_lock);
-
- return 0;
}
#define PCI_CLASS_BRIDGE_HOST 0x0600
diff --git a/xen/include/asm-x86/softirq.h b/xen/include/asm-x86/softirq.h
index ec787d6..7225dea 100644
--- a/xen/include/asm-x86/softirq.h
+++ b/xen/include/asm-x86/softirq.h
@@ -7,8 +7,7 @@
#define MACHINE_CHECK_SOFTIRQ (NR_COMMON_SOFTIRQS + 3)
#define PCI_SERR_SOFTIRQ (NR_COMMON_SOFTIRQS + 4)
-#define HVM_DPCI_SOFTIRQ (NR_COMMON_SOFTIRQS + 5)
-#define NR_ARCH_SOFTIRQS 6
+#define NR_ARCH_SOFTIRQS 5
bool_t arch_skip_send_event_check(unsigned int cpu);
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index 9709397..94a550a 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -93,13 +93,13 @@ struct hvm_irq_dpci {
/* Machine IRQ to guest device/intx mapping. */
struct hvm_pirq_dpci {
uint32_t flags;
- unsigned int state;
+ bool_t masked;
uint16_t pending;
struct list_head digl_list;
struct domain *dom;
struct hvm_gmsi_info gmsi;
struct timer timer;
- struct list_head softirq_list;
+ struct tasklet tasklet;
};
void pt_pirq_init(struct domain *, struct hvm_pirq_dpci *);
@@ -109,7 +109,6 @@ int pt_pirq_iterate(struct domain *d,
struct hvm_pirq_dpci *, void *arg),
void *arg);
-bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *);
/* Modify state of a PCI INTx wire. */
void hvm_pci_intx_assert(
struct domain *d, unsigned int device, unsigned int intx);
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 5f295f3..91520bc 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -99,7 +99,7 @@ struct pci_dev *pci_lock_domain_pdev(
void setup_hwdom_pci_devices(struct domain *,
int (*)(u8 devfn, struct pci_dev *));
-int pci_release_devices(struct domain *d);
+void pci_release_devices(struct domain *d);
int pci_add_segment(u16 seg);
const unsigned long *pci_get_ro_map(u16 seg);
int pci_add_device(u16 seg, u8 bus, u8 devfn, const struct pci_dev_info *);
--
generated by git-patchbot for /home/xen/git/xen.git#stable-4.5
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |