[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86: IRQ affinity should track vCPU affinity



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1276755726 -3600
# Node ID 0695a5cdcb42d98dcd4bbda35614753787aa7983
# Parent  b9c541d9c13822e92719ccfe77fbd0241410f2c5
x86: IRQ affinity should track vCPU affinity

With IRQs getting bound to the CPU the binding vCPU currently runs on
there can result quite a bit of extra cross CPU traffic as soon as
that vCPU moves to a different pCPU. Likewise, when a domain re-binds
an event channel associated with a pIRQ, that IRQ's affinity should
also be adjusted.

The open issue is how to break ties for interrupts shared by multiple
domains - currently, the last request (at any point in time) is being
honored.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 xen/arch/ia64/xen/irq.c    |    5 +++
 xen/arch/x86/hvm/hvm.c     |    2 -
 xen/arch/x86/irq.c         |   22 +++++++++++----
 xen/common/event_channel.c |   65 ++++++++++++++++++++++++++++++++++++++++-----
 xen/common/sched_credit.c  |    8 ++++-
 xen/common/sched_credit2.c |    9 +++++-
 xen/common/sched_sedf.c    |    2 +
 xen/common/schedule.c      |    7 ++++
 xen/include/asm-x86/irq.h  |    2 -
 xen/include/xen/cpumask.h  |    2 -
 xen/include/xen/event.h    |    3 ++
 xen/include/xen/irq.h      |    1 
 xen/include/xen/sched-if.h |    1 
 xen/include/xen/sched.h    |    9 +++++-
 14 files changed, 120 insertions(+), 18 deletions(-)

diff -r b9c541d9c138 -r 0695a5cdcb42 xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c   Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/arch/ia64/xen/irq.c   Thu Jun 17 07:22:06 2010 +0100
@@ -612,6 +612,11 @@ xen_debug_irq(unsigned long vector, stru
        }
 }
 
+void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *mask)
+{
+       /* FIXME */
+}
+
 /*
  * Exit an interrupt context. Process softirqs if needed and possible:
  */
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Thu Jun 17 07:22:06 2010 +0100
@@ -271,7 +271,7 @@ void hvm_migrate_pirqs(struct vcpu *v)
             continue;
         irq = desc - irq_desc;
         ASSERT(MSI_IRQ(irq));
-        irq_set_affinity(irq, *cpumask_of(v->processor));
+        irq_set_affinity(desc, cpumask_of(v->processor));
         spin_unlock_irq(&desc->lock);
     }
     spin_unlock(&d->event_lock);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/arch/x86/irq.c        Thu Jun 17 07:22:06 2010 +0100
@@ -501,16 +501,28 @@ void move_native_irq(int irq)
 }
 
 /* For re-setting irq interrupt affinity for specific irq */
-void irq_set_affinity(int irq, cpumask_t mask)
-{
-    struct irq_desc *desc = irq_to_desc(irq);
-    
+void irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
+{
     if (!desc->handler->set_affinity)
         return;
     
     ASSERT(spin_is_locked(&desc->lock));
+    desc->status &= ~IRQ_MOVE_PENDING;
+    wmb();
+    cpus_copy(desc->pending_mask, *mask);
+    wmb();
     desc->status |= IRQ_MOVE_PENDING;
-    cpus_copy(desc->pending_mask, mask);
+}
+
+void pirq_set_affinity(struct domain *d, int pirq, const cpumask_t *mask)
+{
+    unsigned long flags;
+    struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
+
+    if ( !desc )
+        return;
+    irq_set_affinity(desc, mask);
+    spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 DEFINE_PER_CPU(unsigned int, irq_count);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/event_channel.c
--- a/xen/common/event_channel.c        Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/event_channel.c        Thu Jun 17 07:22:06 2010 +0100
@@ -295,10 +295,36 @@ static long evtchn_bind_ipi(evtchn_bind_
 }
 
 
+static void link_pirq_port(int port, struct evtchn *chn, struct vcpu *v)
+{
+    chn->u.pirq.prev_port = 0;
+    chn->u.pirq.next_port = v->pirq_evtchn_head;
+    if ( v->pirq_evtchn_head )
+        evtchn_from_port(v->domain, v->pirq_evtchn_head)
+            ->u.pirq.prev_port = port;
+    v->pirq_evtchn_head = port;
+}
+
+static void unlink_pirq_port(struct evtchn *chn, struct vcpu *v)
+{
+    struct domain *d = v->domain;
+
+    if ( chn->u.pirq.prev_port )
+        evtchn_from_port(d, chn->u.pirq.prev_port)->u.pirq.next_port =
+            chn->u.pirq.next_port;
+    else
+        v->pirq_evtchn_head = chn->u.pirq.next_port;
+    if ( chn->u.pirq.next_port )
+        evtchn_from_port(d, chn->u.pirq.next_port)->u.pirq.prev_port =
+            chn->u.pirq.prev_port;
+}
+
+
 static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
 {
     struct evtchn *chn;
     struct domain *d = current->domain;
+    struct vcpu   *v = d->vcpu[0];
     int            port, pirq = bind->pirq;
     long           rc;
 
@@ -319,7 +345,7 @@ static long evtchn_bind_pirq(evtchn_bind
     chn = evtchn_from_port(d, port);
 
     d->pirq_to_evtchn[pirq] = port;
-    rc = pirq_guest_bind(d->vcpu[0], pirq, 
+    rc = pirq_guest_bind(v, pirq,
                          !!(bind->flags & BIND_PIRQ__WILL_SHARE));
     if ( rc != 0 )
     {
@@ -328,7 +354,8 @@ static long evtchn_bind_pirq(evtchn_bind
     }
 
     chn->state  = ECS_PIRQ;
-    chn->u.pirq = pirq;
+    chn->u.pirq.irq = pirq;
+    link_pirq_port(port, chn, v);
 
     bind->port = port;
 
@@ -376,8 +403,9 @@ static long __evtchn_close(struct domain
         break;
 
     case ECS_PIRQ:
-        pirq_guest_unbind(d1, chn1->u.pirq);
-        d1->pirq_to_evtchn[chn1->u.pirq] = 0;
+        pirq_guest_unbind(d1, chn1->u.pirq.irq);
+        d1->pirq_to_evtchn[chn1->u.pirq.irq] = 0;
+        unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]);
         break;
 
     case ECS_VIRQ:
@@ -688,7 +716,7 @@ static long evtchn_status(evtchn_status_
         break;
     case ECS_PIRQ:
         status->status = EVTCHNSTAT_pirq;
-        status->u.pirq = chn->u.pirq;
+        status->u.pirq = chn->u.pirq.irq;
         break;
     case ECS_VIRQ:
         status->status = EVTCHNSTAT_virq;
@@ -747,8 +775,16 @@ long evtchn_bind_vcpu(unsigned int port,
         break;
     case ECS_UNBOUND:
     case ECS_INTERDOMAIN:
+        chn->notify_vcpu_id = vcpu_id;
+        break;
     case ECS_PIRQ:
+        if ( chn->notify_vcpu_id == vcpu_id )
+            break;
+        unlink_pirq_port(chn, d->vcpu[chn->notify_vcpu_id]);
         chn->notify_vcpu_id = vcpu_id;
+        pirq_set_affinity(d, chn->u.pirq.irq,
+                          cpumask_of(d->vcpu[vcpu_id]->processor));
+        link_pirq_port(port, chn, d->vcpu[vcpu_id]);
         break;
     default:
         rc = -EINVAL;
@@ -1061,6 +1097,23 @@ void evtchn_destroy_final(struct domain 
     xfree(d->poll_mask);
     d->poll_mask = NULL;
 #endif
+}
+
+
+void evtchn_move_pirqs(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    const cpumask_t *mask = cpumask_of(v->processor);
+    unsigned int port;
+    struct evtchn *chn;
+
+    spin_lock(&d->event_lock);
+    for ( port = v->pirq_evtchn_head; port; port = chn->u.pirq.next_port )
+    {
+        chn = evtchn_from_port(d, port);
+        pirq_set_affinity(d, chn->u.pirq.irq, mask);
+    }
+    spin_unlock(&d->event_lock);
 }
 
 
@@ -1105,7 +1158,7 @@ static void domain_dump_evtchn_info(stru
                    chn->u.interdomain.remote_port);
             break;
         case ECS_PIRQ:
-            printk(" p=%d", chn->u.pirq);
+            printk(" p=%d", chn->u.pirq.irq);
             break;
         case ECS_VIRQ:
             printk(" v=%d", chn->u.virq);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/sched_credit.c Thu Jun 17 07:22:06 2010 +0100
@@ -1168,7 +1168,7 @@ csched_runq_steal(int peer_cpu, int cpu,
 
 static struct csched_vcpu *
 csched_load_balance(struct csched_private *prv, int cpu,
-    struct csched_vcpu *snext)
+    struct csched_vcpu *snext, bool_t *stolen)
 {
     struct csched_vcpu *speer;
     cpumask_t workers;
@@ -1221,7 +1221,10 @@ csched_load_balance(struct csched_privat
         speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
         spin_unlock(per_cpu(schedule_data, peer_cpu).schedule_lock);
         if ( speer != NULL )
+        {
+            *stolen = 1;
             return speer;
+        }
     }
 
  out:
@@ -1269,6 +1272,7 @@ csched_schedule(
         BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
 
     snext = __runq_elem(runq->next);
+    ret.migrated = 0;
 
     /* Tasklet work (which runs in idle VCPU context) overrides all else. */
     if ( tasklet_work_scheduled )
@@ -1288,7 +1292,7 @@ csched_schedule(
     if ( snext->pri > CSCHED_PRI_TS_OVER )
         __runq_remove(snext);
     else
-        snext = csched_load_balance(prv, cpu, snext);
+        snext = csched_load_balance(prv, cpu, snext, &ret.migrated);
 
     /*
      * Update idlers mask if necessary. When we're idling, other CPUs
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/sched_credit2.c
--- a/xen/common/sched_credit2.c        Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/sched_credit2.c        Thu Jun 17 07:22:06 2010 +0100
@@ -991,10 +991,17 @@ csched_schedule(
     }
 #endif
 
+    ret.migrated = 0;
+
     if ( !is_idle_vcpu(snext->vcpu) )
     {
         snext->start_time = now;
-        snext->vcpu->processor = cpu; /* Safe because lock for old processor 
is held */
+        /* Safe because lock for old processor is held */
+        if ( snext->vcpu->processor != cpu )
+        {
+            snext->vcpu->processor = cpu;
+            ret.migrated = 1;
+        }
     }
 
     /*
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/sched_sedf.c   Thu Jun 17 07:22:06 2010 +0100
@@ -874,6 +874,8 @@ static struct task_slice sedf_do_schedul
                ret.time);
         ret.time = EXTRA_QUANTUM;
     }
+
+    ret.migrated = 0;
 
     EDOM_INFO(ret.task)->sched_start_abs = now;
     CHECK(ret.time > 0);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/schedule.c
--- a/xen/common/schedule.c     Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/schedule.c     Thu Jun 17 07:22:06 2010 +0100
@@ -272,6 +272,7 @@ int sched_move_domain(struct domain *d, 
         cpus_setall(v->cpu_affinity);
         v->processor = new_p;
         v->sched_priv = vcpu_priv[v->vcpu_id];
+        evtchn_move_pirqs(v);
 
         new_p = cycle_cpu(new_p, c->cpu_valid);
     }
@@ -418,6 +419,9 @@ static void vcpu_migrate(struct vcpu *v)
     v->processor = new_cpu;
     spin_unlock_irqrestore(
         per_cpu(schedule_data, old_cpu).schedule_lock, flags);
+
+    if ( old_cpu != new_cpu )
+        evtchn_move_pirqs(v);
 
     /* Wake on new CPU. */
     vcpu_wake(v);
@@ -1094,6 +1098,9 @@ static void schedule(void)
 
     stop_timer(&prev->periodic_timer);
 
+    if ( next_slice.migrated )
+        evtchn_move_pirqs(next);
+
     /* Ensure that the domain has an up-to-date time base. */
     update_vcpu_system_time(next);
     vcpu_periodic_timer_work(next);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/asm-x86/irq.h Thu Jun 17 07:22:06 2010 +0100
@@ -143,7 +143,7 @@ void move_native_irq(int irq);
 
 void move_masked_irq(int irq);
 
-void irq_set_affinity(int irq, cpumask_t mask);
+void irq_set_affinity(struct irq_desc *, const cpumask_t *mask);
 
 #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
 #define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/cpumask.h
--- a/xen/include/xen/cpumask.h Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/cpumask.h Thu Jun 17 07:22:06 2010 +0100
@@ -206,7 +206,7 @@ static inline int __cpus_weight(const cp
 }
 
 #define cpus_copy(dest, src) __cpus_copy(&(dest), &(src))
-static inline void __cpus_copy(cpumask_t *dstp, cpumask_t *srcp)
+static inline void __cpus_copy(cpumask_t *dstp, const cpumask_t *srcp)
 {
        bitmap_copy(dstp->bits, srcp->bits, NR_CPUS);
 }
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/event.h
--- a/xen/include/xen/event.h   Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/event.h   Thu Jun 17 07:22:06 2010 +0100
@@ -47,6 +47,9 @@ long evtchn_bind_vcpu(unsigned int port,
 /* Unmask a local event-channel port. */
 int evtchn_unmask(unsigned int port);
 
+/* Move all PIRQs after a vCPU was moved to another pCPU. */
+void evtchn_move_pirqs(struct vcpu *v);
+
 /* Allocate/free a Xen-attached event channel port. */
 int alloc_unbound_xen_event_channel(
     struct vcpu *local_vcpu, domid_t remote_domid);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/irq.h
--- a/xen/include/xen/irq.h     Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/irq.h     Thu Jun 17 07:22:06 2010 +0100
@@ -139,6 +139,7 @@ extern int pirq_guest_unmask(struct doma
 extern int pirq_guest_unmask(struct domain *d);
 extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share);
 extern void pirq_guest_unbind(struct domain *d, int irq);
+extern void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *);
 extern irq_desc_t *domain_spin_lock_irq_desc(
     struct domain *d, int irq, unsigned long *pflags);
 
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/sched-if.h        Thu Jun 17 07:22:06 2010 +0100
@@ -79,6 +79,7 @@ struct task_slice {
 struct task_slice {
     struct vcpu *task;
     s_time_t     time;
+    bool_t       migrated;
 };
 
 struct scheduler {
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/sched.h   Thu Jun 17 07:22:06 2010 +0100
@@ -61,7 +61,11 @@ struct evtchn
             u16            remote_port;
             struct domain *remote_dom;
         } interdomain; /* state == ECS_INTERDOMAIN */
-        u16 pirq;      /* state == ECS_PIRQ */
+        struct {
+            u16            irq;
+            u16            next_port;
+            u16            prev_port;
+        } pirq;        /* state == ECS_PIRQ */
         u16 virq;      /* state == ECS_VIRQ */
     } u;
 #ifdef FLASK_ENABLE
@@ -141,6 +145,9 @@ struct vcpu
      * < 0: multiple ports may be being polled.
      */
     int              poll_evtchn;
+
+    /* (over-)protected by ->domain->event_lock */
+    int              pirq_evtchn_head;
 
     unsigned long    pause_flags;
     atomic_t         pause_count;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.