[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH, RFC] x86: IRQ affinity should track vCPU affinity



With IRQs getting bound to the CPU the binding vCPU currently runs on
there can result quite a bit of extra cross CPU traffic as soon as
that vCPU moves to a different pCPU. Likewise, when a domain re-binds
an event channel associated with a pIRQ, that IRQ's affinity should
also be adjusted.

The open issue is how to break ties for interrupts shared by multiple
domains - currently, the last request (at any point in time) is being
honored.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

--- 2010-06-15.orig/xen/arch/ia64/xen/irq.c     2009-10-29 12:24:48.000000000 
+0100
+++ 2010-06-15/xen/arch/ia64/xen/irq.c  2010-06-15 09:21:05.000000000 +0200
@@ -612,6 +612,11 @@ xen_debug_irq(unsigned long vector, stru
        }
 }
 
+void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *mask)
+{
+       /* FIXME */
+}
+
 /*
  * Exit an interrupt context. Process softirqs if needed and possible:
  */
--- 2010-06-15.orig/xen/arch/x86/hvm/hvm.c      2010-06-11 11:41:35.000000000 
+0200
+++ 2010-06-15/xen/arch/x86/hvm/hvm.c   2010-06-15 09:21:05.000000000 +0200
@@ -270,7 +270,7 @@ void hvm_migrate_pirqs(struct vcpu *v)
             continue;
         irq = desc - irq_desc;
         ASSERT(MSI_IRQ(irq));
-        irq_set_affinity(irq, *cpumask_of(v->processor));
+        irq_set_affinity(desc, cpumask_of(v->processor));
         spin_unlock_irq(&desc->lock);
     }
     spin_unlock(&d->event_lock);
--- 2010-06-15.orig/xen/arch/x86/irq.c  2010-06-11 11:41:35.000000000 +0200
+++ 2010-06-15/xen/arch/x86/irq.c       2010-06-15 09:21:05.000000000 +0200
@@ -501,16 +501,28 @@ void move_native_irq(int irq)
 }
 
 /* For re-setting irq interrupt affinity for specific irq */
-void irq_set_affinity(int irq, cpumask_t mask)
+void irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
 {
-    struct irq_desc *desc = irq_to_desc(irq);
-    
     if (!desc->handler->set_affinity)
         return;
     
     ASSERT(spin_is_locked(&desc->lock));
+    desc->status &= ~IRQ_MOVE_PENDING;
+    wmb();
+    cpus_copy(desc->pending_mask, *mask);
+    wmb();
     desc->status |= IRQ_MOVE_PENDING;
-    cpus_copy(desc->pending_mask, mask);
+}
+
+void pirq_set_affinity(struct domain *d, int pirq, const cpumask_t *mask)
+{
+    unsigned long flags;
+    struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
+
+    if ( !desc )
+        return;
+    irq_set_affinity(desc, mask);
+    spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 DEFINE_PER_CPU(unsigned int, irq_count);
--- 2010-06-15.orig/xen/common/event_channel.c  2010-06-11 11:41:35.000000000 
+0200
+++ 2010-06-15/xen/common/event_channel.c       2010-06-15 09:21:05.000000000 
+0200
@@ -295,10 +295,36 @@ static long evtchn_bind_ipi(evtchn_bind_
 }
 
 
+static void link_pirq_port(int port, struct evtchn *chn, struct vcpu *v)
+{
+    chn->u.pirq.prev_port = 0;
+    chn->u.pirq.next_port = v->pirq_evtchn_head;
+    if ( v->pirq_evtchn_head )
+        evtchn_from_port(v->domain, v->pirq_evtchn_head)
+            ->u.pirq.prev_port = port;
+    v->pirq_evtchn_head = port;
+}
+
+static void unlink_pirq_port(struct evtchn *chn, struct vcpu *v)
+{
+    struct domain *d = v->domain;
+
+    if ( chn->u.pirq.prev_port )
+        evtchn_from_port(d, chn->u.pirq.prev_port)->u.pirq.next_port =
+            chn->u.pirq.next_port;
+    else
+        v->pirq_evtchn_head = chn->u.pirq.next_port;
+    if ( chn->u.pirq.next_port )
+        evtchn_from_port(d, chn->u.pirq.next_port)->u.pirq.prev_port =
+            chn->u.pirq.prev_port;
+}
+
+
 static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
 {
     struct evtchn *chn;
     struct domain *d = current->domain;
+    struct vcpu   *v = d->vcpu[0];
     int            port, pirq = bind->pirq;
     long           rc;
 
@@ -319,7 +345,7 @@ static long evtchn_bind_pirq(evtchn_bind
     chn = evtchn_from_port(d, port);
 
     d->pirq_to_evtchn[pirq] = port;
-    rc = pirq_guest_bind(d->vcpu[0], pirq, 
+    rc = pirq_guest_bind(v, pirq,
                          !!(bind->flags & BIND_PIRQ__WILL_SHARE));
     if ( rc != 0 )
     {
@@ -328,7 +354,8 @@ static long evtchn_bind_pirq(evtchn_bind
     }
 
     chn->state  = ECS_PIRQ;
-    chn->u.pirq = pirq;
+    chn->u.pirq.irq = pirq;
+    link_pirq_port(port, chn, v);
 
     bind->port = port;
 
@@ -376,8 +403,9 @@ static long __evtchn_close(struct domain
         break;
 
     case ECS_PIRQ:
-        pirq_guest_unbind(d1, chn1->u.pirq);
-        d1->pirq_to_evtchn[chn1->u.pirq] = 0;
+        pirq_guest_unbind(d1, chn1->u.pirq.irq);
+        d1->pirq_to_evtchn[chn1->u.pirq.irq] = 0;
+        unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]);
         break;
 
     case ECS_VIRQ:
@@ -688,7 +716,7 @@ static long evtchn_status(evtchn_status_
         break;
     case ECS_PIRQ:
         status->status = EVTCHNSTAT_pirq;
-        status->u.pirq = chn->u.pirq;
+        status->u.pirq = chn->u.pirq.irq;
         break;
     case ECS_VIRQ:
         status->status = EVTCHNSTAT_virq;
@@ -747,8 +775,16 @@ long evtchn_bind_vcpu(unsigned int port,
         break;
     case ECS_UNBOUND:
     case ECS_INTERDOMAIN:
+        chn->notify_vcpu_id = vcpu_id;
+        break;
     case ECS_PIRQ:
+        if ( chn->notify_vcpu_id == vcpu_id )
+            break;
+        unlink_pirq_port(chn, d->vcpu[chn->notify_vcpu_id]);
         chn->notify_vcpu_id = vcpu_id;
+        pirq_set_affinity(d, chn->u.pirq.irq,
+                          cpumask_of(d->vcpu[vcpu_id]->processor));
+        link_pirq_port(port, chn, d->vcpu[vcpu_id]);
         break;
     default:
         rc = -EINVAL;
@@ -1064,6 +1100,23 @@ void evtchn_destroy_final(struct domain 
 }
 
 
+void evtchn_move_pirqs(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    const cpumask_t *mask = cpumask_of(v->processor);
+    unsigned int port;
+    struct evtchn *chn;
+
+    spin_lock(&d->event_lock);
+    for ( port = v->pirq_evtchn_head; port; port = chn->u.pirq.next_port )
+    {
+        chn = evtchn_from_port(d, port);
+        pirq_set_affinity(d, chn->u.pirq.irq, mask);
+    }
+    spin_unlock(&d->event_lock);
+}
+
+
 static void domain_dump_evtchn_info(struct domain *d)
 {
     unsigned int port;
@@ -1105,7 +1158,7 @@ static void domain_dump_evtchn_info(stru
                    chn->u.interdomain.remote_port);
             break;
         case ECS_PIRQ:
-            printk(" p=%d", chn->u.pirq);
+            printk(" p=%d", chn->u.pirq.irq);
             break;
         case ECS_VIRQ:
             printk(" v=%d", chn->u.virq);
--- 2010-06-15.orig/xen/common/schedule.c       2010-06-11 11:41:35.000000000 
+0200
+++ 2010-06-15/xen/common/schedule.c    2010-06-15 09:21:05.000000000 +0200
@@ -272,6 +272,7 @@ int sched_move_domain(struct domain *d, 
         cpus_setall(v->cpu_affinity);
         v->processor = new_p;
         v->sched_priv = vcpu_priv[v->vcpu_id];
+        evtchn_move_pirqs(v);
 
         new_p = cycle_cpu(new_p, c->cpu_valid);
     }
@@ -419,6 +420,9 @@ static void vcpu_migrate(struct vcpu *v)
     spin_unlock_irqrestore(
         per_cpu(schedule_data, old_cpu).schedule_lock, flags);
 
+    if ( old_cpu != new_cpu )
+        evtchn_move_pirqs(v);
+
     /* Wake on new CPU. */
     vcpu_wake(v);
 }
@@ -1094,6 +1098,9 @@ static void schedule(void)
 
     stop_timer(&prev->periodic_timer);
 
+    if ( next_slice.migrated )
+        evtchn_move_pirqs(next);
+
     /* Ensure that the domain has an up-to-date time base. */
     update_vcpu_system_time(next);
     vcpu_periodic_timer_work(next);
--- 2010-06-15.orig/xen/common/sched_credit.c   2010-05-20 09:59:27.000000000 
+0200
+++ 2010-06-15/xen/common/sched_credit.c        2010-06-15 09:21:05.000000000 
+0200
@@ -1168,7 +1168,7 @@ csched_runq_steal(int peer_cpu, int cpu,
 
 static struct csched_vcpu *
 csched_load_balance(struct csched_private *prv, int cpu,
-    struct csched_vcpu *snext)
+    struct csched_vcpu *snext, bool_t *stolen)
 {
     struct csched_vcpu *speer;
     cpumask_t workers;
@@ -1221,7 +1221,10 @@ csched_load_balance(struct csched_privat
         speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
         spin_unlock(per_cpu(schedule_data, peer_cpu).schedule_lock);
         if ( speer != NULL )
+        {
+            *stolen = 1;
             return speer;
+        }
     }
 
  out:
@@ -1269,6 +1272,7 @@ csched_schedule(
         BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
 
     snext = __runq_elem(runq->next);
+    ret.migrated = 0;
 
     /* Tasklet work (which runs in idle VCPU context) overrides all else. */
     if ( tasklet_work_scheduled )
@@ -1288,7 +1292,7 @@ csched_schedule(
     if ( snext->pri > CSCHED_PRI_TS_OVER )
         __runq_remove(snext);
     else
-        snext = csched_load_balance(prv, cpu, snext);
+        snext = csched_load_balance(prv, cpu, snext, &ret.migrated);
 
     /*
      * Update idlers mask if necessary. When we're idling, other CPUs
--- 2010-06-15.orig/xen/common/sched_credit2.c  2010-05-20 09:59:27.000000000 
+0200
+++ 2010-06-15/xen/common/sched_credit2.c       2010-06-15 09:22:13.000000000 
+0200
@@ -991,10 +991,17 @@ csched_schedule(
     }
 #endif
 
+    ret.migrated = 0;
+
     if ( !is_idle_vcpu(snext->vcpu) )
     {
         snext->start_time = now;
-        snext->vcpu->processor = cpu; /* Safe because lock for old processor 
is held */
+        /* Safe because lock for old processor is held */
+        if ( snext->vcpu->processor != cpu )
+        {
+            snext->vcpu->processor = cpu;
+            ret.migrated = 1;
+        }
     }
 
     /*
--- 2010-06-15.orig/xen/common/sched_sedf.c     2010-05-20 09:59:27.000000000 
+0200
+++ 2010-06-15/xen/common/sched_sedf.c  2010-06-15 09:21:05.000000000 +0200
@@ -875,6 +875,8 @@ static struct task_slice sedf_do_schedul
         ret.time = EXTRA_QUANTUM;
     }
 
+    ret.migrated = 0;
+
     EDOM_INFO(ret.task)->sched_start_abs = now;
     CHECK(ret.time > 0);
     ASSERT(sedf_runnable(ret.task));
--- 2010-06-15.orig/xen/include/asm-x86/irq.h   2009-12-16 09:14:13.000000000 
+0100
+++ 2010-06-15/xen/include/asm-x86/irq.h        2010-06-15 09:21:05.000000000 
+0200
@@ -143,7 +143,7 @@ void move_native_irq(int irq);
 
 void move_masked_irq(int irq);
 
-void irq_set_affinity(int irq, cpumask_t mask);
+void irq_set_affinity(struct irq_desc *, const cpumask_t *mask);
 
 #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
 #define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
--- 2010-06-15.orig/xen/include/xen/cpumask.h   2010-05-17 08:45:28.000000000 
+0200
+++ 2010-06-15/xen/include/xen/cpumask.h        2010-06-15 09:21:05.000000000 
+0200
@@ -206,7 +206,7 @@ static inline int __cpus_weight(const cp
 }
 
 #define cpus_copy(dest, src) __cpus_copy(&(dest), &(src))
-static inline void __cpus_copy(cpumask_t *dstp, cpumask_t *srcp)
+static inline void __cpus_copy(cpumask_t *dstp, const cpumask_t *srcp)
 {
        bitmap_copy(dstp->bits, srcp->bits, NR_CPUS);
 }
--- 2010-06-15.orig/xen/include/xen/event.h     2010-06-11 11:41:35.000000000 
+0200
+++ 2010-06-15/xen/include/xen/event.h  2010-06-15 09:21:05.000000000 +0200
@@ -47,6 +47,9 @@ long evtchn_bind_vcpu(unsigned int port,
 /* Unmask a local event-channel port. */
 int evtchn_unmask(unsigned int port);
 
+/* Move all PIRQs after a vCPU was moved to another pCPU. */
+void evtchn_move_pirqs(struct vcpu *v);
+
 /* Allocate/free a Xen-attached event channel port. */
 int alloc_unbound_xen_event_channel(
     struct vcpu *local_vcpu, domid_t remote_domid);
--- 2010-06-15.orig/xen/include/xen/irq.h       2009-10-29 12:24:49.000000000 
+0100
+++ 2010-06-15/xen/include/xen/irq.h    2010-06-15 09:21:05.000000000 +0200
@@ -138,6 +138,7 @@ extern int pirq_guest_eoi(struct domain 
 extern int pirq_guest_unmask(struct domain *d);
 extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share);
 extern void pirq_guest_unbind(struct domain *d, int irq);
+extern void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *);
 extern irq_desc_t *domain_spin_lock_irq_desc(
     struct domain *d, int irq, unsigned long *pflags);
 
--- 2010-06-15.orig/xen/include/xen/sched.h     2010-06-14 08:49:36.000000000 
+0200
+++ 2010-06-15/xen/include/xen/sched.h  2010-06-15 09:21:05.000000000 +0200
@@ -61,7 +61,11 @@ struct evtchn
             u16            remote_port;
             struct domain *remote_dom;
         } interdomain; /* state == ECS_INTERDOMAIN */
-        u16 pirq;      /* state == ECS_PIRQ */
+        struct {
+            u16            irq;
+            u16            next_port;
+            u16            prev_port;
+        } pirq;        /* state == ECS_PIRQ */
         u16 virq;      /* state == ECS_VIRQ */
     } u;
 #ifdef FLASK_ENABLE
@@ -142,6 +146,9 @@ struct vcpu 
      */
     int              poll_evtchn;
 
+    /* (over-)protected by ->domain->event_lock */
+    int              pirq_evtchn_head;
+
     unsigned long    pause_flags;
     atomic_t         pause_count;
 
--- 2010-06-15.orig/xen/include/xen/sched-if.h  2010-05-20 09:59:27.000000000 
+0200
+++ 2010-06-15/xen/include/xen/sched-if.h       2010-06-15 09:21:05.000000000 
+0200
@@ -79,6 +79,7 @@ static inline void vcpu_schedule_unlock(
 struct task_slice {
     struct vcpu *task;
     s_time_t     time;
+    bool_t       migrated;
 };
 
 struct scheduler {


Attachment: guest-irq-affinity.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.