[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] xen: add hypercall option to temporarily pin a vcpu



Some hardware (e.g. Dell studio 1555 laptops) require SMIs to be
called on physical cpu 0 only. Linux drivers like dcdbas or i8k try
to achieve this by pinning the running thread to cpu 0, but in Dom0
this is not enough: the vcpu must be pinned to physical cpu 0 via
Xen, too.

Add a stable hypercall option SCHEDOP_pin_temp to the sched_op
hypercall to achieve this. It is taking a physical cpu number as
parameter. If pinning is possible (the calling domain has the
privilege to make the call and the cpu is available in the domain's
cpupool) the calling vcpu is pinned to the specified cpu. The old
cpu affinity is saved. To undo the temporary pinning a cpu -1 is
specified. This will restore the original cpu affinity for the vcpu.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
 xen/common/schedule.c      | 98 +++++++++++++++++++++++++++++++++++++++++++---
 xen/include/public/sched.h | 15 +++++++
 xen/include/xsm/dummy.h    |  6 +++
 xen/include/xsm/xsm.h      |  6 +++
 xen/xsm/dummy.c            |  1 +
 xen/xsm/flask/hooks.c      |  7 ++++
 6 files changed, 127 insertions(+), 6 deletions(-)

diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 434dcfc..ddb5989 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -271,6 +271,12 @@ int sched_move_domain(struct domain *d, struct cpupool *c)
     struct scheduler *old_ops;
     void *old_domdata;
 
+    for_each_vcpu ( d, v )
+    {
+        if ( v->affinity_broken )
+            return -EBUSY;
+    }
+
     domdata = SCHED_OP(c->sched, alloc_domdata, d);
     if ( domdata == NULL )
         return -ENOMEM;
@@ -670,7 +676,13 @@ int cpu_disable_scheduler(unsigned int cpu)
             if ( cpumask_empty(&online_affinity) &&
                  cpumask_test_cpu(cpu, v->cpu_hard_affinity) )
             {
-                printk(XENLOG_DEBUG "Breaking affinity for %pv\n", v);
+                if ( v->affinity_broken )
+                {
+                    /* The vcpu is temporarily pinned, can't move it. */
+                    vcpu_schedule_unlock_irqrestore(lock, flags, v);
+                    ret = -EBUSY;
+                    continue;
+                }
 
                 if (system_state == SYS_STATE_suspend)
                 {
@@ -679,6 +691,8 @@ int cpu_disable_scheduler(unsigned int cpu)
                     v->affinity_broken = 1;
                 }
 
+                printk(XENLOG_DEBUG "Breaking affinity for %pv\n", v);
+
                 cpumask_setall(v->cpu_hard_affinity);
             }
 
@@ -753,14 +767,22 @@ static int vcpu_set_affinity(
     struct vcpu *v, const cpumask_t *affinity, cpumask_t *which)
 {
     spinlock_t *lock;
+    int ret = 0;
 
     lock = vcpu_schedule_lock_irq(v);
 
-    cpumask_copy(which, affinity);
+    if ( v->affinity_broken )
+    {
+        ret = -EBUSY;
+    }
+    else
+    {
+        cpumask_copy(which, affinity);
 
-    /* Always ask the scheduler to re-evaluate placement
-     * when changing the affinity */
-    set_bit(_VPF_migrating, &v->pause_flags);
+        /* Always ask the scheduler to re-evaluate placement
+         * when changing the affinity */
+        set_bit(_VPF_migrating, &v->pause_flags);
+    }
 
     vcpu_schedule_unlock_irq(lock, v);
 
@@ -772,7 +794,7 @@ static int vcpu_set_affinity(
         vcpu_migrate(v);
     }
 
-    return 0;
+    return ret;
 }
 
 int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity)
@@ -979,6 +1001,53 @@ void watchdog_domain_destroy(struct domain *d)
         kill_timer(&d->watchdog_timer[i]);
 }
 
+static long do_pin_temp(int cpu)
+{
+    struct vcpu *v = current;
+    spinlock_t *lock;
+    long ret = -EINVAL;
+
+    lock = vcpu_schedule_lock_irq(v);
+
+    if ( cpu == -1 )
+    {
+        if ( v->affinity_broken )
+        {
+            cpumask_copy(v->cpu_hard_affinity, v->cpu_hard_affinity_saved);
+            v->affinity_broken = 0;
+            set_bit(_VPF_migrating, &v->pause_flags);
+            ret = 0;
+        }
+    }
+    else if ( cpu < nr_cpu_ids && cpu >= 0 )
+    {
+        if ( v->affinity_broken )
+        {
+            ret = -EBUSY;
+        }
+        else if ( cpumask_test_cpu(cpu, VCPU2ONLINE(v)) )
+        {
+            cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
+            v->affinity_broken = 1;
+            cpumask_copy(v->cpu_hard_affinity, cpumask_of(cpu));
+            set_bit(_VPF_migrating, &v->pause_flags);
+            ret = 0;
+        }
+    }
+
+    vcpu_schedule_unlock_irq(lock, v);
+
+    domain_update_node_affinity(v->domain);
+
+    if ( v->pause_flags & VPF_migrating )
+    {
+        vcpu_sleep_nosync(v);
+        vcpu_migrate(v);
+    }
+
+    return ret;
+}
+
 typedef long ret_t;
 
 #endif /* !COMPAT */
@@ -1088,6 +1157,23 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) 
arg)
         break;
     }
 
+    case SCHEDOP_pin_temp:
+    {
+        struct sched_pin_temp sched_pin_temp;
+
+        ret = -EFAULT;
+        if ( copy_from_guest(&sched_pin_temp, arg, 1) )
+            break;
+
+        ret = xsm_schedop_pin_temp(XSM_PRIV);
+        if ( ret )
+            break;
+
+        ret = do_pin_temp(sched_pin_temp.pcpu);
+
+        break;
+    }
+
     default:
         ret = -ENOSYS;
     }
diff --git a/xen/include/public/sched.h b/xen/include/public/sched.h
index 2219696..acc18d5 100644
--- a/xen/include/public/sched.h
+++ b/xen/include/public/sched.h
@@ -118,6 +118,15 @@
  * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
  */
 #define SCHEDOP_watchdog    6
+
+/*
+ * Temporarily pin the current vcpu to one physical cpu or undo that pinning.
+ * @arg == pointer to sched_pin_temp_t structure.
+ *
+ * Setting pcpu to -1 will undo a previous temporary pinning.
+ * This call is allowed for domains with domain control privilege only.
+ */
+#define SCHEDOP_pin_temp    7
 /* ` } */
 
 struct sched_shutdown {
@@ -148,6 +157,12 @@ struct sched_watchdog {
 typedef struct sched_watchdog sched_watchdog_t;
 DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t);
 
+struct sched_pin_temp {
+    int pcpu;
+};
+typedef struct sched_pin_temp sched_pin_temp_t;
+DEFINE_XEN_GUEST_HANDLE(sched_pin_temp_t);
+
 /*
  * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
  * software to determine the appropriate action. For the most part, Xen does
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index 1d13826..730e112 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -240,6 +240,12 @@ static XSM_INLINE int xsm_schedop_shutdown(XSM_DEFAULT_ARG 
struct domain *d1, st
     return xsm_default_action(action, d1, d2);
 }
 
+static XSM_INLINE int xsm_schedop_pin_temp(XSM_DEFAULT_VOID)
+{
+    XSM_ASSERT_ACTION(XSM_PRIV);
+    return xsm_default_action(action, current->domain, NULL);
+}
+
 static XSM_INLINE int xsm_memory_pin_page(XSM_DEFAULT_ARG struct domain *d1, 
struct domain *d2,
                                           struct page_info *page)
 {
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index 3afed70..ac6487f 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -102,6 +102,7 @@ struct xsm_operations {
 
     int (*kexec) (void);
     int (*schedop_shutdown) (struct domain *d1, struct domain *d2);
+    int (*schedop_pin_temp) (void);
 
     char *(*show_irq_sid) (int irq);
     int (*map_domain_pirq) (struct domain *d);
@@ -413,6 +414,11 @@ static inline int xsm_schedop_shutdown (xsm_default_t def, 
struct domain *d1, st
     return xsm_ops->schedop_shutdown(d1, d2);
 }
 
+static inline int xsm_schedop_pin_temp(xsm_default_t def)
+{
+    return xsm_ops->schedop_pin_temp();
+}
+
 static inline char *xsm_show_irq_sid (int irq)
 {
     return xsm_ops->show_irq_sid(irq);
diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
index 0f32636..2df1167 100644
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -75,6 +75,7 @@ void xsm_fixup_ops (struct xsm_operations *ops)
 
     set_to_dummy_if_null(ops, kexec);
     set_to_dummy_if_null(ops, schedop_shutdown);
+    set_to_dummy_if_null(ops, schedop_pin_temp);
 
     set_to_dummy_if_null(ops, show_irq_sid);
     set_to_dummy_if_null(ops, map_domain_pirq);
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 4813623..5cfbc30 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -470,6 +470,12 @@ static int flask_schedop_shutdown(struct domain *d1, 
struct domain *d2)
     return domain_has_perm(d1, d2, SECCLASS_DOMAIN, DOMAIN__SHUTDOWN);
 }
 
+static int flask_schedop_pin_temp(void)
+{
+    return domain_has_perm(current->domain, SECCLASS_DOMAIN,
+                           DOMAIN__SETAFFINITY);
+}
+
 static void flask_security_domaininfo(struct domain *d, 
                                       struct xen_domctl_getdomaininfo *info)
 {
@@ -1669,6 +1675,7 @@ static struct xsm_operations flask_ops = {
 
     .kexec = flask_kexec,
     .schedop_shutdown = flask_schedop_shutdown,
+    .schedop_pin_temp = flask_schedop_pin_temp,
 
     .show_irq_sid = flask_show_irq_sid,
 
-- 
2.6.2


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.