[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 1/2] xen/arm: Convert runstate address during hypcall


  • To: xen-devel@xxxxxxxxxxxxxxxxxxxx
  • From: Bertrand Marquis <bertrand.marquis@xxxxxxx>
  • Date: Thu, 11 Jun 2020 12:58:20 +0100
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=arm.com; dmarc=pass action=none header.from=arm.com; dkim=pass header.d=arm.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=q9LeQAfhC/BKsL2smIOUvgONsIexEg/IAzNAQ+yxpOo=; b=leJoQCK2E0kMGqpJvuSxQisz7ezE9Z+lmrDZs6yqT2B1AuyB9jEoqoVDOKydwgoAHuFNwsRf3RmevsMC3TJWzgW0ZzwHwCZr3fFr0HpH+4OoPcgO4GlzjDCdf1VhhJiQizcRq+gIfB7H06zr6rqjpFZsLzg8m1M5/HyyUOk3+xXJSJ8iY/TLB5PVxVFTXssxTYCNV8YHXUCSwiCbwmeGRvjfSVR8/29iv5PmfFDINjXO8C0GRUvxH3YraCBLzy0CufLj794CerJ4XSjnb23t0VzGoY9kMKt04Vz7jua7WBOYfuNB+LzAy5eff4Cw29oYESU7eAgyyutWI3QMf36CbQ==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=PGHQ0jEwQLNPfQ9tSLfo6AIM81aGLaqAwR6g8zPZjIHcqqZMYkEgAkx55KzOSI/uX3Y/qRYk/Xr16TKAu78lkW3NUpqLPNUUh+D9UGs+iZf7vl0z8ISzEtFBV0SH+GdSDekZMp8dGwOP7kJs0WSkcU0B2BoszqwzDXPOYvClbGH+AvobzkspHkxs/QboOUOTeqKnPRRsemUbcoxyce+enA8lVfu89WaNiysiDayO2PHbOrZRm63/4dGVeBPxwksapviKfAa34Clo0qCRdJL2MKDX2MimbAte/e3HVbPbJbI8ILfYzMAiIuUNPBhlfXJ9JEnPWEU3T0pZSYv6euHgbg==
  • Authentication-results-original: lists.xenproject.org; dkim=none (message not signed) header.d=none; lists.xenproject.org; dmarc=none action=none header.from=arm.com;
  • Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx>, Julien Grall <julien@xxxxxxx>, Wei Liu <wl@xxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Ian Jackson <ian.jackson@xxxxxxxxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>, nd@xxxxxxx, Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Thu, 11 Jun 2020 11:59:19 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
  • Nodisclaimer: true
  • Original-authentication-results: lists.xenproject.org; dkim=none (message not signed) header.d=none;lists.xenproject.org; dmarc=none action=none header.from=arm.com;

At the moment on Arm, a Linux guest running with KTPI enabled will
cause the following error when a context switch happens in user mode:
(XEN) p2m.c:1890: d1v0: Failed to walk page-table va 0xffffff837ebe0cd0

This patch is modifying the register runstate area handling on arm to
convert the guest address during the hypercall. During runtime context
switches the area is mapped to update the guest runstate copy.
The patch is also removing the initial copy which was done during the
hypercall handling as this is done on the current core when the context
is restore to go back to guest execution on arm.

As a consequence if the register runstate hypercall is called on one
vcpu for an other vcpu, the area will not be updated until the vcpu
will be executed (on arm only).

On x86, the behaviour is not modified, the address conversion is done
during the context switch and the area is updated fully during the
hypercall.
inline functions in headers could not be used as the architecture
domain.h is included before the global domain.h making it impossible
to use the struct vcpu inside the architecture header.
This should not have any performance impact as the hypercall is only
called once per vcpu usually.

Signed-off-by: Bertrand Marquis <bertrand.marquis@xxxxxxx>
---
 xen/arch/arm/domain.c        | 109 +++++++++++++++++++++++++++++------
 xen/arch/x86/domain.c        |  30 +++++++++-
 xen/arch/x86/x86_64/domain.c |   4 +-
 xen/common/domain.c          |  19 ++----
 xen/include/asm-arm/domain.h |   8 +++
 xen/include/asm-x86/domain.h |  16 +++++
 xen/include/xen/domain.h     |   4 ++
 xen/include/xen/sched.h      |  16 +----
 8 files changed, 153 insertions(+), 53 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 31169326b2..739059234f 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -19,6 +19,7 @@
 #include <xen/sched.h>
 #include <xen/softirq.h>
 #include <xen/wait.h>
+#include <xen/domain_page.h>
 
 #include <asm/alternative.h>
 #include <asm/cpuerrata.h>
@@ -275,36 +276,104 @@ static void ctxt_switch_to(struct vcpu *n)
     virt_timer_restore(n);
 }
 
-/* Update per-VCPU guest runstate shared memory area (if registered). */
-static void update_runstate_area(struct vcpu *v)
+void arch_cleanup_runstate_guest(struct vcpu *v)
 {
-    void __user *guest_handle = NULL;
-    struct vcpu_runstate_info runstate;
+    spin_lock(&v->arch.runstate_guest.lock);
 
-    if ( guest_handle_is_null(runstate_guest(v)) )
-        return;
+    /* cleanup previous page if any */
+    if ( v->arch.runstate_guest.page )
+    {
+        put_page_and_type(v->arch.runstate_guest.page);
+        v->arch.runstate_guest.page = NULL;
+        v->arch.runstate_guest.offset = 0;
+    }
 
-    memcpy(&runstate, &v->runstate, sizeof(runstate));
+    spin_unlock(&v->arch.runstate_guest.lock);
+}
 
-    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+int arch_setup_runstate_guest(struct vcpu *v,
+                            struct vcpu_register_runstate_memory_area area)
+{
+    struct page_info *page;
+    unsigned offset;
+
+    spin_lock(&v->arch.runstate_guest.lock);
+
+    /* cleanup previous page if any */
+    if ( v->arch.runstate_guest.page )
     {
-        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
-        guest_handle--;
-        runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
-        __raw_copy_to_guest(guest_handle,
-                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
-        smp_wmb();
+        put_page_and_type(v->arch.runstate_guest.page);
+        v->arch.runstate_guest.page = NULL;
+        v->arch.runstate_guest.offset = 0;
+    }
+
+    offset = ((vaddr_t)area.addr.v) & ~PAGE_MASK;
+    if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) )
+    {
+        spin_unlock(&v->arch.runstate_guest.lock);
+        gprintk(XENLOG_DEBUG, "Runstate is crossing pages\n");
+        return -EINVAL;
+    }
+
+    /* provided address must be aligned to a 64bit */
+    if ( offset % alignof(struct vcpu_runstate_info) )
+    {
+        spin_unlock(&v->arch.runstate_guest.lock);
+        gprintk(XENLOG_DEBUG, "Runstate pointer is not aligned\n");
+        return -EINVAL;
+    }
+
+    page = get_page_from_gva(v, (vaddr_t)area.addr.v, GV2M_WRITE);
+    if ( !page )
+    {
+        spin_unlock(&v->arch.runstate_guest.lock);
+        gprintk(XENLOG_DEBUG, "Runstate pointer is not mapped\n");
+        return -EINVAL;
     }
 
-    __copy_to_guest(runstate_guest(v), &runstate, 1);
+    v->arch.runstate_guest.page = page;
+    v->arch.runstate_guest.offset = offset;
+
+    spin_unlock(&v->arch.runstate_guest.lock);
+
+    return 0;
+}
+
+
+/* Update per-VCPU guest runstate shared memory area (if registered). */
+static void update_runstate_area(struct vcpu *v)
+{
+    struct vcpu_runstate_info *guest_runstate;
+    void *p;
+
+    spin_lock(&v->arch.runstate_guest.lock);
 
-    if ( guest_handle )
+    if ( v->arch.runstate_guest.page )
     {
-        runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        p = __map_domain_page(v->arch.runstate_guest.page);
+        guest_runstate = p + v->arch.runstate_guest.offset;
+
+        if ( VM_ASSIST(v->domain, runstate_update_flag) )
+        {
+            v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+            guest_runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
+            smp_wmb();
+        }
+
+        memcpy(guest_runstate, &v->runstate, sizeof(v->runstate));
         smp_wmb();
-        __raw_copy_to_guest(guest_handle,
-                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
+
+        if ( VM_ASSIST(v->domain, runstate_update_flag) )
+        {
+            v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+            guest_runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+            smp_wmb();
+        }
+
+        unmap_domain_page(p);
     }
+
+    spin_unlock(&v->arch.runstate_guest.lock);
 }
 
 static void schedule_tail(struct vcpu *prev)
@@ -560,6 +629,8 @@ int arch_vcpu_create(struct vcpu *v)
     v->arch.saved_context.sp = (register_t)v->arch.cpu_info;
     v->arch.saved_context.pc = (register_t)continue_new_vcpu;
 
+    spin_lock_init(&v->arch.runstate_guest.lock);
+
     /* Idle VCPUs don't need the rest of this setup */
     if ( is_idle_vcpu(v) )
         return rc;
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index fee6c3931a..32bbed87d5 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1642,6 +1642,30 @@ void paravirt_ctxt_switch_to(struct vcpu *v)
         wrmsr_tsc_aux(v->arch.msrs->tsc_aux);
 }
 
+int arch_setup_runstate_guest(struct vcpu *v,
+                             struct vcpu_register_runstate_memory_area area)
+{
+    struct vcpu_runstate_info runstate;
+
+    runstate_guest(v) = area.addr.h;
+
+    if ( v == current )
+    {
+        __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+    }
+    else
+    {
+        vcpu_runstate_get(v, &runstate);
+        __copy_to_guest(runstate_guest(v), &runstate, 1);
+    }
+    return 0;
+}
+
+void arch_cleanup_runstate_guest(struct vcpu *v)
+{
+    set_xen_guest_handle(runstate_guest(v), NULL);
+}
+
 /* Update per-VCPU guest runstate shared memory area (if registered). */
 bool update_runstate_area(struct vcpu *v)
 {
@@ -1660,8 +1684,8 @@ bool update_runstate_area(struct vcpu *v)
     if ( VM_ASSIST(v->domain, runstate_update_flag) )
     {
         guest_handle = has_32bit_shinfo(v->domain)
-            ? &v->runstate_guest.compat.p->state_entry_time + 1
-            : &v->runstate_guest.native.p->state_entry_time + 1;
+            ? &v->arch.runstate_guest.compat.p->state_entry_time + 1
+            : &v->arch.runstate_guest.native.p->state_entry_time + 1;
         guest_handle--;
         runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
         __raw_copy_to_guest(guest_handle,
@@ -1674,7 +1698,7 @@ bool update_runstate_area(struct vcpu *v)
         struct compat_vcpu_runstate_info info;
 
         XLAT_vcpu_runstate_info(&info, &runstate);
-        __copy_to_guest(v->runstate_guest.compat, &info, 1);
+        __copy_to_guest(v->arch.runstate_guest.compat, &info, 1);
         rc = true;
     }
     else
diff --git a/xen/arch/x86/x86_64/domain.c b/xen/arch/x86/x86_64/domain.c
index c46dccc25a..b879e8dd2c 100644
--- a/xen/arch/x86/x86_64/domain.c
+++ b/xen/arch/x86/x86_64/domain.c
@@ -36,7 +36,7 @@ arch_compat_vcpu_op(
             break;
 
         rc = 0;
-        guest_from_compat_handle(v->runstate_guest.compat, area.addr.h);
+        guest_from_compat_handle(v->arch.runstate_guest.compat, area.addr.h);
 
         if ( v == current )
         {
@@ -49,7 +49,7 @@ arch_compat_vcpu_op(
             vcpu_runstate_get(v, &runstate);
             XLAT_vcpu_runstate_info(&info, &runstate);
         }
-        __copy_to_guest(v->runstate_guest.compat, &info, 1);
+        __copy_to_guest(v->arch.runstate_guest.compat, &info, 1);
 
         break;
     }
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 7cc9526139..0ca6bf4dbc 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -727,7 +727,10 @@ int domain_kill(struct domain *d)
         if ( cpupool_move_domain(d, cpupool0) )
             return -ERESTART;
         for_each_vcpu ( d, v )
+        {
+            arch_cleanup_runstate_guest(v);
             unmap_vcpu_info(v);
+        }
         d->is_dying = DOMDYING_dead;
         /* Mem event cleanup has to go here because the rings 
          * have to be put before we call put_domain. */
@@ -1167,7 +1170,7 @@ int domain_soft_reset(struct domain *d)
 
     for_each_vcpu ( d, v )
     {
-        set_xen_guest_handle(runstate_guest(v), NULL);
+        arch_cleanup_runstate_guest(v);
         unmap_vcpu_info(v);
     }
 
@@ -1484,7 +1487,6 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, 
XEN_GUEST_HANDLE_PARAM(void) arg)
     case VCPUOP_register_runstate_memory_area:
     {
         struct vcpu_register_runstate_memory_area area;
-        struct vcpu_runstate_info runstate;
 
         rc = -EFAULT;
         if ( copy_from_guest(&area, arg, 1) )
@@ -1493,18 +1495,7 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, 
XEN_GUEST_HANDLE_PARAM(void) arg)
         if ( !guest_handle_okay(area.addr.h, 1) )
             break;
 
-        rc = 0;
-        runstate_guest(v) = area.addr.h;
-
-        if ( v == current )
-        {
-            __copy_to_guest(runstate_guest(v), &v->runstate, 1);
-        }
-        else
-        {
-            vcpu_runstate_get(v, &runstate);
-            __copy_to_guest(runstate_guest(v), &runstate, 1);
-        }
+        rc = arch_setup_runstate_guest(v, area);
 
         break;
     }
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 4e2f582006..3a7f53e13d 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -11,6 +11,7 @@
 #include <asm/vgic.h>
 #include <asm/vpl011.h>
 #include <public/hvm/params.h>
+#include <public/vcpu.h>
 #include <xen/serial.h>
 #include <xen/rbtree.h>
 
@@ -206,6 +207,13 @@ struct arch_vcpu
      */
     bool need_flush_to_ram;
 
+    /* runstate guest info */
+    struct {
+        struct page_info *page;  /* guest page */
+        unsigned         offset; /* offset in page */
+        spinlock_t       lock;   /* lock to access page */
+    } runstate_guest;
+
 }  __cacheline_aligned;
 
 void vcpu_show_execution_state(struct vcpu *);
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index e8cee3d5e5..f917b48cb8 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -11,6 +11,11 @@
 #include <asm/x86_emulate.h>
 #include <public/vcpu.h>
 #include <public/hvm/hvm_info_table.h>
+#ifdef CONFIG_COMPAT
+#include <compat/vcpu.h>
+DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
+#endif
+
 
 #define has_32bit_shinfo(d)    ((d)->arch.has_32bit_shinfo)
 
@@ -626,6 +631,17 @@ struct arch_vcpu
     struct {
         bool next_interrupt_enabled;
     } monitor;
+
+#ifndef CONFIG_COMPAT
+# define runstate_guest(v) ((v)->arch.runstate_guest)
+    XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
+#else
+# define runstate_guest(v) ((v)->arch.runstate_guest.native)
+    union {
+        XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
+        XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
+    } runstate_guest;
+#endif
 };
 
 struct guest_memory_policy
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index 7e51d361de..d5d73ce997 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -63,6 +63,10 @@ void arch_vcpu_destroy(struct vcpu *v);
 int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset);
 void unmap_vcpu_info(struct vcpu *v);
 
+int arch_setup_runstate_guest(struct vcpu *v,
+                            struct vcpu_register_runstate_memory_area area);
+void arch_cleanup_runstate_guest(struct vcpu *v);
+
 int arch_domain_create(struct domain *d,
                        struct xen_domctl_createdomain *config);
 
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index ac53519d7f..fac030fb83 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -29,11 +29,6 @@
 #include <public/vcpu.h>
 #include <public/event_channel.h>
 
-#ifdef CONFIG_COMPAT
-#include <compat/vcpu.h>
-DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
-#endif
-
 /*
  * Stats
  *
@@ -166,16 +161,7 @@ struct vcpu
     struct sched_unit *sched_unit;
 
     struct vcpu_runstate_info runstate;
-#ifndef CONFIG_COMPAT
-# define runstate_guest(v) ((v)->runstate_guest)
-    XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
-#else
-# define runstate_guest(v) ((v)->runstate_guest.native)
-    union {
-        XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
-        XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
-    } runstate_guest; /* guest address */
-#endif
+
     unsigned int     new_state;
 
     /* Has the FPU been initialised? */
-- 
2.17.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.