+        return -EINVAL;
+    }
+
+    page = get_page_from_gva(v, vaddr, GV2M_WRITE);
+    if ( !page )
+    {
+        gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%"PRIvaddr
+                ": Page is not mapped\n", vaddr);
+        return -EINVAL;
+    }
+
+    mfn[0] = page_to_mfn(page);
+    v->arch.runstate_guest_page[0] = page;
+
+    if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) )
+    {
+        /* guest area is crossing pages */
+        page = get_page_from_gva(v, vaddr + PAGE_SIZE, GV2M_WRITE);
+        if ( !page )
+        {
+            put_page(v->arch.runstate_guest_page[0]);
+            gprintk(XENLOG_WARNING,
+                    "Cannot map runstate pointer at 0x%"PRIvaddr
+                    ": 2nd Page is not mapped\n", vaddr);
+            return -EINVAL;
+        }
+        mfn[1] = page_to_mfn(page);
+        v->arch.runstate_guest_page[1] = page;
+        numpages = 2;
+    }
+    else
+    {
+        v->arch.runstate_guest_page[1] = NULL;
+        numpages = 1;
+    }
  
-    memcpy(&runstate, &v->runstate, sizeof(runstate));
+    p = vmap(mfn, numpages);
+    if ( !p )
+    {
+        put_page(v->arch.runstate_guest_page[0]);
+        if ( numpages == 2 )
+            put_page(v->arch.runstate_guest_page[1]);
  
-    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+        gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%"PRIvaddr
+                ": vmap error\n", vaddr);
+        return -EINVAL;
+    }
+
+    v->arch.runstate_guest = p + offset;
+
+    if (v == current)
+        memcpy(v->arch.runstate_guest, &v->runstate, sizeof(v->runstate));
+    else
      {
-        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
-        guest_handle--;
-        runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
-        __raw_copy_to_guest(guest_handle,
-                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
-        smp_wmb();
+        vcpu_runstate_get(v, &runstate);
+        memcpy(v->arch.runstate_guest, &runstate, sizeof(v->runstate));
      }
  
-    __copy_to_guest(runstate_guest(v), &runstate, 1);
+    return 0;
+}
+
+int arch_vcpu_setup_runstate(struct vcpu *v,
+                             struct vcpu_register_runstate_memory_area area)
+{
+    int rc;
+
+    spin_lock(&v->arch.runstate_guest_lock);
+
+    /* cleanup if we are recalled */
+    cleanup_runstate_vcpu_locked(v);
+
+    rc = setup_runstate_vcpu_locked(v, (vaddr_t)area.addr.v);
+
+    spin_unlock(&v->arch.runstate_guest_lock);
  
-    if ( guest_handle )
+    return rc;
+}
+
+
+/* Update per-VCPU guest runstate shared memory area (if registered). */
+static void update_runstate_area(struct vcpu *v)
+{
+    spin_lock(&v->arch.runstate_guest_lock);
+
+    if ( v->arch.runstate_guest )
      {
-        runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
-        smp_wmb();
-        __raw_copy_to_guest(guest_handle,
-                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
+        if ( VM_ASSIST(v->domain, runstate_update_flag) )
+        {
+            v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+            write_atomic(&(v->arch.runstate_guest->state_entry_time),
+                    v->runstate.state_entry_time);
+        }
      }
+
+    spin_unlock(&v->arch.runstate_guest_lock);
  }
  
  static void schedule_tail(struct vcpu *prev)
@@ -560,6 +681,8 @@ int arch_vcpu_create(struct vcpu *v)
      v->arch.saved_context.sp = (register_t)v->arch.cpu_info;
      v->arch.saved_context.pc = (register_t)continue_new_vcpu;
  
+    spin_lock_init(&v->arch.runstate_guest_lock);
+
      /* Idle VCPUs don't need the rest of this setup */
      if ( is_idle_vcpu(v) )
          return rc;
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index fee6c3931a..b9b81e94e5 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1642,6 +1642,29 @@ void paravirt_ctxt_switch_to(struct vcpu *v)
          wrmsr_tsc_aux(v->arch.msrs->tsc_aux);
  }
  
+int arch_vcpu_setup_runstate(struct vcpu *v,
+                             struct vcpu_register_runstate_memory_area area)
+{
+    struct vcpu_runstate_info runstate;
+
+    runstate_guest(v) = area.addr.h;
+
+    if ( v == current )
+        __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+    else
+    {
+        vcpu_runstate_get(v, &runstate);
+        __copy_to_guest(runstate_guest(v), &runstate, 1);
+    }
+
+    return 0;
+}
+
+void arch_vcpu_cleanup_runstate(struct vcpu *v)
+{
+    set_xen_guest_handle(runstate_guest(v), NULL);
+}
+
  /* Update per-VCPU guest runstate shared memory area (if registered). */
  bool update_runstate_area(struct vcpu *v)
  {
@@ -1660,8 +1683,8 @@ bool update_runstate_area(struct vcpu *v)
      if ( VM_ASSIST(v->domain, runstate_update_flag) )
      {
          guest_handle = has_32bit_shinfo(v->domain)
-            ? &v->runstate_guest.compat.p->state_entry_time + 1
-            : &v->runstate_guest.native.p->state_entry_time + 1;
+            ? &v->arch.runstate_guest.compat.p->state_entry_time + 1
+            : &v->arch.runstate_guest.native.p->state_entry_time + 1;
          guest_handle--;
          runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
          __raw_copy_to_guest(guest_handle,
@@ -1674,7 +1697,7 @@ bool update_runstate_area(struct vcpu *v)
          struct compat_vcpu_runstate_info info;
  
          XLAT_vcpu_runstate_info(&info, &runstate);
-        __copy_to_guest(v->runstate_guest.compat, &info, 1);
+        __copy_to_guest(v->arch.runstate_guest.compat, &info, 1);
          rc = true;
      }
      else
diff --git a/xen/arch/x86/x86_64/domain.c b/xen/arch/x86/x86_64/domain.c
index c46dccc25a..b879e8dd2c 100644
--- a/xen/arch/x86/x86_64/domain.c
+++ b/xen/arch/x86/x86_64/domain.c
@@ -36,7 +36,7 @@ arch_compat_vcpu_op(
              break;
  
          rc = 0;
-        guest_from_compat_handle(v->runstate_guest.compat, area.addr.h);
+        guest_from_compat_handle(v->arch.runstate_guest.compat, area.addr.h);
  
          if ( v == current )
          {
@@ -49,7 +49,7 @@ arch_compat_vcpu_op(
              vcpu_runstate_get(v, &runstate);
              XLAT_vcpu_runstate_info(&info, &runstate);
          }
-        __copy_to_guest(v->runstate_guest.compat, &info, 1);
+        __copy_to_guest(v->arch.runstate_guest.compat, &info, 1);
  
          break;
      }
diff --git a/xen/common/domain.c b/xen/common/domain.c
index f0f9c62feb..739c6b7b62 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -727,7 +727,10 @@ int domain_kill(struct domain *d)
          if ( cpupool_move_domain(d, cpupool0) )
              return -ERESTART;
          for_each_vcpu ( d, v )
+        {
+            arch_vcpu_cleanup_runstate(v);
              unmap_vcpu_info(v);
+        }
          d->is_dying = DOMDYING_dead;
          /* Mem event cleanup has to go here because the rings
           * have to be put before we call put_domain. */
@@ -1167,7 +1170,7 @@ int domain_soft_reset(struct domain *d)
  
      for_each_vcpu ( d, v )
      {
-        set_xen_guest_handle(runstate_guest(v), NULL);
+        arch_vcpu_cleanup_runstate(v);
          unmap_vcpu_info(v);
      }
  
@@ -1494,7 +1497,6 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
      case VCPUOP_register_runstate_memory_area:
      {
          struct vcpu_register_runstate_memory_area area;
-        struct vcpu_runstate_info runstate;
  
          rc = -EFAULT;
          if ( copy_from_guest(&area, arg, 1) )
@@ -1503,18 +1505,7 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, 
XEN_GUEST_HANDLE_PARAM(void) arg)
          if ( !guest_handle_okay(area.addr.h, 1) )
              break;
  
-        rc = 0;
-        runstate_guest(v) = area.addr.h;
-
-        if ( v == current )
-        {
-            __copy_to_guest(runstate_guest(v), &v->runstate, 1);
-        }
-        else
-        {
-            vcpu_runstate_get(v, &runstate);
-            __copy_to_guest(runstate_guest(v), &runstate, 1);
-        }
+        rc = arch_vcpu_setup_runstate(v, area);
  
          break;
      }
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 6819a3bf38..2f62c3e8f5 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -204,6 +204,15 @@ struct arch_vcpu
       */
      bool need_flush_to_ram;
  
+    /* runstate guest lock */
+    spinlock_t runstate_guest_lock;
+
+    /* runstate guest info */
+    struct vcpu_runstate_info *runstate_guest;
+
+    /* runstate pages mapped for runstate_guest */
+    struct page_info *runstate_guest_page[2];
+
  }  __cacheline_aligned;
  
  void vcpu_show_execution_state(struct vcpu *);
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 635335634d..007ccfbf9f 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -11,6 +11,11 @@
  #include <asm/x86_emulate.h>
  #include <public/vcpu.h>
  #include <public/hvm/hvm_info_table.h>
+#ifdef CONFIG_COMPAT
+#include <compat/vcpu.h>
+DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
+#endif
+
  
  #define has_32bit_shinfo(d)    ((d)->arch.has_32bit_shinfo)
  
@@ -638,6 +643,17 @@ struct arch_vcpu
      struct {
          bool next_interrupt_enabled;
      } monitor;
+
+#ifndef CONFIG_COMPAT
+# define runstate_guest(v) ((v)->arch.runstate_guest)
+    XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
+#else
+# define runstate_guest(v) ((v)->arch.runstate_guest.native)
+    union {
+        XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
+        XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
+    } runstate_guest;
+#endif
  };
  
  struct guest_memory_policy
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index 7e51d361de..5e8cbba31d 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -5,6 +5,7 @@
  #include <xen/types.h>
  
  #include <public/xen.h>
+#include <public/vcpu.h>
  #include <asm/domain.h>
  #include <asm/numa.h>
  
@@ -63,6 +64,10 @@ void arch_vcpu_destroy(struct vcpu *v);
  int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset);
  void unmap_vcpu_info(struct vcpu *v);
  
+int arch_vcpu_setup_runstate(struct vcpu *v,
+                             struct vcpu_register_runstate_memory_area area);
+void arch_vcpu_cleanup_runstate(struct vcpu *v);
+
  int arch_domain_create(struct domain *d,
                         struct xen_domctl_createdomain *config);
  
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index ac53519d7f..fac030fb83 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -29,11 +29,6 @@
  #include <public/vcpu.h>
  #include <public/event_channel.h>
  
-#ifdef CONFIG_COMPAT
-#include <compat/vcpu.h>
-DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
-#endif
-
  /*
   * Stats
   *
@@ -166,16 +161,7 @@ struct vcpu
      struct sched_unit *sched_unit;
  
      struct vcpu_runstate_info runstate;
-#ifndef CONFIG_COMPAT
-# define runstate_guest(v) ((v)->runstate_guest)
-    XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
-#else
-# define runstate_guest(v) ((v)->runstate_guest.native)
-    union {
-        XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
-        XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
-    } runstate_guest; /* guest address */
-#endif
+
      unsigned int     new_state;
  
      /* Has the FPU been initialised? */