[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 2/2] xen: implement VCPUOP_register_runstate_phys_memory_area



Hi,

On 23/04/2019 09:10, Andrii Anisov wrote:
From: Andrii Anisov <andrii_anisov@xxxxxxxx>

VCPUOP_register_runstate_phys_memory_area is implemented via runstate
area mapping. >
Signed-off-by: Andrii Anisov <andrii_anisov@xxxxxxxx>
---
  xen/arch/arm/domain.c        |  62 +++++++++++++++++--------
  xen/arch/x86/domain.c        | 105 +++++++++++++++++++++++++++++++------------
  xen/common/domain.c          |  80 ++++++++++++++++++++++++++++++++-
  xen/include/asm-arm/domain.h |   2 +
  xen/include/xen/domain.h     |   2 +
  xen/include/xen/sched.h      |   8 ++++
  6 files changed, 210 insertions(+), 49 deletions(-)

This patch is quite hard to read because you are reworking the code and at the same time implementing the new VCPUOP. How about moving the rework in a separate patch? The implementation can then be fold in the previous patch as suggested by George.


diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 6dc633e..8e24e63 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -275,32 +275,55 @@ static void ctxt_switch_to(struct vcpu *n)
  }
/* Update per-VCPU guest runstate shared memory area (if registered). */
-static void update_runstate_area(struct vcpu *v)
+void update_runstate_area(struct vcpu *v)

Why do you export update_runstate_area? The function does not seem to be called outside.

  {
-    void __user *guest_handle = NULL;
+    if ( !guest_handle_is_null(runstate_guest(v)) )
+    {
+        void __user *guest_handle = NULL;
+        if ( VM_ASSIST(v->domain, runstate_update_flag) )
+        {
+            guest_handle = &v->runstate_guest.p->state_entry_time + 1;
+            guest_handle--;
+            v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+            __raw_copy_to_guest(guest_handle,
+                                (void *)(&v->runstate.state_entry_time + 1) - 
1,
+                                1);
+            smp_wmb();
+        }
- if ( guest_handle_is_null(runstate_guest(v)) )
-        return;
+        __copy_to_guest(runstate_guest(v), &v->runstate, 1);
- if ( VM_ASSIST(v->domain, runstate_update_flag) )
-    {
-        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
-        guest_handle--;
-        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
-        __raw_copy_to_guest(guest_handle,
-                            (void *)(&v->runstate.state_entry_time + 1) - 1, 
1);
-        smp_wmb();
+        if ( guest_handle )
+        {
+            v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+            smp_wmb();
+            __raw_copy_to_guest(guest_handle,
+                                (void *)(&v->runstate.state_entry_time + 1) - 
1,
+                                1);
+        }
      }
- __copy_to_guest(runstate_guest(v), &v->runstate, 1);
-
-    if ( guest_handle )
+    spin_lock(&v->mapped_runstate_lock);
+    if ( v->mapped_runstate )

The code looks a bit odd to me, you seem to allow a guest to provide 2 runstate areas: one using guest virtual address the other using guest physical address.

It would be best if we prevent a guest to mix match them. IOW, if the guest provide a physical address first, then *all* the call should be physical address. Alternatively this could be a per vCPU decision.

      {
-        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
-        smp_wmb();
-        __raw_copy_to_guest(guest_handle,
-                            (void *)(&v->runstate.state_entry_time + 1) - 1, 
1);
+        if ( VM_ASSIST(v->domain, runstate_update_flag) )
+        {
+            v->mapped_runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
+            smp_wmb();
+            v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+        }
+
+        memcpy(v->mapped_runstate, &v->runstate, sizeof(v->runstate));
+
+        if ( VM_ASSIST(v->domain, runstate_update_flag) )
+        {
+            v->mapped_runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+            smp_wmb();
+            v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        }
      }
+    spin_unlock(&v->mapped_runstate_lock);
+

NIT: The newline is not necessary here.

  }
static void schedule_tail(struct vcpu *prev)
@@ -998,6 +1021,7 @@ long do_arm_vcpu_op(int cmd, unsigned int vcpuid, 
XEN_GUEST_HANDLE_PARAM(void) a
      {
          case VCPUOP_register_vcpu_info:
          case VCPUOP_register_runstate_memory_area:
+        case VCPUOP_register_runstate_phys_memory_area:
              return do_vcpu_op(cmd, vcpuid, arg);
          default:
              return -EINVAL;


[...]

diff --git a/xen/common/domain.c b/xen/common/domain.c
index ae22049..6df76c6 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -149,6 +149,7 @@ struct vcpu *vcpu_create(
      v->dirty_cpu = VCPU_CPU_CLEAN;
spin_lock_init(&v->virq_lock);
+    spin_lock_init(&v->mapped_runstate_lock);
tasklet_init(&v->continue_hypercall_tasklet, NULL, 0); @@ -699,6 +700,69 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
      return 0;
  }
+static void _unmap_runstate_area(struct vcpu *v)
A better name would be unamep_runstate_area_locked() so you avoid the reserved name and make clear of the use.

+{
+    mfn_t mfn;
+
+    if ( !v->mapped_runstate )
+        return;
+
+    mfn = _mfn(virt_to_mfn(runstate_guest(v).p));

As pointed out by Jan in the previous version:

The pointer is the result of __map_domain_page_global(). So I don't think you
don't think you can legitimately use virt_to_mfn() on it, at
least not on x86; domain_page_map_to_mfn() is what you
want to use here.

+
+    unmap_domain_page_global((void *)
+                             ((unsigned long)v->mapped_runstate &
+                              PAGE_MASK));
+
+    v->mapped_runstate = NULL;
+    put_page_and_type(mfn_to_page(mfn));
+}

We seem to have this pattern in a few places now (see unmap_guest_page). It would be good to introduce helpers that can be used everywhere (probably lifted from common/event_fifo.c.

+
+static int map_runstate_area(struct vcpu *v,
+                      struct vcpu_register_runstate_memory_area *area)
+{
+    unsigned long offset = area->addr.p & ~PAGE_MASK;
+    gfn_t gfn = gaddr_to_gfn(area->addr.p);
+    struct domain *d = v->domain;
+    void *mapping;
+    struct page_info *page;
+    size_t size = sizeof (struct vcpu_runstate_info );

space is not necessary before ).

But is the variable really necessary?

+
+    if ( offset > (PAGE_SIZE - size) )
+        return -EINVAL;
+
+    page = get_page_from_gfn(d, gfn_x(gfn), NULL, P2M_ALLOC);
+    if ( !page )
+        return -EINVAL;
+
+    if ( !get_page_type(page, PGT_writable_page) )
+    {
+        put_page(page);
+        return -EINVAL;
+    }
+
+    mapping = __map_domain_page_global(page);
+
+    if ( mapping == NULL )
+    {
+        put_page_and_type(page);
+        return -ENOMEM;
+    }
+
+    spin_lock(&v->mapped_runstate_lock);
+    _unmap_runstate_area(v);
+    v->mapped_runstate = mapping + offset;
+    spin_unlock(&v->mapped_runstate_lock);
+
+    return 0;
+}
+
+static void unmap_runstate_area(struct vcpu *v)
+{
+    spin_lock(&v->mapped_runstate_lock);
+    _unmap_runstate_area(v);
+    spin_unlock(&v->mapped_runstate_lock);
+}
+
  int domain_kill(struct domain *d)
  {
      int rc = 0;
@@ -737,7 +801,11 @@ int domain_kill(struct domain *d)
          if ( cpupool_move_domain(d, cpupool0) )
              return -ERESTART;
          for_each_vcpu ( d, v )
+        {
+            set_xen_guest_handle(runstate_guest(v), NULL);
+            unmap_runstate_area(v);
              unmap_vcpu_info(v);
+        }
          d->is_dying = DOMDYING_dead;
          /* Mem event cleanup has to go here because the rings
           * have to be put before we call put_domain. */
@@ -1192,6 +1260,7 @@ int domain_soft_reset(struct domain *d)
      for_each_vcpu ( d, v )
      {
          set_xen_guest_handle(runstate_guest(v), NULL);
+        unmap_runstate_area(v);
          unmap_vcpu_info(v);
      }
@@ -1536,8 +1605,17 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
      }
case VCPUOP_register_runstate_phys_memory_area:
-        rc = -EOPNOTSUPP;
+    {
+        struct vcpu_register_runstate_memory_area area;
+
+        rc = -EFAULT;
+        if ( copy_from_guest(&area, arg, 1) )
+            break;
+
+        rc = map_runstate_area(v, &area);
+
          break;
+    }
#ifdef VCPU_TRAP_NMI
      case VCPUOP_send_nmi:
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 312fec8..3fb6ea2 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -217,6 +217,8 @@ void vcpu_show_execution_state(struct vcpu *);
  void vcpu_show_registers(const struct vcpu *);
  void vcpu_switch_to_aarch64_mode(struct vcpu *);
+void update_runstate_area(struct vcpu *);
+
  /*
   * Due to the restriction of GICv3, the number of vCPUs in AFF0 is
   * limited to 16, thus only the first 4 bits of AFF0 are legal. We will
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index d1bfc82..ecddcfe 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -118,4 +118,6 @@ struct vnuma_info {
void vnuma_destroy(struct vnuma_info *vnuma); +struct vcpu_register_runstate_memory_area;
+
  #endif /* __XEN_DOMAIN_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 748bb0f..2afe31c 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -163,15 +163,23 @@ struct vcpu
      void            *sched_priv;    /* scheduler-specific data */
struct vcpu_runstate_info runstate;
+
+    spinlock_t      mapped_runstate_lock;
+
  #ifndef CONFIG_COMPAT
  # define runstate_guest(v) ((v)->runstate_guest)
      XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
+    vcpu_runstate_info_t *mapped_runstate;
  #else
  # define runstate_guest(v) ((v)->runstate_guest.native)
      union {
          XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
          XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
      } runstate_guest; /* guest address */
+    union {
+        vcpu_runstate_info_t* native;
+        vcpu_runstate_info_compat_t* compat;
+    } mapped_runstate; /* guest address */

The combination of mapped_runstate and runstate_guest is a bit confusing. I think you want to rework the interface to show that only one is possible at the time and make clear which one is used by who. Maybe:

union
{
   /* Legacy interface to be used when the guest provides a virtual address */
   union {
      XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
      ...
   } virt;

   /* Interface used when the guest provides a physical address */
   union {
   } phys;
} runstate_guest.

runstate_guest_type /* could be a bool or enum */

Jan what do you think?

Cheers,

--
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.