[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] lock down hypercall continuation encoding masks



Andrew validly points out that even if these masks aren't a formal part
of the hypercall interface, we aren't free to change them: A guest
suspended for migration in the middle of a continuation would fail to
work if resumed on a hypervisor using a different value. Hence add
respective comments to their definitions.

Additionally, to help future extensibility as well as in the spirit of
reducing undefined behavior as much as possible, refuse hypercalls made
with the respective bits non-zero when the respective sub-ops don't
make use of those bits.

Reported-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -5458,16 +5458,34 @@ static int hvmop_destroy_ioreq_server(
     return rc;
 }
 
+/*
+ * Note that this value is effectively part of the ABI, even if we don't need
+ * to make it a formal part of it: A guest suspended for migration in the
+ * middle of a continuation would fail to work if resumed on a hypervisor
+ * using a different value.
+ */
 #define HVMOP_op_mask 0xff
 
 long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
 
 {
     struct domain *curr_d = current->domain;
-    unsigned long start_iter = op & ~HVMOP_op_mask;
+    unsigned long start_iter, mask;
     long rc = 0;
 
-    switch ( op &= HVMOP_op_mask )
+    switch ( op & HVMOP_op_mask )
+    {
+    default:
+        mask = ~0UL;
+        break;
+    case HVMOP_modified_memory:
+    case HVMOP_set_mem_type:
+        mask = HVMOP_op_mask;
+        break;
+    }
+
+    start_iter = op & ~mask;
+    switch ( op &= mask )
     {
     case HVMOP_create_ioreq_server:
         rc = hvmop_create_ioreq_server(
@@ -6120,7 +6138,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
 
     if ( rc == -ERESTART )
     {
-        ASSERT(!(start_iter & HVMOP_op_mask));
+        ASSERT(!(start_iter & mask));
         rc = hypercall_create_continuation(__HYPERVISOR_hvm_op, "lh",
                                            op | start_iter, arg);
     }
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4661,9 +4661,8 @@ int xenmem_add_to_physmap_one(
 long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
     int rc;
-    int op = cmd & MEMOP_CMD_MASK;
 
-    switch ( op )
+    switch ( cmd )
     {
     case XENMEM_set_memory_map:
     {
@@ -4837,7 +4836,7 @@ long arch_memory_op(unsigned long cmd, X
         if ( d == NULL )
             return -ESRCH;
 
-        if ( op == XENMEM_set_pod_target )
+        if ( cmd == XENMEM_set_pod_target )
             rc = xsm_set_pod_target(XSM_PRIV, d);
         else
             rc = xsm_get_pod_target(XSM_PRIV, d);
@@ -4845,7 +4844,7 @@ long arch_memory_op(unsigned long cmd, X
         if ( rc != 0 )
             goto pod_target_out_unlock;
 
-        if ( op == XENMEM_set_pod_target )
+        if ( cmd == XENMEM_set_pod_target )
         {
             if ( target.target_pages > d->max_pages )
             {
@@ -4859,7 +4858,7 @@ long arch_memory_op(unsigned long cmd, X
         if ( rc == -ERESTART )
         {
             rc = hypercall_create_continuation(
-                __HYPERVISOR_memory_op, "lh", op, arg);
+                __HYPERVISOR_memory_op, "lh", cmd, arg);
         }
         else if ( rc >= 0 )
         {
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -53,9 +53,8 @@ int compat_arch_memory_op(unsigned long 
     compat_pfn_t mfn;
     unsigned int i;
     int rc = 0;
-    int op = cmd & MEMOP_CMD_MASK;
 
-    switch ( op )
+    switch ( cmd )
     {
     case XENMEM_set_memory_map:
     {
@@ -192,7 +191,7 @@ int compat_arch_memory_op(unsigned long 
         xen_mem_event_op_t meo;
         if ( copy_from_guest(&meo, arg, 1) )
             return -EFAULT;
-        rc = do_mem_event_op(op, meo.domain, (void *) &meo);
+        rc = do_mem_event_op(cmd, meo.domain, &meo);
         if ( !rc && __copy_to_guest(arg, &meo, 1) )
             return -EFAULT;
         break;
@@ -205,7 +204,7 @@ int compat_arch_memory_op(unsigned long 
             return -EFAULT;
         if ( mso.op == XENMEM_sharing_op_audit )
             return mem_sharing_audit(); 
-        rc = do_mem_event_op(op, mso.domain, (void *) &mso);
+        rc = do_mem_event_op(cmd, mso.domain, &mso);
         if ( !rc && __copy_to_guest(arg, &mso, 1) )
             return -EFAULT;
         break;
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -906,9 +906,8 @@ long subarch_memory_op(unsigned long cmd
     xen_pfn_t mfn, last_mfn;
     unsigned int i;
     long rc = 0;
-    int op = cmd & MEMOP_CMD_MASK;
 
-    switch ( op )
+    switch ( cmd )
     {
     case XENMEM_machphys_mfn_list:
         if ( copy_from_guest(&xmml, arg, 1) )
@@ -989,7 +988,7 @@ long subarch_memory_op(unsigned long cmd
         xen_mem_event_op_t meo;
         if ( copy_from_guest(&meo, arg, 1) )
             return -EFAULT;
-        rc = do_mem_event_op(op, meo.domain, (void *) &meo);
+        rc = do_mem_event_op(cmd, meo.domain, &meo);
         if ( !rc && __copy_to_guest(arg, &meo, 1) )
             return -EFAULT;
         break;
@@ -1002,7 +1001,7 @@ long subarch_memory_op(unsigned long cmd
             return -EFAULT;
         if ( mso.op == XENMEM_sharing_op_audit )
             return mem_sharing_audit(); 
-        rc = do_mem_event_op(op, mso.domain, (void *) &mso);
+        rc = do_mem_event_op(cmd, mso.domain, &mso);
         if ( !rc && __copy_to_guest(arg, &mso, 1) )
             return -EFAULT;
         break;
--- a/xen/common/compat/grant_table.c
+++ b/xen/common/compat/grant_table.c
@@ -65,6 +65,8 @@ int compat_grant_table_op(unsigned int c
 
     set_xen_guest_handle(cnt_uop, NULL);
     cmd_op = cmd & GNTTABOP_CMD_MASK;
+    if ( cmd_op != GNTTABOP_cache_flush )
+        cmd_op = cmd;
     switch ( cmd_op )
     {
 #define CASE(name) \
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -62,6 +62,12 @@ integer_param("gnttab_max_frames", max_g
 static unsigned int __read_mostly max_maptrack_frames;
 integer_param("gnttab_max_maptrack_frames", max_maptrack_frames);
 
+/*
+ * Note that the three values below are effectively part of the ABI, even if
+ * we don't need to make them a formal part of it: A guest suspended for
+ * migration in the middle of a continuation would fail to work if resumed on
+ * a hypervisor using different values.
+ */
 #define GNTTABOP_CONTINUATION_ARG_SHIFT 12
 #define GNTTABOP_CMD_MASK               
((1<<GNTTABOP_CONTINUATION_ARG_SHIFT)-1)
 #define GNTTABOP_ARG_MASK               (~GNTTABOP_CMD_MASK)
@@ -2624,9 +2630,12 @@ do_grant_table_op(
     
     if ( (int)count < 0 )
         return -EINVAL;
+
+    if ( (cmd &= GNTTABOP_CMD_MASK) != GNTTABOP_cache_flush && opaque_in )
+        return -ENOSYS;
     
     rc = -EFAULT;
-    switch ( cmd &= GNTTABOP_CMD_MASK )
+    switch ( cmd )
     {
     case GNTTABOP_map_grant_ref:
     {
--- a/xen/common/mem_access.c
+++ b/xen/common/mem_access.c
@@ -58,6 +58,7 @@ void mem_access_resume(struct domain *d)
 int mem_access_memop(unsigned long cmd,
                      XEN_GUEST_HANDLE_PARAM(xen_mem_access_op_t) arg)
 {
+    unsigned long start_iter = cmd & ~MEMOP_CMD_MASK;
     long rc;
     xen_mem_access_op_t mao;
     struct domain *d;
@@ -84,14 +85,16 @@ int mem_access_memop(unsigned long cmd,
     switch ( mao.op )
     {
     case XENMEM_access_op_resume:
-        mem_access_resume(d);
-        rc = 0;
+        if ( unlikely(start_iter) )
+            rc = -ENOSYS;
+        else
+        {
+            mem_access_resume(d);
+            rc = 0;
+        }
         break;
 
     case XENMEM_access_op_set_access:
-    {
-        unsigned long start_iter = cmd & ~MEMOP_CMD_MASK;
-
         rc = -EINVAL;
         if ( (mao.pfn != ~0ull) &&
              (mao.nr < start_iter ||
@@ -108,12 +111,15 @@ int mem_access_memop(unsigned long cmd,
                                                XENMEM_access_op | rc, arg);
         }
         break;
-    }
 
     case XENMEM_access_op_get_access:
     {
         xenmem_access_t access;
 
+        rc = -ENOSYS;
+        if ( unlikely(start_iter) )
+            break;
+
         rc = -EINVAL;
         if ( (mao.pfn > domain_get_maximum_gpfn(d)) && mao.pfn != ~0ull )
             break;
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -779,16 +779,25 @@ long do_memory_op(unsigned long cmd, XEN
         break;
 
     case XENMEM_exchange:
+        if ( unlikely(start_extent) )
+            return -ENOSYS;
+
         rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
         break;
 
     case XENMEM_maximum_ram_page:
+        if ( unlikely(start_extent) )
+            return -ENOSYS;
+
         rc = max_page;
         break;
 
     case XENMEM_current_reservation:
     case XENMEM_maximum_reservation:
     case XENMEM_maximum_gpfn:
+        if ( unlikely(start_extent) )
+            return -ENOSYS;
+
         if ( copy_from_guest(&domid, arg, 1) )
             return -EFAULT;
 
@@ -912,6 +921,9 @@ long do_memory_op(unsigned long cmd, XEN
         struct page_info *page;
         struct domain *d;
 
+        if ( unlikely(start_extent) )
+            return -ENOSYS;
+
         if ( copy_from_guest(&xrfp, arg, 1) )
             return -EFAULT;
 
@@ -945,6 +957,9 @@ long do_memory_op(unsigned long cmd, XEN
         break;
 
     case XENMEM_claim_pages:
+        if ( unlikely(start_extent) )
+            return -ENOSYS;
+
         if ( copy_from_guest(&reservation, arg, 1) )
             return -EFAULT;
 
@@ -977,6 +992,9 @@ long do_memory_op(unsigned long cmd, XEN
         unsigned int dom_vnodes, dom_vranges, dom_vcpus;
         struct vnuma_info tmp;
 
+        if ( unlikely(start_extent) )
+            return -ENOSYS;
+
         /*
          * Guest passes nr_vnodes, number of regions and nr_vcpus thus
          * we know how much memory guest has allocated.
--- a/xen/include/xen/hypercall.h
+++ b/xen/include/xen/hypercall.h
@@ -57,6 +57,11 @@ do_platform_op(
  * To allow safe resume of do_memory_op() after preemption, we need to know
  * at what point in the page list to resume. For this purpose I steal the
  * high-order bits of the @cmd parameter, which are otherwise unused and zero.
+ *
+ * Note that both of these values are effectively part of the ABI, even if
+ * we don't need to make them a formal part of it: A guest suspended for
+ * migration in the middle of a continuation would fail to work if resumed on
+ * a hypervisor using different values.
  */
 #define MEMOP_EXTENT_SHIFT 6 /* cmd[:6] == start_extent */
 #define MEMOP_CMD_MASK     ((1 << MEMOP_EXTENT_SHIFT) - 1)


Attachment: continuation-masks.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.