lock down hypercall continuation encoding masks Andrew validly points out that even if these masks aren't a formal part of the hypercall interface, we aren't free to change them: A guest suspended for migration in the middle of a continuation would fail to work if resumed on a hypervisor using a different value. Hence add respective comments to their definitions. Additionally, to help future extensibility as well as in the spirit of reducing undefined behavior as much as possible, refuse hypercalls made with the respective bits non-zero when the respective sub-ops don't make use of those bits. Reported-by: Andrew Cooper Signed-off-by: Jan Beulich --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -5458,16 +5458,34 @@ static int hvmop_destroy_ioreq_server( return rc; } +/* + * Note that this value is effectively part of the ABI, even if we don't need + * to make it a formal part of it: A guest suspended for migration in the + * middle of a continuation would fail to work if resumed on a hypervisor + * using a different value. + */ #define HVMOP_op_mask 0xff long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) { struct domain *curr_d = current->domain; - unsigned long start_iter = op & ~HVMOP_op_mask; + unsigned long start_iter, mask; long rc = 0; - switch ( op &= HVMOP_op_mask ) + switch ( op & HVMOP_op_mask ) + { + default: + mask = ~0UL; + break; + case HVMOP_modified_memory: + case HVMOP_set_mem_type: + mask = HVMOP_op_mask; + break; + } + + start_iter = op & ~mask; + switch ( op &= mask ) { case HVMOP_create_ioreq_server: rc = hvmop_create_ioreq_server( @@ -6120,7 +6138,7 @@ long do_hvm_op(unsigned long op, XEN_GUE if ( rc == -ERESTART ) { - ASSERT(!(start_iter & HVMOP_op_mask)); + ASSERT(!(start_iter & mask)); rc = hypercall_create_continuation(__HYPERVISOR_hvm_op, "lh", op | start_iter, arg); } --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -4661,9 +4661,8 @@ int xenmem_add_to_physmap_one( long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) { int rc; - int op = cmd & MEMOP_CMD_MASK; - switch ( op ) + switch ( cmd ) { case XENMEM_set_memory_map: { @@ -4837,7 +4836,7 @@ long arch_memory_op(unsigned long cmd, X if ( d == NULL ) return -ESRCH; - if ( op == XENMEM_set_pod_target ) + if ( cmd == XENMEM_set_pod_target ) rc = xsm_set_pod_target(XSM_PRIV, d); else rc = xsm_get_pod_target(XSM_PRIV, d); @@ -4845,7 +4844,7 @@ long arch_memory_op(unsigned long cmd, X if ( rc != 0 ) goto pod_target_out_unlock; - if ( op == XENMEM_set_pod_target ) + if ( cmd == XENMEM_set_pod_target ) { if ( target.target_pages > d->max_pages ) { @@ -4859,7 +4858,7 @@ long arch_memory_op(unsigned long cmd, X if ( rc == -ERESTART ) { rc = hypercall_create_continuation( - __HYPERVISOR_memory_op, "lh", op, arg); + __HYPERVISOR_memory_op, "lh", cmd, arg); } else if ( rc >= 0 ) { --- a/xen/arch/x86/x86_64/compat/mm.c +++ b/xen/arch/x86/x86_64/compat/mm.c @@ -53,9 +53,8 @@ int compat_arch_memory_op(unsigned long compat_pfn_t mfn; unsigned int i; int rc = 0; - int op = cmd & MEMOP_CMD_MASK; - switch ( op ) + switch ( cmd ) { case XENMEM_set_memory_map: { @@ -192,7 +191,7 @@ int compat_arch_memory_op(unsigned long xen_mem_event_op_t meo; if ( copy_from_guest(&meo, arg, 1) ) return -EFAULT; - rc = do_mem_event_op(op, meo.domain, (void *) &meo); + rc = do_mem_event_op(cmd, meo.domain, &meo); if ( !rc && __copy_to_guest(arg, &meo, 1) ) return -EFAULT; break; @@ -205,7 +204,7 @@ int compat_arch_memory_op(unsigned long return -EFAULT; if ( mso.op == XENMEM_sharing_op_audit ) return mem_sharing_audit(); - rc = do_mem_event_op(op, mso.domain, (void *) &mso); + rc = do_mem_event_op(cmd, mso.domain, &mso); if ( !rc && __copy_to_guest(arg, &mso, 1) ) return -EFAULT; break; --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -906,9 +906,8 @@ long subarch_memory_op(unsigned long cmd xen_pfn_t mfn, last_mfn; unsigned int i; long rc = 0; - int op = cmd & MEMOP_CMD_MASK; - switch ( op ) + switch ( cmd ) { case XENMEM_machphys_mfn_list: if ( copy_from_guest(&xmml, arg, 1) ) @@ -989,7 +988,7 @@ long subarch_memory_op(unsigned long cmd xen_mem_event_op_t meo; if ( copy_from_guest(&meo, arg, 1) ) return -EFAULT; - rc = do_mem_event_op(op, meo.domain, (void *) &meo); + rc = do_mem_event_op(cmd, meo.domain, &meo); if ( !rc && __copy_to_guest(arg, &meo, 1) ) return -EFAULT; break; @@ -1002,7 +1001,7 @@ long subarch_memory_op(unsigned long cmd return -EFAULT; if ( mso.op == XENMEM_sharing_op_audit ) return mem_sharing_audit(); - rc = do_mem_event_op(op, mso.domain, (void *) &mso); + rc = do_mem_event_op(cmd, mso.domain, &mso); if ( !rc && __copy_to_guest(arg, &mso, 1) ) return -EFAULT; break; --- a/xen/common/compat/grant_table.c +++ b/xen/common/compat/grant_table.c @@ -65,6 +65,8 @@ int compat_grant_table_op(unsigned int c set_xen_guest_handle(cnt_uop, NULL); cmd_op = cmd & GNTTABOP_CMD_MASK; + if ( cmd_op != GNTTABOP_cache_flush ) + cmd_op = cmd; switch ( cmd_op ) { #define CASE(name) \ --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -62,6 +62,12 @@ integer_param("gnttab_max_frames", max_g static unsigned int __read_mostly max_maptrack_frames; integer_param("gnttab_max_maptrack_frames", max_maptrack_frames); +/* + * Note that the three values below are effectively part of the ABI, even if + * we don't need to make them a formal part of it: A guest suspended for + * migration in the middle of a continuation would fail to work if resumed on + * a hypervisor using different values. + */ #define GNTTABOP_CONTINUATION_ARG_SHIFT 12 #define GNTTABOP_CMD_MASK ((1< domain_get_maximum_gpfn(d)) && mao.pfn != ~0ull ) break; --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -779,16 +779,25 @@ long do_memory_op(unsigned long cmd, XEN break; case XENMEM_exchange: + if ( unlikely(start_extent) ) + return -ENOSYS; + rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t)); break; case XENMEM_maximum_ram_page: + if ( unlikely(start_extent) ) + return -ENOSYS; + rc = max_page; break; case XENMEM_current_reservation: case XENMEM_maximum_reservation: case XENMEM_maximum_gpfn: + if ( unlikely(start_extent) ) + return -ENOSYS; + if ( copy_from_guest(&domid, arg, 1) ) return -EFAULT; @@ -912,6 +921,9 @@ long do_memory_op(unsigned long cmd, XEN struct page_info *page; struct domain *d; + if ( unlikely(start_extent) ) + return -ENOSYS; + if ( copy_from_guest(&xrfp, arg, 1) ) return -EFAULT; @@ -945,6 +957,9 @@ long do_memory_op(unsigned long cmd, XEN break; case XENMEM_claim_pages: + if ( unlikely(start_extent) ) + return -ENOSYS; + if ( copy_from_guest(&reservation, arg, 1) ) return -EFAULT; @@ -977,6 +992,9 @@ long do_memory_op(unsigned long cmd, XEN unsigned int dom_vnodes, dom_vranges, dom_vcpus; struct vnuma_info tmp; + if ( unlikely(start_extent) ) + return -ENOSYS; + /* * Guest passes nr_vnodes, number of regions and nr_vcpus thus * we know how much memory guest has allocated. --- a/xen/include/xen/hypercall.h +++ b/xen/include/xen/hypercall.h @@ -57,6 +57,11 @@ do_platform_op( * To allow safe resume of do_memory_op() after preemption, we need to know * at what point in the page list to resume. For this purpose I steal the * high-order bits of the @cmd parameter, which are otherwise unused and zero. + * + * Note that both of these values are effectively part of the ABI, even if + * we don't need to make them a formal part of it: A guest suspended for + * migration in the middle of a continuation would fail to work if resumed on + * a hypervisor using different values. */ #define MEMOP_EXTENT_SHIFT 6 /* cmd[:6] == start_extent */ #define MEMOP_CMD_MASK ((1 << MEMOP_EXTENT_SHIFT) - 1)