[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RESEND PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
From: Andrii Anisov <andrii_anisov@xxxxxxxx> Existing interface to register runstate are with its virtual address is prone to issues which became more obvious with KPTI enablement in guests. The nature of those issues is the fact that the guest could be interrupted by the hypervisor at any time, and there is no guarantee to have the registered virtual address translated with the currently available guest's page tables. Before the KPTI such a situation was possible in case the guest is caught in the middle of PT processing (e.g. superpage shattering). With the KPTI this happens also when the guest runs userspace, so has a pretty high probability. So it was agreed to register runstate with the guest's physical address so that its mapping is permanent from the hypervisor point of view. [1] The hypercall employs the same vcpu_register_runstate_memory_area structure for the interface, but requires a registered area to not cross a page boundary. [1] https://lists.xenproject.org/archives/html/xen-devel/2019-02/msg00416.html Signed-off-by: Andrii Anisov <andrii_anisov@xxxxxxxx> --- xen/arch/arm/domain.c | 58 ++++++++++++++++++--- xen/arch/x86/domain.c | 99 ++++++++++++++++++++++++++++++++--- xen/arch/x86/x86_64/domain.c | 16 +++++- xen/common/domain.c | 121 +++++++++++++++++++++++++++++++++++++++---- xen/include/public/vcpu.h | 15 ++++++ xen/include/xen/sched.h | 28 +++++++--- 6 files changed, 306 insertions(+), 31 deletions(-) diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index ff330b3..ecedf1c 100644 --- a/xen/arch/arm/domain.c +++ b/xen/arch/arm/domain.c @@ -274,17 +274,15 @@ static void ctxt_switch_to(struct vcpu *n) virt_timer_restore(n); } -/* Update per-VCPU guest runstate shared memory area (if registered). */ -static void update_runstate_area(struct vcpu *v) +static void update_runstate_by_gvaddr(struct vcpu *v) { void __user *guest_handle = NULL; - if ( guest_handle_is_null(runstate_guest(v)) ) - return; + ASSERT(!guest_handle_is_null(runstate_guest_virt(v))); if ( VM_ASSIST(v->domain, runstate_update_flag) ) { - guest_handle = &v->runstate_guest.p->state_entry_time + 1; + guest_handle = &v->runstate_guest.virt.p->state_entry_time + 1; guest_handle--; v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE; __raw_copy_to_guest(guest_handle, @@ -292,7 +290,7 @@ static void update_runstate_area(struct vcpu *v) smp_wmb(); } - __copy_to_guest(runstate_guest(v), &v->runstate, 1); + __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1); if ( guest_handle ) { @@ -303,6 +301,53 @@ static void update_runstate_area(struct vcpu *v) } } +static void update_runstate_by_gpaddr(struct vcpu *v) +{ + struct vcpu_runstate_info *runstate = + (struct vcpu_runstate_info *)v->runstate_guest.phys; + + ASSERT(runstate != NULL); + + if ( VM_ASSIST(v->domain, runstate_update_flag) ) + { + runstate->state_entry_time |= XEN_RUNSTATE_UPDATE; + smp_wmb(); + v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE; + } + + memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate)); + + if ( VM_ASSIST(v->domain, runstate_update_flag) ) + { + runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE; + smp_wmb(); + v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE; + } +} + +/* Update per-VCPU guest runstate shared memory area (if registered). */ +static void update_runstate_area(struct vcpu *v) +{ + if ( xchg(&v->runstate_in_use, 1) ) + return; + + switch ( v->runstate_guest_type ) + { + case RUNSTATE_NONE: + break; + + case RUNSTATE_VADDR: + update_runstate_by_gvaddr(v); + break; + + case RUNSTATE_PADDR: + update_runstate_by_gpaddr(v); + break; + } + + xchg(&v->runstate_in_use, 0); +} + static void schedule_tail(struct vcpu *prev) { ASSERT(prev != current); @@ -998,6 +1043,7 @@ long do_arm_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) a { case VCPUOP_register_vcpu_info: case VCPUOP_register_runstate_memory_area: + case VCPUOP_register_runstate_phys_memory_area: return do_vcpu_op(cmd, vcpuid, arg); default: return -EINVAL; diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index ac960dd..fe71776 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1566,22 +1566,21 @@ void paravirt_ctxt_switch_to(struct vcpu *v) } /* Update per-VCPU guest runstate shared memory area (if registered). */ -bool update_runstate_area(struct vcpu *v) +static bool update_runstate_by_gvaddr(struct vcpu *v) { bool rc; struct guest_memory_policy policy = { .nested_guest_mode = false }; void __user *guest_handle = NULL; - if ( guest_handle_is_null(runstate_guest(v)) ) - return true; + ASSERT(!guest_handle_is_null(runstate_guest_virt(v))); update_guest_memory_policy(v, &policy); if ( VM_ASSIST(v->domain, runstate_update_flag) ) { guest_handle = has_32bit_shinfo(v->domain) - ? &v->runstate_guest.compat.p->state_entry_time + 1 - : &v->runstate_guest.native.p->state_entry_time + 1; + ? &v->runstate_guest.virt.compat.p->state_entry_time + 1 + : &v->runstate_guest.virt.native.p->state_entry_time + 1; guest_handle--; v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE; __raw_copy_to_guest(guest_handle, @@ -1594,11 +1593,11 @@ bool update_runstate_area(struct vcpu *v) struct compat_vcpu_runstate_info info; XLAT_vcpu_runstate_info(&info, &v->runstate); - __copy_to_guest(v->runstate_guest.compat, &info, 1); + __copy_to_guest(v->runstate_guest.virt.compat, &info, 1); rc = true; } else - rc = __copy_to_guest(runstate_guest(v), &v->runstate, 1) != + rc = __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1) != sizeof(v->runstate); if ( guest_handle ) @@ -1614,6 +1613,92 @@ bool update_runstate_area(struct vcpu *v) return rc; } +static bool update_runstate_by_gpaddr_native(struct vcpu *v) +{ + struct vcpu_runstate_info *runstate = + (struct vcpu_runstate_info *)v->runstate_guest.phys; + + ASSERT(runstate != NULL); + + if ( VM_ASSIST(v->domain, runstate_update_flag) ) + { + runstate->state_entry_time |= XEN_RUNSTATE_UPDATE; + smp_wmb(); + v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE; + } + + memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate)); + + if ( VM_ASSIST(v->domain, runstate_update_flag) ) + { + runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE; + smp_wmb(); + v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE; + } + + return true; +} + +static bool update_runstate_by_gpaddr_compat(struct vcpu *v) +{ + struct compat_vcpu_runstate_info *runstate = + (struct compat_vcpu_runstate_info *)v->runstate_guest.phys; + + ASSERT(runstate != NULL); + + if ( VM_ASSIST(v->domain, runstate_update_flag) ) + { + runstate->state_entry_time |= XEN_RUNSTATE_UPDATE; + smp_wmb(); + v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE; + } + + { + struct compat_vcpu_runstate_info info; + XLAT_vcpu_runstate_info(&info, &v->runstate); + memcpy(v->runstate_guest.phys, &info, sizeof(info)); + } + else + memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate)); + + if ( VM_ASSIST(v->domain, runstate_update_flag) ) + { + runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE; + smp_wmb(); + v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE; + } + + return true; +} + +bool update_runstate_area(struct vcpu *v) +{ + bool rc = true; + + if ( xchg(&v->runstate_in_use, 1) ) + return rc; + + switch ( v->runstate_guest_type ) + { + case RUNSTATE_NONE: + break; + + case RUNSTATE_VADDR: + rc = update_runstate_by_gvaddr(v); + break; + + case RUNSTATE_PADDR: + if ( has_32bit_shinfo(v->domain) ) + rc = update_runstate_by_gpaddr_compat(v); + else + rc = update_runstate_by_gpaddr_native(v); + break; + } + + xchg(&v->runstate_in_use, 0); + return rc; +} + static void _update_runstate_area(struct vcpu *v) { if ( !update_runstate_area(v) && is_pv_vcpu(v) && diff --git a/xen/arch/x86/x86_64/domain.c b/xen/arch/x86/x86_64/domain.c index c46dccc..85d0072 100644 --- a/xen/arch/x86/x86_64/domain.c +++ b/xen/arch/x86/x86_64/domain.c @@ -12,6 +12,8 @@ CHECK_vcpu_get_physid; #undef xen_vcpu_get_physid +extern void discard_runstate_area(struct vcpu *v); + int arch_compat_vcpu_op( int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg) @@ -35,8 +37,16 @@ arch_compat_vcpu_op( !compat_handle_okay(area.addr.h, 1) ) break; + while( xchg(&v->runstate_in_use, 1) == 0); + + discard_runstate_area(v); + rc = 0; - guest_from_compat_handle(v->runstate_guest.compat, area.addr.h); + + guest_from_compat_handle(v->runstate_guest.virt.compat, + area.addr.h); + + v->runstate_guest_type = RUNSTATE_VADDR; if ( v == current ) { @@ -49,7 +59,9 @@ arch_compat_vcpu_op( vcpu_runstate_get(v, &runstate); XLAT_vcpu_runstate_info(&info, &runstate); } - __copy_to_guest(v->runstate_guest.compat, &info, 1); + __copy_to_guest(v->runstate_guest.virt.compat, &info, 1); + + xchg(&v->runstate_in_use, 0); break; } diff --git a/xen/common/domain.c b/xen/common/domain.c index 90c6607..d276b87 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -698,6 +698,74 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d) return 0; } +static void unmap_runstate_area(struct vcpu *v) +{ + mfn_t mfn; + + if ( ! v->runstate_guest.phys ) + return; + + mfn = domain_page_map_to_mfn(v->runstate_guest.phys); + + unmap_domain_page_global((void *) + ((unsigned long)v->runstate_guest.phys & + PAGE_MASK)); + + v->runstate_guest.phys = NULL; + put_page_and_type(mfn_to_page(mfn)); +} + +static int map_runstate_area(struct vcpu *v, + struct vcpu_register_runstate_memory_area *area) +{ + unsigned long offset = area->addr.p & ~PAGE_MASK; + gfn_t gfn = gaddr_to_gfn(area->addr.p); + struct domain *d = v->domain; + void *mapping; + struct page_info *page; + size_t size = sizeof(struct vcpu_runstate_info); + + if ( offset > (PAGE_SIZE - size) ) + return -EINVAL; + + page = get_page_from_gfn(d, gfn_x(gfn), NULL, P2M_ALLOC); + if ( !page ) + return -EINVAL; + + if ( !get_page_type(page, PGT_writable_page) ) + { + put_page(page); + return -EINVAL; + } + + mapping = __map_domain_page_global(page); + + if ( mapping == NULL ) + { + put_page_and_type(page); + return -ENOMEM; + } + + v->runstate_guest.phys = mapping + offset; + + return 0; +} + +void discard_runstate_area(struct vcpu *v) +{ + if ( v->runstate_guest_type == RUNSTATE_PADDR ) + unmap_runstate_area(v); + + v->runstate_guest_type = RUNSTATE_NONE; +} + +static void discard_runstate_area_locked(struct vcpu *v) +{ + while ( xchg(&v->runstate_in_use, 1) ); + discard_runstate_area(v); + xchg(&v->runstate_in_use, 0); +} + int domain_kill(struct domain *d) { int rc = 0; @@ -734,7 +802,10 @@ int domain_kill(struct domain *d) if ( cpupool_move_domain(d, cpupool0) ) return -ERESTART; for_each_vcpu ( d, v ) + { + discard_runstate_area_locked(v); unmap_vcpu_info(v); + } d->is_dying = DOMDYING_dead; /* Mem event cleanup has to go here because the rings * have to be put before we call put_domain. */ @@ -1188,7 +1259,7 @@ int domain_soft_reset(struct domain *d) for_each_vcpu ( d, v ) { - set_xen_guest_handle(runstate_guest(v), NULL); + discard_runstate_area_locked(v); unmap_vcpu_info(v); } @@ -1518,18 +1589,50 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg) break; rc = 0; - runstate_guest(v) = area.addr.h; - if ( v == current ) - { - __copy_to_guest(runstate_guest(v), &v->runstate, 1); - } - else + while( xchg(&v->runstate_in_use, 1) == 0); + + discard_runstate_area(v); + + if ( !guest_handle_is_null(runstate_guest_virt(v)) ) { - vcpu_runstate_get(v, &runstate); - __copy_to_guest(runstate_guest(v), &runstate, 1); + runstate_guest_virt(v) = area.addr.h; + v->runstate_guest_type = RUNSTATE_VADDR; + + if ( v == current ) + { + __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1); + } + else + { + vcpu_runstate_get(v, &runstate); + __copy_to_guest(runstate_guest_virt(v), &runstate, 1); + } } + xchg(&v->runstate_in_use, 0); + + break; + } + + case VCPUOP_register_runstate_phys_memory_area: + { + struct vcpu_register_runstate_memory_area area; + + rc = -EFAULT; + if ( copy_from_guest(&area, arg, 1) ) + break; + + while( xchg(&v->runstate_in_use, 1) == 0); + + discard_runstate_area(v); + + rc = map_runstate_area(v, &area); + if ( !rc ) + v->runstate_guest_type = RUNSTATE_PADDR; + + xchg(&v->runstate_in_use, 0); + break; } diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h index 3623af9..d7da4a3 100644 --- a/xen/include/public/vcpu.h +++ b/xen/include/public/vcpu.h @@ -235,6 +235,21 @@ struct vcpu_register_time_memory_area { typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t; DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t); +/* + * Register a shared memory area from which the guest may obtain its own + * runstate information without needing to execute a hypercall. + * Notes: + * 1. The registered address must be guest's physical address. + * 2. The registered runstate area should not cross page boundary. + * 3. Only one shared area may be registered per VCPU. The shared area is + * updated by the hypervisor each time the VCPU is scheduled. Thus + * runstate.state will always be RUNSTATE_running and + * runstate.state_entry_time will indicate the system time at which the + * VCPU was last scheduled to run. + * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. + */ +#define VCPUOP_register_runstate_phys_memory_area 14 + #endif /* __XEN_PUBLIC_VCPU_H__ */ /* diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 2201fac..6c8de8f 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -163,17 +163,31 @@ struct vcpu void *sched_priv; /* scheduler-specific data */ struct vcpu_runstate_info runstate; + + enum { + RUNSTATE_NONE = 0, + RUNSTATE_PADDR = 1, + RUNSTATE_VADDR = 2, + } runstate_guest_type; + + unsigned long runstate_in_use; + + union + { #ifndef CONFIG_COMPAT -# define runstate_guest(v) ((v)->runstate_guest) - XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */ +# define runstate_guest_virt(v) ((v)->runstate_guest.virt) + XEN_GUEST_HANDLE(vcpu_runstate_info_t) virt; /* guest address */ #else -# define runstate_guest(v) ((v)->runstate_guest.native) - union { - XEN_GUEST_HANDLE(vcpu_runstate_info_t) native; - XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat; - } runstate_guest; /* guest address */ +# define runstate_guest_virt(v) ((v)->runstate_guest.virt.native) + union { + XEN_GUEST_HANDLE(vcpu_runstate_info_t) native; + XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat; + } virt; /* guest address */ #endif + void* phys; + } runstate_guest; + /* last time when vCPU is scheduled out */ uint64_t last_run_time; -- 2.7.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |