[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2] xen/arm: Convert runstate address during hypcall



On Tue, 28 Jul 2020, Bertrand Marquis wrote:
> At the moment on Arm, a Linux guest running with KTPI enabled will
> cause the following error when a context switch happens in user mode:
> (XEN) p2m.c:1890: d1v0: Failed to walk page-table va 0xffffff837ebe0cd0
> 
> The error is caused by the virtual address for the runstate area
> registered by the guest only being accessible when the guest is running
> in kernel space when KPTI is enabled.
> 
> To solve this issue, this patch is doing the translation from virtual
> address to physical address during the hypercall and mapping the
> required pages using vmap. This is removing the conversion from virtual
> to physical address during the context switch which is solving the
> problem with KPTI.
> 
> This is done only on arm architecture, the behaviour on x86 is not
> modified by this patch and the address conversion is done as before
> during each context switch.
> 
> This is introducing several limitations in comparison to the previous
> behaviour (on arm only):
> - if the guest is remapping the area at a different physical address Xen
> will continue to update the area at the previous physical address. As
> the area is in kernel space and usually defined as a global variable this
> is something which is believed not to happen. If this is required by a
> guest, it will have to call the hypercall with the new area (even if it
> is at the same virtual address).
> - the area needs to be mapped during the hypercall. For the same reasons
> as for the previous case, even if the area is registered for a different
> vcpu. It is believed that registering an area using a virtual address
> unmapped is not something done.
> 
> inline functions in headers could not be used as the architecture
> domain.h is included before the global domain.h making it impossible
> to use the struct vcpu inside the architecture header.
> This should not have any performance impact as the hypercall is only
> called once per vcpu usually.
> 
> Signed-off-by: Bertrand Marquis <bertrand.marquis@xxxxxxx>
> ---
>   Changes in v2
>     - use vmap to map the pages during the hypercall.
>     - reintroduce initial copy during hypercall.
> 
> ---
>  xen/arch/arm/domain.c        | 160 +++++++++++++++++++++++++++++++----
>  xen/arch/x86/domain.c        |  30 ++++++-
>  xen/arch/x86/x86_64/domain.c |   4 +-
>  xen/common/domain.c          |  19 ++---
>  xen/include/asm-arm/domain.h |   9 ++
>  xen/include/asm-x86/domain.h |  16 ++++
>  xen/include/xen/domain.h     |   5 ++
>  xen/include/xen/sched.h      |  16 +---
>  8 files changed, 207 insertions(+), 52 deletions(-)
> 
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 31169326b2..c595438bd9 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -19,6 +19,7 @@
>  #include <xen/sched.h>
>  #include <xen/softirq.h>
>  #include <xen/wait.h>
> +#include <xen/vmap.h>
>  
>  #include <asm/alternative.h>
>  #include <asm/cpuerrata.h>
> @@ -275,36 +276,157 @@ static void ctxt_switch_to(struct vcpu *n)
>      virt_timer_restore(n);
>  }
>  
> -/* Update per-VCPU guest runstate shared memory area (if registered). */
> -static void update_runstate_area(struct vcpu *v)
> +static void cleanup_runstate_vcpu_locked(struct vcpu *v)
> +{
> +    if ( v->arch.runstate_guest )
> +    {
> +        vunmap((void *)((unsigned long)v->arch.runstate_guest & PAGE_MASK));
> +
> +        put_page(v->arch.runstate_guest_page[0]);
> +
> +        if ( v->arch.runstate_guest_page[1] )
> +        {
> +            put_page(v->arch.runstate_guest_page[1]);
> +        }
> +        v->arch.runstate_guest = NULL;
> +    }
> +}
> +
> +void arch_vcpu_cleanup_runstate(struct vcpu *v)
>  {
> -    void __user *guest_handle = NULL;
> +    spin_lock(&v->arch.runstate_guest_lock);
> +
> +    cleanup_runstate_vcpu_locked(v);
> +
> +    spin_unlock(&v->arch.runstate_guest_lock);
> +}
> +
> +static int setup_runstate_vcpu_locked(struct vcpu *v, vaddr_t vaddr)
> +{
> +    unsigned int offset;
> +    mfn_t mfn[2];
> +    struct page_info *page;
> +    unsigned int numpages;
>      struct vcpu_runstate_info runstate;
> +    void *p;
>  
> -    if ( guest_handle_is_null(runstate_guest(v)) )
> -        return;
> +    /* user can pass a NULL address to unregister a previous area */
> +    if ( vaddr == 0 )
> +        return 0;
>  
> -    memcpy(&runstate, &v->runstate, sizeof(runstate));
> +    offset = vaddr & ~PAGE_MASK;
>  
> -    if ( VM_ASSIST(v->domain, runstate_update_flag) )
> +    /* provided address must be aligned to a 64bit */
> +    if ( offset % alignof(struct vcpu_runstate_info) )
>      {
> -        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
> -        guest_handle--;
> -        runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
> -        __raw_copy_to_guest(guest_handle,
> -                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
> -        smp_wmb();
> +        gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: "
> +                "Invalid offset\n", vaddr);
> +        return -EINVAL;
> +    }
> +
> +    page = get_page_from_gva(v, vaddr, GV2M_WRITE);
> +    if ( !page )
> +    {
> +        gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: "
> +                "Page is not mapped\n", vaddr);
> +        return -EINVAL;
> +    }
> +    mfn[0] = page_to_mfn(page);
> +    v->arch.runstate_guest_page[0] = page;
> +
> +    if ( offset > (PAGE_SIZE - sizeof(struct vcpu_runstate_info)) )
> +    {
> +        /* guest area is crossing pages */
> +        page = get_page_from_gva(v, vaddr + PAGE_SIZE, GV2M_WRITE);
> +        if ( !page )
> +        {
> +            put_page(v->arch.runstate_guest_page[0]);
> +            gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: "
> +                    "2nd Page is not mapped\n", vaddr);
> +            return -EINVAL;
> +        }
> +        mfn[1] = page_to_mfn(page);
> +        v->arch.runstate_guest_page[1] = page;
> +        numpages = 2;
> +    }
> +    else
> +    {
> +        v->arch.runstate_guest_page[1] = NULL;
> +        numpages = 1;
> +    }
> +
> +    p = vmap(mfn, numpages);
> +    if ( !p )
> +    {
> +        put_page(v->arch.runstate_guest_page[0]);
> +        if ( numpages == 2 )
> +            put_page(v->arch.runstate_guest_page[1]);
> +
> +        gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: "
> +                "vmap error\n", vaddr);
> +        return -EINVAL;
>      }
>  
> -    __copy_to_guest(runstate_guest(v), &runstate, 1);
> +    v->arch.runstate_guest = p + offset;
>  
> -    if ( guest_handle )
> +    if (v == current)
>      {
> -        runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
> +        memcpy(v->arch.runstate_guest, &v->runstate, sizeof(v->runstate));
> +    }
> +    else
> +    {
> +        vcpu_runstate_get(v, &runstate);
> +        memcpy(v->arch.runstate_guest, &runstate, sizeof(v->runstate));
> +    }
> +
> +    return 0;
> +}


The arm32 build breaks with:

domain.c: In function 'setup_runstate_vcpu_locked':
domain.c:322:9: error: format '%lx' expects argument of type 'long unsigned 
int', but argument 3 has type 'vaddr_t' [-Werror=format=]
         gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: "
         ^
domain.c:330:9: error: format '%lx' expects argument of type 'long unsigned 
int', but argument 3 has type 'vaddr_t' [-Werror=format=]
         gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: "
         ^
domain.c:344:13: error: format '%lx' expects argument of type 'long unsigned 
int', but argument 3 has type 'vaddr_t' [-Werror=format=]
             gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: "
             ^
domain.c:365:9: error: format '%lx' expects argument of type 'long unsigned 
int', but argument 3 has type 'vaddr_t' [-Werror=format=]
         gprintk(XENLOG_WARNING, "Cannot map runstate pointer at 0x%lx: "
         ^
cc1: all warnings being treated as errors


> +int arch_vcpu_setup_runstate(struct vcpu *v,
> +                             struct vcpu_register_runstate_memory_area area)
> +{
> +    int rc;
> +
> +    spin_lock(&v->arch.runstate_guest_lock);
> +
> +    /* cleanup if we are recalled */
> +    cleanup_runstate_vcpu_locked(v);
> +
> +    rc = setup_runstate_vcpu_locked(v, (vaddr_t)area.addr.v);
> +
> +    spin_unlock(&v->arch.runstate_guest_lock);
> +
> +    return rc;
> +}
> +
> +
> +/* Update per-VCPU guest runstate shared memory area (if registered). */
> +static void update_runstate_area(struct vcpu *v)
> +{
> +    spin_lock(&v->arch.runstate_guest_lock);
> +
> +    if ( v->arch.runstate_guest )
> +    {
> +        if ( VM_ASSIST(v->domain, runstate_update_flag) )
> +        {
> +            v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
> +            v->arch.runstate_guest->state_entry_time |= XEN_RUNSTATE_UPDATE;

Please use write_atomic (as suggested by Julien here:
https://marc.info/?l=xen-devel&m=159225391619240)


> +            smp_wmb();
> +        }
> +
> +        memcpy(v->arch.runstate_guest, &v->runstate, sizeof(v->runstate));
> +
> +        /* copy must be done before switching the bit */
>          smp_wmb();
> -        __raw_copy_to_guest(guest_handle,
> -                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
> +
> +        if ( VM_ASSIST(v->domain, runstate_update_flag) )
> +        {
> +            v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
> +            v->arch.runstate_guest->state_entry_time &= ~XEN_RUNSTATE_UPDATE;

and here too

The rest looks OK to me.


> +        }
>      }
> +
> +    spin_unlock(&v->arch.runstate_guest_lock);
>  }
>  
>  static void schedule_tail(struct vcpu *prev)



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.