[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v5 24/28] xen/x86: allow HVM guests to use hypercalls to bring up vCPUs



On 21/08/15 17:53, Roger Pau Monne wrote:
> Allow the usage of the VCPUOP_initialise, VCPUOP_up, VCPUOP_down and
> VCPUOP_is_up hypercalls from HVM guests.
>
> This patch introduces a new structure (vcpu_hvm_context) that should be used
> in conjuction with the VCPUOP_initialise hypercall in order to initialize
> vCPUs for HVM guests.
>
> Signed-off-by: Roger Pau Monnà <roger.pau@xxxxxxxxxx>
> Cc: Jan Beulich <jbeulich@xxxxxxxx>
> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>
> Cc: Stefano Stabellini <stefano.stabellini@xxxxxxxxxx>
> ---
> Changes since v4:
>  - Don't assume mode is 64B, add an explicit check.
>  - Don't set TF_kernel_mode, it is only needed for PV guests.
>  - Don't set CR0_ET unconditionally.
> ---
>  xen/arch/arm/domain.c             |  24 ++++++
>  xen/arch/x86/domain.c             | 164 +++++++++++++++++++++++++++++++++++++
>  xen/arch/x86/hvm/hvm.c            |   8 ++
>  xen/common/domain.c               |  16 +---
>  xen/include/public/hvm/hvm_vcpu.h | 168 
> ++++++++++++++++++++++++++++++++++++++
>  xen/include/xen/domain.h          |   2 +
>  6 files changed, 367 insertions(+), 15 deletions(-)
>  create mode 100644 xen/include/public/hvm/hvm_vcpu.h
>
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index b2bfc7d..b20035d 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -752,6 +752,30 @@ int arch_set_info_guest(
>      return 0;
>  }
>  
> +int arch_initialize_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
> +{
> +    struct vcpu_guest_context *ctxt;
> +    struct domain *d = current->domain;
> +    int rc;
> +
> +    if ( (ctxt = alloc_vcpu_guest_context()) == NULL )
> +        return -ENOMEM;

I have posted my "remove alloc_vcpu_guest_context()" patch to the list
for reference as it interacts with this patch.  I don't mind rebasing
it, but it might also influence this patch.

> +
> +    if ( copy_from_guest(ctxt, arg, 1) )
> +    {
> +        free_vcpu_guest_context(ctxt);
> +        return -EFAULT;
> +    }
> +
> +    domain_lock(d);
> +    rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, ctxt);
> +    domain_unlock(d);
> +
> +    free_vcpu_guest_context(ctxt);
> +
> +    return rc;
> +}
> +
>  int arch_vcpu_reset(struct vcpu *v)
>  {
>      vcpu_end_shutdown_deferral(v);
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index 8fe95f7..23ff14c 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -37,6 +37,7 @@
>  #include <xen/wait.h>
>  #include <xen/guest_access.h>
>  #include <public/sysctl.h>
> +#include <public/hvm/hvm_vcpu.h>
>  #include <asm/regs.h>
>  #include <asm/mc146818rtc.h>
>  #include <asm/system.h>
> @@ -1140,6 +1141,169 @@ int arch_set_info_guest(
>  #undef c
>  }
>  
> +/* Called by VCPUOP_initialise for HVM guests. */
> +static int arch_set_info_hvm_guest(struct vcpu *v, vcpu_hvm_context_t *ctx)
> +{
> +    struct segment_register seg;
> +
> +#define get_context_seg(ctx, seg, f)                                        \
> +    (ctx)->mode == VCPU_HVM_MODE_16B ? (ctx)->cpu_regs.x86_16.seg##_##f :   \
> +    (ctx)->mode == VCPU_HVM_MODE_32B ? (ctx)->cpu_regs.x86_32.seg##_##f :   \
> +    (ctx)->mode == VCPU_HVM_MODE_64B ? (ctx)->cpu_regs.x86_64.seg##_##f :   \
> +    ({ panic("Invalid vCPU mode %u requested\n", (ctx)->mode); 0; })

panic() is far too severe.  domain_crash() would be better. with an
early exit.

> +
> +#define get_context_gpr(ctx, gpr)                                           \
> +    (ctx)->mode == VCPU_HVM_MODE_16B ? (ctx)->cpu_regs.x86_16.gpr    :      \
> +    (ctx)->mode == VCPU_HVM_MODE_32B ? (ctx)->cpu_regs.x86_32.e##gpr :      \
> +    (ctx)->mode == VCPU_HVM_MODE_64B ? (ctx)->cpu_regs.x86_64.r##gpr :      \
> +    ({ panic("Invalid vCPU mode %u requested\n", (ctx)->mode); 0; })
> +
> +#define get_context_field(ctx, field)                                       \
> +    (ctx)->mode == VCPU_HVM_MODE_16B ? (ctx)->cpu_regs.x86_16.field :       \
> +    (ctx)->mode == VCPU_HVM_MODE_32B ? (ctx)->cpu_regs.x86_32.field :       \
> +    (ctx)->mode == VCPU_HVM_MODE_64B ? (ctx)->cpu_regs.x86_64.field :       \
> +    ({ panic("Invalid vCPU mode %u requested\n", (ctx)->mode); 0; })
> +
> +    if ( ctx->mode != VCPU_HVM_MODE_16B && ctx->mode != VCPU_HVM_MODE_32B &&
> +         ctx->mode != VCPU_HVM_MODE_64B )
> +        return -EINVAL;

For readability (and style), I would suggest formatting this as

    if ( !((ctx->mode == VCPU_HVM_MODE_16B) ||
           (ctx->mode == VCPU_HVM_MODE_32B) ||
           (ctx->mode == VCPU_HVM_MODE_64B)) )
        return -EINVAL;

> +
> +    memset(&seg, 0, sizeof(seg));
> +
> +    if ( !paging_mode_hap(v->domain) )
> +        v->arch.guest_table = pagetable_null();
> +
> +    v->arch.user_regs.rax = get_context_gpr(ctx, ax);
> +    v->arch.user_regs.rcx = get_context_gpr(ctx, cx);
> +    v->arch.user_regs.rdx = get_context_gpr(ctx, dx);
> +    v->arch.user_regs.rbx = get_context_gpr(ctx, bx);
> +    v->arch.user_regs.rsp = get_context_gpr(ctx, sp);
> +    v->arch.user_regs.rbp = get_context_gpr(ctx, bp);
> +    v->arch.user_regs.rsi = get_context_gpr(ctx, si);
> +    v->arch.user_regs.rdi = get_context_gpr(ctx, di);
> +    v->arch.user_regs.rip = get_context_gpr(ctx, ip);
> +    v->arch.user_regs.rflags = get_context_gpr(ctx, flags);

All these hidden conditionals cause the compiler to generate a 2K
function, a large quantity of which are conditional jumps.

I did some experimentation, available from
git://xenbits.xen.org/people/andrewcoop/xen.git wip-dmlite-v5-refactor

Bloat-o-meter indicates:

add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-559 (-559)
function                                     old     new   delta
arch_set_info_hvm_guest                     2209    1650    -559

And looking at the disassembly, those -559 are mostly cmp/jXX
constructs, and the dead panic() calls.

The code is now longer, but I don't think it detracts from the
readability, and it will certainly be faster to execute.

What do you think?  If others agree, you are welcome to fold the patch
into your series.

> +
> +    v->arch.hvm_vcpu.guest_cr[0] = get_context_field(ctx, cr0);
> +    hvm_update_guest_cr(v, 0);
> +    v->arch.hvm_vcpu.guest_cr[4] = get_context_field(ctx, cr4);
> +    hvm_update_guest_cr(v, 4);
> +
> +    switch ( ctx->mode )
> +    {
> +    case VCPU_HVM_MODE_32B:
> +        v->arch.hvm_vcpu.guest_efer = ctx->cpu_regs.x86_32.efer;
> +        hvm_update_guest_efer(v);
> +        v->arch.hvm_vcpu.guest_cr[3] = ctx->cpu_regs.x86_32.cr3;
> +        hvm_update_guest_cr(v, 3);
> +        break;
> +
> +    case VCPU_HVM_MODE_64B:
> +        v->arch.user_regs.r8 = ctx->cpu_regs.x86_64.r8;
> +        v->arch.user_regs.r9 = ctx->cpu_regs.x86_64.r9;
> +        v->arch.user_regs.r10 = ctx->cpu_regs.x86_64.r10;
> +        v->arch.user_regs.r11 = ctx->cpu_regs.x86_64.r11;
> +        v->arch.user_regs.r12 = ctx->cpu_regs.x86_64.r12;
> +        v->arch.user_regs.r13 = ctx->cpu_regs.x86_64.r13;
> +        v->arch.user_regs.r14 = ctx->cpu_regs.x86_64.r14;
> +        v->arch.user_regs.r15 = ctx->cpu_regs.x86_64.r15;
> +        v->arch.hvm_vcpu.guest_efer = ctx->cpu_regs.x86_64.efer;
> +        hvm_update_guest_efer(v);
> +        v->arch.hvm_vcpu.guest_cr[3] = ctx->cpu_regs.x86_64.cr3;
> +        hvm_update_guest_cr(v, 3);
> +        break;
> +    }
> +
> +    if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) )
> +    {
> +        /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
> +        struct page_info *page = get_page_from_gfn(v->domain,
> +                                 v->arch.hvm_vcpu.guest_cr[3] >> PAGE_SHIFT,
> +                                 NULL, P2M_ALLOC);
> +        if ( !page )
> +        {
> +            gdprintk(XENLOG_ERR, "Invalid CR3\n");

Please print the bad cr3, rather than just stating that some unknown
value is invalid.

> +            domain_crash(v->domain);
> +            return -EINVAL;
> +        }
> +
> +        v->arch.guest_table = pagetable_from_page(page);
> +    }
> +
> +    seg.base        = get_context_seg(ctx, cs, base);
> +    seg.limit       = get_context_seg(ctx, cs, limit);
> +    seg.attr.bytes  = get_context_seg(ctx, cs, ar);
> +    hvm_set_segment_register(v, x86_seg_cs, &seg);
> +
> +    seg.base        = get_context_seg(ctx, ds, base);
> +    seg.limit       = get_context_seg(ctx, ds, limit);
> +    seg.attr.bytes  = get_context_seg(ctx, ds, ar);
> +    hvm_set_segment_register(v, x86_seg_ds, &seg);
> +
> +    seg.base        = get_context_seg(ctx, ss, base);
> +    seg.limit       = get_context_seg(ctx, ss, limit);
> +    seg.attr.bytes  = get_context_seg(ctx, ss, ar);
> +    hvm_set_segment_register(v, x86_seg_ss, &seg);
> +
> +    seg.base        = get_context_seg(ctx, tr, base);
> +    seg.limit       = get_context_seg(ctx, tr, limit);
> +    seg.attr.bytes  = get_context_seg(ctx, tr, ar);
> +    hvm_set_segment_register(v, x86_seg_tr, &seg);
> +
> +    /* Sync AP's TSC with BSP's. */
> +    v->arch.hvm_vcpu.cache_tsc_offset =
> +        v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
> +    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset,
> +                             v->domain->arch.hvm_domain.sync_tsc);
> +
> +    v->arch.hvm_vcpu.msr_tsc_adjust = 0;
> +
> +    paging_update_paging_modes(v);
> +
> +    v->is_initialised = 1;
> +    set_bit(_VPF_down, &v->pause_flags);
> +
> +    return 0;
> +#undef get_context_field
> +#undef get_context_gpr
> +#undef get_context_seg
> +}
> +
> +int arch_initialize_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
> +{
> +    struct vcpu_guest_context *ctxt;
> +    struct vcpu_hvm_context hvm_ctx;

These two should reduce scope into their respective if() conditions, to
avoid accidental cross-use.

> +    struct domain *d = current->domain;

This should come from v->domain, not current.

As PV guests read out of the active pagetables, the PV path should
assert that d == current->domain, but there is no such requirement
(which I can spot) on the HVM side.

> +    int rc;
> +
> +    if ( is_hvm_vcpu(v) )
> +    {
> +        if ( copy_from_guest(&hvm_ctx, arg, 1) )
> +            return -EFAULT;
> +
> +        domain_lock(d);
> +        rc = v->is_initialised ? -EEXIST : arch_set_info_hvm_guest(v, 
> &hvm_ctx);
> +        domain_unlock(d);
> +    } else {

Style.

> +        if ( (ctxt = alloc_vcpu_guest_context()) == NULL )
> +            return -ENOMEM;
> +
> +        if ( copy_from_guest(ctxt, arg, 1) )
> +        {
> +            free_vcpu_guest_context(ctxt);
> +            return -EFAULT;
> +        }
> +
> +        domain_lock(d);
> +        rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, ctxt);
> +        domain_unlock(d);
> +
> +        free_vcpu_guest_context(ctxt);
> +    }
> +
> +    return rc;
> +}
> +
>  int arch_vcpu_reset(struct vcpu *v)
>  {
>      if ( is_pv_vcpu(v) )
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 1640b58..8856c72 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -4980,6 +4980,10 @@ static long hvm_vcpu_op(
>      case VCPUOP_stop_singleshot_timer:
>      case VCPUOP_register_vcpu_info:
>      case VCPUOP_register_vcpu_time_memory_area:
> +    case VCPUOP_initialise:
> +    case VCPUOP_up:
> +    case VCPUOP_down:
> +    case VCPUOP_is_up:
>          rc = do_vcpu_op(cmd, vcpuid, arg);
>          break;
>      default:
> @@ -5038,6 +5042,10 @@ static long hvm_vcpu_op_compat32(
>      case VCPUOP_stop_singleshot_timer:
>      case VCPUOP_register_vcpu_info:
>      case VCPUOP_register_vcpu_time_memory_area:
> +    case VCPUOP_initialise:
> +    case VCPUOP_up:
> +    case VCPUOP_down:
> +    case VCPUOP_is_up:
>          rc = compat_vcpu_op(cmd, vcpuid, arg);
>          break;
>      default:
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 1b9fcfc..f97e7f4 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -1173,7 +1173,6 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, 
> XEN_GUEST_HANDLE_PARAM(void) arg)
>  {
>      struct domain *d = current->domain;
>      struct vcpu *v;
> -    struct vcpu_guest_context *ctxt;
>      long rc = 0;
>  
>      if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
> @@ -1185,20 +1184,7 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, 
> XEN_GUEST_HANDLE_PARAM(void) arg)
>          if ( v->vcpu_info == &dummy_vcpu_info )
>              return -EINVAL;
>  
> -        if ( (ctxt = alloc_vcpu_guest_context()) == NULL )
> -            return -ENOMEM;
> -
> -        if ( copy_from_guest(ctxt, arg, 1) )
> -        {
> -            free_vcpu_guest_context(ctxt);
> -            return -EFAULT;
> -        }
> -
> -        domain_lock(d);
> -        rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, ctxt);
> -        domain_unlock(d);
> -
> -        free_vcpu_guest_context(ctxt);
> +        rc = arch_initialize_vcpu(v, arg);
>  
>          if ( rc == -ERESTART )
>              rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iuh",
> diff --git a/xen/include/public/hvm/hvm_vcpu.h 
> b/xen/include/public/hvm/hvm_vcpu.h
> new file mode 100644
> index 0000000..db86edd
> --- /dev/null
> +++ b/xen/include/public/hvm/hvm_vcpu.h

Please also patch the public/vcpu.h documentation concerning @extra_arg.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.