[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [V10 PATCH 12/23] PVH xen: Support privileged op emulation for PVH



On Wed, Jul 24, 2013 at 2:59 AM, Mukesh Rathor <mukesh.rathor@xxxxxxxxxx> wrote:
> This patch changes mostly traps.c to support privileged op emulation for PVH.
> A new function read_descriptor_sel() is introduced to read descriptor for PVH
> given a selector.  Another new function vmx_read_selector() reads a selector
> from VMCS, to support read_segment_register() for PVH.
>
> Signed-off-by: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx>
> Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
>  xen/arch/x86/hvm/vmx/vmx.c    |   40 +++++++++++++++++++
>  xen/arch/x86/traps.c          |   86 +++++++++++++++++++++++++++++++++++-----
>  xen/include/asm-x86/hvm/hvm.h |    7 +++
>  xen/include/asm-x86/system.h  |   19 +++++++--
>  4 files changed, 137 insertions(+), 15 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index e3c7515..80109c1 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -664,6 +664,45 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
>          .fields = { .type = 0xb, .s = 0, .dpl = 0, .p = 1, .avl = 0,    \
>                      .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes)
>
> +u16 vmx_read_selector(struct vcpu *v, enum x86_segment seg)
> +{
> +    u16 sel = 0;
> +
> +    vmx_vmcs_enter(v);
> +    switch ( seg )
> +    {
> +    case x86_seg_cs:
> +        sel = __vmread(GUEST_CS_SELECTOR);
> +        break;
> +
> +    case x86_seg_ss:
> +        sel = __vmread(GUEST_SS_SELECTOR);
> +        break;
> +
> +    case x86_seg_es:
> +        sel = __vmread(GUEST_ES_SELECTOR);
> +        break;
> +
> +    case x86_seg_ds:
> +        sel = __vmread(GUEST_DS_SELECTOR);
> +        break;
> +
> +    case x86_seg_fs:
> +        sel = __vmread(GUEST_FS_SELECTOR);
> +        break;
> +
> +    case x86_seg_gs:
> +        sel = __vmread(GUEST_GS_SELECTOR);
> +        break;
> +
> +    default:
> +        BUG();
> +    }
> +    vmx_vmcs_exit(v);
> +
> +    return sel;
> +}
> +
>  void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg,
>                                struct segment_register *reg)
>  {
> @@ -1563,6 +1602,7 @@ static struct hvm_function_table __initdata 
> vmx_function_table = {
>      .handle_eoi           = vmx_handle_eoi,
>      .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
>      .pvh_set_vcpu_info    = vmx_pvh_set_vcpu_info,
> +    .read_selector        = vmx_read_selector,
>  };
>
>  const struct hvm_function_table * __init start_vmx(void)
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index a3ca70b..fe8b94c 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -480,6 +480,10 @@ static unsigned int check_guest_io_breakpoint(struct 
> vcpu *v,
>      unsigned int width, i, match = 0;
>      unsigned long start;
>
> +    /* PVH fixme: support io breakpoint. */
> +    if ( is_pvh_vcpu(v) )
> +        return 0;

Does this one, and the check to IO below, have anything to do with
privileged op emulation?

> +
>      if ( !(v->arch.debugreg[5]) ||
>           !(v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) )
>          return 0;
> @@ -1525,6 +1529,49 @@ static int read_descriptor(unsigned int sel,
>      return 1;
>  }
>
> +static int read_descriptor_sel(unsigned int sel,
> +                               enum x86_segment which_sel,
> +                               struct vcpu *v,
> +                               const struct cpu_user_regs *regs,
> +                               unsigned long *base,
> +                               unsigned long *limit,
> +                               unsigned int *ar,
> +                               unsigned int vm86attr)
> +{
> +    struct segment_register seg;
> +    bool_t long_mode;
> +
> +    if ( !is_pvh_vcpu(v) )
> +        return read_descriptor(sel, v, regs, base, limit, ar, vm86attr);

Again, wouldn't it be better to rename read_desrciptor to
pv_read_descriptor(), name this one pvh_read_desrciptor(), give them a
similar function signature (e.g., have both take a which_sel and have
it look up the selector itself), rather than have this
one-function-calls-another-function thing?

> +
> +    hvm_get_segment_register(v, x86_seg_cs, &seg);
> +    long_mode = seg.attr.fields.l;
> +
> +    if ( which_sel != x86_seg_cs )
> +        hvm_get_segment_register(v, which_sel, &seg);
> +
> +    /* "ar" is returned packed as in segment_attributes_t. Fix it up. */
> +    *ar = seg.attr.bytes;
> +    *ar = (*ar & 0xff ) | ((*ar & 0xf00) << 4);
> +    *ar <<= 8;
> +
> +    if ( long_mode )
> +    {
> +        *limit = ~0UL;
> +
> +        if ( which_sel < x86_seg_fs )
> +        {
> +            *base = 0UL;
> +            return 1;
> +        }
> +   }
> +   else
> +       *limit = seg.limit;
> +
> +   *base = seg.base;
> +    return 1;
> +}
> +
>  static int read_gate_descriptor(unsigned int gate_sel,
>                                  const struct vcpu *v,
>                                  unsigned int *sel,
> @@ -1590,6 +1637,13 @@ static int guest_io_okay(
>      int user_mode = !(v->arch.flags & TF_kernel_mode);
>  #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
>
> +    /*
> +     * For PVH we check this in vmexit for EXIT_REASON_IO_INSTRUCTION
> +     * and so don't need to check again here.
> +     */
> +    if ( is_pvh_vcpu(v) )
> +        return 1;

Same question re IO emulation.

> +
>      if ( !vm86_mode(regs) &&
>           (v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) )
>          return 1;
> @@ -1835,7 +1889,7 @@ static inline uint64_t guest_misc_enable(uint64_t val)
>          _ptr = (unsigned int)_ptr;                                          \
>      if ( (limit) < sizeof(_x) - 1 || (eip) > (limit) - (sizeof(_x) - 1) )   \
>          goto fail;                                                          \
> -    if ( (_rc = copy_from_user(&_x, (type *)_ptr, sizeof(_x))) != 0 )       \
> +    if ( (_rc = raw_copy_from_guest(&_x, (type *)_ptr, sizeof(_x))) != 0 )  \

Why is this changed?

>      {                                                                       \
>          propagate_page_fault(_ptr + sizeof(_x) - _rc, 0);                   \
>          goto skip;                                                          \
> @@ -1852,6 +1906,7 @@ static int is_cpufreq_controller(struct domain *d)
>
>  static int emulate_privileged_op(struct cpu_user_regs *regs)
>  {
> +    enum x86_segment which_sel;
>      struct vcpu *v = current;
>      unsigned long *reg, eip = regs->eip;
>      u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, lock = 0, rex = 
> 0;
> @@ -1874,9 +1929,10 @@ static int emulate_privileged_op(struct cpu_user_regs 
> *regs)
>      void (*io_emul)(struct cpu_user_regs *) __attribute__((__regparm__(1)));
>      uint64_t val, msr_content;
>
> -    if ( !read_descriptor(regs->cs, v, regs,
> -                          &code_base, &code_limit, &ar,
> -                          _SEGMENT_CODE|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P) )
> +    if ( !read_descriptor_sel(regs->cs, x86_seg_cs, v, regs,
> +                              &code_base, &code_limit, &ar,
> +                              _SEGMENT_CODE|_SEGMENT_S|
> +                              _SEGMENT_DPL|_SEGMENT_P) )
>          goto fail;
>      op_default = op_bytes = (ar & (_SEGMENT_L|_SEGMENT_DB)) ? 4 : 2;
>      ad_default = ad_bytes = (ar & _SEGMENT_L) ? 8 : op_default;
> @@ -1887,6 +1943,7 @@ static int emulate_privileged_op(struct cpu_user_regs 
> *regs)
>
>      /* emulating only opcodes not allowing SS to be default */
>      data_sel = read_segment_register(v, regs, ds);
> +    which_sel = x86_seg_ds;
>
>      /* Legacy prefixes. */
>      for ( i = 0; i < 8; i++, rex == opcode || (rex = 0) )
> @@ -1902,23 +1959,29 @@ static int emulate_privileged_op(struct cpu_user_regs 
> *regs)
>              continue;
>          case 0x2e: /* CS override */
>              data_sel = regs->cs;
> +            which_sel = x86_seg_cs;
>              continue;
>          case 0x3e: /* DS override */
>              data_sel = read_segment_register(v, regs, ds);
> +            which_sel = x86_seg_ds;
>              continue;
>          case 0x26: /* ES override */
>              data_sel = read_segment_register(v, regs, es);
> +            which_sel = x86_seg_es;
>              continue;
>          case 0x64: /* FS override */
>              data_sel = read_segment_register(v, regs, fs);
> +            which_sel = x86_seg_fs;
>              lm_ovr = lm_seg_fs;
>              continue;
>          case 0x65: /* GS override */
>              data_sel = read_segment_register(v, regs, gs);
> +            which_sel = x86_seg_gs;
>              lm_ovr = lm_seg_gs;
>              continue;
>          case 0x36: /* SS override */
>              data_sel = regs->ss;
> +            which_sel = x86_seg_ss;

...If you did that, you wouldn't need this pair of assignments, only
one of which is actually going to be used, all the way through this
function.

>              continue;
>          case 0xf0: /* LOCK */
>              lock = 1;
> @@ -1962,15 +2025,16 @@ static int emulate_privileged_op(struct cpu_user_regs 
> *regs)
>          if ( !(opcode & 2) )
>          {
>              data_sel = read_segment_register(v, regs, es);
> +            which_sel = x86_seg_es;
>              lm_ovr = lm_seg_none;
>          }
>
>          if ( !(ar & _SEGMENT_L) )
>          {
> -            if ( !read_descriptor(data_sel, v, regs,
> -                                  &data_base, &data_limit, &ar,
> -                                  _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|
> -                                  _SEGMENT_P) )
> +            if ( !read_descriptor_sel(data_sel, which_sel, v, regs,
> +                                      &data_base, &data_limit, &ar,
> +                                      _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|
> +                                      _SEGMENT_P) )
>                  goto fail;
>              if ( !(ar & _SEGMENT_S) ||
>                   !(ar & _SEGMENT_P) ||
> @@ -2000,9 +2064,9 @@ static int emulate_privileged_op(struct cpu_user_regs 
> *regs)
>                  }
>              }
>              else
> -                read_descriptor(data_sel, v, regs,
> -                                &data_base, &data_limit, &ar,
> -                                0);
> +                read_descriptor_sel(data_sel, which_sel, v, regs,
> +                                    &data_base, &data_limit, &ar,
> +                                    0);
>              data_limit = ~0UL;
>              ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P;
>          }
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index 072a2a7..29ed313 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -195,6 +195,8 @@ struct hvm_function_table {
>                                  bool_t access_w, bool_t access_x);
>
>      int (*pvh_set_vcpu_info)(struct vcpu *v, struct vcpu_guest_context 
> *ctxtp);
> +
> +    u16 (*read_selector)(struct vcpu *v, enum x86_segment seg);
>  };
>
>  extern struct hvm_function_table hvm_funcs;
> @@ -334,6 +336,11 @@ static inline int pvh_set_vcpu_info(struct vcpu *v,
>      return hvm_funcs.pvh_set_vcpu_info(v, ctxtp);
>  }
>
> +static inline u16 pvh_get_selector(struct vcpu *v, enum x86_segment seg)
> +{
> +    return hvm_funcs.read_selector(v, seg);
> +}
> +
>  #define is_viridian_domain(_d)                                             \
>   (is_hvm_domain(_d) && ((_d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN]))
>
> diff --git a/xen/include/asm-x86/system.h b/xen/include/asm-x86/system.h
> index 9bb22cb..1242657 100644
> --- a/xen/include/asm-x86/system.h
> +++ b/xen/include/asm-x86/system.h
> @@ -4,10 +4,21 @@
>  #include <xen/lib.h>
>  #include <xen/bitops.h>
>
> -#define read_segment_register(vcpu, regs, name)                 \
> -({  u16 __sel;                                                  \
> -    asm volatile ( "movw %%" STR(name) ",%0" : "=r" (__sel) );  \
> -    __sel;                                                      \
> +/*
> + * We need vcpu because during context switch, going from PV to PVH,
> + * in save_segments() current has been updated to next, and no longer 
> pointing
> + * to the PV, but the intention is to get selector for the PV. Checking
> + * is_pvh_vcpu(current) will yield incorrect results in such a case.
> + */
> +#define read_segment_register(vcpu, regs, name)                   \
> +({  u16 __sel;                                                    \
> +    struct cpu_user_regs *_regs = (regs);                         \
> +                                                                  \
> +    if ( is_pvh_vcpu(vcpu) && guest_mode(_regs) )                 \
> +        __sel = pvh_get_selector(vcpu, x86_seg_##name);           \
> +    else                                                          \
> +        asm volatile ( "movw %%" #name ",%0" : "=r" (__sel) );    \

Is there a reason you discarded the STR() macro here?

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.