[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH V4] x86/hvm: fix domain crash when CR3 has the noflush bit set



On Fri, Feb 9, 2018 at 4:01 AM, Razvan Cojocaru
<rcojocaru@xxxxxxxxxxxxxxx> wrote:
> The emulation layers of Xen lack PCID support, and as we only offer
> PCID to HAP guests, all writes to CR3 are handled by hardware,
> except when introspection is involved. Consequently, trying to set
> CR3 when the noflush bit is set in hvm_set_cr3() leads to domain
> crashes. The workaround is to clear the noflush bit in
> hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized.
> Additionally, a bool parameter now propagates to
> {svm,vmx}_update_guest_cr(), so that no flushes occur when
> the bit was set.
>
> Signed-off-by: Razvan Cojocaru <rcojocaru@xxxxxxxxxxxxxxx>
> Reported-by: Bitweasil <bitweasil@xxxxxxxxxxxxxx>
> Suggested-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

Acked-by: Tamas K Lengyel <tamas@xxxxxxxxxxxxx>

>
> ---
> Changes since V3:
>  - Removed unnecessary !! from "noflush = !!(value & X86_CR3_NOFLUSH);"
>  - Moved the X86_CR3_NOFLUSH #define to x86-defns.h.
>  - Removed X86_CR3_NOFLUSH_DISABLE_MASK (using ~X86_CR3_NOFLUSH).
>  - Reverted changes to hvm_update_guest_cr() and callers, and added
>    hvm_update_guest_cr3() instead.
>  - Added HVM_UPDATE_GUEST_CR3_NO_FLUSH as a potential flag to pass
>    to update_guest_cr(), instead of a simple bool which did not
>    apply to all CRs.
> ---
>  xen/arch/x86/hvm/hvm.c            | 13 ++++++++++---
>  xen/arch/x86/hvm/monitor.c        |  3 +++
>  xen/arch/x86/hvm/svm/svm.c        | 22 ++++++++++++++--------
>  xen/arch/x86/hvm/vmx/vmx.c        | 18 +++++++++++-------
>  xen/arch/x86/mm.c                 |  2 +-
>  xen/arch/x86/mm/hap/hap.c         |  6 +++---
>  xen/arch/x86/mm/shadow/common.c   |  2 +-
>  xen/arch/x86/mm/shadow/multi.c    |  6 +++---
>  xen/arch/x86/mm/shadow/none.c     |  2 +-
>  xen/include/asm-x86/hvm/hvm.h     | 15 +++++++++++++--
>  xen/include/asm-x86/hvm/svm/svm.h |  2 +-
>  xen/include/asm-x86/paging.h      |  7 ++++---
>  xen/include/asm-x86/x86-defns.h   |  5 +++++
>  13 files changed, 70 insertions(+), 33 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 18d721d..f17163e 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -2297,6 +2297,7 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
>      struct vcpu *v = current;
>      struct page_info *page;
>      unsigned long old = v->arch.hvm_vcpu.guest_cr[3];
> +    bool noflush = false;
>
>      if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled 
> &
>                                 monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) )
> @@ -2313,6 +2314,12 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
>          }
>      }
>
> +    if ( hvm_pcid_enabled(v) ) /* Clear the noflush bit. */
> +    {
> +        noflush = value & X86_CR3_NOFLUSH;
> +        value &= ~X86_CR3_NOFLUSH;
> +    }
> +
>      if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
>           (value != v->arch.hvm_vcpu.guest_cr[3]) )
>      {
> @@ -2330,7 +2337,7 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
>      }
>
>      v->arch.hvm_vcpu.guest_cr[3] = value;
> -    paging_update_cr3(v);
> +    paging_update_cr3(v, noflush);
>      return X86EMUL_OKAY;
>
>   bad_cr3:
> @@ -4031,7 +4038,7 @@ static int hvmop_flush_tlb_all(void)
>
>      /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
>      for_each_vcpu ( d, v )
> -        paging_update_cr3(v);
> +        paging_update_cr3(v, false);
>
>      /* Flush all dirty TLBs. */
>      flush_tlb_mask(d->dirty_cpumask);
> @@ -4193,7 +4200,7 @@ static int hvmop_set_param(
>          domain_pause(d);
>          d->arch.hvm_domain.params[a.index] = a.value;
>          for_each_vcpu ( d, v )
> -            paging_update_cr3(v);
> +            paging_update_cr3(v, false);
>          domain_unpause(d);
>
>          domctl_lock_release();
> diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
> index 131b852..87e7e31 100644
> --- a/xen/arch/x86/hvm/monitor.c
> +++ b/xen/arch/x86/hvm/monitor.c
> @@ -36,6 +36,9 @@ bool hvm_monitor_cr(unsigned int index, unsigned long 
> value, unsigned long old)
>      struct arch_domain *ad = &curr->domain->arch;
>      unsigned int ctrlreg_bitmask = monitor_ctrlreg_bitmask(index);
>
> +    if ( index == VM_EVENT_X86_CR3 && hvm_pcid_enabled(curr) )
> +        value &= ~X86_CR3_NOFLUSH; /* Clear the noflush bit. */
> +
>      if ( (ad->monitor.write_ctrlreg_enabled & ctrlreg_bitmask) &&
>           (!(ad->monitor.write_ctrlreg_onchangeonly & ctrlreg_bitmask) ||
>            value != old) &&
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index 81cf5b8..4e74f62 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -315,9 +315,9 @@ static int svm_vmcb_restore(struct vcpu *v, struct 
> hvm_hw_cpu *c)
>      v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
>      v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
>      v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
> -    svm_update_guest_cr(v, 0);
> -    svm_update_guest_cr(v, 2);
> -    svm_update_guest_cr(v, 4);
> +    svm_update_guest_cr(v, 0, 0);
> +    svm_update_guest_cr(v, 2, 0);
> +    svm_update_guest_cr(v, 4, 0);
>
>      /* Load sysenter MSRs into both VMCB save area and VCPU fields. */
>      vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = c->sysenter_cs;
> @@ -533,7 +533,7 @@ static int svm_guest_x86_mode(struct vcpu *v)
>      return likely(vmcb->cs.db) ? 4 : 2;
>  }
>
> -void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
> +void svm_update_guest_cr(struct vcpu *v, unsigned int cr, unsigned int flags)
>  {
>      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
>      uint64_t value;
> @@ -563,13 +563,19 @@ void svm_update_guest_cr(struct vcpu *v, unsigned int 
> cr)
>      case 3:
>          vmcb_set_cr3(vmcb, v->arch.hvm_vcpu.hw_cr[3]);
>          if ( !nestedhvm_enabled(v->domain) )
> -            hvm_asid_flush_vcpu(v);
> +        {
> +            if ( !(flags & HVM_UPDATE_GUEST_CR3_NO_FLUSH) )
> +                hvm_asid_flush_vcpu(v);
> +        }
>          else if ( nestedhvm_vmswitch_in_progress(v) )
>              ; /* CR3 switches during VMRUN/VMEXIT do not flush the TLB. */
>          else
> -            hvm_asid_flush_vcpu_asid(
> -                nestedhvm_vcpu_in_guestmode(v)
> -                ? &vcpu_nestedhvm(v).nv_n2asid : &v->arch.hvm_vcpu.n1asid);
> +        {
> +            if ( !(flags & HVM_UPDATE_GUEST_CR3_NO_FLUSH) )
> +                hvm_asid_flush_vcpu_asid(
> +                    nestedhvm_vcpu_in_guestmode(v)
> +                    ? &vcpu_nestedhvm(v).nv_n2asid : 
> &v->arch.hvm_vcpu.n1asid);
> +        }
>          break;
>      case 4:
>          value = HVM_CR4_HOST_MASK;
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 3dc6a6d..4106d4c 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -70,7 +70,8 @@ static void vmx_ctxt_switch_to(struct vcpu *v);
>  static int  vmx_alloc_vlapic_mapping(struct domain *d);
>  static void vmx_free_vlapic_mapping(struct domain *d);
>  static void vmx_install_vlapic_mapping(struct vcpu *v);
> -static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
> +static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
> +                                unsigned int flags);
>  static void vmx_update_guest_efer(struct vcpu *v);
>  static void vmx_wbinvd_intercept(void);
>  static void vmx_fpu_dirty_intercept(void);
> @@ -840,9 +841,9 @@ static int vmx_vmcs_restore(struct vcpu *v, struct 
> hvm_hw_cpu *c)
>
>      v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
>      v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
> -    vmx_update_guest_cr(v, 0);
> -    vmx_update_guest_cr(v, 2);
> -    vmx_update_guest_cr(v, 4);
> +    vmx_update_guest_cr(v, 0, 0);
> +    vmx_update_guest_cr(v, 2, 0);
> +    vmx_update_guest_cr(v, 4, 0);
>
>      v->arch.hvm_vcpu.guest_efer = c->msr_efer;
>      vmx_update_guest_efer(v);
> @@ -1552,7 +1553,8 @@ void vmx_update_debug_state(struct vcpu *v)
>      vmx_vmcs_exit(v);
>  }
>
> -static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
> +static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
> +                                unsigned int flags)
>  {
>      vmx_vmcs_enter(v);
>
> @@ -1704,7 +1706,9 @@ static void vmx_update_guest_cr(struct vcpu *v, 
> unsigned int cr)
>          }
>
>          __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]);
> -        hvm_asid_flush_vcpu(v);
> +
> +        if ( !(flags & HVM_UPDATE_GUEST_CR3_NO_FLUSH) )
> +            hvm_asid_flush_vcpu(v);
>          break;
>
>      default:
> @@ -2656,7 +2660,7 @@ static int vmx_cr_access(unsigned long 
> exit_qualification)
>           */
>          hvm_monitor_crX(CR0, value, old);
>          curr->arch.hvm_vcpu.guest_cr[0] = value;
> -        vmx_update_guest_cr(curr, 0);
> +        vmx_update_guest_cr(curr, 0, 0);
>          HVMTRACE_0D(CLTS);
>          break;
>      }
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index 66ea822..1ef3635 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -526,7 +526,7 @@ void update_cr3(struct vcpu *v)
>
>      if ( paging_mode_enabled(v->domain) )
>      {
> -        paging_update_cr3(v);
> +        paging_update_cr3(v, false);
>          return;
>      }
>
> diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
> index 003c2d8..b76e6b8 100644
> --- a/xen/arch/x86/mm/hap/hap.c
> +++ b/xen/arch/x86/mm/hap/hap.c
> @@ -669,10 +669,10 @@ static bool_t hap_invlpg(struct vcpu *v, unsigned long 
> va)
>      return 1;
>  }
>
> -static void hap_update_cr3(struct vcpu *v, int do_locking)
> +static void hap_update_cr3(struct vcpu *v, int do_locking, bool noflush)
>  {
>      v->arch.hvm_vcpu.hw_cr[3] = v->arch.hvm_vcpu.guest_cr[3];
> -    hvm_update_guest_cr(v, 3);
> +    hvm_update_guest_cr3(v, noflush);
>  }
>
>  const struct paging_mode *
> @@ -708,7 +708,7 @@ static void hap_update_paging_modes(struct vcpu *v)
>      }
>
>      /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
> -    hap_update_cr3(v, 0);
> +    hap_update_cr3(v, 0, false);
>
>      paging_unlock(d);
>      put_gfn(d, cr3_gfn);
> diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
> index c240953..20ded3e 100644
> --- a/xen/arch/x86/mm/shadow/common.c
> +++ b/xen/arch/x86/mm/shadow/common.c
> @@ -3030,7 +3030,7 @@ static void sh_update_paging_modes(struct vcpu *v)
>      }
>  #endif /* OOS */
>
> -    v->arch.paging.mode->update_cr3(v, 0);
> +    v->arch.paging.mode->update_cr3(v, 0, false);
>  }
>
>  void shadow_update_paging_modes(struct vcpu *v)
> diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
> index a6372e3..fcc4fa3 100644
> --- a/xen/arch/x86/mm/shadow/multi.c
> +++ b/xen/arch/x86/mm/shadow/multi.c
> @@ -3173,7 +3173,7 @@ static int sh_page_fault(struct vcpu *v,
>           * In any case, in the PAE case, the ASSERT is not true; it can
>           * happen because of actions the guest is taking. */
>  #if GUEST_PAGING_LEVELS == 3
> -        v->arch.paging.mode->update_cr3(v, 0);
> +        v->arch.paging.mode->update_cr3(v, 0, false);
>  #else
>          ASSERT(d->is_shutting_down);
>  #endif
> @@ -3992,7 +3992,7 @@ sh_set_toplevel_shadow(struct vcpu *v,
>
>
>  static void
> -sh_update_cr3(struct vcpu *v, int do_locking)
> +sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
>  /* Updates vcpu->arch.cr3 after the guest has changed CR3.
>   * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
>   * if appropriate).
> @@ -4234,7 +4234,7 @@ sh_update_cr3(struct vcpu *v, int do_locking)
>          v->arch.hvm_vcpu.hw_cr[3] =
>              pagetable_get_paddr(v->arch.shadow_table[0]);
>  #endif
> -        hvm_update_guest_cr(v, 3);
> +        hvm_update_guest_cr3(v, noflush);
>      }
>
>      /* Fix up the linear pagetable mappings */
> diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c
> index 9e6ad23..a8c9604 100644
> --- a/xen/arch/x86/mm/shadow/none.c
> +++ b/xen/arch/x86/mm/shadow/none.c
> @@ -50,7 +50,7 @@ static unsigned long _gva_to_gfn(struct vcpu *v, struct 
> p2m_domain *p2m,
>      return gfn_x(INVALID_GFN);
>  }
>
> -static void _update_cr3(struct vcpu *v, int do_locking)
> +static void _update_cr3(struct vcpu *v, int do_locking, bool noflush)
>  {
>      ASSERT_UNREACHABLE();
>  }
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index dd3dd5f..1b4080b 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -80,6 +80,9 @@ enum hvm_intblk {
>  #define HVM_EVENT_VECTOR_UNSET    (-1)
>  #define HVM_EVENT_VECTOR_UPDATING (-2)
>
> +/* update_guest_cr() flags. */
> +#define HVM_UPDATE_GUEST_CR3_NO_FLUSH 0x00000001
> +
>  /*
>   * The hardware virtual machine (HVM) interface abstracts away from the
>   * x86/x86_64 CPU virtualization assist specifics. Currently this interface
> @@ -132,7 +135,8 @@ struct hvm_function_table {
>      /*
>       * Called to inform HVM layer that a guest CRn or EFER has changed.
>       */
> -    void (*update_guest_cr)(struct vcpu *v, unsigned int cr);
> +    void (*update_guest_cr)(struct vcpu *v, unsigned int cr,
> +                            unsigned int flags);
>      void (*update_guest_efer)(struct vcpu *v);
>
>      void (*cpuid_policy_changed)(struct vcpu *v);
> @@ -324,7 +328,14 @@ hvm_update_host_cr3(struct vcpu *v)
>
>  static inline void hvm_update_guest_cr(struct vcpu *v, unsigned int cr)
>  {
> -    hvm_funcs.update_guest_cr(v, cr);
> +    hvm_funcs.update_guest_cr(v, cr, 0);
> +}
> +
> +static inline void hvm_update_guest_cr3(struct vcpu *v, bool noflush)
> +{
> +    unsigned int flags = noflush ? HVM_UPDATE_GUEST_CR3_NO_FLUSH : 0;
> +
> +    hvm_funcs.update_guest_cr(v, 3, flags);
>  }
>
>  static inline void hvm_update_guest_efer(struct vcpu *v)
> diff --git a/xen/include/asm-x86/hvm/svm/svm.h 
> b/xen/include/asm-x86/hvm/svm/svm.h
> index 462cb89..6c050f5 100644
> --- a/xen/include/asm-x86/hvm/svm/svm.h
> +++ b/xen/include/asm-x86/hvm/svm/svm.h
> @@ -51,7 +51,7 @@ static inline void svm_invlpga(unsigned long vaddr, 
> uint32_t asid)
>
>  unsigned long *svm_msrbit(unsigned long *msr_bitmap, uint32_t msr);
>  void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len);
> -void svm_update_guest_cr(struct vcpu *, unsigned int cr);
> +void svm_update_guest_cr(struct vcpu *, unsigned int cr, unsigned int flags);
>
>  extern u32 svm_feature_flags;
>
> diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
> index 5607ab4..dd3e31f 100644
> --- a/xen/include/asm-x86/paging.h
> +++ b/xen/include/asm-x86/paging.h
> @@ -122,7 +122,8 @@ struct paging_mode {
>                                              unsigned long cr3,
>                                              paddr_t ga, uint32_t *pfec,
>                                              unsigned int *page_order);
> -    void          (*update_cr3            )(struct vcpu *v, int do_locking);
> +    void          (*update_cr3            )(struct vcpu *v, int do_locking,
> +                                            bool noflush);
>      void          (*update_paging_modes   )(struct vcpu *v);
>      void          (*write_p2m_entry       )(struct domain *d, unsigned long 
> gfn,
>                                              l1_pgentry_t *p, l1_pgentry_t 
> new,
> @@ -276,9 +277,9 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct 
> vcpu *v,
>  /* Update all the things that are derived from the guest's CR3.
>   * Called when the guest changes CR3; the caller can then use v->arch.cr3
>   * as the value to load into the host CR3 to schedule this vcpu */
> -static inline void paging_update_cr3(struct vcpu *v)
> +static inline void paging_update_cr3(struct vcpu *v, bool noflush)
>  {
> -    paging_get_hostmode(v)->update_cr3(v, 1);
> +    paging_get_hostmode(v)->update_cr3(v, 1, noflush);
>  }
>
>  /* Update all the things that are derived from the guest's CR0/CR3/CR4.
> diff --git a/xen/include/asm-x86/x86-defns.h b/xen/include/asm-x86/x86-defns.h
> index 70453e8..7bb6918 100644
> --- a/xen/include/asm-x86/x86-defns.h
> +++ b/xen/include/asm-x86/x86-defns.h
> @@ -43,6 +43,11 @@
>  #define X86_CR0_PG              0x80000000 /* Paging                   (RW) 
> */
>
>  /*
> + * Intel CPU flags in CR3
> + */
> +#define X86_CR3_NOFLUSH    0x8000000000000000
> +
> +/*
>   * Intel CPU features in CR4
>   */
>  #define X86_CR4_VME        0x00000001 /* enable vm86 extensions */
> --
> 2.7.4

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.