[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH V11 1/3] x86/hvm: pkeys, add pkeys support for guest_walk_tables



On Fri, Feb 19, 2016 at 9:51 AM, Huaitong Han <huaitong.han@xxxxxxxxx> wrote:
> Protection keys define a new 4-bit protection key field(PKEY) in bits 62:59 of
> leaf entries of the page tables.
>
> PKRU register defines 32 bits, there are 16 domains and 2 attribute bits per
> domain in pkru, for each i (0 â i â 15), PKRU[2i] is the access-disable bit 
> for
> protection key i (ADi); PKRU[2i+1] is the write-disable bit for protection key
> i (WDi). PKEY is index to a defined domain.
>
> A fault is considered as a PKU violation if all of the following conditions 
> are
> true:
> 1.CR4_PKE=1.
> 2.EFER_LMA=1.
> 3.Page is present with no reserved bit violations.
> 4.The access is not an instruction fetch.
> 5.The access is to a user page.
> 6.PKRU.AD=1
>     or The access is a data write and PKRU.WD=1
>                 and either CR0.WP=1 or it is a user access.
>
> Signed-off-by: Huaitong Han <huaitong.han@xxxxxxxxx>
> Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
> Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx>

Reviewed-by: George Dunlap <george.dunlap@xxxxxxxxxx>

> ---
> Changes in v11:
> *Move pkru_ad/pkru_wd variable initialization position.
> *Undo v10 changes.
>
> Changes in v10:
> *Move PFEC_page_present check.
>
> Changes in v9:
> *Rename _write_cr4 to raw_write_cr4.
>
> Changes in v8:
> *Abstract out _write_cr4.
>
> Changes in v7:
> *Add static for pkey_fault.
> *Add a comment for page present check and adjust indentation.
> *Init pkru_ad and pkru_wd.
> *Delete l3e_get_pkey the outer parentheses.
> *The first parameter of read_pkru_* use uint32_t type.
>
>  xen/arch/x86/mm/guest_walk.c      | 52 
> +++++++++++++++++++++++++++++++++++++++
>  xen/arch/x86/mm/hap/guest_walk.c  |  3 +++
>  xen/include/asm-x86/guest_pt.h    | 12 +++++++++
>  xen/include/asm-x86/hvm/hvm.h     |  2 ++
>  xen/include/asm-x86/page.h        |  5 ++++
>  xen/include/asm-x86/processor.h   | 47 ++++++++++++++++++++++++++++++++++-
>  xen/include/asm-x86/x86_64/page.h | 12 +++++++++
>  7 files changed, 132 insertions(+), 1 deletion(-)
>
> diff --git a/xen/arch/x86/mm/guest_walk.c b/xen/arch/x86/mm/guest_walk.c
> index 18d1acf..01a64ae 100644
> --- a/xen/arch/x86/mm/guest_walk.c
> +++ b/xen/arch/x86/mm/guest_walk.c
> @@ -90,6 +90,52 @@ static uint32_t set_ad_bits(void *guest_p, void *walk_p, 
> int set_dirty)
>      return 0;
>  }
>
> +#if GUEST_PAGING_LEVELS >= 4
> +static bool_t pkey_fault(struct vcpu *vcpu, uint32_t pfec,
> +        uint32_t pte_flags, uint32_t pte_pkey)
> +{
> +    uint32_t pkru;
> +
> +    /* When page isn't present,  PKEY isn't checked. */
> +    if ( !(pfec & PFEC_page_present) || is_pv_vcpu(vcpu) )
> +        return 0;
> +
> +    /*
> +     * PKU:  additional mechanism by which the paging controls
> +     * access to user-mode addresses based on the value in the
> +     * PKRU register. A fault is considered as a PKU violation if all
> +     * of the following conditions are true:
> +     * 1.CR4_PKE=1.
> +     * 2.EFER_LMA=1.
> +     * 3.Page is present with no reserved bit violations.
> +     * 4.The access is not an instruction fetch.
> +     * 5.The access is to a user page.
> +     * 6.PKRU.AD=1 or
> +     *      the access is a data write and PKRU.WD=1 and
> +     *          either CR0.WP=1 or it is a user access.
> +     */
> +    if ( !hvm_pku_enabled(vcpu) ||
> +         !hvm_long_mode_enabled(vcpu) ||
> +         (pfec & PFEC_reserved_bit) ||
> +         (pfec & PFEC_insn_fetch) ||
> +         !(pte_flags & _PAGE_USER) )
> +        return 0;
> +
> +    pkru = read_pkru();
> +    if ( unlikely(pkru) )
> +    {
> +        bool_t pkru_ad = read_pkru_ad(pkru, pte_pkey);
> +        bool_t pkru_wd = read_pkru_wd(pkru, pte_pkey);
> +        /* Condition 6 */
> +        if ( pkru_ad || (pkru_wd && (pfec & PFEC_write_access) &&
> +                    (hvm_wp_enabled(vcpu) || (pfec & PFEC_user_mode))))
> +            return 1;
> +    }
> +
> +    return 0;
> +}
> +#endif
> +
>  /* Walk the guest pagetables, after the manner of a hardware walker. */
>  /* Because the walk is essentially random, it can cause a deadlock
>   * warning in the p2m locking code. Highly unlikely this is an actual
> @@ -107,6 +153,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m,
>      guest_l3e_t *l3p = NULL;
>      guest_l4e_t *l4p;
>  #endif
> +    unsigned int pkey;
>      uint32_t gflags, mflags, iflags, rc = 0;
>      bool_t smep = 0, smap = 0;
>      bool_t pse1G = 0, pse2M = 0;
> @@ -190,6 +237,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m,
>          goto out;
>      /* Get the l3e and check its flags*/
>      gw->l3e = l3p[guest_l3_table_offset(va)];
> +    pkey = guest_l3e_get_pkey(gw->l3e);
>      gflags = guest_l3e_get_flags(gw->l3e) ^ iflags;
>      if ( !(gflags & _PAGE_PRESENT) ) {
>          rc |= _PAGE_PRESENT;
> @@ -261,6 +309,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m,
>
>  #endif /* All levels... */
>
> +    pkey = guest_l2e_get_pkey(gw->l2e);
>      gflags = guest_l2e_get_flags(gw->l2e) ^ iflags;
>      if ( !(gflags & _PAGE_PRESENT) ) {
>          rc |= _PAGE_PRESENT;
> @@ -324,6 +373,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m,
>          if(l1p == NULL)
>              goto out;
>          gw->l1e = l1p[guest_l1_table_offset(va)];
> +        pkey = guest_l1e_get_pkey(gw->l1e);
>          gflags = guest_l1e_get_flags(gw->l1e) ^ iflags;
>          if ( !(gflags & _PAGE_PRESENT) ) {
>              rc |= _PAGE_PRESENT;
> @@ -334,6 +384,8 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m,
>
>  #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
>  set_ad:
> +    if ( pkey_fault(v, pfec, gflags, pkey) )
> +        rc |= _PAGE_PKEY_BITS;
>  #endif
>      /* Now re-invert the user-mode requirement for SMEP and SMAP */
>      if ( smep || smap )
> diff --git a/xen/arch/x86/mm/hap/guest_walk.c 
> b/xen/arch/x86/mm/hap/guest_walk.c
> index 8aa7e0f..d2716f9 100644
> --- a/xen/arch/x86/mm/hap/guest_walk.c
> +++ b/xen/arch/x86/mm/hap/guest_walk.c
> @@ -130,6 +130,9 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
>      if ( missing & _PAGE_INVALID_BITS )
>          pfec[0] |= PFEC_reserved_bit;
>
> +    if ( missing & _PAGE_PKEY_BITS )
> +        pfec[0] |= PFEC_prot_key;
> +
>      if ( missing & _PAGE_PAGED )
>          pfec[0] = PFEC_page_paged;
>
> diff --git a/xen/include/asm-x86/guest_pt.h b/xen/include/asm-x86/guest_pt.h
> index 3447973..eb29e62 100644
> --- a/xen/include/asm-x86/guest_pt.h
> +++ b/xen/include/asm-x86/guest_pt.h
> @@ -81,6 +81,11 @@ static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
>  static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
>  { return gl2e.l2 & 0xfff; }
>
> +static inline u32 guest_l1e_get_pkey(guest_l1e_t gl1e)
> +{ return 0; }
> +static inline u32 guest_l2e_get_pkey(guest_l2e_t gl2e)
> +{ return 0; }
> +
>  static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
>  { return (guest_l1e_t) { (gfn_x(gfn) << PAGE_SHIFT) | flags }; }
>  static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
> @@ -154,6 +159,13 @@ static inline u32 guest_l4e_get_flags(guest_l4e_t gl4e)
>  { return l4e_get_flags(gl4e); }
>  #endif
>
> +static inline u32 guest_l1e_get_pkey(guest_l1e_t gl1e)
> +{ return l1e_get_pkey(gl1e); }
> +static inline u32 guest_l2e_get_pkey(guest_l2e_t gl2e)
> +{ return l2e_get_pkey(gl2e); }
> +static inline u32 guest_l3e_get_pkey(guest_l3e_t gl3e)
> +{ return l3e_get_pkey(gl3e); }
> +
>  static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
>  { return l1e_from_pfn(gfn_x(gfn), flags); }
>  static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index a87224b..731dd44 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -277,6 +277,8 @@ int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t 
> dest, uint8_t dest_mode);
>      (hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & 
> X86_CR4_SMAP))
>  #define hvm_nx_enabled(v) \
>      (!!((v)->arch.hvm_vcpu.guest_efer & EFER_NX))
> +#define hvm_pku_enabled(v) \
> +    (hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PKE))
>
>  /* Can we use superpages in the HAP p2m table? */
>  #define hap_has_1gb (!!(hvm_funcs.hap_capabilities & HVM_HAP_SUPERPAGE_1GB))
> diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
> index a095a93..9202f3d 100644
> --- a/xen/include/asm-x86/page.h
> +++ b/xen/include/asm-x86/page.h
> @@ -93,6 +93,11 @@
>  #define l3e_get_flags(x)           (get_pte_flags((x).l3))
>  #define l4e_get_flags(x)           (get_pte_flags((x).l4))
>
> +/* Get pte pkeys (unsigned int). */
> +#define l1e_get_pkey(x)           get_pte_pkey((x).l1)
> +#define l2e_get_pkey(x)           get_pte_pkey((x).l2)
> +#define l3e_get_pkey(x)           get_pte_pkey((x).l3)
> +
>  /* Construct an empty pte. */
>  #define l1e_empty()                ((l1_pgentry_t) { 0 })
>  #define l2e_empty()                ((l2_pgentry_t) { 0 })
> diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
> index 26ba141..617a4db 100644
> --- a/xen/include/asm-x86/processor.h
> +++ b/xen/include/asm-x86/processor.h
> @@ -332,6 +332,11 @@ static inline unsigned long read_cr2(void)
>
>  DECLARE_PER_CPU(unsigned long, cr4);
>
> +static inline void raw_write_cr4(unsigned long val)
> +{
> +    asm volatile ( "mov %0,%%cr4" : : "r" (val) );
> +}
> +
>  static inline unsigned long read_cr4(void)
>  {
>      return this_cpu(cr4);
> @@ -340,7 +345,7 @@ static inline unsigned long read_cr4(void)
>  static inline void write_cr4(unsigned long val)
>  {
>      this_cpu(cr4) = val;
> -    asm volatile ( "mov %0,%%cr4" : : "r" (val) );
> +    raw_write_cr4(val);
>  }
>
>  /* Clear and set 'TS' bit respectively */
> @@ -374,6 +379,46 @@ static always_inline void clear_in_cr4 (unsigned long 
> mask)
>      write_cr4(read_cr4() & ~mask);
>  }
>
> +static inline unsigned int read_pkru(void)
> +{
> +    unsigned int pkru;
> +    unsigned long cr4 = read_cr4();
> +
> +    /*
> +     * _PAGE_PKEY_BITS have a conflict with _PAGE_GNTTAB used by PV guests,
> +     * so that X86_CR4_PKE  is disabled on hypervisor. To use RDPKRU, CR4.PKE
> +     * gets temporarily enabled.
> +     */
> +    raw_write_cr4(cr4 | X86_CR4_PKE);
> +    asm volatile (".byte 0x0f,0x01,0xee"
> +        : "=a" (pkru) : "c" (0) : "dx");
> +    raw_write_cr4(cr4);
> +
> +    return pkru;
> +}
> +
> +/* Macros for PKRU domain */
> +#define PKRU_READ  (0)
> +#define PKRU_WRITE (1)
> +#define PKRU_ATTRS (2)
> +
> +/*
> + * PKRU defines 32 bits, there are 16 domains and 2 attribute bits per
> + * domain in pkru, pkeys is index to a defined domain, so the value of
> + * pte_pkeys * PKRU_ATTRS + R/W is offset of a defined domain attribute.
> + */
> +static inline bool_t read_pkru_ad(uint32_t pkru, unsigned int pkey)
> +{
> +    ASSERT(pkey < 16);
> +    return (pkru >> (pkey * PKRU_ATTRS + PKRU_READ)) & 1;
> +}
> +
> +static inline bool_t read_pkru_wd(uint32_t pkru, unsigned int pkey)
> +{
> +    ASSERT(pkey < 16);
> +    return (pkru >> (pkey * PKRU_ATTRS + PKRU_WRITE)) & 1;
> +}
> +
>  /*
>   *      NSC/Cyrix CPU configuration register indexes
>   */
> diff --git a/xen/include/asm-x86/x86_64/page.h 
> b/xen/include/asm-x86/x86_64/page.h
> index 19ab4d0..86abb94 100644
> --- a/xen/include/asm-x86/x86_64/page.h
> +++ b/xen/include/asm-x86/x86_64/page.h
> @@ -134,6 +134,18 @@ typedef l4_pgentry_t root_pgentry_t;
>  #define get_pte_flags(x) (((int)((x) >> 40) & ~0xFFF) | ((int)(x) & 0xFFF))
>  #define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 40) | ((x) & 0xFFF))
>
> +/*
> + * Protection keys define a new 4-bit protection key field
> + * (PKEY) in bits 62:59 of leaf entries of the page tables.
> + * This corresponds to bit 22:19 of a 24-bit flags.
> + *
> + * Notice: Bit 22 is used by _PAGE_GNTTAB which is visible to PV guests,
> + * so Protection keys must be disabled on PV guests.
> + */
> +#define _PAGE_PKEY_BITS  (0x780000)     /* Protection Keys, 22:19 */
> +
> +#define get_pte_pkey(x) (MASK_EXTR(get_pte_flags(x), _PAGE_PKEY_BITS))
> +
>  /* Bit 23 of a 24-bit flag mask. This corresponds to bit 63 of a pte.*/
>  #define _PAGE_NX_BIT (1U<<23)
>
> --
> 2.5.0
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.