[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH V11 1/3] x86/hvm: pkeys, add pkeys support for guest_walk_tables
On Fri, Feb 19, 2016 at 9:51 AM, Huaitong Han <huaitong.han@xxxxxxxxx> wrote: > Protection keys define a new 4-bit protection key field(PKEY) in bits 62:59 of > leaf entries of the page tables. > > PKRU register defines 32 bits, there are 16 domains and 2 attribute bits per > domain in pkru, for each i (0 â i â 15), PKRU[2i] is the access-disable bit > for > protection key i (ADi); PKRU[2i+1] is the write-disable bit for protection key > i (WDi). PKEY is index to a defined domain. > > A fault is considered as a PKU violation if all of the following conditions > are > true: > 1.CR4_PKE=1. > 2.EFER_LMA=1. > 3.Page is present with no reserved bit violations. > 4.The access is not an instruction fetch. > 5.The access is to a user page. > 6.PKRU.AD=1 > or The access is a data write and PKRU.WD=1 > and either CR0.WP=1 or it is a user access. > > Signed-off-by: Huaitong Han <huaitong.han@xxxxxxxxx> > Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx> > Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx> Reviewed-by: George Dunlap <george.dunlap@xxxxxxxxxx> > --- > Changes in v11: > *Move pkru_ad/pkru_wd variable initialization position. > *Undo v10 changes. > > Changes in v10: > *Move PFEC_page_present check. > > Changes in v9: > *Rename _write_cr4 to raw_write_cr4. > > Changes in v8: > *Abstract out _write_cr4. > > Changes in v7: > *Add static for pkey_fault. > *Add a comment for page present check and adjust indentation. > *Init pkru_ad and pkru_wd. > *Delete l3e_get_pkey the outer parentheses. > *The first parameter of read_pkru_* use uint32_t type. > > xen/arch/x86/mm/guest_walk.c | 52 > +++++++++++++++++++++++++++++++++++++++ > xen/arch/x86/mm/hap/guest_walk.c | 3 +++ > xen/include/asm-x86/guest_pt.h | 12 +++++++++ > xen/include/asm-x86/hvm/hvm.h | 2 ++ > xen/include/asm-x86/page.h | 5 ++++ > xen/include/asm-x86/processor.h | 47 ++++++++++++++++++++++++++++++++++- > xen/include/asm-x86/x86_64/page.h | 12 +++++++++ > 7 files changed, 132 insertions(+), 1 deletion(-) > > diff --git a/xen/arch/x86/mm/guest_walk.c b/xen/arch/x86/mm/guest_walk.c > index 18d1acf..01a64ae 100644 > --- a/xen/arch/x86/mm/guest_walk.c > +++ b/xen/arch/x86/mm/guest_walk.c > @@ -90,6 +90,52 @@ static uint32_t set_ad_bits(void *guest_p, void *walk_p, > int set_dirty) > return 0; > } > > +#if GUEST_PAGING_LEVELS >= 4 > +static bool_t pkey_fault(struct vcpu *vcpu, uint32_t pfec, > + uint32_t pte_flags, uint32_t pte_pkey) > +{ > + uint32_t pkru; > + > + /* When page isn't present, PKEY isn't checked. */ > + if ( !(pfec & PFEC_page_present) || is_pv_vcpu(vcpu) ) > + return 0; > + > + /* > + * PKU: additional mechanism by which the paging controls > + * access to user-mode addresses based on the value in the > + * PKRU register. A fault is considered as a PKU violation if all > + * of the following conditions are true: > + * 1.CR4_PKE=1. > + * 2.EFER_LMA=1. > + * 3.Page is present with no reserved bit violations. > + * 4.The access is not an instruction fetch. > + * 5.The access is to a user page. > + * 6.PKRU.AD=1 or > + * the access is a data write and PKRU.WD=1 and > + * either CR0.WP=1 or it is a user access. > + */ > + if ( !hvm_pku_enabled(vcpu) || > + !hvm_long_mode_enabled(vcpu) || > + (pfec & PFEC_reserved_bit) || > + (pfec & PFEC_insn_fetch) || > + !(pte_flags & _PAGE_USER) ) > + return 0; > + > + pkru = read_pkru(); > + if ( unlikely(pkru) ) > + { > + bool_t pkru_ad = read_pkru_ad(pkru, pte_pkey); > + bool_t pkru_wd = read_pkru_wd(pkru, pte_pkey); > + /* Condition 6 */ > + if ( pkru_ad || (pkru_wd && (pfec & PFEC_write_access) && > + (hvm_wp_enabled(vcpu) || (pfec & PFEC_user_mode)))) > + return 1; > + } > + > + return 0; > +} > +#endif > + > /* Walk the guest pagetables, after the manner of a hardware walker. */ > /* Because the walk is essentially random, it can cause a deadlock > * warning in the p2m locking code. Highly unlikely this is an actual > @@ -107,6 +153,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, > guest_l3e_t *l3p = NULL; > guest_l4e_t *l4p; > #endif > + unsigned int pkey; > uint32_t gflags, mflags, iflags, rc = 0; > bool_t smep = 0, smap = 0; > bool_t pse1G = 0, pse2M = 0; > @@ -190,6 +237,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, > goto out; > /* Get the l3e and check its flags*/ > gw->l3e = l3p[guest_l3_table_offset(va)]; > + pkey = guest_l3e_get_pkey(gw->l3e); > gflags = guest_l3e_get_flags(gw->l3e) ^ iflags; > if ( !(gflags & _PAGE_PRESENT) ) { > rc |= _PAGE_PRESENT; > @@ -261,6 +309,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, > > #endif /* All levels... */ > > + pkey = guest_l2e_get_pkey(gw->l2e); > gflags = guest_l2e_get_flags(gw->l2e) ^ iflags; > if ( !(gflags & _PAGE_PRESENT) ) { > rc |= _PAGE_PRESENT; > @@ -324,6 +373,7 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, > if(l1p == NULL) > goto out; > gw->l1e = l1p[guest_l1_table_offset(va)]; > + pkey = guest_l1e_get_pkey(gw->l1e); > gflags = guest_l1e_get_flags(gw->l1e) ^ iflags; > if ( !(gflags & _PAGE_PRESENT) ) { > rc |= _PAGE_PRESENT; > @@ -334,6 +384,8 @@ guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, > > #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ > set_ad: > + if ( pkey_fault(v, pfec, gflags, pkey) ) > + rc |= _PAGE_PKEY_BITS; > #endif > /* Now re-invert the user-mode requirement for SMEP and SMAP */ > if ( smep || smap ) > diff --git a/xen/arch/x86/mm/hap/guest_walk.c > b/xen/arch/x86/mm/hap/guest_walk.c > index 8aa7e0f..d2716f9 100644 > --- a/xen/arch/x86/mm/hap/guest_walk.c > +++ b/xen/arch/x86/mm/hap/guest_walk.c > @@ -130,6 +130,9 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)( > if ( missing & _PAGE_INVALID_BITS ) > pfec[0] |= PFEC_reserved_bit; > > + if ( missing & _PAGE_PKEY_BITS ) > + pfec[0] |= PFEC_prot_key; > + > if ( missing & _PAGE_PAGED ) > pfec[0] = PFEC_page_paged; > > diff --git a/xen/include/asm-x86/guest_pt.h b/xen/include/asm-x86/guest_pt.h > index 3447973..eb29e62 100644 > --- a/xen/include/asm-x86/guest_pt.h > +++ b/xen/include/asm-x86/guest_pt.h > @@ -81,6 +81,11 @@ static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e) > static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e) > { return gl2e.l2 & 0xfff; } > > +static inline u32 guest_l1e_get_pkey(guest_l1e_t gl1e) > +{ return 0; } > +static inline u32 guest_l2e_get_pkey(guest_l2e_t gl2e) > +{ return 0; } > + > static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags) > { return (guest_l1e_t) { (gfn_x(gfn) << PAGE_SHIFT) | flags }; } > static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags) > @@ -154,6 +159,13 @@ static inline u32 guest_l4e_get_flags(guest_l4e_t gl4e) > { return l4e_get_flags(gl4e); } > #endif > > +static inline u32 guest_l1e_get_pkey(guest_l1e_t gl1e) > +{ return l1e_get_pkey(gl1e); } > +static inline u32 guest_l2e_get_pkey(guest_l2e_t gl2e) > +{ return l2e_get_pkey(gl2e); } > +static inline u32 guest_l3e_get_pkey(guest_l3e_t gl3e) > +{ return l3e_get_pkey(gl3e); } > + > static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags) > { return l1e_from_pfn(gfn_x(gfn), flags); } > static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags) > diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h > index a87224b..731dd44 100644 > --- a/xen/include/asm-x86/hvm/hvm.h > +++ b/xen/include/asm-x86/hvm/hvm.h > @@ -277,6 +277,8 @@ int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t > dest, uint8_t dest_mode); > (hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & > X86_CR4_SMAP)) > #define hvm_nx_enabled(v) \ > (!!((v)->arch.hvm_vcpu.guest_efer & EFER_NX)) > +#define hvm_pku_enabled(v) \ > + (hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PKE)) > > /* Can we use superpages in the HAP p2m table? */ > #define hap_has_1gb (!!(hvm_funcs.hap_capabilities & HVM_HAP_SUPERPAGE_1GB)) > diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h > index a095a93..9202f3d 100644 > --- a/xen/include/asm-x86/page.h > +++ b/xen/include/asm-x86/page.h > @@ -93,6 +93,11 @@ > #define l3e_get_flags(x) (get_pte_flags((x).l3)) > #define l4e_get_flags(x) (get_pte_flags((x).l4)) > > +/* Get pte pkeys (unsigned int). */ > +#define l1e_get_pkey(x) get_pte_pkey((x).l1) > +#define l2e_get_pkey(x) get_pte_pkey((x).l2) > +#define l3e_get_pkey(x) get_pte_pkey((x).l3) > + > /* Construct an empty pte. */ > #define l1e_empty() ((l1_pgentry_t) { 0 }) > #define l2e_empty() ((l2_pgentry_t) { 0 }) > diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h > index 26ba141..617a4db 100644 > --- a/xen/include/asm-x86/processor.h > +++ b/xen/include/asm-x86/processor.h > @@ -332,6 +332,11 @@ static inline unsigned long read_cr2(void) > > DECLARE_PER_CPU(unsigned long, cr4); > > +static inline void raw_write_cr4(unsigned long val) > +{ > + asm volatile ( "mov %0,%%cr4" : : "r" (val) ); > +} > + > static inline unsigned long read_cr4(void) > { > return this_cpu(cr4); > @@ -340,7 +345,7 @@ static inline unsigned long read_cr4(void) > static inline void write_cr4(unsigned long val) > { > this_cpu(cr4) = val; > - asm volatile ( "mov %0,%%cr4" : : "r" (val) ); > + raw_write_cr4(val); > } > > /* Clear and set 'TS' bit respectively */ > @@ -374,6 +379,46 @@ static always_inline void clear_in_cr4 (unsigned long > mask) > write_cr4(read_cr4() & ~mask); > } > > +static inline unsigned int read_pkru(void) > +{ > + unsigned int pkru; > + unsigned long cr4 = read_cr4(); > + > + /* > + * _PAGE_PKEY_BITS have a conflict with _PAGE_GNTTAB used by PV guests, > + * so that X86_CR4_PKE is disabled on hypervisor. To use RDPKRU, CR4.PKE > + * gets temporarily enabled. > + */ > + raw_write_cr4(cr4 | X86_CR4_PKE); > + asm volatile (".byte 0x0f,0x01,0xee" > + : "=a" (pkru) : "c" (0) : "dx"); > + raw_write_cr4(cr4); > + > + return pkru; > +} > + > +/* Macros for PKRU domain */ > +#define PKRU_READ (0) > +#define PKRU_WRITE (1) > +#define PKRU_ATTRS (2) > + > +/* > + * PKRU defines 32 bits, there are 16 domains and 2 attribute bits per > + * domain in pkru, pkeys is index to a defined domain, so the value of > + * pte_pkeys * PKRU_ATTRS + R/W is offset of a defined domain attribute. > + */ > +static inline bool_t read_pkru_ad(uint32_t pkru, unsigned int pkey) > +{ > + ASSERT(pkey < 16); > + return (pkru >> (pkey * PKRU_ATTRS + PKRU_READ)) & 1; > +} > + > +static inline bool_t read_pkru_wd(uint32_t pkru, unsigned int pkey) > +{ > + ASSERT(pkey < 16); > + return (pkru >> (pkey * PKRU_ATTRS + PKRU_WRITE)) & 1; > +} > + > /* > * NSC/Cyrix CPU configuration register indexes > */ > diff --git a/xen/include/asm-x86/x86_64/page.h > b/xen/include/asm-x86/x86_64/page.h > index 19ab4d0..86abb94 100644 > --- a/xen/include/asm-x86/x86_64/page.h > +++ b/xen/include/asm-x86/x86_64/page.h > @@ -134,6 +134,18 @@ typedef l4_pgentry_t root_pgentry_t; > #define get_pte_flags(x) (((int)((x) >> 40) & ~0xFFF) | ((int)(x) & 0xFFF)) > #define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 40) | ((x) & 0xFFF)) > > +/* > + * Protection keys define a new 4-bit protection key field > + * (PKEY) in bits 62:59 of leaf entries of the page tables. > + * This corresponds to bit 22:19 of a 24-bit flags. > + * > + * Notice: Bit 22 is used by _PAGE_GNTTAB which is visible to PV guests, > + * so Protection keys must be disabled on PV guests. > + */ > +#define _PAGE_PKEY_BITS (0x780000) /* Protection Keys, 22:19 */ > + > +#define get_pte_pkey(x) (MASK_EXTR(get_pte_flags(x), _PAGE_PKEY_BITS)) > + > /* Bit 23 of a 24-bit flag mask. This corresponds to bit 63 of a pte.*/ > #define _PAGE_NX_BIT (1U<<23) > > -- > 2.5.0 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |