x86/EPT: split super pages upon mismatching memory types ... between constituent pages. To indicate such, the page order is being passed down to the vMTRR routines, with a negative return value (possible only on order-non-zero pages) indicating such collisions. Some code redundancy reduction is being done to ept_set_entry() along the way, allowing the new handling to be centralized to a single place there. In order to keep ept_set_entry() fast and simple, the actual splitting is being deferred to the EPT_MISCONFIG VM exit handler. Signed-off-by: Jan Beulich Reviewed-by: Tim Deegan --- a/xen/arch/x86/hvm/mtrr.c +++ b/xen/arch/x86/hvm/mtrr.c @@ -222,30 +222,40 @@ void hvm_vcpu_cacheattr_destroy(struct v /* * Get MTRR memory type for physical address pa. + * + * May return a negative value when order > 0, indicating to the caller + * that the respective mapping needs splitting. */ -static uint8_t get_mtrr_type(struct mtrr_state *m, paddr_t pa) +static int get_mtrr_type(const struct mtrr_state *m, + paddr_t pa, unsigned int order) { - int32_t addr, seg, index; uint8_t overlap_mtrr = 0; uint8_t overlap_mtrr_pos = 0; - uint64_t phys_base; - uint64_t phys_mask; - uint8_t num_var_ranges = m->mtrr_cap & 0xff; + uint64_t mask = -(uint64_t)PAGE_SIZE << order; + unsigned int seg, num_var_ranges = m->mtrr_cap & 0xff; if ( unlikely(!(m->enabled & 0x2)) ) return MTRR_TYPE_UNCACHABLE; + pa &= mask; if ( (pa < 0x100000) && (m->enabled & 1) ) { - /* Fixed range MTRR takes effective */ - addr = (uint32_t) pa; + /* Fixed range MTRR takes effect. */ + uint32_t addr = (uint32_t)pa, index; + if ( addr < 0x80000 ) { + /* 0x00000 ... 0x7FFFF in 64k steps */ + if ( order > 4 ) + return -1; seg = (addr >> 16); return m->fixed_ranges[seg]; } else if ( addr < 0xc0000 ) { + /* 0x80000 ... 0xBFFFF in 16k steps */ + if ( order > 2 ) + return -1; seg = (addr - 0x80000) >> 14; index = (seg >> 3) + 1; seg &= 7; /* select 0-7 segments */ @@ -253,7 +263,9 @@ static uint8_t get_mtrr_type(struct mtrr } else { - /* 0xC0000 --- 0x100000 */ + /* 0xC0000 ... 0xFFFFF in 4k steps */ + if ( order ) + return -1; seg = (addr - 0xc0000) >> 12; index = (seg >> 3) + 3; seg &= 7; /* select 0-7 segments */ @@ -264,14 +276,15 @@ static uint8_t get_mtrr_type(struct mtrr /* Match with variable MTRRs. */ for ( seg = 0; seg < num_var_ranges; seg++ ) { - phys_base = ((uint64_t*)m->var_ranges)[seg*2]; - phys_mask = ((uint64_t*)m->var_ranges)[seg*2 + 1]; + uint64_t phys_base = m->var_ranges[seg].base; + uint64_t phys_mask = m->var_ranges[seg].mask; + if ( phys_mask & MTRR_PHYSMASK_VALID ) { - if ( ((uint64_t) pa & phys_mask) >> MTRR_PHYSMASK_SHIFT == - (phys_base & phys_mask) >> MTRR_PHYSMASK_SHIFT ) + phys_mask &= mask; + if ( (pa & phys_mask) == (phys_base & phys_mask) ) { - if ( unlikely(m->overlapped) ) + if ( unlikely(m->overlapped) || order ) { overlap_mtrr |= 1 << (phys_base & MTRR_PHYSBASE_TYPE_MASK); overlap_mtrr_pos = phys_base & MTRR_PHYSBASE_TYPE_MASK; @@ -285,23 +298,24 @@ static uint8_t get_mtrr_type(struct mtrr } } - /* Overlapped or not found. */ + /* Not found? */ if ( unlikely(overlap_mtrr == 0) ) return m->def_type; - if ( likely(!(overlap_mtrr & ~( ((uint8_t)1) << overlap_mtrr_pos ))) ) - /* Covers both one variable memory range matches and - * two or more identical match. - */ + /* One match, or multiple identical ones? */ + if ( likely(overlap_mtrr == (1 << overlap_mtrr_pos)) ) return overlap_mtrr_pos; + if ( order ) + return -1; + + /* Two or more matches, one being UC? */ if ( overlap_mtrr & (1 << MTRR_TYPE_UNCACHABLE) ) - /* Two or more match, one is UC. */ return MTRR_TYPE_UNCACHABLE; - if ( !(overlap_mtrr & - ~((1 << MTRR_TYPE_WRTHROUGH) | (1 << MTRR_TYPE_WRBACK))) ) - /* Two or more match, WT and WB. */ + /* Two or more matches, all of them WT and WB? */ + if ( overlap_mtrr == + ((1 << MTRR_TYPE_WRTHROUGH) | (1 << MTRR_TYPE_WRBACK)) ) return MTRR_TYPE_WRTHROUGH; /* Behaviour is undefined, but return the last overlapped type. */ @@ -341,7 +355,7 @@ static uint8_t effective_mm_type(struct * just use it */ if ( gmtrr_mtype == NO_HARDCODE_MEM_TYPE ) - mtrr_mtype = get_mtrr_type(m, gpa); + mtrr_mtype = get_mtrr_type(m, gpa, 0); else mtrr_mtype = gmtrr_mtype; @@ -370,7 +384,7 @@ uint32_t get_pat_flags(struct vcpu *v, guest_eff_mm_type = effective_mm_type(g, pat, gpaddr, gl1e_flags, gmtrr_mtype); /* 2. Get the memory type of host physical address, with MTRR */ - shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr); + shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr, 0); /* 3. Find the memory type in PAT, with host MTRR memory type * and guest effective memory type. @@ -703,10 +717,10 @@ void memory_type_changed(struct domain * p2m_memory_type_changed(d); } -uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn, - uint8_t *ipat, bool_t direct_mmio) +int epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn, + unsigned int order, uint8_t *ipat, bool_t direct_mmio) { - uint8_t gmtrr_mtype, hmtrr_mtype; + int gmtrr_mtype, hmtrr_mtype; uint32_t type; struct vcpu *v = current; @@ -747,10 +761,12 @@ uint8_t epte_get_entry_emt(struct domain } gmtrr_mtype = is_hvm_domain(d) && v ? - get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)) : + get_mtrr_type(&v->arch.hvm_vcpu.mtrr, + gfn << PAGE_SHIFT, order) : MTRR_TYPE_WRBACK; - - hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn_x(mfn) << PAGE_SHIFT)); + hmtrr_mtype = get_mtrr_type(&mtrr_state, mfn_x(mfn) << PAGE_SHIFT, order); + if ( gmtrr_mtype < 0 || hmtrr_mtype < 0 ) + return -1; /* If both types match we're fine. */ if ( likely(gmtrr_mtype == hmtrr_mtype) ) --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -289,6 +289,7 @@ ept_set_entry(struct p2m_domain *p2m, un int vtd_pte_present = 0; int needs_sync = 1; ept_entry_t old_entry = { .epte = 0 }; + ept_entry_t new_entry = { .epte = 0 }; struct ept_data *ept = &p2m->ept; struct domain *d = p2m->domain; @@ -338,7 +339,6 @@ ept_set_entry(struct p2m_domain *p2m, un if ( i == target ) { /* We reached the target level. */ - ept_entry_t new_entry = { .epte = 0 }; /* No need to flush if the old entry wasn't valid */ if ( !is_epte_present(ept_entry) ) @@ -349,35 +349,11 @@ ept_set_entry(struct p2m_domain *p2m, un * * Read-then-write is OK because we hold the p2m lock. */ old_entry = *ept_entry; - - if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) || - (p2mt == p2m_ram_paging_in) ) - { - /* Construct the new entry, and then write it once */ - new_entry.emt = epte_get_entry_emt(p2m->domain, gfn, mfn, &ipat, - direct_mmio); - - new_entry.ipat = ipat; - new_entry.sp = !!order; - new_entry.sa_p2mt = p2mt; - new_entry.access = p2ma; - new_entry.snp = (iommu_enabled && iommu_snoop); - - new_entry.mfn = mfn_x(mfn); - - if ( old_entry.mfn == new_entry.mfn ) - need_modify_vtd_table = 0; - - ept_p2m_type_to_flags(&new_entry, p2mt, p2ma); - } - - atomic_write_ept_entry(ept_entry, new_entry); } else { /* We need to split the original page. */ ept_entry_t split_ept_entry; - ept_entry_t new_entry = { .epte = 0 }; ASSERT(is_epte_superpage(ept_entry)); @@ -401,8 +377,19 @@ ept_set_entry(struct p2m_domain *p2m, un ASSERT(i == target); ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER)); + } + + if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) || + (p2mt == p2m_ram_paging_in) ) + { + int emt = epte_get_entry_emt(p2m->domain, gfn, mfn, + i * EPT_TABLE_ORDER, &ipat, direct_mmio); + + if ( emt >= 0 ) + new_entry.emt = emt; + else /* ept_handle_misconfig() will need to take care of this. */ + new_entry.emt = MTRR_NUM_TYPES; - new_entry.emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio); new_entry.ipat = ipat; new_entry.sp = !!i; new_entry.sa_p2mt = p2mt; @@ -417,10 +404,10 @@ ept_set_entry(struct p2m_domain *p2m, un need_modify_vtd_table = 0; ept_p2m_type_to_flags(&new_entry, p2mt, p2ma); - - atomic_write_ept_entry(ept_entry, new_entry); } + atomic_write_ept_entry(ept_entry, new_entry); + /* Track the highest gfn for which we have ever had a valid mapping */ if ( p2mt != p2m_invalid && (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) ) @@ -738,7 +725,7 @@ bool_t ept_handle_misconfig(uint64_t gpa if ( !is_epte_valid(&e) || !is_epte_present(&e) ) continue; e.emt = epte_get_entry_emt(p2m->domain, gfn + i, - _mfn(e.mfn), &ipat, + _mfn(e.mfn), 0, &ipat, e.sa_p2mt == p2m_mmio_direct); e.ipat = ipat; atomic_write_ept_entry(&epte[i], e); @@ -746,9 +733,22 @@ bool_t ept_handle_misconfig(uint64_t gpa } else { - e.emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn), - &ipat, - e.sa_p2mt == p2m_mmio_direct); + int emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn), + level * EPT_TABLE_ORDER, &ipat, + e.sa_p2mt == p2m_mmio_direct); + if ( unlikely(emt < 0) ) + { + unmap_domain_page(epte); + if ( ept_split_super_page(p2m, &e, level, level - 1) ) + { + mfn = e.mfn; + continue; + } + ept_free_entry(p2m, &e, level); + okay = 0; + break; + } + e.emt = emt; e.ipat = ipat; atomic_write_ept_entry(&epte[i], e); } --- a/xen/include/asm-x86/mtrr.h +++ b/xen/include/asm-x86/mtrr.h @@ -72,8 +72,9 @@ extern int mtrr_del_page(int reg, unsign extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); extern u32 get_pat_flags(struct vcpu *v, u32 gl1e_flags, paddr_t gpaddr, paddr_t spaddr, uint8_t gmtrr_mtype); -extern uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, - mfn_t mfn, uint8_t *ipat, bool_t direct_mmio); +extern int epte_get_entry_emt(struct domain *, unsigned long gfn, mfn_t mfn, + unsigned int order, uint8_t *ipat, + bool_t direct_mmio); extern void ept_change_entry_emt_with_range( struct domain *d, unsigned long start_gfn, unsigned long end_gfn); extern unsigned char pat_type_2_pte_flags(unsigned char pat_type);