[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen master] x86: enforce consistent cachability of MMIO mappings
commit c61a6f74f80eb36ed83a82f713db3143159b9009 Author: Jan Beulich <jbeulich@xxxxxxxx> AuthorDate: Wed Feb 17 16:16:53 2016 +0100 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Wed Feb 17 16:16:53 2016 +0100 x86: enforce consistent cachability of MMIO mappings We've been told by Intel that inconsistent cachability between multiple mappings of the same page can affect system stability only when the affected page is an MMIO one. Since the stale data issue is of no relevance to the hypervisor (since all guest memory accesses go through proper accessors and validation), handling of RAM pages remains unchanged here. Any MMIO mapped by domains however needs to be done consistently (all cachable mappings or all uncachable ones), in order to avoid Machine Check exceptions. Since converting existing cachable mappings to uncachable (at the time an uncachable mapping gets established) would in the PV case require tracking all mappings, allow MMIO to only get mapped uncachable (UC, UC-, or WC). This also implies that in the PV case we mustn't use the L1 PTE update fast path when cachability flags get altered. Since in the HVM case at least for now we want to continue honoring pinned cachability attributes for pages not mapped by the hypervisor, special case handling of r/o MMIO pages (forcing UC) gets added there. Arguably the counterpart change to p2m-pt.c may not be necessary, since UC- (which already gets enforced there) is probably strict enough. Note that the shadow code changes include fixing the write protection of r/o MMIO ranges: shadow_l1e_remove_flags() and its siblings, other than l1e_remove_flags() and alike, return the new PTE (and hence ignoring their return values makes them no-ops). This is CVE-2016-2270 / XSA-154. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- docs/misc/xen-command-line.markdown | 9 ++++ xen/arch/x86/hvm/mtrr.c | 11 +++- xen/arch/x86/mm.c | 103 ++++++++++++++++++++++++++---------- xen/arch/x86/mm/p2m-pt.c | 3 ++ xen/arch/x86/mm/shadow/multi.c | 32 +++++++---- xen/arch/x86/mm/shadow/types.h | 3 ++ xen/include/asm-x86/page.h | 3 ++ 7 files changed, 125 insertions(+), 39 deletions(-) diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown index d267a04..6435f8e 100644 --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -1084,6 +1084,15 @@ limit is ignored by Xen. Specify if the MMConfig space should be enabled. +### mmio-relax +> `= <boolean> | all` + +> Default: `false` + +By default, domains may not create cached mappings to MMIO regions. +This option relaxes the check for Domain 0 (or when using `all`, all PV +domains), to permit the use of cacheable MMIO mappings. + ### msi > `= <boolean>` diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c index 595134c..4188ae7 100644 --- a/xen/arch/x86/hvm/mtrr.c +++ b/xen/arch/x86/hvm/mtrr.c @@ -770,8 +770,17 @@ int epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn, if ( v->domain != d ) v = d->vcpu ? d->vcpu[0] : NULL; - if ( !mfn_valid(mfn_x(mfn)) ) + if ( !mfn_valid(mfn_x(mfn)) || + rangeset_contains_range(mmio_ro_ranges, mfn_x(mfn), + mfn_x(mfn) + (1UL << order) - 1) ) + { + *ipat = 1; return MTRR_TYPE_UNCACHABLE; + } + + if ( rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn), + mfn_x(mfn) + (1UL << order) - 1) ) + return -1; switch ( hvm_get_mem_pinned_cacheattr(d, gfn, order, &type) ) { diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index c214fc5..a05edc3 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -178,6 +178,18 @@ static uint32_t base_disallow_mask; is_pv_domain(d)) ? \ L1_DISALLOW_MASK : (L1_DISALLOW_MASK & ~PAGE_CACHE_ATTRS)) +static s8 __read_mostly opt_mmio_relax; +static void __init parse_mmio_relax(const char *s) +{ + if ( !*s ) + opt_mmio_relax = 1; + else + opt_mmio_relax = parse_bool(s); + if ( opt_mmio_relax < 0 && strcmp(s, "all") ) + opt_mmio_relax = 0; +} +custom_param("mmio-relax", parse_mmio_relax); + static void __init init_frametable_chunk(void *start, void *end) { unsigned long s = (unsigned long)start; @@ -871,10 +883,7 @@ get_page_from_l1e( if ( !mfn_valid(mfn) || (real_pg_owner = page_get_owner_and_reference(page)) == dom_io ) { -#ifndef NDEBUG - const unsigned long *ro_map; - unsigned int seg, bdf; -#endif + int flip = 0; /* Only needed the reference to confirm dom_io ownership. */ if ( mfn_valid(mfn) ) @@ -908,24 +917,55 @@ get_page_from_l1e( return -EINVAL; } - if ( !(l1f & _PAGE_RW) || - !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) - return 0; + if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) + { + /* MMIO pages must not be mapped cachable unless requested so. */ + switch ( opt_mmio_relax ) + { + case 0: + break; + case 1: + if ( is_hardware_domain(l1e_owner) ) + case -1: + return 0; + default: + ASSERT_UNREACHABLE(); + } + } + else if ( l1f & _PAGE_RW ) + { #ifndef NDEBUG - if ( !pci_mmcfg_decode(mfn, &seg, &bdf) || - ((ro_map = pci_get_ro_map(seg)) != NULL && - test_bit(bdf, ro_map)) ) - printk(XENLOG_G_WARNING - "d%d: Forcing read-only access to MFN %lx\n", - l1e_owner->domain_id, mfn); - else - rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, - print_mmio_emul_range, - &(struct mmio_emul_range_ctxt){ - .d = l1e_owner, - .mfn = mfn }); + const unsigned long *ro_map; + unsigned int seg, bdf; + + if ( !pci_mmcfg_decode(mfn, &seg, &bdf) || + ((ro_map = pci_get_ro_map(seg)) != NULL && + test_bit(bdf, ro_map)) ) + printk(XENLOG_G_WARNING + "d%d: Forcing read-only access to MFN %lx\n", + l1e_owner->domain_id, mfn); + else + rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, + print_mmio_emul_range, + &(struct mmio_emul_range_ctxt){ + .d = l1e_owner, + .mfn = mfn }); #endif - return 1; + flip = _PAGE_RW; + } + + switch ( l1f & PAGE_CACHE_ATTRS ) + { + case 0: /* WB */ + flip |= _PAGE_PWT | _PAGE_PCD; + break; + case _PAGE_PWT: /* WT */ + case _PAGE_PWT | _PAGE_PAT: /* WP */ + flip |= _PAGE_PCD | (l1f & _PAGE_PAT); + break; + } + + return flip; } if ( unlikely( (real_pg_owner != pg_owner) && @@ -1315,8 +1355,9 @@ static int alloc_l1_table(struct page_info *page) goto fail; case 0: break; - case 1: - l1e_remove_flags(pl1e[i], _PAGE_RW); + case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS: + ASSERT(!(ret & ~(_PAGE_RW | PAGE_CACHE_ATTRS))); + l1e_flip_flags(pl1e[i], ret); break; } @@ -1849,8 +1890,9 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, return -EINVAL; } - /* Fast path for identical mapping, r/w and presence. */ - if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) + /* Fast path for identical mapping, r/w, presence, and cachability. */ + if ( !l1e_has_changed(ol1e, nl1e, + PAGE_CACHE_ATTRS | _PAGE_RW | _PAGE_PRESENT) ) { adjust_guest_l1e(nl1e, pt_dom); if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, @@ -1873,8 +1915,9 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, return rc; case 0: break; - case 1: - l1e_remove_flags(nl1e, _PAGE_RW); + case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS: + ASSERT(!(rc & ~(_PAGE_RW | PAGE_CACHE_ATTRS))); + l1e_flip_flags(nl1e, rc); rc = 0; break; } @@ -5109,6 +5152,7 @@ static int ptwr_emulated_update( l1_pgentry_t pte, ol1e, nl1e, *pl1e; struct vcpu *v = current; struct domain *d = v->domain; + int ret; /* Only allow naturally-aligned stores within the original %cr2 page. */ if ( unlikely(((addr^ptwr_ctxt->cr2) & PAGE_MASK) || (addr & (bytes-1))) ) @@ -5156,7 +5200,7 @@ static int ptwr_emulated_update( /* Check the new PTE. */ nl1e = l1e_from_intpte(val); - switch ( get_page_from_l1e(nl1e, d, d) ) + switch ( ret = get_page_from_l1e(nl1e, d, d) ) { default: if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) && @@ -5180,8 +5224,9 @@ static int ptwr_emulated_update( break; case 0: break; - case 1: - l1e_remove_flags(nl1e, _PAGE_RW); + case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS: + ASSERT(!(ret & ~(_PAGE_RW | PAGE_CACHE_ATTRS))); + l1e_flip_flags(nl1e, ret); break; } diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c index da546e8..3d80612 100644 --- a/xen/arch/x86/mm/p2m-pt.c +++ b/xen/arch/x86/mm/p2m-pt.c @@ -109,7 +109,10 @@ static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn, if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) ) flags |= _PAGE_RW; else + { + flags |= _PAGE_PWT; ASSERT(!level); + } return flags | P2M_BASE_FLAGS | _PAGE_PCD; } } diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index aaf8db7..70f2aa5 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -534,6 +534,7 @@ _sh_propagate(struct vcpu *v, gfn_t target_gfn = guest_l1e_get_gfn(guest_entry); u32 pass_thru_flags; u32 gflags, sflags; + bool_t mmio_mfn; /* We don't shadow PAE l3s */ ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); @@ -574,7 +575,10 @@ _sh_propagate(struct vcpu *v, // mfn means that we can not usefully shadow anything, and so we // return early. // - if ( !mfn_valid(target_mfn) + mmio_mfn = !mfn_valid(target_mfn) + || (level == 1 + && page_get_owner(mfn_to_page(target_mfn)) == dom_io); + if ( mmio_mfn && !(level == 1 && (!shadow_mode_refcounts(d) || p2mt == p2m_mmio_direct)) ) { @@ -592,7 +596,7 @@ _sh_propagate(struct vcpu *v, _PAGE_RW | _PAGE_PRESENT); if ( guest_supports_nx(v) ) pass_thru_flags |= _PAGE_NX_BIT; - if ( !shadow_mode_refcounts(d) && !mfn_valid(target_mfn) ) + if ( level == 1 && !shadow_mode_refcounts(d) && mmio_mfn ) pass_thru_flags |= _PAGE_PAT | _PAGE_PCD | _PAGE_PWT; sflags = gflags & pass_thru_flags; @@ -691,10 +695,14 @@ _sh_propagate(struct vcpu *v, } /* Read-only memory */ - if ( p2m_is_readonly(p2mt) || - (p2mt == p2m_mmio_direct && - rangeset_contains_singleton(mmio_ro_ranges, mfn_x(target_mfn))) ) + if ( p2m_is_readonly(p2mt) ) sflags &= ~_PAGE_RW; + else if ( p2mt == p2m_mmio_direct && + rangeset_contains_singleton(mmio_ro_ranges, mfn_x(target_mfn)) ) + { + sflags &= ~(_PAGE_RW | _PAGE_PAT); + sflags |= _PAGE_PCD | _PAGE_PWT; + } // protect guest page tables // @@ -1200,22 +1208,28 @@ static int shadow_set_l1e(struct domain *d, && !sh_l1e_is_magic(new_sl1e) ) { /* About to install a new reference */ - if ( shadow_mode_refcounts(d) ) { + if ( shadow_mode_refcounts(d) ) + { +#define PAGE_FLIPPABLE (_PAGE_RW | _PAGE_PWT | _PAGE_PCD | _PAGE_PAT) + int rc; + TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF); - switch ( shadow_get_page_from_l1e(new_sl1e, d, new_type) ) + switch ( rc = shadow_get_page_from_l1e(new_sl1e, d, new_type) ) { default: /* Doesn't look like a pagetable. */ flags |= SHADOW_SET_ERROR; new_sl1e = shadow_l1e_empty(); break; - case 1: - shadow_l1e_remove_flags(new_sl1e, _PAGE_RW); + case PAGE_FLIPPABLE & -PAGE_FLIPPABLE ... PAGE_FLIPPABLE: + ASSERT(!(rc & ~PAGE_FLIPPABLE)); + new_sl1e = shadow_l1e_flip_flags(new_sl1e, rc); /* fall through */ case 0: shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d); break; } +#undef PAGE_FLIPPABLE } } diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h index 503243f..a717d74 100644 --- a/xen/arch/x86/mm/shadow/types.h +++ b/xen/arch/x86/mm/shadow/types.h @@ -99,6 +99,9 @@ static inline u32 shadow_l4e_get_flags(shadow_l4e_t sl4e) static inline shadow_l1e_t shadow_l1e_remove_flags(shadow_l1e_t sl1e, u32 flags) { l1e_remove_flags(sl1e, flags); return sl1e; } +static inline shadow_l1e_t +shadow_l1e_flip_flags(shadow_l1e_t sl1e, u32 flags) +{ l1e_flip_flags(sl1e, flags); return sl1e; } static inline shadow_l1e_t shadow_l1e_empty(void) { return l1e_empty(); } diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h index a095a93..bdf3960 100644 --- a/xen/include/asm-x86/page.h +++ b/xen/include/asm-x86/page.h @@ -157,6 +157,9 @@ static inline l4_pgentry_t l4e_from_paddr(paddr_t pa, unsigned int flags) #define l3e_remove_flags(x, flags) ((x).l3 &= ~put_pte_flags(flags)) #define l4e_remove_flags(x, flags) ((x).l4 &= ~put_pte_flags(flags)) +/* Flip flags in an existing L1 PTE. */ +#define l1e_flip_flags(x, flags) ((x).l1 ^= put_pte_flags(flags)) + /* Check if a pte's page mapping or significant access flags have changed. */ #define l1e_has_changed(x,y,flags) \ ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) ) -- generated by git-patchbot for /home/xen/git/xen.git#master _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |