diff -r 2491691e3e69 xen/include/public/xen.h --- a/xen/include/public/xen.h Sat Dec 29 17:57:47 2007 +0000 +++ b/xen/include/public/xen.h Fri Jan 04 20:29:31 2008 -0700 @@ -168,9 +168,40 @@ * ptr[:2] -- Machine address within the frame whose mapping to modify. * The frame must belong to the FD, if one is specified. * val -- Value to write into the mapping entry. + * + * ptr[1:0] == MMU_ATOMIC_PT_UPDATE: + * Updates a range of entries in an L1 page table atomically such that any + * bits which the hardware mmu could have modified are not lost. The source of + * the updates is a range of values specified by val. If the new table entry is + * valid/present, the mapped frame must belong to the FD, if an FD has been + * specified. If attempting to map an I/O page then the caller assumes the + * privilege of the FD. + * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. + * FD == DOMID_XEN: Map restricted areas of Xen's heap space. + * ptr[:2] -- Machine address of the page-table entry to modify. + * val -- Value to write. + * + * ptr[1:0] == MMU_FLAG_RANGE_UPDATE: + * Updates a range of entries in an L1 page table atomically such that any + * bits which the hardware mmu could have modified are not lost. The update + * value created by updating certain buts in the the current page table entry, + * as specified by val. If the new table entry is valid/present, the mapped + * frame must belong to the FD, if an FD has been specified. If attempting to + * map an I/O page then the caller assumes the privilege of the FD. + * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. + * FD == DOMID_XEN: Map restricted areas of Xen's heap space. + * ptr[:2] -- Machine address of the first page-table entry to modify. + * val[11:0] = mask pte bits 11:0 (1 means update; 0 means don't change) + * val[23:12] = mask pte bits 63:52 + * val[35:24] = new pte bits 11:0 (where corresp. Mask bit == 1) + * val[47:36] = new pte bits 63:52 + * val[57:48] = number of ptes in range + * val[63:58] = MBZ */ #define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ #define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_ATOMIC_PT_UPDATE 2 /* checked atomic '*ptr = val'. ptr is MA.*/ +#define MMU_FLAG_RANGE_UPDATE 3 /* range atomic checked '*ptr flags = val'*/ /* * MMU EXTENDED OPERATIONS diff -r 2491691e3e69 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Sat Dec 29 17:57:47 2007 +0000 +++ b/xen/arch/x86/mm.c Fri Jan 04 20:35:08 2008 -0700 @@ -1338,6 +1338,97 @@ static void free_l4_table(struct page_in /* How to write an entry to the guest pagetables. * Returns 0 for failure (pointer not valid), 1 for success. */ +static inline int update_intpte_sync(intpte_t *p, + intpte_t old, + intpte_t new, + unsigned long mfn, + struct vcpu *v) +{ + int rv; + intpte_t t = old; + for ( ; ; ) + { + rv = paging_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); + if ( unlikely(rv == 0) ) + { + MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte + ": saw %" PRIpte, old, new, t); + break; + } + + if ( t == old ) + break; + + /* Allowed to change in Accessed/Dirty flags only. */ + BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY)); + + new |= t ^ old; + old = t; + } + return rv; +} + +/* Macro that wraps the appropriate type-changes around update_intpte(). + * Arguments are: type, ptr, old, new, mfn, vcpu */ +#define UPDATE_ENTRY_SYNC(_t,_p,_o,_n,_m,_v) \ + update_intpte_sync(&_t ## e_get_intpte(*(_p)), \ + _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \ + (_m), (_v)) + +/* Update the L1 entry at pl1e to new value nl1e. */ +static int mod_l1_entry_sync(l1_pgentry_t *pl1e, l1_pgentry_t ol1e, + l1_pgentry_t nl1e, unsigned long gl1mfn) +{ + struct vcpu *curr = current; + struct domain *d = curr->domain; + unsigned long mfn; + + if ( unlikely(paging_mode_refcounts(d)) ) + return UPDATE_ENTRY_SYNC(l1, pl1e, ol1e, nl1e, gl1mfn, curr); + + if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) + { + /* Translate foreign guest addresses. */ + mfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e)); + if ( unlikely(mfn == INVALID_MFN) ) + return 0; + ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0); + nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e)); + + if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) ) + { + MEM_LOG("Bad L1 flags %x", + l1e_get_flags(nl1e) & l1_disallow_mask(d)); + return 0; + } + + adjust_guest_l1e(nl1e, d); + + /* Fast path for identical mapping, r/w and presence. */ + if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) + return UPDATE_ENTRY_SYNC(l1, pl1e, ol1e, nl1e, gl1mfn, curr); + + if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) + return 0; + + if ( unlikely(!UPDATE_ENTRY_SYNC(l1, pl1e, ol1e, nl1e, gl1mfn, curr)) ) + { + put_page_from_l1e(nl1e, d); + return 0; + } + } + else + { + if ( unlikely(!UPDATE_ENTRY_SYNC(l1, pl1e, ol1e, nl1e, gl1mfn, curr)) ) + return 0; + } + + put_page_from_l1e(ol1e, d); + return 1; +} + +/* How to write an entry to the guest pagetables. + * Returns 0 for failure (pointer not valid), 1 for success. */ static inline int update_intpte(intpte_t *p, intpte_t old, intpte_t new, @@ -2403,11 +2494,13 @@ int do_mmu_update( unsigned long gpfn, gmfn, mfn; struct page_info *page; int rc = 0, okay = 1, i = 0; - unsigned int cmd, done = 0; + unsigned int cmd, done = 0, pte_cnt; struct vcpu *v = current; struct domain *d = v->domain; unsigned long type_info; struct domain_mmap_cache mapcache; + uint64_t mask_flags, add_flags; + l1_pgentry_t ol1e; if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { @@ -2577,6 +2670,217 @@ int do_mmu_update( paging_mark_dirty(FOREIGNDOM, mfn); put_page(mfn_to_page(mfn)); + break; + + case MMU_ATOMIC_PT_UPDATE: + + rc = xsm_mmu_atomic_update(d, req.val); + if ( rc ) + break; + + req.ptr -= cmd; + + gmfn = req.ptr >> PAGE_SHIFT; + mfn = gmfn_to_mfn(d, gmfn); + + if ( unlikely(!get_page_from_pagenr(mfn, d)) ) + { + MEM_LOG("Could not get page for normal update"); + break; + } + + va = map_domain_page_with_cache(mfn, &mapcache); + va = (void *)((unsigned long)va + + (unsigned long)(req.ptr & ~PAGE_MASK)); + page = mfn_to_page(mfn); + + switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask ) + { + case PGT_l1_page_table: + { + if ( paging_mode_refcounts(d) ) + { + MEM_LOG("mmu update on auto-refcounted domain!"); + break; + } + + if ( unlikely(!get_page_type( + page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) ) + goto atomic_not_a_pt; + + switch ( type_info & PGT_type_mask ) + { + case PGT_l1_page_table: + { + l1_pgentry_t l1e = l1e_from_intpte(req.val); + if ( unlikely(__copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) != 0) ) + okay = 0; + else + { + okay = mod_l1_entry_sync(va, ol1e, l1e, mfn); + } + } + break; + case PGT_l2_page_table: + case PGT_l3_page_table: + case PGT_l4_page_table: + MEM_LOG("Not L1 page table"); + break; + } + + put_page_type(page); + } + break; + + case PGT_l2_page_table: + case PGT_l3_page_table: + case PGT_l4_page_table: + MEM_LOG("Not L1 page table"); + break; + + default: + atomic_not_a_pt: + { + if ( unlikely(!get_page_type(page, PGT_writable_page)) ) + break; + + okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn)); + + put_page_type(page); + } + break; + } + + unmap_domain_page_with_cache(va, &mapcache); + + put_page(page); + break; + + case MMU_FLAG_RANGE_UPDATE: + + rc = xsm_mmu_flag_range_update(d, req.val); + if ( rc ) + break; + + req.ptr -= cmd; + + pte_cnt = (req.val >> 48) & 0x3ff; + + if (pte_cnt == 0) + { + okay = 1; + break; + } + if ((req.ptr & ~PAGE_MASK) + (pte_cnt * sizeof(l1_pgentry_t)) > PAGE_SIZE) + { + // count extends beyond end of page table + MEM_LOG("Count value is too large"); + break; + } + + gmfn = req.ptr >> PAGE_SHIFT; + mfn = gmfn_to_mfn(d, gmfn); + + page = mfn_to_page(mfn); + + if ( unlikely(!get_page_from_pagenr(mfn, d)) ) + { + MEM_LOG("Could not get page for normal update"); + break; + } + + va = map_domain_page_with_cache(mfn, &mapcache); + + mask_flags = ((req.val & 0xfff000ULL) << 40) | + (req.val & 0xfffULL); + add_flags = ((req.val & 0xfff000000000ULL) << 16) | + ((req.val >> 24) & 0xfffULL); + + switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask ) + { + case PGT_l1_page_table: + { + if ( paging_mode_refcounts(d) ) + { + MEM_LOG("mmu update on auto-refcounted domain!"); + break; + } + + if ( unlikely(!get_page_type( + page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) ) + goto range_not_a_pt; + + switch ( type_info & PGT_type_mask ) + { + case PGT_l1_page_table: + { + va = (void *)((unsigned long)va + + (unsigned long)(req.ptr & ~PAGE_MASK) - + sizeof(l1_pgentry_t)); + do + { + l1_pgentry_t ol1e; + l1_pgentry_t nl1e; + + va += sizeof(l1_pgentry_t); + if ( unlikely(__copy_from_user(&ol1e, va, sizeof(ol1e)) != 0) ) + { + okay = 0; + break; + } + nl1e = ol1e; + nl1e.l1 &= ~mask_flags; + nl1e.l1 |= add_flags; + okay = mod_l1_entry_sync(va, ol1e, nl1e, mfn); + if (!okay) + break; + } while (--pte_cnt); + } + break; + case PGT_l2_page_table: + case PGT_l3_page_table: + case PGT_l4_page_table: + MEM_LOG("Operation permitted on l1 page table only"); + } + + put_page_type(page); + } + break; + + case PGT_l2_page_table: + case PGT_l3_page_table: + case PGT_l4_page_table: + MEM_LOG("Operation permitted on l1 page table only"); + break; + + + default: + range_not_a_pt: + { + if ( unlikely(!get_page_type(page, PGT_writable_page)) ) + break; + va = (void *)((unsigned long)va + + (unsigned long)(req.ptr & ~PAGE_MASK) - + sizeof(l1_pgentry_t)); + do + { + l1_pgentry_t l1e; + va += sizeof(l1_pgentry_t); + l1e = *(l1_pgentry_t *)va; + l1e.l1 &= ~mask_flags; + l1e.l1 |= add_flags; + okay = paging_write_guest_entry(v, va, l1e.l1, _mfn(mfn)); + if (!okay) + break; + } while (--pte_cnt); + put_page_type(page); + } + break; + } + + unmap_domain_page_with_cache(va, &mapcache); + + put_page(page); break; default: diff -r 2491691e3e69 xen/include/xsm/xsm.h --- a/xen/include/xsm/xsm.h Sat Dec 29 17:57:47 2007 +0000 +++ b/xen/include/xsm/xsm.h Fri Jan 04 20:36:12 2008 -0700 @@ -136,6 +136,8 @@ struct xsm_operations { int (*mmu_machphys_update) (struct domain *d, unsigned long mfn); int (*update_va_mapping) (struct domain *d, l1_pgentry_t pte); int (*add_to_physmap) (struct domain *d1, struct domain *d2); + int (*mmu_atomic_update) (struct domain *d, intpte_t fpte); + int (*mmu_flag_range_update) (struct domain *d, intpte_t fpte); #endif }; @@ -532,6 +534,16 @@ static inline int xsm_add_to_physmap(str { return xsm_call(add_to_physmap(d1, d2)); } + +static inline int xsm_mmu_atomic_update (struct domain *d, intpte_t fpte) +{ + return xsm_call(mmu_atomic_update(d, fpte)); +} + +static inline int xsm_mmu_flag_range_update (struct domain *d, intpte_t fpte) +{ + return xsm_call(mmu_flag_range_update(d, fpte)); +} #endif /* CONFIG_X86 */ #endif /* __XSM_H */ diff -r 2491691e3e69 xen/xsm/dummy.c --- a/xen/xsm/dummy.c Sat Dec 29 17:57:47 2007 +0000 +++ b/xen/xsm/dummy.c Fri Jan 04 20:36:49 2008 -0700 @@ -385,6 +385,16 @@ static int dummy_add_to_physmap (struct { return 0; } + +static int dummy_mmu_atomic_update (struct domain *d, intpte_t fpte) +{ + return 0; +} + +static int dummy_mmu_flag_range_update (struct domain *d, intpte_t fpte) +{ + return 0; +} #endif struct xsm_operations dummy_xsm_ops; @@ -484,5 +494,7 @@ void xsm_fixup_ops (struct xsm_operation set_to_dummy_if_null(ops, mmu_machphys_update); set_to_dummy_if_null(ops, update_va_mapping); set_to_dummy_if_null(ops, add_to_physmap); + set_to_dummy_if_null(ops, mmu_atomic_update); + set_to_dummy_if_null(ops, mmu_flag_range_update); #endif }