diff -r 61c96456a3e1 include/xen/interface/xen.h --- a/include/xen/interface/xen.h Thu Dec 20 16:58:14 2007 +0000 +++ b/include/xen/interface/xen.h Mon Jan 07 17:46:41 2008 -0700 @@ -168,9 +168,39 @@ * ptr[:2] -- Machine address within the frame whose mapping to modify. * The frame must belong to the FD, if one is specified. * val -- Value to write into the mapping entry. + * + * ptr[1:0] == MMU_ATOMIC_PT_UPDATE: + * Updates an entry in an L1 page table such that concurrent hardware mmu + * updates to that entry are not lost. If the new table entry is valid/present, + * the mapped frame must belong to the FD, if an FD has been specified. If + * attempting to map an I/O page then the caller assumes the privilege of the + * FD. + * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. + * FD == DOMID_XEN: Map restricted areas of Xen's heap space. + * ptr[:2] -- Machine address of the page-table entry to modify. + * val -- Value to write. + * + * ptr[1:0] == MMU_FLAG_RANGE_UPDATE: + * Updates a range of entries in an L1 page table such that concurrent hardware + * mmu updates to those entries are not lost. The update value is created by + * updating specific bits in the the current page table entry, as specified by + * val. If the new table entry is valid/present, the mapped frame must belong + * to the FD, if an FD has been specified. If attempting to map an I/O page + * then the caller assumes the privilege of the FD. + * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. + * FD == DOMID_XEN: Map restricted areas of Xen's heap space. + * ptr[:2] -- Machine address of the first page-table entry to modify. + * val[11:0] = mask pte bits 11:0 (1 means update; 0 means don't change) + * val[23:12] = mask pte bits 63:52 + * val[35:24] = new pte bits 11:0 (where corresp. Mask bit == 1) + * val[47:36] = new pte bits 63:52 + * val[57:48] = number of ptes in range + * val[63:58] = MBZ */ #define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ #define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_ATOMIC_PT_UPDATE 2 /* checked atomic '*ptr = val'. ptr is MA.*/ +#define MMU_FLAG_RANGE_UPDATE 3 /* range atomic checked '*ptr flags = val'*/ /* * MMU EXTENDED OPERATIONS diff -r 61c96456a3e1 include/asm-i386/mach-xen/asm/pgtable.h --- a/include/asm-i386/mach-xen/asm/pgtable.h Thu Dec 20 16:58:14 2007 +0000 +++ b/include/asm-i386/mach-xen/asm/pgtable.h Mon Jan 07 17:47:31 2008 -0700 @@ -512,6 +512,12 @@ int touch_pte_range(struct mm_struct *mm unsigned long address, unsigned long size); +int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, unsigned long end, pgprot_t newprot); + +#define arch_change_pte_range(mm, pmd, addr, end, newprot) \ + xen_change_pte_range(mm, pmd, addr, end, newprot) + #define io_remap_pfn_range(vma,from,pfn,size,prot) \ direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO) diff -r 61c96456a3e1 include/asm-x86_64/mach-xen/asm/pgtable.h --- a/include/asm-x86_64/mach-xen/asm/pgtable.h Thu Dec 20 16:58:14 2007 +0000 +++ b/include/asm-x86_64/mach-xen/asm/pgtable.h Mon Jan 07 17:47:41 2008 -0700 @@ -541,6 +541,12 @@ int touch_pte_range(struct mm_struct *mm unsigned long address, unsigned long size); +int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, unsigned long end, pgprot_t newprot); + +#define arch_change_pte_range(mm, pmd, addr, end, newprot) \ + xen_change_pte_range(mm, pmd, addr, end, newprot) + #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO) diff -r 61c96456a3e1 include/asm-generic/pgtable.h --- a/include/asm-generic/pgtable.h Thu Dec 20 16:58:14 2007 +0000 +++ b/include/asm-generic/pgtable.h Mon Jan 07 17:48:00 2008 -0700 @@ -188,6 +188,10 @@ static inline void ptep_set_wrprotect(st }) #endif +#ifndef arch_change_pte_range +#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0 +#endif + #ifndef __ASSEMBLY__ /* * When walking page tables, we usually want to skip any p?d_none entries; diff -r 61c96456a3e1 arch/i386/mm/hypervisor.c --- a/arch/i386/mm/hypervisor.c Thu Dec 20 16:58:14 2007 +0000 +++ b/arch/i386/mm/hypervisor.c Mon Jan 07 17:48:30 2008 -0700 @@ -548,3 +548,78 @@ int write_ldt_entry(void *ldt, int entry mach_lp, (u64)entry_a | ((u64)entry_b<<32)); } #endif + +#define MAX_BATCHED_FULL_PTES 32 + +#define PTE_FLAGS 0xfff0000000000fffULL + +#define map_mask_bits(X) (((X & 0xfffULL) << 0) | \ + ((X & PTE_FLAGS) >> 40)) +#define map_flag_bits(X) (((X & PTE_FLAGS) << 24) | \ + ((X & PTE_FLAGS) >> 16)) + +int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, unsigned long end, pgprot_t newprot) +{ + int rc = 0; + u64 nr_full_ptes = 0, nr_flag_ptes = 0; + mmu_update_t u[MAX_BATCHED_FULL_PTES], v; + u64 newval, flags, mask, mask_bits; + pte_t *pte; + spinlock_t *ptl; + + mask_bits = ~(PTE_MASK | _PAGE_DIRTY); +#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) + mask_bits |= ~__supported_pte_mask; +#endif + mask = map_mask_bits(mask_bits); + + pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + + for (;;) { + if (addr != end && pte_present(*pte)) { + newval = __pte_val(pte_modify(*pte, newprot)); + if (((__pte_val(*pte) ^ newval) & ~PTE_FLAGS) || + (nr_flag_ptes && (newval & PTE_FLAGS) != flags)) { + if (nr_flag_ptes) { + v.val |= nr_flag_ptes << 48; + if ((rc = HYPERVISOR_mmu_update( + &v, 1, NULL, DOMID_SELF)) != 0) + break; + nr_flag_ptes = 0; + } + u[nr_full_ptes].ptr = virt_to_machine(pte) | MMU_ATOMIC_PT_UPDATE; + u[nr_full_ptes].val = newval; + if (++nr_full_ptes == MAX_BATCHED_FULL_PTES) { + if ((rc = HYPERVISOR_mmu_update( + &u[0], nr_full_ptes, NULL, DOMID_SELF)) != 0) + break; + nr_full_ptes = 0; + } + } else if (nr_flag_ptes++ == 0) { + flags = newval & PTE_FLAGS; + v.ptr = virt_to_machine(pte) | MMU_FLAG_RANGE_UPDATE; + v.val = mask | map_flag_bits(flags); + } + } else { + if (nr_flag_ptes) { + v.val |= nr_flag_ptes << 48; + if ((rc = HYPERVISOR_mmu_update( + &v, 1, NULL, DOMID_SELF)) != 0) + break; + nr_flag_ptes = 0; + } + if (addr == end) { + if (nr_full_ptes) + rc = HYPERVISOR_mmu_update( + &u[0], nr_full_ptes, NULL, DOMID_SELF); + break; + } + } + pte++; + addr += PAGE_SIZE; + } + pte_unmap_unlock(pte - 1, ptl); + BUG_ON(rc && rc != -ENOSYS); + return !rc; +} diff -r 61c96456a3e1 mm/mprotect.c --- a/mm/mprotect.c Thu Dec 20 16:58:14 2007 +0000 +++ b/mm/mprotect.c Mon Jan 07 17:48:42 2008 -0700 @@ -75,6 +75,8 @@ static inline void change_pmd_range(stru do { next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) + continue; + if (arch_change_pte_range(mm, pmd, addr, next, newprot)) continue; change_pte_range(mm, pmd, addr, next, newprot); } while (pmd++, addr = next, addr != end);