--- xen-unstable//./xen/include/asm-x86/x86_32/page.h 2008-07-17 09:49:27.000000000 -0500 +++ xen-hpage/./xen/include/asm-x86/x86_32/page.h 2008-10-02 15:07:34.000000000 -0500 @@ -112,7 +112,7 @@ extern unsigned int PAGE_HYPERVISOR_NOCA * Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL. * Permit the NX bit if the hardware supports it. */ -#define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX) +#define BASE_DISALLOW_MASK (0xFFFFF118U & ~_PAGE_NX) #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) --- xen-unstable//./xen/include/asm-x86/x86_64/page.h 2008-10-02 14:23:17.000000000 -0500 +++ xen-hpage/./xen/include/asm-x86/x86_64/page.h 2008-10-02 15:07:34.000000000 -0500 @@ -112,7 +112,7 @@ typedef l4_pgentry_t root_pgentry_t; * Permit the NX bit if the hardware supports it. * Note that range [62:52] is available for software use on x86/64. */ -#define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX) +#define BASE_DISALLOW_MASK (0xFF800118U & ~_PAGE_NX) #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) --- xen-unstable//./xen/arch/x86/mm.c 2008-10-02 14:23:17.000000000 -0500 +++ xen-hpage/./xen/arch/x86/mm.c 2008-10-08 16:56:46.000000000 -0500 @@ -160,6 +160,9 @@ unsigned long total_pages; #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) +static int opt_allow_hugepage = 0; +boolean_param("allowhugepage", opt_allow_hugepage); + #define l1_disallow_mask(d) \ ((d != dom_io) && \ (rangeset_is_empty((d)->iomem_caps) && \ @@ -584,6 +587,28 @@ static int get_page_and_type_from_pagenr return rc; } +static int +get_data_page(struct page_info *page, struct domain *d, int writeable) +{ + int rc; + + if (writeable) + rc = get_page_and_type(page, d, PGT_writable_page); + else + rc = get_page(page, d); + + return rc; +} + +static void +put_data_page(struct page_info *page, int writeable) +{ + if (writeable) + put_page_and_type(page); + else + put_page(page); +} + /* * We allow root tables to map each other (a.k.a. linear page tables). It * needs some special care with reference counts and access permissions: @@ -656,6 +681,7 @@ get_page_from_l1e( struct vcpu *curr = current; struct domain *owner; int okay; + int writeable; if ( !(l1f & _PAGE_PRESENT) ) return 1; @@ -698,10 +724,9 @@ get_page_from_l1e( * contribute to writeable mapping refcounts. (This allows the * qemu-dm helper process in dom0 to map the domain's memory without * messing up the count of "real" writable mappings.) */ - okay = (((l1f & _PAGE_RW) && - !(unlikely(paging_mode_external(d) && (d != curr->domain)))) - ? get_page_and_type(page, d, PGT_writable_page) - : get_page(page, d)); + writeable = (l1f & _PAGE_RW) && + !(unlikely(paging_mode_external(d) && (d != curr->domain))); + okay = get_data_page(page, d, writeable); if ( !okay ) { MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte @@ -759,11 +784,42 @@ get_page_from_l2e( MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK); return -EINVAL; } + if ( l2e_get_flags(l2e) & _PAGE_PSE ) { + unsigned long mfn = l2e_get_pfn(l2e); + unsigned long m, me; + struct page_info *page = mfn_to_page(mfn); + int writeable; - rc = get_page_and_type_from_pagenr( - l2e_get_pfn(l2e), PGT_l1_page_table, d, 0); - if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) - rc = 0; + if (!opt_allow_hugepage) + return -EINVAL; + + writeable = l2e_get_flags(l2e) & _PAGE_RW; + + rc = get_data_page(page, d, writeable); + if (unlikely(!rc)) { + return rc; + } + + for (m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++) { + rc = get_data_page(mfn_to_page(m), d, writeable); + if (unlikely(!rc)) { + for (--m; m > mfn; --m) { + put_data_page(mfn_to_page(m), writeable); + } + put_data_page(page, writeable); + return 0; + } + } +#ifdef __x86_64__ + map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn, L1_PAGETABLE_ENTRIES, + PAGE_HYPERVISOR | l2e_get_flags(l2e)); +#endif + } else { + rc = get_page_and_type_from_pagenr( + l2e_get_pfn(l2e), PGT_l1_page_table, d, 0); + if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) + rc = 0; + } return rc; } @@ -955,7 +1011,19 @@ static int put_page_from_l2e(l2_pgentry_ if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && (l2e_get_pfn(l2e) != pfn) ) { - put_page_and_type(l2e_get_page(l2e)); + if (l2e_get_flags(l2e) & _PAGE_PSE) { + unsigned long mfn = l2e_get_pfn(l2e); + unsigned long m, me; + struct page_info *page = mfn_to_page(mfn); + int writeable = l2e_get_flags(l2e) & _PAGE_RW; + + for (m = mfn+1, me = m + (L1_PAGETABLE_ENTRIES-1); m <= me; m++) { + put_data_page(mfn_to_page(m), writeable); + } + put_data_page(page, writeable); + } else { + put_page_and_type(l2e_get_page(l2e)); + } return 0; } return 1;