[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [XEN] Revert changeset 11438. Needs fixing for PAE.
# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxx # Node ID bfd00b317815f2d1c8989b55a4cfd174da043e43 # Parent 6f36370e373a4a75d0c3a6695a3cef7f1adb8ce6 [XEN] Revert changeset 11438. Needs fixing for PAE. Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx> --- xen/arch/ia64/xen/mm.c | 95 ++++++---- xen/arch/powerpc/mm.c | 82 +++++--- xen/arch/x86/domain_build.c | 8 xen/arch/x86/mm.c | 285 ++++++++++++++++++++----------- xen/arch/x86/mm/shadow/common.c | 10 - xen/arch/x86/mm/shadow/multi.c | 2 xen/include/asm-ia64/mm.h | 8 xen/include/asm-powerpc/mm.h | 8 xen/include/asm-x86/mm.h | 50 +++++ xen/include/asm-x86/x86_32/page-3level.h | 2 10 files changed, 392 insertions(+), 158 deletions(-) diff -r 6f36370e373a -r bfd00b317815 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Sat Sep 09 20:48:16 2006 +0100 +++ b/xen/arch/ia64/xen/mm.c Mon Sep 11 01:55:03 2006 +0100 @@ -1624,6 +1624,13 @@ void put_page_type(struct page_info *pag nx &= ~PGT_validated; } } + else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) == + (PGT_pinned | 1)) && + ((nx & PGT_type_mask) != PGT_writable_page)) ) + { + /* Page is now only pinned. Make the back pointer mutable again. */ + nx |= PGT_va_mutable; + } } while ( unlikely((y = cmpxchg_rel(&page->u.inuse.type_info, x, nx)) != x) ); } @@ -1632,8 +1639,6 @@ int get_page_type(struct page_info *page int get_page_type(struct page_info *page, u32 type) { u32 nx, x, y = page->u.inuse.type_info; - - ASSERT(!(type & ~PGT_type_mask)); again: do { @@ -1646,25 +1651,29 @@ int get_page_type(struct page_info *page } else if ( unlikely((x & PGT_count_mask) == 0) ) { - if ( (x & PGT_type_mask) != type ) + if ( (x & (PGT_type_mask|PGT_va_mask)) != type ) { - /* - * On type change we check to flush stale TLB entries. This - * may be unnecessary (e.g., page was GDT/LDT) but those - * circumstances should be very rare. - */ - cpumask_t mask = - page_get_owner(page)->domain_dirty_cpumask; - tlbflush_filter(mask, page->tlbflush_timestamp); - - if ( unlikely(!cpus_empty(mask)) ) + if ( (x & PGT_type_mask) != (type & PGT_type_mask) ) { - perfc_incrc(need_flush_tlb_flush); - flush_tlb_mask(mask); + /* + * On type change we check to flush stale TLB + * entries. This may be unnecessary (e.g., page + * was GDT/LDT) but those circumstances should be + * very rare. + */ + cpumask_t mask = + page_get_owner(page)->domain_dirty_cpumask; + tlbflush_filter(mask, page->tlbflush_timestamp); + + if ( unlikely(!cpus_empty(mask)) ) + { + perfc_incrc(need_flush_tlb_flush); + flush_tlb_mask(mask); + } } /* We lose existing type, back pointer, and validity. */ - nx &= ~(PGT_type_mask | PGT_validated); + nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); nx |= type; /* No special validation needed for writable pages. */ @@ -1673,22 +1682,46 @@ int get_page_type(struct page_info *page nx |= PGT_validated; } } - else if ( unlikely((x & PGT_type_mask) != type) ) + else { - if ( ((x & PGT_type_mask) != PGT_l2_page_table) || - (type != PGT_l1_page_table) ) - MEM_LOG("Bad type (saw %08x != exp %08x) " - "for mfn %016lx (pfn %016lx)", - x, type, page_to_mfn(page), - get_gpfn_from_mfn(page_to_mfn(page))); - return 0; - } - else if ( unlikely(!(x & PGT_validated)) ) - { - /* Someone else is updating validation of this page. Wait... */ - while ( (y = page->u.inuse.type_info) == x ) - cpu_relax(); - goto again; + if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) ) + { + if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) + { + if ( ((x & PGT_type_mask) != PGT_l2_page_table) || + ((type & PGT_type_mask) != PGT_l1_page_table) ) + MEM_LOG("Bad type (saw %08x != exp %08x) " + "for mfn %016lx (pfn %016lx)", + x, type, page_to_mfn(page), + get_gpfn_from_mfn(page_to_mfn(page))); + return 0; + } + else if ( (x & PGT_va_mask) == PGT_va_mutable ) + { + /* The va backpointer is mutable, hence we update it. */ + nx &= ~PGT_va_mask; + nx |= type; /* we know the actual type is correct */ + } + else if ( ((type & PGT_va_mask) != PGT_va_mutable) && + ((type & PGT_va_mask) != (x & PGT_va_mask)) ) + { +#ifdef CONFIG_X86_PAE + /* We use backptr as extra typing. Cannot be unknown. */ + if ( (type & PGT_type_mask) == PGT_l2_page_table ) + return 0; +#endif + /* This table is possibly mapped at multiple locations. */ + nx &= ~PGT_va_mask; + nx |= PGT_va_unknown; + } + } + if ( unlikely(!(x & PGT_validated)) ) + { + /* Someone else is updating validation of this page. Wait... */ + while ( (y = page->u.inuse.type_info) == x ) + cpu_relax(); + goto again; + } } } while ( unlikely((y = cmpxchg_acq(&page->u.inuse.type_info, x, nx)) != x) ); diff -r 6f36370e373a -r bfd00b317815 xen/arch/powerpc/mm.c --- a/xen/arch/powerpc/mm.c Sat Sep 09 20:48:16 2006 +0100 +++ b/xen/arch/powerpc/mm.c Mon Sep 11 01:55:03 2006 +0100 @@ -87,6 +87,12 @@ void put_page_type(struct page_info *pag /* Record TLB information for flush later. */ page->tlbflush_timestamp = tlbflush_current_time(); } + else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == + (PGT_pinned | 1)) ) + { + /* Page is now only pinned. Make the back pointer mutable again. */ + nx |= PGT_va_mutable; + } } while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); } @@ -95,8 +101,6 @@ int get_page_type(struct page_info *page int get_page_type(struct page_info *page, unsigned long type) { unsigned long nx, x, y = page->u.inuse.type_info; - - ASSERT(!(type & ~PGT_type_mask)); again: do { @@ -109,25 +113,29 @@ int get_page_type(struct page_info *page } else if ( unlikely((x & PGT_count_mask) == 0) ) { - if ( (x & PGT_type_mask) != type ) + if ( (x & (PGT_type_mask|PGT_va_mask)) != type ) { - /* - * On type change we check to flush stale TLB entries. This - * may be unnecessary (e.g., page was GDT/LDT) but those - * circumstances should be very rare. - */ - cpumask_t mask = - page_get_owner(page)->domain_dirty_cpumask; - tlbflush_filter(mask, page->tlbflush_timestamp); - - if ( unlikely(!cpus_empty(mask)) ) - { - perfc_incrc(need_flush_tlb_flush); - flush_tlb_mask(mask); + if ( (x & PGT_type_mask) != (type & PGT_type_mask) ) + { + /* + * On type change we check to flush stale TLB + * entries. This may be unnecessary (e.g., page + * was GDT/LDT) but those circumstances should be + * very rare. + */ + cpumask_t mask = + page_get_owner(page)->domain_dirty_cpumask; + tlbflush_filter(mask, page->tlbflush_timestamp); + + if ( unlikely(!cpus_empty(mask)) ) + { + perfc_incrc(need_flush_tlb_flush); + flush_tlb_mask(mask); + } } /* We lose existing type, back pointer, and validity. */ - nx &= ~(PGT_type_mask | PGT_validated); + nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); nx |= type; /* No special validation needed for writable pages. */ @@ -136,16 +144,36 @@ int get_page_type(struct page_info *page nx |= PGT_validated; } } - else if ( unlikely((x & PGT_type_mask) != type) ) - { - return 0; - } - if ( unlikely(!(x & PGT_validated)) ) - { - /* Someone else is updating validation of this page. Wait... */ - while ( (y = page->u.inuse.type_info) == x ) - cpu_relax(); - goto again; + else + { + if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) ) + { + if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) + { + return 0; + } + else if ( (x & PGT_va_mask) == PGT_va_mutable ) + { + /* The va backpointer is mutable, hence we update it. */ + nx &= ~PGT_va_mask; + nx |= type; /* we know the actual type is correct */ + } + else if ( (type & PGT_va_mask) != PGT_va_mutable ) + { + ASSERT((type & PGT_va_mask) != (x & PGT_va_mask)); + + /* This table is possibly mapped at multiple locations. */ + nx &= ~PGT_va_mask; + nx |= PGT_va_unknown; + } + } + if ( unlikely(!(x & PGT_validated)) ) + { + /* Someone else is updating validation of this page. Wait... */ + while ( (y = page->u.inuse.type_info) == x ) + cpu_relax(); + goto again; + } } } while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); diff -r 6f36370e373a -r bfd00b317815 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Sat Sep 09 20:48:16 2006 +0100 +++ b/xen/arch/x86/domain_build.c Mon Sep 11 01:55:03 2006 +0100 @@ -510,13 +510,15 @@ int construct_dom0(struct domain *d, case 1 ... 4: page->u.inuse.type_info &= ~PGT_type_mask; page->u.inuse.type_info |= PGT_l2_page_table; - if ( count == 4 ) - page->u.inuse.type_info |= PGT_pae_xen_l2; + page->u.inuse.type_info |= + (count-1) << PGT_va_shift; get_page(page, d); /* an extra ref because of readable mapping */ break; default: page->u.inuse.type_info &= ~PGT_type_mask; page->u.inuse.type_info |= PGT_l1_page_table; + page->u.inuse.type_info |= + ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-5))<<PGT_va_shift; get_page(page, d); /* an extra ref because of readable mapping */ break; } @@ -542,6 +544,8 @@ int construct_dom0(struct domain *d, { page->u.inuse.type_info &= ~PGT_type_mask; page->u.inuse.type_info |= PGT_l1_page_table; + page->u.inuse.type_info |= + ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift; /* * No longer writable: decrement the type_count. diff -r 6f36370e373a -r bfd00b317815 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Sat Sep 09 20:48:16 2006 +0100 +++ b/xen/arch/x86/mm.c Mon Sep 11 01:55:03 2006 +0100 @@ -625,7 +625,8 @@ get_page_from_l1e( /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */ static int get_page_from_l2e( - l2_pgentry_t l2e, unsigned long pfn, struct domain *d) + l2_pgentry_t l2e, unsigned long pfn, + struct domain *d, unsigned long vaddr) { int rc; @@ -638,7 +639,10 @@ get_page_from_l2e( return 0; } - rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d); + vaddr >>= L2_PAGETABLE_SHIFT; + vaddr <<= PGT_va_shift; + rc = get_page_and_type_from_pagenr( + l2e_get_pfn(l2e), PGT_l1_page_table | vaddr, d); #if CONFIG_PAGING_LEVELS == 2 if ( unlikely(!rc) ) rc = get_linear_pagetable(l2e, pfn, d); @@ -650,7 +654,8 @@ get_page_from_l2e( #if CONFIG_PAGING_LEVELS >= 3 static int get_page_from_l3e( - l3_pgentry_t l3e, unsigned long pfn, struct domain *d) + l3_pgentry_t l3e, unsigned long pfn, + struct domain *d, unsigned long vaddr) { int rc; @@ -663,7 +668,11 @@ get_page_from_l3e( return 0; } - rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d); + vaddr >>= L3_PAGETABLE_SHIFT; + vaddr <<= PGT_va_shift; + rc = get_page_and_type_from_pagenr( + l3e_get_pfn(l3e), + PGT_l2_page_table | vaddr, d); return rc; } #endif /* 3 level */ @@ -671,7 +680,8 @@ get_page_from_l3e( #if CONFIG_PAGING_LEVELS >= 4 static int get_page_from_l4e( - l4_pgentry_t l4e, unsigned long pfn, struct domain *d) + l4_pgentry_t l4e, unsigned long pfn, + struct domain *d, unsigned long vaddr) { int rc; @@ -684,7 +694,11 @@ get_page_from_l4e( return 0; } - rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d); + vaddr >>= L4_PAGETABLE_SHIFT; + vaddr <<= PGT_va_shift; + rc = get_page_and_type_from_pagenr( + l4e_get_pfn(l4e), + PGT_l3_page_table | vaddr, d); if ( unlikely(!rc) ) rc = get_linear_pagetable(l4e, pfn, d); @@ -863,8 +877,8 @@ static int create_pae_xen_mappings(l3_pg /* * The Xen-private mappings include linear mappings. The L2 thus cannot * be shared by multiple L3 tables. The test here is adequate because: - * 1. Cannot appear in slots != 3 because get_page_type() checks the - * PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3 + * 1. Cannot appear in slots != 3 because the page would then then have + * unknown va backpointer, which get_page_type() explicitly disallows. * 2. Cannot appear in another page table's L3: * a. alloc_l3_table() calls this function and this check will fail * b. mod_l3_entry() disallows updates to slot 3 in an existing table @@ -874,7 +888,6 @@ static int create_pae_xen_mappings(l3_pg page = l3e_get_page(l3e3); BUG_ON(page->u.inuse.type_info & PGT_pinned); BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0); - BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2)); if ( (page->u.inuse.type_info & PGT_count_mask) != 1 ) { MEM_LOG("PAE L3 3rd slot is shared"); @@ -936,17 +949,61 @@ static void pae_flush_pgd( flush_tlb_mask(d->domain_dirty_cpumask); } +static inline int l1_backptr( + unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type) +{ + unsigned long l2_backptr = l2_type & PGT_va_mask; + ASSERT(l2_backptr != PGT_va_unknown); + ASSERT(l2_backptr != PGT_va_mutable); + *backptr = + ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) | + (offset_in_l2 << L2_PAGETABLE_SHIFT); + return 1; +} + #elif CONFIG_X86_64 # define create_pae_xen_mappings(pl3e) (1) # define pae_flush_pgd(mfn, idx, nl3e) ((void)0) + +static inline int l1_backptr( + unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type) +{ + unsigned long l2_backptr = l2_type & PGT_va_mask; + ASSERT(l2_backptr != PGT_va_unknown); + ASSERT(l2_backptr != PGT_va_mutable); + *backptr = ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) | + (offset_in_l2 << L2_PAGETABLE_SHIFT); + return 1; +} + +static inline int l2_backptr( + unsigned long *backptr, unsigned long offset_in_l3, unsigned long l3_type) +{ + unsigned long l3_backptr = l3_type & PGT_va_mask; + ASSERT(l3_backptr != PGT_va_unknown); + ASSERT(l3_backptr != PGT_va_mutable); + *backptr = ((l3_backptr >> PGT_va_shift) << L4_PAGETABLE_SHIFT) | + (offset_in_l3 << L3_PAGETABLE_SHIFT); + return 1; +} + +static inline int l3_backptr( + unsigned long *backptr, unsigned long offset_in_l4, unsigned long l4_type) +{ + *backptr = (offset_in_l4 << L4_PAGETABLE_SHIFT); + return 1; +} #else # define create_pae_xen_mappings(pl3e) (1) +# define l1_backptr(bp,l2o,l2t) \ + ({ *(bp) = (unsigned long)(l2o) << L2_PAGETABLE_SHIFT; 1; }) #endif static int alloc_l2_table(struct page_info *page, unsigned long type) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); + unsigned long vaddr; l2_pgentry_t *pl2e; int i; @@ -956,8 +1013,10 @@ static int alloc_l2_table(struct page_in for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { + if ( !l1_backptr(&vaddr, i, type) ) + goto fail; if ( is_guest_l2_slot(type, i) && - unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) ) + unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) ) goto fail; adjust_guest_l2e(pl2e[i]); @@ -992,10 +1051,11 @@ static int alloc_l2_table(struct page_in #if CONFIG_PAGING_LEVELS >= 3 -static int alloc_l3_table(struct page_info *page) +static int alloc_l3_table(struct page_info *page, unsigned long type) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); + unsigned long vaddr; l3_pgentry_t *pl3e; int i; @@ -1019,21 +1079,14 @@ static int alloc_l3_table(struct page_in pl3e = map_domain_page(pfn); for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) { -#ifdef CONFIG_X86_PAE - if ( i == 3 ) - { - if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) || - (l3e_get_flags(pl3e[i]) & L3_DISALLOW_MASK) || - !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]), - PGT_l2_page_table | - PGT_pae_xen_l2, - d) ) - goto fail; - } - else +#if CONFIG_PAGING_LEVELS >= 4 + if ( !l2_backptr(&vaddr, i, type) ) + goto fail; +#else + vaddr = (unsigned long)i << L3_PAGETABLE_SHIFT; #endif if ( is_guest_l3_slot(i) && - unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) ) + unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) ) goto fail; adjust_guest_l3e(pl3e[i]); @@ -1055,23 +1108,27 @@ static int alloc_l3_table(struct page_in return 0; } #else -#define alloc_l3_table(page) (0) +#define alloc_l3_table(page, type) (0) #endif #if CONFIG_PAGING_LEVELS >= 4 -static int alloc_l4_table(struct page_info *page) +static int alloc_l4_table(struct page_info *page, unsigned long type) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); l4_pgentry_t *pl4e = page_to_virt(page); + unsigned long vaddr; int i; ASSERT(!shadow_mode_refcounts(d)); for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ ) { + if ( !l3_backptr(&vaddr, i, type) ) + goto fail; + if ( is_guest_l4_slot(i) && - unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) ) + unlikely(!get_page_from_l4e(pl4e[i], pfn, d, vaddr)) ) goto fail; adjust_guest_l4e(pl4e[i]); @@ -1099,7 +1156,7 @@ static int alloc_l4_table(struct page_in return 0; } #else -#define alloc_l4_table(page) (0) +#define alloc_l4_table(page, type) (0) #endif @@ -1133,8 +1190,6 @@ static void free_l2_table(struct page_in put_page_from_l2e(pl2e[i], pfn); unmap_domain_page(pl2e); - - page->u.inuse.type_info &= ~PGT_pae_xen_l2; } @@ -1302,6 +1357,7 @@ static int mod_l2_entry(l2_pgentry_t *pl unsigned long type) { l2_pgentry_t ol2e; + unsigned long vaddr = 0; if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) ) { @@ -1327,7 +1383,8 @@ static int mod_l2_entry(l2_pgentry_t *pl if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT)) return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn); - if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) ) + if ( unlikely(!l1_backptr(&vaddr, pgentry_ptr_to_slot(pl2e), type)) || + unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, vaddr)) ) return 0; if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) @@ -1350,9 +1407,11 @@ static int mod_l2_entry(l2_pgentry_t *pl /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */ static int mod_l3_entry(l3_pgentry_t *pl3e, l3_pgentry_t nl3e, - unsigned long pfn) + unsigned long pfn, + unsigned long type) { l3_pgentry_t ol3e; + unsigned long vaddr; int okay; if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) @@ -1388,8 +1447,16 @@ static int mod_l3_entry(l3_pgentry_t *pl if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT)) return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn); - if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) ) +#if CONFIG_PAGING_LEVELS >= 4 + if ( unlikely(!l2_backptr(&vaddr, pgentry_ptr_to_slot(pl3e), type)) || + unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) ) return 0; +#else + vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t)) + << L3_PAGETABLE_SHIFT; + if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) ) + return 0; +#endif if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) { @@ -1418,9 +1485,11 @@ static int mod_l3_entry(l3_pgentry_t *pl /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */ static int mod_l4_entry(l4_pgentry_t *pl4e, l4_pgentry_t nl4e, - unsigned long pfn) + unsigned long pfn, + unsigned long type) { l4_pgentry_t ol4e; + unsigned long vaddr; if ( unlikely(!is_guest_l4_slot(pgentry_ptr_to_slot(pl4e))) ) { @@ -1446,7 +1515,8 @@ static int mod_l4_entry(l4_pgentry_t *pl if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT)) return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn); - if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) ) + if ( unlikely(!l3_backptr(&vaddr, pgentry_ptr_to_slot(pl4e), type)) || + unlikely(!get_page_from_l4e(nl4e, pfn, current->domain, vaddr)) ) return 0; if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) @@ -1480,9 +1550,9 @@ int alloc_page_type(struct page_info *pa case PGT_l2_page_table: return alloc_l2_table(page, type); case PGT_l3_page_table: - return alloc_l3_table(page); + return alloc_l3_table(page, type); case PGT_l4_page_table: - return alloc_l4_table(page); + return alloc_l4_table(page, type); case PGT_gdt_page: case PGT_ldt_page: return alloc_segdesc_page(page); @@ -1602,6 +1672,12 @@ void put_page_type(struct page_info *pag /* Record TLB information for flush later. */ page->tlbflush_timestamp = tlbflush_current_time(); } + else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == + (PGT_pinned|PGT_l1_page_table|1)) ) + { + /* Page is now only pinned. Make the back pointer mutable again. */ + nx |= PGT_va_mutable; + } } while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); } @@ -1610,8 +1686,6 @@ int get_page_type(struct page_info *page int get_page_type(struct page_info *page, unsigned long type) { unsigned long nx, x, y = page->u.inuse.type_info; - - ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2))); again: do { @@ -1624,26 +1698,29 @@ int get_page_type(struct page_info *page } else if ( unlikely((x & PGT_count_mask) == 0) ) { - ASSERT(!(x & PGT_pae_xen_l2)); - if ( (x & PGT_type_mask) != type ) + if ( (x & (PGT_type_mask|PGT_va_mask)) != type ) { - /* - * On type change we check to flush stale TLB entries. This - * may be unnecessary (e.g., page was GDT/LDT) but those - * circumstances should be very rare. - */ - cpumask_t mask = - page_get_owner(page)->domain_dirty_cpumask; - tlbflush_filter(mask, page->tlbflush_timestamp); - - if ( unlikely(!cpus_empty(mask)) ) + if ( (x & PGT_type_mask) != (type & PGT_type_mask) ) { - perfc_incrc(need_flush_tlb_flush); - flush_tlb_mask(mask); + /* + * On type change we check to flush stale TLB + * entries. This may be unnecessary (e.g., page + * was GDT/LDT) but those circumstances should be + * very rare. + */ + cpumask_t mask = + page_get_owner(page)->domain_dirty_cpumask; + tlbflush_filter(mask, page->tlbflush_timestamp); + + if ( unlikely(!cpus_empty(mask)) ) + { + perfc_incrc(need_flush_tlb_flush); + flush_tlb_mask(mask); + } } /* We lose existing type, back pointer, and validity. */ - nx &= ~(PGT_type_mask | PGT_validated); + nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); nx |= type; /* No special validation needed for writable pages. */ @@ -1652,23 +1729,51 @@ int get_page_type(struct page_info *page nx |= PGT_validated; } } - else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) ) - { - if ( ((x & PGT_type_mask) != PGT_l2_page_table) || - (type != PGT_l1_page_table) ) - MEM_LOG("Bad type (saw %" PRtype_info - " != exp %" PRtype_info ") " - "for mfn %lx (pfn %lx)", - x, type, page_to_mfn(page), - get_gpfn_from_mfn(page_to_mfn(page))); - return 0; - } - else if ( unlikely(!(x & PGT_validated)) ) - { - /* Someone else is updating validation of this page. Wait... */ - while ( (y = page->u.inuse.type_info) == x ) - cpu_relax(); - goto again; + else + { + if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) ) + { + if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) + { + if ( ((x & PGT_type_mask) != PGT_l2_page_table) || + ((type & PGT_type_mask) != PGT_l1_page_table) ) + MEM_LOG("Bad type (saw %" PRtype_info + " != exp %" PRtype_info ") " + "for mfn %lx (pfn %lx)", + x, type, page_to_mfn(page), + get_gpfn_from_mfn(page_to_mfn(page))); + return 0; + } + else if ( (x & PGT_va_mask) == PGT_va_mutable ) + { + /* The va backpointer is mutable, hence we update it. */ + nx &= ~PGT_va_mask; + nx |= type; /* we know the actual type is correct */ + } + else if ( (type & PGT_va_mask) != PGT_va_mutable ) + { + ASSERT((type & PGT_va_mask) != (x & PGT_va_mask)); +#ifdef CONFIG_X86_PAE + /* We use backptr as extra typing. Cannot be unknown. */ + if ( (type & PGT_type_mask) == PGT_l2_page_table ) + return 0; +#endif + /* Fixme: add code to propagate va_unknown to subtables. */ + if ( ((type & PGT_type_mask) >= PGT_l2_page_table) && + !shadow_mode_refcounts(page_get_owner(page)) ) + return 0; + /* This table is possibly mapped at multiple locations. */ + nx &= ~PGT_va_mask; + nx |= PGT_va_unknown; + } + } + if ( unlikely(!(x & PGT_validated)) ) + { + /* Someone else is updating validation of this page. Wait... */ + while ( (y = page->u.inuse.type_info) == x ) + cpu_relax(); + goto again; + } } } while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); @@ -1925,25 +2030,19 @@ int do_mmuext_op( switch ( op.cmd ) { case MMUEXT_PIN_L1_TABLE: - type = PGT_l1_page_table; + type = PGT_l1_page_table | PGT_va_mutable; goto pin_page; case MMUEXT_PIN_L2_TABLE: - type = PGT_l2_page_table; - goto pin_page; - case MMUEXT_PIN_L3_TABLE: - type = PGT_l3_page_table; - goto pin_page; - case MMUEXT_PIN_L4_TABLE: - type = PGT_l4_page_table; + /* Ignore pinning of subdirectories. */ + if ( (op.cmd - MMUEXT_PIN_L1_TABLE) != (CONFIG_PAGING_LEVELS - 1) ) + break; + + type = PGT_root_page_table; pin_page: - /* Ignore pinning of invalid paging levels. */ - if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) ) - break; - if ( shadow_mode_refcounts(FOREIGNDOM) ) break; @@ -2227,7 +2326,7 @@ int do_mmu_update( } if ( unlikely(!get_page_type( - page, type_info & PGT_type_mask)) ) + page, type_info & (PGT_type_mask|PGT_va_mask))) ) goto not_a_pt; switch ( type_info & PGT_type_mask ) @@ -2249,7 +2348,7 @@ int do_mmu_update( case PGT_l3_page_table: { l3_pgentry_t l3e = l3e_from_intpte(req.val); - okay = mod_l3_entry(va, l3e, mfn); + okay = mod_l3_entry(va, l3e, mfn, type_info); } break; #endif @@ -2257,7 +2356,7 @@ int do_mmu_update( case PGT_l4_page_table: { l4_pgentry_t l4e = l4e_from_intpte(req.val); - okay = mod_l4_entry(va, l4e, mfn); + okay = mod_l4_entry(va, l4e, mfn, type_info); } break; #endif @@ -2355,7 +2454,7 @@ static int create_grant_pte_mapping( void *va; unsigned long gmfn, mfn; struct page_info *page; - u32 type; + u32 type_info; l1_pgentry_t ol1e; struct domain *d = v->domain; @@ -2376,8 +2475,9 @@ static int create_grant_pte_mapping( va = (void *)((unsigned long)va + (pte_addr & ~PAGE_MASK)); page = mfn_to_page(mfn); - type = page->u.inuse.type_info & PGT_type_mask; - if ( (type != PGT_l1_page_table) || !get_page_type(page, type) ) + type_info = page->u.inuse.type_info; + if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) || + !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) ) { MEM_LOG("Grant map attempted to update a non-L1 page"); rc = GNTST_general_error; @@ -2411,7 +2511,7 @@ static int destroy_grant_pte_mapping( void *va; unsigned long gmfn, mfn; struct page_info *page; - u32 type; + u32 type_info; l1_pgentry_t ol1e; gmfn = addr >> PAGE_SHIFT; @@ -2427,8 +2527,9 @@ static int destroy_grant_pte_mapping( va = (void *)((unsigned long)va + (addr & ~PAGE_MASK)); page = mfn_to_page(mfn); - type = page->u.inuse.type_info & PGT_type_mask; - if ( (type != PGT_l1_page_table) || !get_page_type(page, type) ) + type_info = page->u.inuse.type_info; + if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) || + !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) ) { MEM_LOG("Grant map attempted to update a non-L1 page"); rc = GNTST_general_error; diff -r 6f36370e373a -r bfd00b317815 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Sat Sep 09 20:48:16 2006 +0100 +++ b/xen/arch/x86/mm/shadow/common.c Mon Sep 11 01:55:03 2006 +0100 @@ -21,6 +21,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define SHADOW 1 + #include <xen/config.h> #include <xen/types.h> #include <xen/mm.h> @@ -223,6 +225,7 @@ struct x86_emulate_ops shadow_emulator_o .cmpxchg8b_emulated = sh_x86_emulate_cmpxchg8b_emulated, }; + /**************************************************************************/ /* Code for "promoting" a guest page to the point where the shadow code is * willing to let it be treated as a guest page table. This generally @@ -232,6 +235,7 @@ void shadow_promote(struct vcpu *v, mfn_ void shadow_promote(struct vcpu *v, mfn_t gmfn, u32 type) { struct page_info *page = mfn_to_page(gmfn); + unsigned long type_info; ASSERT(valid_mfn(gmfn)); @@ -247,8 +251,10 @@ void shadow_promote(struct vcpu *v, mfn_ // vcpu or not, or even what kind of type we get; we just want the type // count to be > 0. // - while ( !get_page_type(page, page->u.inuse.type_info & PGT_type_mask) ) - continue; + do { + type_info = + page->u.inuse.type_info & (PGT_type_mask | PGT_va_mask); + } while ( !get_page_type(page, type_info) ); // Now that the type ref is non-zero, we can safely use the // shadow_flags. diff -r 6f36370e373a -r bfd00b317815 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Sat Sep 09 20:48:16 2006 +0100 +++ b/xen/arch/x86/mm/shadow/multi.c Mon Sep 11 01:55:03 2006 +0100 @@ -34,6 +34,8 @@ // - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address // space for both PV and HVM guests. // + +#define SHADOW 1 #include <xen/config.h> #include <xen/types.h> diff -r 6f36370e373a -r bfd00b317815 xen/include/asm-ia64/mm.h --- a/xen/include/asm-ia64/mm.h Sat Sep 09 20:48:16 2006 +0100 +++ b/xen/include/asm-ia64/mm.h Mon Sep 11 01:55:03 2006 +0100 @@ -102,6 +102,14 @@ struct page_info /* Owning guest has pinned this page to its current type? */ #define _PGT_pinned 27 #define PGT_pinned (1U<<_PGT_pinned) + + /* The 27 most significant bits of virt address if this is a page table. */ +#define PGT_va_shift 32 +#define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift) + /* Is the back pointer still mutable (i.e. not fixed yet)? */ +#define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift) + /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */ +#define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift) /* 16-bit count of uses of this frame as its current type. */ #define PGT_count_mask ((1U<<16)-1) diff -r 6f36370e373a -r bfd00b317815 xen/include/asm-powerpc/mm.h --- a/xen/include/asm-powerpc/mm.h Sat Sep 09 20:48:16 2006 +0100 +++ b/xen/include/asm-powerpc/mm.h Mon Sep 11 01:55:03 2006 +0100 @@ -101,6 +101,14 @@ struct page_extents { /* Has this page been validated for use as its current type? */ #define _PGT_validated 27 #define PGT_validated (1U<<_PGT_validated) + + /* The 27 most significant bits of virt address if this is a page table. */ +#define PGT_va_shift 32 +#define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift) + /* Is the back pointer still mutable (i.e. not fixed yet)? */ +#define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift) + /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */ +#define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift) /* 16-bit count of uses of this frame as its current type. */ #define PGT_count_mask ((1U<<16)-1) diff -r 6f36370e373a -r bfd00b317815 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Sat Sep 09 20:48:16 2006 +0100 +++ b/xen/include/asm-x86/mm.h Mon Sep 11 01:55:03 2006 +0100 @@ -75,6 +75,19 @@ struct page_info #define PGT_gdt_page (5U<<29) /* using this page in a GDT? */ #define PGT_ldt_page (6U<<29) /* using this page in an LDT? */ #define PGT_writable_page (7U<<29) /* has writable mappings of this page? */ + +#ifndef SHADOW +#define PGT_l1_shadow PGT_l1_page_table +#define PGT_l2_shadow PGT_l2_page_table +#define PGT_l3_shadow PGT_l3_page_table +#define PGT_l4_shadow PGT_l4_page_table +#define PGT_hl2_shadow (5U<<29) +#define PGT_snapshot (6U<<29) +#define PGT_writable_pred (7U<<29) /* predicted gpfn with writable ref */ + +#define PGT_fl1_shadow (5U<<29) +#endif + #define PGT_type_mask (7U<<29) /* Bits 29-31. */ /* Owning guest has pinned this page to its current type? */ @@ -83,12 +96,43 @@ struct page_info /* Has this page been validated for use as its current type? */ #define _PGT_validated 27 #define PGT_validated (1U<<_PGT_validated) - /* PAE only: is this an L2 page directory containing Xen-private mappings? */ -#define _PGT_pae_xen_l2 26 -#define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2) +#if defined(__i386__) + /* The 11 most significant bits of virt address if this is a page table. */ +#define PGT_va_shift 16 +#define PGT_va_mask (((1U<<11)-1)<<PGT_va_shift) + /* Is the back pointer still mutable (i.e. not fixed yet)? */ +#define PGT_va_mutable (((1U<<11)-1)<<PGT_va_shift) + /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */ +#define PGT_va_unknown (((1U<<11)-2)<<PGT_va_shift) +#elif defined(__x86_64__) + /* The 27 most significant bits of virt address if this is a page table. */ +#define PGT_va_shift 32 +#define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift) + /* Is the back pointer still mutable (i.e. not fixed yet)? */ +#define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift) + /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */ +#define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift) +#endif /* 16-bit count of uses of this frame as its current type. */ #define PGT_count_mask ((1U<<16)-1) + +#ifndef SHADOW +#ifdef __x86_64__ +#define PGT_high_mfn_shift 52 +#define PGT_high_mfn_mask (0xfffUL << PGT_high_mfn_shift) +#define PGT_mfn_mask (((1U<<27)-1) | PGT_high_mfn_mask) +#define PGT_high_mfn_nx (0x800UL << PGT_high_mfn_shift) +#else + /* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */ +#define PGT_mfn_mask ((1U<<23)-1) + /* NX for PAE xen is not supported yet */ +#define PGT_high_mfn_nx (1ULL << 63) + +#define PGT_score_shift 23 +#define PGT_score_mask (((1U<<4)-1)<<PGT_score_shift) +#endif +#endif /* SHADOW */ /* Cleared when the owning guest 'frees' this page. */ #define _PGC_allocated 31 diff -r 6f36370e373a -r bfd00b317815 xen/include/asm-x86/x86_32/page-3level.h --- a/xen/include/asm-x86/x86_32/page-3level.h Sat Sep 09 20:48:16 2006 +0100 +++ b/xen/include/asm-x86/x86_32/page-3level.h Mon Sep 11 01:55:03 2006 +0100 @@ -49,7 +49,7 @@ typedef l3_pgentry_t root_pgentry_t; /* misc */ #define is_guest_l1_slot(s) (1) #define is_guest_l2_slot(t,s) \ - ( !((t) & PGT_pae_xen_l2) || \ + ( ((((t) & PGT_va_mask) >> PGT_va_shift) != 3) || \ ((s) < (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) ) #define is_guest_l3_slot(s) (1) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |