[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [XEN] Add new safe pte update macros and use as necessary when
# HG changeset patch # User kfraser@xxxxxxxxxxxxxxxxxxxxx # Node ID 1d83974d08b1e4b6a38e979eda8179a06ffccc81 # Parent 93314655b16f62809c83644eed84592ebe4e8001 [XEN] Add new safe pte update macros and use as necessary when updating active pagetables. Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx> --- xen/arch/x86/mm.c | 54 +++++++++++++++++-------------- xen/arch/x86/mm/shadow/multi.c | 2 + xen/arch/x86/x86_32/domain_page.c | 10 ++--- xen/arch/x86/x86_32/mm.c | 35 ++++++++++---------- xen/arch/x86/x86_64/mm.c | 31 +++++++++-------- xen/include/asm-x86/page.h | 15 ++++++-- xen/include/asm-x86/x86_32/page-2level.h | 3 + xen/include/asm-x86/x86_32/page-3level.h | 7 ++++ xen/include/asm-x86/x86_64/page.h | 3 + 9 files changed, 95 insertions(+), 65 deletions(-) diff -r 93314655b16f -r 1d83974d08b1 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu Dec 07 14:40:13 2006 +0000 +++ b/xen/arch/x86/mm.c Thu Dec 07 14:57:14 2006 +0000 @@ -335,7 +335,7 @@ void make_cr3(struct vcpu *v, unsigned l cache->high_mfn = mfn; /* Map the guest L3 table and copy to the chosen low-memory cache. */ - *(fix_pae_highmem_pl1e - cpu) = l1e_from_pfn(mfn, __PAGE_HYPERVISOR); + l1e_write(fix_pae_highmem_pl1e-cpu, l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); /* First check the previous high mapping can't be in the TLB. * (i.e. have we loaded CR3 since we last did this?) */ if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) ) @@ -343,7 +343,7 @@ void make_cr3(struct vcpu *v, unsigned l highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu); lowmem_l3tab = cache->table[cache->inuse_idx]; memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0])); - *(fix_pae_highmem_pl1e - cpu) = l1e_empty(); + l1e_write(fix_pae_highmem_pl1e-cpu, l1e_empty()); this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time); v->arch.cr3 = __pa(lowmem_l3tab); @@ -380,7 +380,7 @@ void invalidate_shadow_ldt(struct vcpu * { pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]); if ( pfn == 0 ) continue; - v->arch.perdomain_ptes[i] = l1e_empty(); + l1e_write(&v->arch.perdomain_ptes[i], l1e_empty()); page = mfn_to_page(pfn); ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page); ASSERT_PAGE_IS_DOMAIN(page, v->domain); @@ -449,7 +449,7 @@ int map_ldt_shadow_page(unsigned int off nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW); - v->arch.perdomain_ptes[off + 16] = nl1e; + l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e); v->arch.shadow_ldt_mapcnt++; return 1; @@ -851,7 +851,7 @@ static int create_pae_xen_mappings(l3_pg static int create_pae_xen_mappings(l3_pgentry_t *pl3e) { struct page_info *page; - l2_pgentry_t *pl2e; + l2_pgentry_t *pl2e, l2e; l3_pgentry_t l3e3; int i; @@ -892,15 +892,19 @@ static int create_pae_xen_mappings(l3_pg &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT], L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) - pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] = - l2e_from_page( - virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i, - __PAGE_HYPERVISOR); + { + l2e = l2e_from_page( + virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i, + __PAGE_HYPERVISOR); + l2e_write(&pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i], l2e); + } for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) - pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] = - (l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ? - l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR) : - l2e_empty(); + { + l2e = l2e_empty(); + if ( l3e_get_flags(pl3e[i]) & _PAGE_PRESENT ) + l2e = l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR); + l2e_write(&pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], l2e); + } unmap_domain_page(pl2e); return 1; @@ -2760,7 +2764,7 @@ void destroy_gdt(struct vcpu *v) { if ( (pfn = l1e_get_pfn(v->arch.perdomain_ptes[i])) != 0 ) put_page_and_type(mfn_to_page(pfn)); - v->arch.perdomain_ptes[i] = l1e_empty(); + l1e_write(&v->arch.perdomain_ptes[i], l1e_empty()); v->arch.guest_context.gdt_frames[i] = 0; } } @@ -2794,8 +2798,8 @@ long set_gdt(struct vcpu *v, for ( i = 0; i < nr_pages; i++ ) { v->arch.guest_context.gdt_frames[i] = frames[i]; - v->arch.perdomain_ptes[i] = - l1e_from_pfn(frames[i], __PAGE_HYPERVISOR); + l1e_write(&v->arch.perdomain_ptes[i], + l1e_from_pfn(frames[i], __PAGE_HYPERVISOR)); } return 0; @@ -3298,8 +3302,8 @@ int map_pages_to_xen( !map_small_pages ) { /* Super-page mapping. */ - ol2e = *pl2e; - *pl2e = l2e_from_pfn(mfn, flags|_PAGE_PSE); + ol2e = *pl2e; + l2e_write(pl2e, l2e_from_pfn(mfn, flags|_PAGE_PSE)); if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) ) { @@ -3319,22 +3323,24 @@ int map_pages_to_xen( { pl1e = page_to_virt(alloc_xen_pagetable()); clear_page(pl1e); - *pl2e = l2e_from_page(virt_to_page(pl1e), __PAGE_HYPERVISOR); + l2e_write(pl2e, l2e_from_page(virt_to_page(pl1e), + __PAGE_HYPERVISOR)); } else if ( l2e_get_flags(*pl2e) & _PAGE_PSE ) { pl1e = page_to_virt(alloc_xen_pagetable()); for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) - pl1e[i] = l1e_from_pfn( - l2e_get_pfn(*pl2e) + i, - l2e_get_flags(*pl2e) & ~_PAGE_PSE); - *pl2e = l2e_from_page(virt_to_page(pl1e), __PAGE_HYPERVISOR); + l1e_write(&pl1e[i], + l1e_from_pfn(l2e_get_pfn(*pl2e) + i, + l2e_get_flags(*pl2e) & ~_PAGE_PSE)); + l2e_write(pl2e, l2e_from_page(virt_to_page(pl1e), + __PAGE_HYPERVISOR)); local_flush_tlb_pge(); } pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt); ol1e = *pl1e; - *pl1e = l1e_from_pfn(mfn, flags); + l1e_write(pl1e, l1e_from_pfn(mfn, flags)); if ( (l1e_get_flags(ol1e) & _PAGE_PRESENT) ) local_flush_tlb_one(virt); diff -r 93314655b16f -r 1d83974d08b1 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Thu Dec 07 14:40:13 2006 +0000 +++ b/xen/arch/x86/mm/shadow/multi.c Thu Dec 07 14:57:14 2006 +0000 @@ -851,7 +851,9 @@ static inline void safe_write_entry(void * then writing the high word before the low word. */ BUILD_BUG_ON(sizeof (shadow_l1e_t) != 2 * sizeof (unsigned long)); d[0] = 0; + wmb(); d[1] = s[1]; + wmb(); d[0] = s[0]; #else /* In 32-bit and 64-bit, sizeof(pte) == sizeof(ulong) == 1 word, diff -r 93314655b16f -r 1d83974d08b1 xen/arch/x86/x86_32/domain_page.c --- a/xen/arch/x86/x86_32/domain_page.c Thu Dec 07 14:40:13 2006 +0000 +++ b/xen/arch/x86/x86_32/domain_page.c Thu Dec 07 14:57:14 2006 +0000 @@ -107,7 +107,7 @@ void *map_domain_page(unsigned long mfn) spin_unlock(&cache->lock); - cache->l1tab[idx] = l1e_from_pfn(mfn, __PAGE_HYPERVISOR); + l1e_write(&cache->l1tab[idx], l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); out: va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT); @@ -147,7 +147,7 @@ void unmap_domain_page(void *va) { /* /First/, zap the PTE. */ ASSERT(l1e_get_pfn(cache->l1tab[hashent->idx]) == hashent->mfn); - cache->l1tab[hashent->idx] = l1e_empty(); + l1e_write(&cache->l1tab[hashent->idx], l1e_empty()); /* /Second/, mark as garbage. */ set_bit(hashent->idx, cache->garbage); } @@ -159,7 +159,7 @@ void unmap_domain_page(void *va) else { /* /First/, zap the PTE. */ - cache->l1tab[idx] = l1e_empty(); + l1e_write(&cache->l1tab[idx], l1e_empty()); /* /Second/, mark as garbage. */ set_bit(idx, cache->garbage); } @@ -229,7 +229,7 @@ void *map_domain_page_global(unsigned lo pl2e = virt_to_xen_l2e(va); pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(va); - *pl1e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR); + l1e_write(pl1e, l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); return (void *)va; } @@ -246,7 +246,7 @@ void unmap_domain_page_global(void *va) /* /First/, we zap the PTE. */ pl2e = virt_to_xen_l2e(__va); pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(__va); - *pl1e = l1e_empty(); + l1e_write(pl1e, l1e_empty()); /* /Second/, we add to the garbage map. */ idx = (__va - IOREMAP_VIRT_START) >> PAGE_SHIFT; diff -r 93314655b16f -r 1d83974d08b1 xen/arch/x86/x86_32/mm.c --- a/xen/arch/x86/x86_32/mm.c Thu Dec 07 14:40:13 2006 +0000 +++ b/xen/arch/x86/x86_32/mm.c Thu Dec 07 14:57:14 2006 +0000 @@ -99,11 +99,12 @@ void __init paging_init(void) { if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL ) panic("Not enough memory to bootstrap Xen.\n"); - idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i] = - l2e_from_page(pg, PAGE_HYPERVISOR | _PAGE_PSE); + l2e_write(&idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i], + l2e_from_page(pg, PAGE_HYPERVISOR | _PAGE_PSE)); /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ - idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i] = - l2e_from_page(pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW); + l2e_write(&idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i], + l2e_from_page( + pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW)); } /* Fill with an obvious debug pattern. */ @@ -115,8 +116,8 @@ void __init paging_init(void) { ioremap_pt = alloc_xenheap_page(); clear_page(ioremap_pt); - idle_pg_table_l2[l2_linear_offset(IOREMAP_VIRT_START) + i] = - l2e_from_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR); + l2e_write(&idle_pg_table_l2[l2_linear_offset(IOREMAP_VIRT_START) + i], + l2e_from_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR)); } } @@ -125,10 +126,10 @@ void __init setup_idle_pagetable(void) int i; for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) - idle_pg_table_l2[l2_linear_offset(PERDOMAIN_VIRT_START) + i] = - l2e_from_page(virt_to_page(idle_vcpu[0]->domain-> - arch.mm_perdomain_pt) + i, - __PAGE_HYPERVISOR); + l2e_write(&idle_pg_table_l2[l2_linear_offset(PERDOMAIN_VIRT_START)+i], + l2e_from_page(virt_to_page(idle_vcpu[0]->domain-> + arch.mm_perdomain_pt) + i, + __PAGE_HYPERVISOR)); } void __init zap_low_mappings(l2_pgentry_t *base) @@ -136,14 +137,16 @@ void __init zap_low_mappings(l2_pgentry_ int i; u32 addr; - for (i = 0; ; i++) { - addr = (i << L2_PAGETABLE_SHIFT); - if (addr >= HYPERVISOR_VIRT_START) + for ( i = 0; ; i++ ) + { + addr = i << L2_PAGETABLE_SHIFT; + if ( addr >= HYPERVISOR_VIRT_START ) break; - if (l2e_get_paddr(base[i]) != addr) + if ( l2e_get_paddr(base[i]) != addr ) continue; - base[i] = l2e_empty(); - } + l2e_write(&base[i], l2e_empty()); + } + flush_tlb_all_pge(); } diff -r 93314655b16f -r 1d83974d08b1 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Thu Dec 07 14:40:13 2006 +0000 +++ b/xen/arch/x86/x86_64/mm.c Thu Dec 07 14:57:14 2006 +0000 @@ -59,7 +59,7 @@ l2_pgentry_t *virt_to_xen_l2e(unsigned l { pl3e = page_to_virt(alloc_xen_pagetable()); clear_page(pl3e); - *pl4e = l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR); + l4e_write(pl4e, l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR)); } pl3e = l4e_to_l3e(*pl4e) + l3_table_offset(v); @@ -67,7 +67,7 @@ l2_pgentry_t *virt_to_xen_l2e(unsigned l { pl2e = page_to_virt(alloc_xen_pagetable()); clear_page(pl2e); - *pl3e = l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR); + l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR)); } pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v); @@ -85,8 +85,8 @@ void __init paging_init(void) if ( (l2_pg = alloc_domheap_page(NULL)) == NULL ) goto nomem; l3_ro_mpt = clear_page(page_to_virt(l2_pg)); - idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] = - l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER); + l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)], + l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER)); /* * Allocate and map the machine-to-phys table. @@ -111,18 +111,19 @@ void __init paging_init(void) goto nomem; va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT); l2_ro_mpt = clear_page(page_to_virt(l2_pg)); - l3_ro_mpt[l3_table_offset(va)] = - l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER); + l3e_write(&l3_ro_mpt[l3_table_offset(va)], + l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER)); l2_ro_mpt += l2_table_offset(va); } /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ - *l2_ro_mpt++ = l2e_from_page( - l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT); + l2e_write(l2_ro_mpt, l2e_from_page( + l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); + l2_ro_mpt++; } /* Set up linear page table mapping. */ - idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] = - l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR); + l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)], + l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR)); return; nomem: @@ -132,15 +133,15 @@ void __init setup_idle_pagetable(void) void __init setup_idle_pagetable(void) { /* Install per-domain mappings for idle domain. */ - idle_pg_table[l4_table_offset(PERDOMAIN_VIRT_START)] = - l4e_from_page( - virt_to_page(idle_vcpu[0]->domain->arch.mm_perdomain_l3), - __PAGE_HYPERVISOR); + l4e_write(&idle_pg_table[l4_table_offset(PERDOMAIN_VIRT_START)], + l4e_from_page( + virt_to_page(idle_vcpu[0]->domain->arch.mm_perdomain_l3), + __PAGE_HYPERVISOR)); } void __init zap_low_mappings(void) { - idle_pg_table[0] = l4e_empty(); + l4e_write(&idle_pg_table[0], l4e_empty()); flush_tlb_all_pge(); } diff -r 93314655b16f -r 1d83974d08b1 xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Thu Dec 07 14:40:13 2006 +0000 +++ b/xen/include/asm-x86/page.h Thu Dec 07 14:57:14 2006 +0000 @@ -33,9 +33,18 @@ /* Write a pte atomically to memory. */ #define l1e_write_atomic(l1ep, l1e) pte_write_atomic(l1ep, l1e_get_intpte(l1e)) -#define l2e_write_atomic(l2ep, l2e) pte_write_atomic(l2ep, l1e_get_intpte(l2e)) -#define l3e_write_atomic(l3ep, l3e) pte_write_atomic(l3ep, l1e_get_intpte(l3e)) -#define l4e_write_atomic(l4ep, l4e) pte_write_atomic(l4ep, l1e_get_intpte(l4e)) +#define l2e_write_atomic(l2ep, l2e) pte_write_atomic(l2ep, l2e_get_intpte(l2e)) +#define l3e_write_atomic(l3ep, l3e) pte_write_atomic(l3ep, l3e_get_intpte(l3e)) +#define l4e_write_atomic(l4ep, l4e) pte_write_atomic(l4ep, l4e_get_intpte(l4e)) + +/* + * Write a pte safely but non-atomically to memory. + * The PTE may become temporarily not-present during the update. + */ +#define l1e_write(l1ep, l1e) pte_write(l1ep, l1e_get_intpte(l1e)) +#define l2e_write(l2ep, l2e) pte_write(l2ep, l2e_get_intpte(l2e)) +#define l3e_write(l3ep, l3e) pte_write(l3ep, l3e_get_intpte(l3e)) +#define l4e_write(l4ep, l4e) pte_write(l4ep, l4e_get_intpte(l4e)) /* Get direct integer representation of a pte's contents (intpte_t). */ #define l1e_get_intpte(x) ((x).l1) diff -r 93314655b16f -r 1d83974d08b1 xen/include/asm-x86/x86_32/page-2level.h --- a/xen/include/asm-x86/x86_32/page-2level.h Thu Dec 07 14:40:13 2006 +0000 +++ b/xen/include/asm-x86/x86_32/page-2level.h Thu Dec 07 14:57:14 2006 +0000 @@ -29,7 +29,8 @@ typedef l2_pgentry_t root_pgentry_t; #endif /* !__ASSEMBLY__ */ #define pte_read_atomic(ptep) (*(intpte_t *)(ptep)) -#define pte_write_atomic(ptep, pte) (*(intpte_t *)(ptep)) = (pte)) +#define pte_write_atomic(ptep, pte) ((*(intpte_t *)(ptep)) = (pte)) +#define pte_write(ptep, pte) ((*(intpte_t *)(ptep)) = (pte)) /* root table */ #define root_get_pfn l2e_get_pfn diff -r 93314655b16f -r 1d83974d08b1 xen/include/asm-x86/x86_32/page-3level.h --- a/xen/include/asm-x86/x86_32/page-3level.h Thu Dec 07 14:40:13 2006 +0000 +++ b/xen/include/asm-x86/x86_32/page-3level.h Thu Dec 07 14:57:14 2006 +0000 @@ -48,6 +48,13 @@ typedef l3_pgentry_t root_pgentry_t; while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, (pte))) != __pte ) \ __pte = __npte; \ } while ( 0 ) +#define pte_write(ptep, pte) do { \ + *((u32 *)(ptep)+0) = 0; \ + wmb(); \ + *((u32 *)(ptep)+1) = (pte) >> 32; \ + wmb(); \ + *((u32 *)(ptep)+0) = (pte) >> 0; \ +} while ( 0 ) /* root table */ #define root_get_pfn l3e_get_pfn diff -r 93314655b16f -r 1d83974d08b1 xen/include/asm-x86/x86_64/page.h --- a/xen/include/asm-x86/x86_64/page.h Thu Dec 07 14:40:13 2006 +0000 +++ b/xen/include/asm-x86/x86_64/page.h Thu Dec 07 14:57:14 2006 +0000 @@ -44,7 +44,8 @@ typedef l4_pgentry_t root_pgentry_t; #endif /* !__ASSEMBLY__ */ #define pte_read_atomic(ptep) (*(intpte_t *)(ptep)) -#define pte_write_atomic(ptep, pte) (*(intpte_t *)(ptep)) = (pte)) +#define pte_write_atomic(ptep, pte) ((*(intpte_t *)(ptep)) = (pte)) +#define pte_write(ptep, pte) ((*(intpte_t *)(ptep)) = (pte)) /* Given a virtual address, get an entry offset into a linear page table. */ #define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> L1_PAGETABLE_SHIFT) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |