Xen project Mailing List

[Xen-devel] [PATCH 2/2] xen/mm: Introduce PGC_state_uninitialised

From: David Woodhouse <dwmw@xxxxxxxxxxxx> It is possible for pages to enter general circulation without ever being process by init_heap_pages(). For example, pages of the multiboot module containing the initramfs may be assigned via assign_pages() to dom0 as it is created. And some code including map_pages_to_xen() has checks on 'system_state' to determine whether to use the boot or the heap allocator, but it seems impossible to prove that pages allocated by the boot allocator are not subsequently freed with free_heap_pages(). This actually works fine in the majority of cases; there are only a few esoteric corner cases which init_heap_pages() handles before handing the page range off to free_heap_pages(): • Excluding MFN #0 to avoid inappropriate cross-zone merging. • Ensuring that the node information structures exist, when the first page(s) of a given node are handled. • High order allocations crossing from one node to another. To handle this case, shift PG_state_inuse from its current value of zero, to another value. Use zero, which is the initial state of the entire frame table, as PG_state_uninitialised. Fix a couple of assertions which were assuming that PG_state_inuse is zero, and make them cope with the PG_state_uninitialised case too where appopriate. Finally, make free_heap_pages() call through to init_heap_pages() when given a page range which has not been initialised. This cannot keep recursing because init_heap_pages() will set each page state to PGC_state_inuse before passing it back to free_heap_pages() for the second time. Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx> --- xen/arch/x86/mm.c | 3 ++- xen/common/page_alloc.c | 44 +++++++++++++++++++++++++++++----------- xen/include/asm-arm/mm.h | 3 ++- xen/include/asm-x86/mm.h | 3 ++- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 62507ca651..5f0581c072 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -491,7 +491,8 @@ void share_xen_page_with_guest(struct page_info *page, struct domain *d, page_set_owner(page, d); smp_wmb(); /* install valid domain ptr before updating refcnt. */ - ASSERT((page->count_info & ~PGC_xen_heap) == 0); + ASSERT((page->count_info & ~PGC_xen_heap) == PGC_state_inuse || + (page->count_info & ~PGC_xen_heap) == PGC_state_uninitialised); /* Only add to the allocation list if the domain isn't dying. */ if ( !d->is_dying ) diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 8d72a64f4e..4f7971f2a1 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -252,6 +252,8 @@ struct bootmem_region { static struct bootmem_region __initdata bootmem_region_list[PAGE_SIZE / sizeof(struct bootmem_region)]; static unsigned int __initdata nr_bootmem_regions; +static void init_heap_pages(struct page_info *pg, unsigned long nr_pages, + bool scrub); struct scrub_region { unsigned long offset; @@ -1390,6 +1392,17 @@ static void free_heap_pages( ASSERT(order <= MAX_ORDER); ASSERT(node >= 0); + if ( page_state_is(pg, uninitialised) ) + { + init_heap_pages(pg, 1 << order, need_scrub); + /* + * init_heap_pages() will call back into free_heap_pages() for + * each page but cannot keep recursing because each page will + * be set to PGC_state_inuse first. + */ + return; + } + spin_lock(&heap_lock); for ( i = 0; i < (1 << order); i++ ) @@ -1771,11 +1784,10 @@ int query_page_offline(mfn_t mfn, uint32_t *status) * latter is not on a MAX_ORDER boundary, then we reserve the page by * not freeing it to the buddy allocator. */ -static void init_heap_pages( - struct page_info *pg, unsigned long nr_pages) +static void init_heap_pages(struct page_info *pg, unsigned long nr_pages, + bool scrub) { unsigned long i; - bool idle_scrub = false; /* * Keep MFN 0 away from the buddy allocator to avoid crossing zone @@ -1800,7 +1812,7 @@ static void init_heap_pages( spin_unlock(&heap_lock); if ( system_state < SYS_STATE_active && opt_bootscrub == BOOTSCRUB_IDLE ) - idle_scrub = true; + scrub = true; for ( i = 0; i < nr_pages; i++ ) { @@ -1828,7 +1840,8 @@ static void init_heap_pages( nr_pages -= n; } - free_heap_pages(pg + i, 0, scrub_debug || idle_scrub); + pg[i].count_info = PGC_state_inuse; + free_heap_pages(pg + i, 0, scrub_debug || scrub); } } @@ -1864,7 +1877,7 @@ void __init end_boot_allocator(void) if ( (r->s < r->e) && (phys_to_nid(pfn_to_paddr(r->s)) == cpu_to_node(0)) ) { - init_heap_pages(mfn_to_page(_mfn(r->s)), r->e - r->s); + init_heap_pages(mfn_to_page(_mfn(r->s)), r->e - r->s, false); r->e = r->s; break; } @@ -1873,7 +1886,7 @@ void __init end_boot_allocator(void) { struct bootmem_region *r = &bootmem_region_list[i]; if ( r->s < r->e ) - init_heap_pages(mfn_to_page(_mfn(r->s)), r->e - r->s); + init_heap_pages(mfn_to_page(_mfn(r->s)), r->e - r->s, false); } nr_bootmem_regions = 0; @@ -2142,7 +2155,7 @@ void init_xenheap_pages(paddr_t ps, paddr_t pe) memguard_guard_range(maddr_to_virt(ps), pe - ps); - init_heap_pages(maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT); + init_heap_pages(maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT, false); } @@ -2251,7 +2264,7 @@ void init_domheap_pages(paddr_t ps, paddr_t pe) if ( mfn_x(emfn) <= mfn_x(smfn) ) return; - init_heap_pages(mfn_to_page(smfn), mfn_x(emfn) - mfn_x(smfn)); + init_heap_pages(mfn_to_page(smfn), mfn_x(emfn) - mfn_x(smfn), false); } @@ -2280,7 +2293,8 @@ int assign_pages( for ( i = 0; i < (1ul << order); i++ ) { - ASSERT(!(pg[i].count_info & ~PGC_extra)); + ASSERT((pg[i].count_info & ~PGC_extra) == PGC_state_inuse || + (pg[i].count_info & ~PGC_extra) == PGC_state_uninitialised); if ( pg[i].count_info & PGC_extra ) extra_pages++; } @@ -2316,10 +2330,16 @@ int assign_pages( for ( i = 0; i < (1 << order); i++ ) { ASSERT(page_get_owner(&pg[i]) == NULL); + /* + * Note: Not using page_state_is() here. The ASSERT requires that + * all other bits in count_info are zero, in addition to PGC_state + * being appropriate. + */ + ASSERT((pg[i].count_info & ~PGC_extra) == PGC_state_inuse || + (pg[i].count_info & ~PGC_extra) == PGC_state_uninitialised); page_set_owner(&pg[i], d); smp_wmb(); /* Domain pointer must be visible before updating refcnt. */ - pg[i].count_info = - (pg[i].count_info & PGC_extra) | PGC_allocated | 1; + pg[i].count_info = (pg[i].count_info & PGC_state) | PGC_allocated | 1; page_list_add_tail(&pg[i], &d->page_list); } diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h index a877791d1c..49663fa98a 100644 --- a/xen/include/asm-arm/mm.h +++ b/xen/include/asm-arm/mm.h @@ -113,12 +113,13 @@ struct page_info * { inuse, offlining, offlined, free, broken_offlining, broken } */ #define PGC_state PG_mask(7, 9) -#define PGC_state_inuse PG_mask(0, 9) +#define PGC_state_uninitialised PG_mask(0, 9) #define PGC_state_offlining PG_mask(1, 9) #define PGC_state_offlined PG_mask(2, 9) #define PGC_state_free PG_mask(3, 9) #define PGC_state_broken_offlining PG_mask(4, 9) /* Broken and offlining */ #define PGC_state_broken PG_mask(5, 9) /* Broken and offlined */ +#define PGC_state_inuse PG_mask(6, 9) #define pgc_is(pgc, st) (((pgc) & PGC_state) == PGC_state_##st) #define page_state_is(pg, st) pgc_is((pg)->count_info, st) diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 1203f1b179..5fbbca5f05 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -72,12 +72,13 @@ * { inuse, offlining, offlined, free, broken_offlining, broken } */ #define PGC_state PG_mask(7, 9) -#define PGC_state_inuse PG_mask(0, 9) +#define PGC_state_uninitialised PG_mask(0, 9) #define PGC_state_offlining PG_mask(1, 9) #define PGC_state_offlined PG_mask(2, 9) #define PGC_state_free PG_mask(3, 9) #define PGC_state_broken_offlining PG_mask(4, 9) /* Broken and offlining */ #define PGC_state_broken PG_mask(5, 9) /* Broken and offlined */ +#define PGC_state_inuse PG_mask(6, 9) #define pgc_is(pgc, st) (((pgc) & PGC_state) == PGC_state_##st) #define page_state_is(pg, st) pgc_is((pg)->count_info, st) -- 2.21.0 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.