# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1173948291 -32400 # Node ID 946572694e6b0564606870609fcb81d523ac87ab # Parent 817647aff01f2770aa60064cac0445a2169a18b3 This patch fixes memory exchange hypercall which has been broken on ia64. Especially the error recovery path. Dropping _PGC_allocated bit and guest_physmap_remove_page() must be done before stealing page because The ia64 p2m table increments page refcount and guest_physmap_remove_page() behaves depending on _PGC_allocated. So far there is work around code in ia64, however the c/s 13366:ed73ff8440d8 of xen-unstable.hg revealed that the work around is broken. The c/s passes dma bit argument of memory exchange so that it results in hypercall error. On x86 platform, memory_exchange() is used only for paravirtualized domain and guest_physmap_remove_page() and guest_phsymap_add_page() are nop for paravirtualized domain. So reordering them is safe. PATCHNAME: ia64_memory_exchange_fix Signed-off-by: Isaku Yamahata diff -r 817647aff01f -r 946572694e6b xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Wed Mar 14 14:18:51 2007 -0700 +++ b/xen/arch/ia64/xen/mm.c Thu Mar 15 17:44:51 2007 +0900 @@ -1078,21 +1078,20 @@ assign_domain_mach_page(struct domain *d static void domain_put_page(struct domain* d, unsigned long mpaddr, - volatile pte_t* ptep, pte_t old_pte, int clear_PGC_allocate) + volatile pte_t* ptep, pte_t old_pte) { unsigned long mfn = pte_pfn(old_pte); struct page_info* page = mfn_to_page(mfn); if (pte_pgc_allocated(old_pte)) { - if (page_get_owner(page) == d || page_get_owner(page) == NULL) { + if (page_get_owner(page) == d) { BUG_ON(get_gpfn_from_mfn(mfn) != (mpaddr >> PAGE_SHIFT)); - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); } else { BUG(); } - if (clear_PGC_allocate) - try_to_clear_PGC_allocate(d, page); + try_to_clear_PGC_allocate(d, page); } domain_page_flush_and_put(d, mpaddr, ptep, old_pte, page); } @@ -1127,7 +1126,7 @@ assign_domain_page_replace(struct domain // => create_host_mapping() // => assign_domain_page_replace() if (mfn != old_mfn) { - domain_put_page(d, mpaddr, pte, old_pte, 1); + domain_put_page(d, mpaddr, pte, old_pte; } } perfc_incrc(assign_domain_page_replace); @@ -1221,10 +1220,7 @@ zap_domain_page_one(struct domain *d, un pte_t ret_pte; again: - // memory_exchange() calls guest_physmap_remove_page() with - // a stealed page. i.e. page owner = NULL. - BUG_ON(page_get_owner(mfn_to_page(mfn)) != d && - page_get_owner(mfn_to_page(mfn)) != NULL); + BUG_ON(page_get_owner(mfn_to_page(mfn)) != d); old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK; old_pte = pfn_pte(mfn, __pgprot(old_arflags)); new_pte = __pte(0); @@ -1249,12 +1245,7 @@ zap_domain_page_one(struct domain *d, un page = mfn_to_page(mfn); BUG_ON((page->count_info & PGC_count_mask) == 0); - // exchange_memory() calls - // steal_page() - // page owner is set to NULL - // guest_physmap_remove_page() - // zap_domain_page_one() - domain_put_page(d, mpaddr, pte, old_pte, (page_get_owner(page) != NULL)); + domain_put_page(d, mpaddr, pte, old_pte); perfc_incrc(zap_dcomain_page_one); } @@ -1639,18 +1630,9 @@ steal_page(struct domain *d, struct page // page->u.inused._domain = 0; _nd = x >> 32; - if ( - // when !MEMF_no_refcount, page might be put_page()'d or - // it will be put_page()'d later depending on queued. - unlikely(!(memflags & MEMF_no_refcount) && - ((x & (PGC_count_mask | PGC_allocated)) != - (1 | PGC_allocated))) || - // when MEMF_no_refcount, page isn't de-assigned from - // this domain yet. So count_info = 2 - unlikely((memflags & MEMF_no_refcount) && - ((x & (PGC_count_mask | PGC_allocated)) != - (2 | PGC_allocated))) || - + if ((x & (PGC_count_mask | PGC_allocated)) != + (1 | + ((memflags & MEMF_no_refcount)? 0: PGC_allocated))) || unlikely(_nd != _d)) { struct domain* nd = unpickle_domptr(_nd); if (nd == NULL) { diff -r 817647aff01f -r 946572694e6b xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Wed Mar 14 14:18:51 2007 -0700 +++ b/xen/arch/x86/mm.c Thu Mar 15 17:44:51 2007 +0900 @@ -2677,8 +2677,10 @@ int steal_page( y = page->count_info; do { x = y; - if (unlikely((x & (PGC_count_mask|PGC_allocated)) != - (1 | PGC_allocated)) || unlikely(_nd != _d)) { + if (unlikely((x & (PGC_count_mask | PGC_allocated)) != + (1 | + ((memflags & MEMF_no_refcount)? 0: PGC_allocated))) || + unlikely(_nd != _d)) { MEM_LOG("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p," " caf=%08x, taf=%" PRtype_info "\n", (void *) page_to_mfn(page), diff -r 817647aff01f -r 946572694e6b xen/common/memory.c --- a/xen/common/memory.c Wed Mar 14 14:18:51 2007 -0700 +++ b/xen/common/memory.c Thu Mar 15 17:44:51 2007 +0900 @@ -288,6 +288,7 @@ static long memory_exchange(XEN_GUEST_HA LIST_HEAD(out_chunk_list); unsigned long in_chunk_order, out_chunk_order; xen_pfn_t gpfn, gmfn, mfn; + xen_pfn_t *removed_gmfns; unsigned long i, j, k; unsigned int memflags = 0, cpu; long rc = 0; @@ -356,6 +357,14 @@ static long memory_exchange(XEN_GUEST_HA cpu = select_local_cpu(d); + removed_gmfns = xmalloc_array(xen_pfn_t, (1UL << in_chunk_order) * + (1UL << exch.in.extent_order)); + if ( removed_gmfns == NULL ) + { + rc = -ENOMEM; + goto fail_early; + } + for ( i = (exch.nr_exchanged >> in_chunk_order); i < (exch.in.nr_extents >> in_chunk_order); i++ ) @@ -363,6 +372,7 @@ static long memory_exchange(XEN_GUEST_HA if ( hypercall_preempt_check() ) { exch.nr_exchanged = i << in_chunk_order; + xfree(removed_gmfns); if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) return -EFAULT; return hypercall_create_continuation( @@ -379,24 +389,35 @@ static long memory_exchange(XEN_GUEST_HA goto fail; } - for ( k = 0; k < (1UL << exch.in.extent_order); k++ ) + for ( k = 0; k < (1UL << exch.in.extent_order); k++, gmfn++ ) { - mfn = gmfn_to_mfn(d, gmfn + k); + mfn = gmfn_to_mfn(d, gmfn); if ( unlikely(!mfn_valid(mfn)) ) { rc = -EINVAL; goto fail; } + removed_gmfns[(1UL << exch.in.extent_order) * j + k] = gmfn; page = mfn_to_page(mfn); - - if ( unlikely(steal_page(d, page, MEMF_no_refcount)) ) + if ( unlikely(!test_and_clear_bit(_PGC_allocated, + &page->count_info)) ) { rc = -EINVAL; goto fail; } - - list_add(&page->list, &in_chunk_list); + guest_physmap_remove_page(d, gmfn, mfn); + + if ( unlikely(steal_page(d, page, MEMF_no_refcount)) ) + { + if ( test_and_set_bit(_PGC_allocated, &page->count_info) ) + BUG(); + guest_physmap_add_page(d, gmfn, mfn); + rc = -EINVAL; + goto fail; + } + + list_add_tail(&page->list, &in_chunk_list); } } @@ -423,10 +444,6 @@ static long memory_exchange(XEN_GUEST_HA { page = list_entry(in_chunk_list.next, struct page_info, list); list_del(&page->list); - if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) ) - BUG(); - mfn = page_to_mfn(page); - guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn); put_page(page); } @@ -463,6 +480,7 @@ static long memory_exchange(XEN_GUEST_HA BUG_ON(j != (1UL << out_chunk_order)); } + xfree(removed_gmfns); exch.nr_exchanged = exch.in.nr_extents; if ( copy_field_to_guest(arg, &exch, nr_exchanged) ) rc = -EFAULT; @@ -474,13 +492,17 @@ static long memory_exchange(XEN_GUEST_HA */ fail: /* Reassign any input pages we managed to steal. */ + j = 0; while ( !list_empty(&in_chunk_list) ) { page = list_entry(in_chunk_list.next, struct page_info, list); list_del(&page->list); if ( assign_pages(d, page, 0, MEMF_no_refcount) ) BUG(); - } + guest_physmap_add_page(d, removed_gmfns[j], page_to_mfn(page)); + j++; + } + xfree(removed_gmfns); /* Free any output pages we managed to allocate. */ while ( !list_empty(&out_chunk_list) )