[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 9 of 9] Modify all call sites of queries into the p2m to use the new fine-grained locking
xen/arch/x86/cpu/mcheck/vmce.c | 7 +- xen/arch/x86/debug.c | 7 +- xen/arch/x86/domain.c | 24 +++++- xen/arch/x86/domctl.c | 9 ++- xen/arch/x86/hvm/emulate.c | 25 ++++++- xen/arch/x86/hvm/hvm.c | 126 ++++++++++++++++++++++++++++++------ xen/arch/x86/hvm/mtrr.c | 2 +- xen/arch/x86/hvm/nestedhvm.c | 2 +- xen/arch/x86/hvm/stdvga.c | 4 +- xen/arch/x86/hvm/svm/nestedsvm.c | 12 ++- xen/arch/x86/hvm/svm/svm.c | 11 ++- xen/arch/x86/hvm/viridian.c | 4 + xen/arch/x86/hvm/vmx/vmx.c | 13 +++- xen/arch/x86/hvm/vmx/vvmx.c | 11 ++- xen/arch/x86/mm.c | 126 +++++++++++++++++++++++++++++++++--- xen/arch/x86/mm/guest_walk.c | 11 +++ xen/arch/x86/mm/hap/guest_walk.c | 15 +++- xen/arch/x86/mm/mem_event.c | 28 ++++++- xen/arch/x86/mm/mem_sharing.c | 23 +++++- xen/arch/x86/mm/shadow/common.c | 4 +- xen/arch/x86/mm/shadow/multi.c | 67 +++++++++++++++---- xen/arch/x86/physdev.c | 9 ++ xen/arch/x86/traps.c | 17 +++- xen/common/grant_table.c | 27 +++++++- xen/common/memory.c | 9 ++ xen/common/tmem_xen.c | 21 ++++- xen/include/asm-x86/hvm/hvm.h | 5 +- xen/include/asm-x86/hvm/vmx/vvmx.h | 1 + 28 files changed, 519 insertions(+), 101 deletions(-) This patch is humongous, unfortunately, given the dozens of call sites involved. For callers outside of the p2m code, we also perform a get_page on the resulting mfn of the query. This ensures that the caller, while operating on the gfn, has exclusive control of the p2m entry, and that the underlying mfn will not go away. We cannot enforce ordering of this fine-grained p2m lock at this point because there are some inversions present in the current code (pod sweeps, unshare page) that will take more time to unroot. Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/cpu/mcheck/vmce.c --- a/xen/arch/x86/cpu/mcheck/vmce.c +++ b/xen/arch/x86/cpu/mcheck/vmce.c @@ -574,6 +574,7 @@ int unmmap_broken_page(struct domain *d, { mfn_t r_mfn; p2m_type_t pt; + int rc; /* Always trust dom0's MCE handler will prevent future access */ if ( d == dom0 ) @@ -585,14 +586,16 @@ int unmmap_broken_page(struct domain *d, if ( !is_hvm_domain(d) || !paging_mode_hap(d) ) return -ENOSYS; + rc = -1; r_mfn = gfn_to_mfn_query(d, gfn, &pt); if ( p2m_to_mask(pt) & P2M_UNMAP_TYPES) { ASSERT(mfn_x(r_mfn) == mfn_x(mfn)); p2m_change_type(d, gfn, pt, p2m_ram_broken); - return 0; + rc = 0; } + drop_p2m_gfn_domain(d, gfn, mfn_x(r_mfn)); - return -1; + return rc; } diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/debug.c --- a/xen/arch/x86/debug.c +++ b/xen/arch/x86/debug.c @@ -45,7 +45,8 @@ static unsigned long dbg_hvm_va2mfn(dbgva_t vaddr, struct domain *dp, int toaddr) { - unsigned long mfn, gfn; + unsigned long gfn; + mfn_t mfn; uint32_t pfec = PFEC_page_present; p2m_type_t gfntype; @@ -58,7 +59,7 @@ dbg_hvm_va2mfn(dbgva_t vaddr, struct dom return INVALID_MFN; } - mfn = mfn_x(gfn_to_mfn(dp, gfn, &gfntype)); + mfn = gfn_to_mfn_unlocked(dp, gfn, &gfntype); if ( p2m_is_readonly(gfntype) && toaddr ) { DBGP2("kdb:p2m_is_readonly: gfntype:%x\n", gfntype); @@ -66,7 +67,7 @@ dbg_hvm_va2mfn(dbgva_t vaddr, struct dom } DBGP2("X: vaddr:%lx domid:%d mfn:%lx\n", vaddr, dp->domain_id, mfn); - return mfn; + return mfn_x(mfn); } #if defined(__x86_64__) diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -720,6 +720,7 @@ int arch_set_info_guest( struct vcpu *v, vcpu_guest_context_u c) { struct domain *d = v->domain; + unsigned long cr3_gfn; unsigned long cr3_pfn = INVALID_MFN; unsigned long flags, cr4; unsigned int i; @@ -931,7 +932,8 @@ int arch_set_info_guest( if ( !compat ) { - cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[3])); + cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[3]); + cr3_pfn = gmfn_to_mfn(d, cr3_gfn); if ( !mfn_valid(cr3_pfn) || (paging_mode_refcounts(d) @@ -939,16 +941,18 @@ int arch_set_info_guest( : !get_page_and_type(mfn_to_page(cr3_pfn), d, PGT_base_page_table)) ) { + drop_p2m_gfn_domain(d, cr3_gfn, cr3_pfn); destroy_gdt(v); return -EINVAL; } v->arch.guest_table = pagetable_from_pfn(cr3_pfn); - + drop_p2m_gfn_domain(d, cr3_gfn, cr3_pfn); #ifdef __x86_64__ if ( c.nat->ctrlreg[1] ) { - cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[1])); + cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[1]); + cr3_pfn = gmfn_to_mfn(d, cr3_gfn); if ( !mfn_valid(cr3_pfn) || (paging_mode_refcounts(d) @@ -962,11 +966,13 @@ int arch_set_info_guest( put_page(mfn_to_page(cr3_pfn)); else put_page_and_type(mfn_to_page(cr3_pfn)); + drop_p2m_gfn_domain(d, cr3_gfn, cr3_pfn); destroy_gdt(v); return -EINVAL; } v->arch.guest_table_user = pagetable_from_pfn(cr3_pfn); + drop_p2m_gfn_domain(d, cr3_gfn, cr3_pfn); } else if ( !(flags & VGCF_in_kernel) ) { @@ -978,7 +984,8 @@ int arch_set_info_guest( { l4_pgentry_t *l4tab; - cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3])); + cr3_gfn = compat_cr3_to_pfn(c.cmp->ctrlreg[3]); + cr3_pfn = gmfn_to_mfn(d, cr3_gfn); if ( !mfn_valid(cr3_pfn) || (paging_mode_refcounts(d) @@ -986,6 +993,7 @@ int arch_set_info_guest( : !get_page_and_type(mfn_to_page(cr3_pfn), d, PGT_l3_page_table)) ) { + drop_p2m_gfn_domain(d, cr3_gfn, cr3_pfn); destroy_gdt(v); return -EINVAL; } @@ -993,6 +1001,7 @@ int arch_set_info_guest( l4tab = __va(pagetable_get_paddr(v->arch.guest_table)); *l4tab = l4e_from_pfn( cr3_pfn, _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED); + drop_p2m_gfn_domain(d, cr3_gfn, cr3_pfn); #endif } @@ -1058,11 +1067,12 @@ unmap_vcpu_info(struct vcpu *v) * event doesn't get missed. */ static int -map_vcpu_info(struct vcpu *v, unsigned long mfn, unsigned offset) +map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset) { struct domain *d = v->domain; void *mapping; vcpu_info_t *new_info; + unsigned long mfn; int i; if ( offset > (PAGE_SIZE - sizeof(vcpu_info_t)) ) @@ -1075,7 +1085,7 @@ map_vcpu_info(struct vcpu *v, unsigned l if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) ) return -EINVAL; - mfn = gmfn_to_mfn(d, mfn); + mfn = gmfn_to_mfn(d, gfn); if ( !mfn_valid(mfn) || !get_page_and_type(mfn_to_page(mfn), d, PGT_writable_page) ) return -EINVAL; @@ -1084,6 +1094,7 @@ map_vcpu_info(struct vcpu *v, unsigned l if ( mapping == NULL ) { put_page_and_type(mfn_to_page(mfn)); + drop_p2m_gfn_domain(d, gfn, mfn); return -ENOMEM; } @@ -1113,6 +1124,7 @@ map_vcpu_info(struct vcpu *v, unsigned l for ( i = 0; i < BITS_PER_EVTCHN_WORD(d); i++ ) set_bit(i, &vcpu_info(v, evtchn_pending_sel)); + drop_p2m_gfn_domain(d, gfn, mfn); return 0; } diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -235,6 +235,7 @@ long arch_do_domctl( type = XEN_DOMCTL_PFINFO_XTAB; arr[j] = type; + drop_p2m_gfn_domain(d, arr[j], mfn); } if ( copy_to_guest_offset(domctl->u.getpageframeinfo3.array, @@ -299,6 +300,7 @@ long arch_do_domctl( for ( j = 0; j < k; j++ ) { struct page_info *page; + unsigned long gfn = arr32[j]; unsigned long mfn = gmfn_to_mfn(d, arr32[j]); page = mfn_to_page(mfn); @@ -310,8 +312,10 @@ long arch_do_domctl( unlikely(is_xen_heap_mfn(mfn)) ) arr32[j] |= XEN_DOMCTL_PFINFO_XTAB; else if ( xsm_getpageframeinfo(page) != 0 ) + { + drop_p2m_gfn_domain(d, gfn, mfn); continue; - else if ( likely(get_page(page, d)) ) + } else if ( likely(get_page(page, d)) ) { unsigned long type = 0; @@ -339,6 +343,7 @@ long arch_do_domctl( else arr32[j] |= XEN_DOMCTL_PFINFO_XTAB; + drop_p2m_gfn_domain(d, gfn, mfn); } if ( copy_to_guest_offset(domctl->u.getpageframeinfo2.array, @@ -431,6 +436,7 @@ long arch_do_domctl( if ( !mfn_valid(mfn) || !get_page_and_type(mfn_to_page(mfn), d, PGT_writable_page) ) { + drop_p2m_gfn_domain(d, gmfn, mfn); rcu_unlock_domain(d); break; } @@ -443,6 +449,7 @@ long arch_do_domctl( put_page_and_type(mfn_to_page(mfn)); + drop_p2m_gfn_domain(d, gmfn, mfn); rcu_unlock_domain(d); } break; diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/hvm/emulate.c --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -66,10 +66,14 @@ static int hvmemul_do_io( if ( p2m_is_paging(p2mt) ) { p2m_mem_paging_populate(curr->domain, ram_gfn); + drop_p2m_gfn_domain(curr->domain, ram_gfn, mfn_x(ram_mfn)); return X86EMUL_RETRY; } if ( p2m_is_shared(p2mt) ) + { + drop_p2m_gfn_domain(curr->domain, ram_gfn, mfn_x(ram_mfn)); return X86EMUL_RETRY; + } /* * Weird-sized accesses have undefined behaviour: we discard writes @@ -81,6 +85,7 @@ static int hvmemul_do_io( ASSERT(p_data != NULL); /* cannot happen with a REP prefix */ if ( dir == IOREQ_READ ) memset(p_data, ~0, size); + drop_p2m_gfn_domain(curr->domain, ram_gfn, mfn_x(ram_mfn)); return X86EMUL_UNHANDLEABLE; } @@ -98,7 +103,10 @@ static int hvmemul_do_io( paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes; if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) + { + drop_p2m_gfn_domain(curr->domain, ram_gfn, mfn_x(ram_mfn)); return X86EMUL_OKAY; + } } else { @@ -108,6 +116,7 @@ static int hvmemul_do_io( { memcpy(p_data, &curr->arch.hvm_vcpu.mmio_large_read[addr - pa], size); + drop_p2m_gfn_domain(curr->domain, ram_gfn, mfn_x(ram_mfn)); return X86EMUL_OKAY; } } @@ -120,15 +129,22 @@ static int hvmemul_do_io( case HVMIO_completed: curr->arch.hvm_vcpu.io_state = HVMIO_none; if ( p_data == NULL ) + { + drop_p2m_gfn_domain(curr->domain, ram_gfn, mfn_x(ram_mfn)); return X86EMUL_UNHANDLEABLE; + } goto finish_access; case HVMIO_dispatched: /* May have to wait for previous cycle of a multi-write to complete. */ if ( is_mmio && !value_is_ptr && (dir == IOREQ_WRITE) && (addr == (curr->arch.hvm_vcpu.mmio_large_write_pa + curr->arch.hvm_vcpu.mmio_large_write_bytes)) ) + { + drop_p2m_gfn_domain(curr->domain, ram_gfn, mfn_x(ram_mfn)); return X86EMUL_RETRY; + } default: + drop_p2m_gfn_domain(curr->domain, ram_gfn, mfn_x(ram_mfn)); return X86EMUL_UNHANDLEABLE; } @@ -136,6 +152,7 @@ static int hvmemul_do_io( { gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n", p->state); + drop_p2m_gfn_domain(curr->domain, ram_gfn, mfn_x(ram_mfn)); return X86EMUL_UNHANDLEABLE; } @@ -186,7 +203,10 @@ static int hvmemul_do_io( } if ( rc != X86EMUL_OKAY ) + { + drop_p2m_gfn_domain(curr->domain, ram_gfn, mfn_x(ram_mfn)); return rc; + } finish_access: if ( p_data != NULL ) @@ -221,6 +241,7 @@ static int hvmemul_do_io( } } + drop_p2m_gfn_domain(curr->domain, ram_gfn, mfn_x(ram_mfn)); return X86EMUL_OKAY; } @@ -669,12 +690,12 @@ static int hvmemul_rep_movs( if ( rc != X86EMUL_OKAY ) return rc; - (void)gfn_to_mfn(current->domain, sgpa >> PAGE_SHIFT, &p2mt); + (void)gfn_to_mfn_unlocked(current->domain, sgpa >> PAGE_SHIFT, &p2mt); if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) ) return hvmemul_do_mmio( sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ, df, NULL); - (void)gfn_to_mfn(current->domain, dgpa >> PAGE_SHIFT, &p2mt); + (void)gfn_to_mfn_unlocked(current->domain, dgpa >> PAGE_SHIFT, &p2mt); if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) ) return hvmemul_do_mmio( dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE, df, NULL); diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -357,24 +357,35 @@ static int hvm_set_ioreq_page( mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn, &p2mt)); if ( !p2m_is_ram(p2mt) ) + { + drop_p2m_gfn_domain(d, gmfn, mfn); return -EINVAL; + } if ( p2m_is_paging(p2mt) ) { p2m_mem_paging_populate(d, gmfn); + drop_p2m_gfn_domain(d, gmfn, mfn); return -ENOENT; } if ( p2m_is_shared(p2mt) ) + { + drop_p2m_gfn_domain(d, gmfn, mfn); return -ENOENT; + } ASSERT(mfn_valid(mfn)); page = mfn_to_page(mfn); if ( !get_page_and_type(page, d, PGT_writable_page) ) + { + drop_p2m_gfn_domain(d, gmfn, mfn); return -EINVAL; + } va = map_domain_page_global(mfn); if ( va == NULL ) { put_page_and_type(page); + drop_p2m_gfn_domain(d, gmfn, mfn); return -ENOMEM; } @@ -385,12 +396,14 @@ static int hvm_set_ioreq_page( spin_unlock(&iorp->lock); unmap_domain_page_global(va); put_page_and_type(mfn_to_page(mfn)); + drop_p2m_gfn_domain(d, gmfn, mfn); return -EINVAL; } iorp->va = va; iorp->page = page; + drop_p2m_gfn_domain(d, gmfn, mfn); spin_unlock(&iorp->lock); domain_unpause(d); @@ -1182,6 +1195,7 @@ int hvm_hap_nested_page_fault(unsigned l mfn_t mfn; struct vcpu *v = current; struct p2m_domain *p2m; + int rc; /* On Nested Virtualization, walk the guest page table. * If this succeeds, all is fine. @@ -1251,8 +1265,8 @@ int hvm_hap_nested_page_fault(unsigned l if ( violation ) { p2m_mem_access_check(gpa, gla_valid, gla, access_r, access_w, access_x); - - return 1; + rc = 1; + goto out_put_p2m; } } @@ -1264,7 +1278,8 @@ int hvm_hap_nested_page_fault(unsigned l { if ( !handle_mmio() ) hvm_inject_exception(TRAP_gp_fault, 0, 0); - return 1; + rc = 1; + goto out_put_p2m; } #ifdef __x86_64__ @@ -1277,7 +1292,8 @@ int hvm_hap_nested_page_fault(unsigned l { ASSERT(!p2m_is_nestedp2m(p2m)); mem_sharing_unshare_page(p2m->domain, gfn, 0); - return 1; + rc = 1; + goto out_put_p2m; } #endif @@ -1291,7 +1307,8 @@ int hvm_hap_nested_page_fault(unsigned l */ paging_mark_dirty(v->domain, mfn_x(mfn)); p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw); - return 1; + rc = 1; + goto out_put_p2m; } /* Shouldn't happen: Maybe the guest was writing to a r/o grant mapping? */ @@ -1300,10 +1317,14 @@ int hvm_hap_nested_page_fault(unsigned l gdprintk(XENLOG_WARNING, "trying to write to read-only grant mapping\n"); hvm_inject_exception(TRAP_gp_fault, 0, 0); - return 1; + rc = 1; + goto out_put_p2m; } - return 0; + rc = 0; +out_put_p2m: + drop_p2m_gfn(p2m, gfn, mfn_x(mfn)); + return rc; } int hvm_handle_xsetbv(u64 new_bv) @@ -1530,6 +1551,7 @@ int hvm_set_cr0(unsigned long value) if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) { + drop_p2m_gfn_domain(v->domain, gfn, mfn); gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", v->arch.hvm_vcpu.guest_cr[3], mfn); domain_crash(v->domain); @@ -1541,6 +1563,7 @@ int hvm_set_cr0(unsigned long value) HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", v->arch.hvm_vcpu.guest_cr[3], mfn); + drop_p2m_gfn_domain(v->domain, gfn, mfn); } } else if ( !(value & X86_CR0_PG) && (old_value & X86_CR0_PG) ) @@ -1620,10 +1643,15 @@ int hvm_set_cr3(unsigned long value) mfn = mfn_x(gfn_to_mfn(v->domain, value >> PAGE_SHIFT, &p2mt)); if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) + { + drop_p2m_gfn_domain(v->domain, + value >> PAGE_SHIFT, mfn); goto bad_cr3; + } put_page(pagetable_get_page(v->arch.guest_table)); v->arch.guest_table = pagetable_from_pfn(mfn); + drop_p2m_gfn_domain(v->domain, value >> PAGE_SHIFT, mfn); HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); } @@ -1760,6 +1788,8 @@ int hvm_virtual_to_linear_addr( return 0; } +/* We leave this function holding a lock on the p2m entry and a ref + * on the mapped mfn */ static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable) { unsigned long mfn; @@ -1770,10 +1800,14 @@ static void *__hvm_map_guest_frame(unsig ? gfn_to_mfn_unshare(d, gfn, &p2mt) : gfn_to_mfn(d, gfn, &p2mt)); if ( (p2m_is_shared(p2mt) && writable) || !p2m_is_ram(p2mt) ) + { + drop_p2m_gfn_domain(d, gfn, mfn); return NULL; + } if ( p2m_is_paging(p2mt) ) { p2m_mem_paging_populate(d, gfn); + drop_p2m_gfn_domain(d, gfn, mfn); return NULL; } @@ -1795,10 +1829,39 @@ void *hvm_map_guest_frame_ro(unsigned lo return __hvm_map_guest_frame(gfn, 0); } -void hvm_unmap_guest_frame(void *p) +void hvm_unmap_guest_frame(void *p, unsigned long addr, int is_va) { + /* We enter this function with a map obtained in __hvm_map_guest_frame. + * This map performed a p2m query that locked the gfn entry and got + * a ref on the mfn. Must undo */ if ( p ) + { + unsigned long gfn = ~0UL; + + if ( is_va ) + { + if ( addr ) + { + uint32_t pfec = 0; + gfn = paging_gva_to_gfn(current, addr, &pfec); + } else { + gfn = ~0UL; + } + } else { + gfn = addr; + } + + if ( gfn != ~0UL ) + { + /* And we get a recursive lock and second ref */ + p2m_type_t t; + unsigned long mfn = mfn_x(gfn_to_mfn(current->domain, gfn, &t)); + drop_p2m_gfn_domain(current->domain, gfn, mfn); + drop_p2m_gfn_domain(current->domain, gfn, mfn); + } + unmap_domain_page(p); + } } static void *hvm_map_entry(unsigned long va) @@ -1835,9 +1898,9 @@ static void *hvm_map_entry(unsigned long return NULL; } -static void hvm_unmap_entry(void *p) +static void hvm_unmap_entry(void *p, unsigned long va) { - hvm_unmap_guest_frame(p); + hvm_unmap_guest_frame(p, va, 1); } static int hvm_load_segment_selector( @@ -1849,6 +1912,7 @@ static int hvm_load_segment_selector( int fault_type = TRAP_invalid_tss; struct cpu_user_regs *regs = guest_cpu_user_regs(); struct vcpu *v = current; + unsigned long va_desc; if ( regs->eflags & X86_EFLAGS_VM ) { @@ -1882,7 +1946,8 @@ static int hvm_load_segment_selector( if ( ((sel & 0xfff8) + 7) > desctab.limit ) goto fail; - pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8)); + va_desc = desctab.base + (sel & 0xfff8); + pdesc = hvm_map_entry(va_desc); if ( pdesc == NULL ) goto hvm_map_fail; @@ -1942,7 +2007,7 @@ static int hvm_load_segment_selector( desc.b |= 0x100; skip_accessed_flag: - hvm_unmap_entry(pdesc); + hvm_unmap_entry(pdesc, va_desc); segr.base = (((desc.b << 0) & 0xff000000u) | ((desc.b << 16) & 0x00ff0000u) | @@ -1958,7 +2023,7 @@ static int hvm_load_segment_selector( return 0; unmap_and_fail: - hvm_unmap_entry(pdesc); + hvm_unmap_entry(pdesc, va_desc); fail: hvm_inject_exception(fault_type, sel & 0xfffc, 0); hvm_map_fail: @@ -1973,7 +2038,7 @@ void hvm_task_switch( struct cpu_user_regs *regs = guest_cpu_user_regs(); struct segment_register gdt, tr, prev_tr, segr; struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc; - unsigned long eflags; + unsigned long eflags, va_optss = 0, va_nptss = 0; int exn_raised, rc; struct { u16 back_link,__blh; @@ -1999,11 +2064,13 @@ void hvm_task_switch( goto out; } - optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8)); + va_optss = gdt.base + (prev_tr.sel & 0xfff8); + optss_desc = hvm_map_entry(va_optss); if ( optss_desc == NULL ) goto out; - nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8)); + va_nptss = gdt.base + (tss_sel & 0xfff8); + nptss_desc = hvm_map_entry(va_nptss); if ( nptss_desc == NULL ) goto out; @@ -2168,8 +2235,8 @@ void hvm_task_switch( } out: - hvm_unmap_entry(optss_desc); - hvm_unmap_entry(nptss_desc); + hvm_unmap_entry(optss_desc, va_optss); + hvm_unmap_entry(nptss_desc, va_nptss); } #define HVMCOPY_from_guest (0u<<0) @@ -2182,7 +2249,7 @@ static enum hvm_copy_result __hvm_copy( void *buf, paddr_t addr, int size, unsigned int flags, uint32_t pfec) { struct vcpu *curr = current; - unsigned long gfn, mfn; + unsigned long gfn = 0, mfn = 0; /* gcc ... */ p2m_type_t p2mt; char *p; int count, todo = size; @@ -2231,14 +2298,24 @@ static enum hvm_copy_result __hvm_copy( if ( p2m_is_paging(p2mt) ) { p2m_mem_paging_populate(curr->domain, gfn); + drop_p2m_gfn_domain(curr->domain, gfn, mfn); return HVMCOPY_gfn_paged_out; } if ( p2m_is_shared(p2mt) ) + { + drop_p2m_gfn_domain(curr->domain, gfn, mfn); return HVMCOPY_gfn_shared; + } if ( p2m_is_grant(p2mt) ) + { + drop_p2m_gfn_domain(curr->domain, gfn, mfn); return HVMCOPY_unhandleable; + } if ( !p2m_is_ram(p2mt) ) + { + drop_p2m_gfn_domain(curr->domain, gfn, mfn); return HVMCOPY_bad_gfn_to_mfn; + } ASSERT(mfn_valid(mfn)); p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK); @@ -2269,6 +2346,7 @@ static enum hvm_copy_result __hvm_copy( addr += count; buf += count; todo -= count; + drop_p2m_gfn_domain(curr->domain, gfn, mfn); } return HVMCOPY_okay; @@ -3688,7 +3766,7 @@ long do_hvm_op(unsigned long op, XEN_GUE if ( p2m_is_paging(t) ) { p2m_mem_paging_populate(d, pfn); - + drop_p2m_gfn_domain(d, pfn, mfn_x(mfn)); rc = -EINVAL; goto param_fail3; } @@ -3703,6 +3781,7 @@ long do_hvm_op(unsigned long op, XEN_GUE /* don't take a long time and don't die either */ sh_remove_shadows(d->vcpu[0], mfn, 1, 0); } + drop_p2m_gfn_domain(d, pfn, mfn_x(mfn)); } param_fail3: @@ -3726,7 +3805,7 @@ long do_hvm_op(unsigned long op, XEN_GUE rc = -EINVAL; if ( is_hvm_domain(d) ) { - gfn_to_mfn_unshare(d, a.pfn, &t); + gfn_to_mfn_unshare_unlocked(d, a.pfn, &t); if ( p2m_is_mmio(t) ) a.mem_type = HVMMEM_mmio_dm; else if ( p2m_is_readonly(t) ) @@ -3783,16 +3862,19 @@ long do_hvm_op(unsigned long op, XEN_GUE if ( p2m_is_paging(t) ) { p2m_mem_paging_populate(d, pfn); + drop_p2m_gfn_domain(d, pfn, mfn_x(mfn)); rc = -EINVAL; goto param_fail4; } if ( p2m_is_shared(t) ) { + drop_p2m_gfn_domain(d, pfn, mfn_x(mfn)); rc = -EINVAL; goto param_fail4; } if ( p2m_is_grant(t) ) { + drop_p2m_gfn_domain(d, pfn, mfn_x(mfn)); gdprintk(XENLOG_WARNING, "type for pfn 0x%lx changed to grant while " "we were working?\n", pfn); @@ -3803,6 +3885,7 @@ long do_hvm_op(unsigned long op, XEN_GUE nt = p2m_change_type(d, pfn, t, memtype[a.hvmmem_type]); if ( nt != t ) { + drop_p2m_gfn_domain(d, pfn, mfn_x(mfn)); gdprintk(XENLOG_WARNING, "type of pfn 0x%lx changed from %d to %d while " "we were trying to change it to %d\n", @@ -3810,6 +3893,7 @@ long do_hvm_op(unsigned long op, XEN_GUE goto param_fail4; } } + drop_p2m_gfn_domain(d, pfn, mfn_x(mfn)); } rc = 0; diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/hvm/mtrr.c --- a/xen/arch/x86/hvm/mtrr.c +++ b/xen/arch/x86/hvm/mtrr.c @@ -389,7 +389,7 @@ uint32_t get_pat_flags(struct vcpu *v, { struct domain *d = v->domain; p2m_type_t p2mt; - gfn_to_mfn_query(d, paddr_to_pfn(gpaddr), &p2mt); + gfn_to_mfn_query_unlocked(d, paddr_to_pfn(gpaddr), &p2mt); if (p2m_is_ram(p2mt)) gdprintk(XENLOG_WARNING, "Conflict occurs for a given guest l1e flags:%x " diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/hvm/nestedhvm.c --- a/xen/arch/x86/hvm/nestedhvm.c +++ b/xen/arch/x86/hvm/nestedhvm.c @@ -56,7 +56,7 @@ nestedhvm_vcpu_reset(struct vcpu *v) nv->nv_ioportED = 0; if (nv->nv_vvmcx) - hvm_unmap_guest_frame(nv->nv_vvmcx); + hvm_unmap_guest_frame(nv->nv_vvmcx, nv->nv_vvmcxaddr >> PAGE_SHIFT, 0); nv->nv_vvmcx = NULL; nv->nv_vvmcxaddr = VMCX_EADDR; nv->nv_flushp2m = 0; diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/hvm/stdvga.c --- a/xen/arch/x86/hvm/stdvga.c +++ b/xen/arch/x86/hvm/stdvga.c @@ -482,7 +482,7 @@ static int mmio_move(struct hvm_hw_stdvg if ( hvm_copy_to_guest_phys(data, &tmp, p->size) != HVMCOPY_okay ) { - (void)gfn_to_mfn(d, data >> PAGE_SHIFT, &p2mt); + (void)gfn_to_mfn_unlocked(d, data >> PAGE_SHIFT, &p2mt); /* * The only case we handle is vga_mem <-> vga_mem. * Anything else disables caching and leaves it to qemu-dm. @@ -504,7 +504,7 @@ static int mmio_move(struct hvm_hw_stdvg if ( hvm_copy_from_guest_phys(&tmp, data, p->size) != HVMCOPY_okay ) { - (void)gfn_to_mfn(d, data >> PAGE_SHIFT, &p2mt); + (void)gfn_to_mfn_unlocked(d, data >> PAGE_SHIFT, &p2mt); if ( (p2mt != p2m_mmio_dm) || (data < VGA_MEM_BASE) || ((data + p->size) > (VGA_MEM_BASE + VGA_MEM_SIZE)) ) return 0; diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/hvm/svm/nestedsvm.c --- a/xen/arch/x86/hvm/svm/nestedsvm.c +++ b/xen/arch/x86/hvm/svm/nestedsvm.c @@ -71,7 +71,7 @@ int nestedsvm_vmcb_map(struct vcpu *v, u if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) { ASSERT(nv->nv_vvmcx != NULL); ASSERT(nv->nv_vvmcxaddr != VMCX_EADDR); - hvm_unmap_guest_frame(nv->nv_vvmcx); + hvm_unmap_guest_frame(nv->nv_vvmcx, nv->nv_vvmcxaddr >> PAGE_SHIFT, 0); nv->nv_vvmcx = NULL; nv->nv_vvmcxaddr = VMCX_EADDR; } @@ -353,7 +353,7 @@ static int nsvm_vmrun_permissionmap(stru ASSERT(ns_viomap != NULL); ioport_80 = test_bit(0x80, ns_viomap); ioport_ed = test_bit(0xed, ns_viomap); - hvm_unmap_guest_frame(ns_viomap); + hvm_unmap_guest_frame(ns_viomap, svm->ns_iomap_pa >> PAGE_SHIFT, 0); svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed); @@ -857,23 +857,25 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t ioio_info_t ioinfo; uint16_t port; bool_t enabled; + unsigned long gfn = 0; /* gcc ... */ ioinfo.bytes = exitinfo1; port = ioinfo.fields.port; switch (port) { case 0 ... 32767: /* first 4KB page */ - io_bitmap = hvm_map_guest_frame_ro(iopm_gfn); + gfn = iopm_gfn; break; case 32768 ... 65535: /* second 4KB page */ port -= 32768; - io_bitmap = hvm_map_guest_frame_ro(iopm_gfn+1); + gfn = iopm_gfn + 1; break; default: BUG(); break; } + io_bitmap = hvm_map_guest_frame_ro(gfn); if (io_bitmap == NULL) { gdprintk(XENLOG_ERR, "IOIO intercept: mapping of permission map failed\n"); @@ -881,7 +883,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t } enabled = test_bit(port, io_bitmap); - hvm_unmap_guest_frame(io_bitmap); + hvm_unmap_guest_frame(io_bitmap, gfn, 0); if (!enabled) return NESTEDHVM_VMEXIT_HOST; diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -247,6 +247,8 @@ static int svm_vmcb_restore(struct vcpu mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt)); if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) { + drop_p2m_gfn_domain(v->domain, + c->cr3 >> PAGE_SHIFT, mfn); gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3); return -EINVAL; @@ -257,6 +259,10 @@ static int svm_vmcb_restore(struct vcpu put_page(pagetable_get_page(v->arch.guest_table)); v->arch.guest_table = pagetable_from_pfn(mfn); + if ( c->cr0 & X86_CR0_PG ) + { + drop_p2m_gfn_domain(v->domain, c->cr3 >> PAGE_SHIFT, mfn); + } } v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET; @@ -1160,7 +1166,9 @@ static void svm_do_nested_pgfault(struct p2m = p2m_get_p2m(v); _d.gpa = gpa; _d.qualification = 0; - _d.mfn = mfn_x(gfn_to_mfn_type_p2m(p2m, gfn, &_d.p2mt, &p2ma, p2m_query, NULL)); + mfn = gfn_to_mfn_type_p2m(p2m, gfn, &_d.p2mt, &p2ma, p2m_query, NULL); + _d.mfn = mfn_x(mfn); + drop_p2m_gfn(p2m, gfn, mfn_x(mfn)); __trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d); } @@ -1184,6 +1192,7 @@ static void svm_do_nested_pgfault(struct gdprintk(XENLOG_ERR, "SVM violation gpa %#"PRIpaddr", mfn %#lx, type %i\n", gpa, mfn_x(mfn), p2mt); + drop_p2m_gfn(p2m, gfn, mfn_x(mfn)); domain_crash(v->domain); } diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/hvm/viridian.c --- a/xen/arch/x86/hvm/viridian.c +++ b/xen/arch/x86/hvm/viridian.c @@ -140,6 +140,7 @@ static void enable_hypercall_page(struct if ( !mfn_valid(mfn) || !get_page_and_type(mfn_to_page(mfn), d, PGT_writable_page) ) { + drop_p2m_gfn_domain(d, gmfn, mfn); gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx)\n", gmfn, mfn); return; } @@ -162,6 +163,7 @@ static void enable_hypercall_page(struct unmap_domain_page(p); put_page_and_type(mfn_to_page(mfn)); + drop_p2m_gfn_domain(d, gmfn, mfn); } void initialize_apic_assist(struct vcpu *v) @@ -184,6 +186,7 @@ void initialize_apic_assist(struct vcpu if ( !mfn_valid(mfn) || !get_page_and_type(mfn_to_page(mfn), d, PGT_writable_page) ) { + drop_p2m_gfn_domain(d, gmfn, mfn); gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx)\n", gmfn, mfn); return; } @@ -195,6 +198,7 @@ void initialize_apic_assist(struct vcpu unmap_domain_page(p); put_page_and_type(mfn_to_page(mfn)); + drop_p2m_gfn_domain(d, gmfn, mfn); } int wrmsr_viridian_regs(uint32_t idx, uint64_t val) diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -490,6 +490,7 @@ static int vmx_restore_cr0_cr3( mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt)); if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) { + drop_p2m_gfn_domain(v->domain, cr3 >> PAGE_SHIFT, mfn); gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%lx\n", cr3); return -EINVAL; } @@ -499,6 +500,10 @@ static int vmx_restore_cr0_cr3( put_page(pagetable_get_page(v->arch.guest_table)); v->arch.guest_table = pagetable_from_pfn(mfn); + if ( cr0 & X86_CR0_PG ) + { + drop_p2m_gfn_domain(v->domain, cr3 >> PAGE_SHIFT, mfn); + } } v->arch.hvm_vcpu.guest_cr[0] = cr0 | X86_CR0_ET; @@ -1009,7 +1014,10 @@ static void vmx_load_pdptrs(struct vcpu mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt)); if ( !p2m_is_ram(p2mt) ) + { + drop_p2m_gfn_domain(v->domain, cr3 >> PAGE_SHIFT, mfn); goto crash; + } p = map_domain_page(mfn); @@ -1037,6 +1045,7 @@ static void vmx_load_pdptrs(struct vcpu vmx_vmcs_exit(v); unmap_domain_page(p); + drop_p2m_gfn_domain(v->domain, cr3 >> PAGE_SHIFT, mfn); return; crash: @@ -2088,7 +2097,7 @@ static void ept_handle_violation(unsigne _d.gpa = gpa; _d.qualification = qualification; - _d.mfn = mfn_x(gfn_to_mfn_query(d, gfn, &_d.p2mt)); + _d.mfn = mfn_x(gfn_to_mfn_query_unlocked(d, gfn, &_d.p2mt)); __trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d); } @@ -2104,7 +2113,7 @@ static void ept_handle_violation(unsigne return; /* Everything else is an error. */ - mfn = gfn_to_mfn_guest(d, gfn, &p2mt); + mfn = gfn_to_mfn_guest_unlocked(d, gfn, &p2mt); gdprintk(XENLOG_ERR, "EPT violation %#lx (%c%c%c/%c%c%c), " "gpa %#"PRIpaddr", mfn %#lx, type %i.\n", qualification, diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/hvm/vmx/vvmx.c --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -558,8 +558,10 @@ static void __map_io_bitmap(struct vcpu index = vmcs_reg == IO_BITMAP_A ? 0 : 1; if (nvmx->iobitmap[index]) - hvm_unmap_guest_frame (nvmx->iobitmap[index]); + hvm_unmap_guest_frame (nvmx->iobitmap[index], + nvmx->iobitmap_gfn[index], 0); gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg); + nvmx->iobitmap_gfn[index] = gpa >> PAGE_SHIFT; nvmx->iobitmap[index] = hvm_map_guest_frame_ro (gpa >> PAGE_SHIFT); } @@ -577,13 +579,14 @@ static void nvmx_purge_vvmcs(struct vcpu __clear_current_vvmcs(v); if ( nvcpu->nv_vvmcxaddr != VMCX_EADDR ) - hvm_unmap_guest_frame (nvcpu->nv_vvmcx); + hvm_unmap_guest_frame (nvcpu->nv_vvmcx, nvcpu->nv_vvmcxaddr >> PAGE_SHIFT, 0); nvcpu->nv_vvmcx == NULL; nvcpu->nv_vvmcxaddr = VMCX_EADDR; for (i=0; i<2; i++) { if ( nvmx->iobitmap[i] ) { - hvm_unmap_guest_frame (nvmx->iobitmap[i]); + hvm_unmap_guest_frame (nvmx->iobitmap[i], nvmx->iobitmap_gfn[i], 0); nvmx->iobitmap[i] = NULL; + nvmx->iobitmap_gfn[i] = 0; } } } @@ -1198,7 +1201,7 @@ int nvmx_handle_vmclear(struct cpu_user_ vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT); if ( vvmcs ) __set_vvmcs(vvmcs, NVMX_LAUNCH_STATE, 0); - hvm_unmap_guest_frame(vvmcs); + hvm_unmap_guest_frame(vvmcs, gpa >> PAGE_SHIFT, 0); } vmreturn(regs, VMSUCCEED); diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -665,11 +665,17 @@ int map_ldt_shadow_page(unsigned int off gmfn = l1e_get_pfn(l1e); mfn = gmfn_to_mfn(d, gmfn); if ( unlikely(!mfn_valid(mfn)) ) + { + drop_p2m_gfn_domain(d, gmfn, mfn); return 0; + } okay = get_page_and_type(mfn_to_page(mfn), d, PGT_seg_desc_page); if ( unlikely(!okay) ) + { + drop_p2m_gfn_domain(d, gmfn, mfn); return 0; + } nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW); @@ -678,6 +684,7 @@ int map_ldt_shadow_page(unsigned int off v->arch.pv_vcpu.shadow_ldt_mapcnt++; spin_unlock(&v->arch.pv_vcpu.shadow_ldt_lock); + drop_p2m_gfn_domain(d, gmfn, mfn); return 1; } @@ -1796,7 +1803,6 @@ static int mod_l1_entry(l1_pgentry_t *pl { l1_pgentry_t ol1e; struct domain *pt_dom = pt_vcpu->domain; - unsigned long mfn; p2m_type_t p2mt; int rc = 0; @@ -1813,9 +1819,14 @@ static int mod_l1_entry(l1_pgentry_t *pl if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) { /* Translate foreign guest addresses. */ - mfn = mfn_x(gfn_to_mfn(pg_dom, l1e_get_pfn(nl1e), &p2mt)); + unsigned long mfn, gfn; + gfn = l1e_get_pfn(nl1e); + mfn = mfn_x(gfn_to_mfn(pg_dom, gfn, &p2mt)); if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) ) + { + drop_p2m_gfn_domain(pg_dom, gfn, mfn); return -EINVAL; + } ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0); nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e)); @@ -1823,6 +1834,7 @@ static int mod_l1_entry(l1_pgentry_t *pl { MEM_LOG("Bad L1 flags %x", l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom)); + drop_p2m_gfn_domain(pg_dom, gfn, mfn); return -EINVAL; } @@ -1833,12 +1845,14 @@ static int mod_l1_entry(l1_pgentry_t *pl if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, preserve_ad) ) return 0; + drop_p2m_gfn_domain(pg_dom, gfn, mfn); return -EBUSY; } switch ( rc = get_page_from_l1e(nl1e, pt_dom, pg_dom) ) { default: + drop_p2m_gfn_domain(pg_dom, gfn, mfn); return rc; case 0: break; @@ -1854,6 +1868,7 @@ static int mod_l1_entry(l1_pgentry_t *pl ol1e = nl1e; rc = -EBUSY; } + drop_p2m_gfn_domain(pg_dom, gfn, mfn); } else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, preserve_ad)) ) @@ -3030,6 +3045,7 @@ int do_mmuext_op( rc = -EAGAIN; else if ( rc != -EAGAIN ) MEM_LOG("Error while pinning mfn %lx", mfn); + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); break; } @@ -3038,6 +3054,7 @@ int do_mmuext_op( if ( (rc = xsm_memory_pin_page(d, page)) != 0 ) { put_page_and_type(page); + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); okay = 0; break; } @@ -3047,6 +3064,7 @@ int do_mmuext_op( { MEM_LOG("Mfn %lx already pinned", mfn); put_page_and_type(page); + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); okay = 0; break; } @@ -3065,6 +3083,7 @@ int do_mmuext_op( spin_unlock(&pg_owner->page_alloc_lock); if ( drop_ref ) put_page_and_type(page); + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); } break; @@ -3080,6 +3099,7 @@ int do_mmuext_op( mfn = gmfn_to_mfn(pg_owner, op.arg1.mfn); if ( unlikely(!(okay = get_page_from_pagenr(mfn, pg_owner))) ) { + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); MEM_LOG("Mfn %lx bad domain", mfn); break; } @@ -3090,6 +3110,7 @@ int do_mmuext_op( { okay = 0; put_page(page); + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); MEM_LOG("Mfn %lx not pinned", mfn); break; } @@ -3100,12 +3121,16 @@ int do_mmuext_op( /* A page is dirtied when its pin status is cleared. */ paging_mark_dirty(pg_owner, mfn); + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); break; } - case MMUEXT_NEW_BASEPTR: - okay = new_guest_cr3(gmfn_to_mfn(d, op.arg1.mfn)); + case MMUEXT_NEW_BASEPTR: { + unsigned long mfn = gmfn_to_mfn(d, op.arg1.mfn); + okay = new_guest_cr3(mfn); + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); break; + } #ifdef __x86_64__ case MMUEXT_NEW_USER_BASEPTR: { @@ -3121,6 +3146,7 @@ int do_mmuext_op( mfn, PGT_root_page_table, d, 0, 0); if ( unlikely(!okay) ) { + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); MEM_LOG("Error while installing new mfn %lx", mfn); break; } @@ -3128,6 +3154,7 @@ int do_mmuext_op( old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); curr->arch.guest_table_user = pagetable_from_pfn(mfn); + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); if ( old_mfn != 0 ) { @@ -3249,6 +3276,7 @@ int do_mmuext_op( mfn, PGT_writable_page, d, 0, 0); if ( unlikely(!okay) ) { + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); MEM_LOG("Error while clearing mfn %lx", mfn); break; } @@ -3261,6 +3289,7 @@ int do_mmuext_op( fixunmap_domain_page(ptr); put_page_and_type(mfn_to_page(mfn)); + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); break; } @@ -3274,6 +3303,8 @@ int do_mmuext_op( okay = get_page_from_pagenr(src_mfn, d); if ( unlikely(!okay) ) { + drop_p2m_gfn_domain(pg_owner, + op.arg2.src_mfn, src_mfn); MEM_LOG("Error while copying from mfn %lx", src_mfn); break; } @@ -3283,7 +3314,10 @@ int do_mmuext_op( mfn, PGT_writable_page, d, 0, 0); if ( unlikely(!okay) ) { + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); put_page(mfn_to_page(src_mfn)); + drop_p2m_gfn_domain(pg_owner, + op.arg2.src_mfn, src_mfn); MEM_LOG("Error while copying to mfn %lx", mfn); break; } @@ -3297,8 +3331,11 @@ int do_mmuext_op( fixunmap_domain_page(dst); unmap_domain_page(src); + drop_p2m_gfn_domain(pg_owner, op.arg1.mfn, mfn); put_page_and_type(mfn_to_page(mfn)); put_page(mfn_to_page(src_mfn)); + drop_p2m_gfn_domain(pg_owner, + op.arg2.src_mfn, src_mfn); break; } @@ -3488,12 +3525,18 @@ int do_mmu_update( gmfn = req.ptr >> PAGE_SHIFT; mfn = mfn_x(gfn_to_mfn(pt_owner, gmfn, &p2mt)); if ( !p2m_is_valid(p2mt) ) + { + /* In the odd case we ever got a valid mfn with an invalid type, + * we drop the ref obtained in the p2m lookup */ + if (mfn != INVALID_MFN) + put_page(mfn_to_page(mfn)); mfn = INVALID_MFN; + } if ( p2m_is_paged(p2mt) ) { p2m_mem_paging_populate(pg_owner, gmfn); - + drop_p2m_gfn_domain(pt_owner, gmfn, mfn); rc = -ENOENT; break; } @@ -3501,6 +3544,7 @@ int do_mmu_update( if ( unlikely(!get_page_from_pagenr(mfn, pt_owner)) ) { MEM_LOG("Could not get page for normal update"); + drop_p2m_gfn_domain(pt_owner, gmfn, mfn); break; } @@ -3511,6 +3555,7 @@ int do_mmu_update( rc = xsm_mmu_normal_update(d, req.val, page); if ( rc ) { + drop_p2m_gfn_domain(pt_owner, gmfn, mfn); unmap_domain_page_with_cache(va, &mapcache); put_page(page); break; @@ -3524,16 +3569,20 @@ int do_mmu_update( { l1_pgentry_t l1e = l1e_from_intpte(req.val); p2m_type_t l1e_p2mt; - gfn_to_mfn(pg_owner, l1e_get_pfn(l1e), &l1e_p2mt); + unsigned long l1egfn = l1e_get_pfn(l1e), l1emfn; + + l1emfn = mfn_x(gfn_to_mfn(pg_owner, l1egfn, &l1e_p2mt)); if ( p2m_is_paged(l1e_p2mt) ) { p2m_mem_paging_populate(pg_owner, l1e_get_pfn(l1e)); + drop_p2m_gfn_domain(pg_owner, l1egfn, l1emfn); rc = -ENOENT; break; } else if ( p2m_ram_paging_in_start == l1e_p2mt && !mfn_valid(mfn) ) { + drop_p2m_gfn_domain(pg_owner, l1egfn, l1emfn); rc = -ENOENT; break; } @@ -3550,7 +3599,10 @@ int do_mmu_update( l1e_get_pfn(l1e), 0); if ( rc ) + { + drop_p2m_gfn_domain(pg_owner, l1egfn, l1emfn); break; + } } } #endif @@ -3558,27 +3610,33 @@ int do_mmu_update( rc = mod_l1_entry(va, l1e, mfn, cmd == MMU_PT_UPDATE_PRESERVE_AD, v, pg_owner); + drop_p2m_gfn_domain(pg_owner, l1egfn, l1emfn); } break; case PGT_l2_page_table: { l2_pgentry_t l2e = l2e_from_intpte(req.val); p2m_type_t l2e_p2mt; - gfn_to_mfn(pg_owner, l2e_get_pfn(l2e), &l2e_p2mt); + unsigned long l2egfn = l2e_get_pfn(l2e), l2emfn; + + l2emfn = mfn_x(gfn_to_mfn(pg_owner, l2egfn, &l2e_p2mt)); if ( p2m_is_paged(l2e_p2mt) ) { + drop_p2m_gfn_domain(pg_owner, l2egfn, l2emfn); p2m_mem_paging_populate(pg_owner, l2e_get_pfn(l2e)); rc = -ENOENT; break; } else if ( p2m_ram_paging_in_start == l2e_p2mt && !mfn_valid(mfn) ) { + drop_p2m_gfn_domain(pg_owner, l2egfn, l2emfn); rc = -ENOENT; break; } else if ( p2m_ram_shared == l2e_p2mt ) { + drop_p2m_gfn_domain(pg_owner, l2egfn, l2emfn); MEM_LOG("Unexpected attempt to map shared page.\n"); break; } @@ -3586,33 +3644,40 @@ int do_mmu_update( rc = mod_l2_entry(va, l2e, mfn, cmd == MMU_PT_UPDATE_PRESERVE_AD, v); + drop_p2m_gfn_domain(pg_owner, l2egfn, l2emfn); } break; case PGT_l3_page_table: { l3_pgentry_t l3e = l3e_from_intpte(req.val); p2m_type_t l3e_p2mt; - gfn_to_mfn(pg_owner, l3e_get_pfn(l3e), &l3e_p2mt); + unsigned long l3egfn = l3e_get_pfn(l3e), l3emfn; + + l3emfn = mfn_x(gfn_to_mfn(pg_owner, l3egfn, &l3e_p2mt)); if ( p2m_is_paged(l3e_p2mt) ) { p2m_mem_paging_populate(pg_owner, l3e_get_pfn(l3e)); + drop_p2m_gfn_domain(pg_owner, l3egfn, l3emfn); rc = -ENOENT; break; } else if ( p2m_ram_paging_in_start == l3e_p2mt && !mfn_valid(mfn) ) { + drop_p2m_gfn_domain(pg_owner, l3egfn, l3emfn); rc = -ENOENT; break; } else if ( p2m_ram_shared == l3e_p2mt ) { + drop_p2m_gfn_domain(pg_owner, l3egfn, l3emfn); MEM_LOG("Unexpected attempt to map shared page.\n"); break; } rc = mod_l3_entry(va, l3e, mfn, cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v); + drop_p2m_gfn_domain(pg_owner, l3egfn, l3emfn); } break; #if CONFIG_PAGING_LEVELS >= 4 @@ -3620,27 +3685,33 @@ int do_mmu_update( { l4_pgentry_t l4e = l4e_from_intpte(req.val); p2m_type_t l4e_p2mt; - gfn_to_mfn(pg_owner, l4e_get_pfn(l4e), &l4e_p2mt); + unsigned long l4egfn = l4e_get_pfn(l4e), l4emfn; + + l4emfn = mfn_x(gfn_to_mfn(pg_owner, l4egfn, &l4e_p2mt)); if ( p2m_is_paged(l4e_p2mt) ) { p2m_mem_paging_populate(pg_owner, l4e_get_pfn(l4e)); + drop_p2m_gfn_domain(pg_owner, l4egfn, l4emfn); rc = -ENOENT; break; } else if ( p2m_ram_paging_in_start == l4e_p2mt && !mfn_valid(mfn) ) { + drop_p2m_gfn_domain(pg_owner, l4egfn, l4emfn); rc = -ENOENT; break; } else if ( p2m_ram_shared == l4e_p2mt ) { + drop_p2m_gfn_domain(pg_owner, l4egfn, l4emfn); MEM_LOG("Unexpected attempt to map shared page.\n"); break; } rc = mod_l4_entry(va, l4e, mfn, cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v); + drop_p2m_gfn_domain(pg_owner, l4egfn, l4emfn); } break; #endif @@ -3662,6 +3733,7 @@ int do_mmu_update( put_page_type(page); } + drop_p2m_gfn_domain(pt_owner, gmfn, mfn); unmap_domain_page_with_cache(va, &mapcache); put_page(page); } @@ -3754,6 +3826,7 @@ static int create_grant_pte_mapping( if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) ) { + drop_p2m_gfn_domain(d, gmfn, mfn); MEM_LOG("Could not get page for normal update"); return GNTST_general_error; } @@ -3790,6 +3863,7 @@ static int create_grant_pte_mapping( failed: unmap_domain_page(va); + drop_p2m_gfn_domain(d, gmfn, mfn); put_page(page); return rc; @@ -3809,6 +3883,7 @@ static int destroy_grant_pte_mapping( if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) ) { + drop_p2m_gfn_domain(d, gmfn, mfn); MEM_LOG("Could not get page for normal update"); return GNTST_general_error; } @@ -3860,6 +3935,7 @@ static int destroy_grant_pte_mapping( failed: unmap_domain_page(va); put_page(page); + drop_p2m_gfn_domain(d, gmfn, mfn); return rc; } @@ -4051,7 +4127,7 @@ static int replace_grant_p2m_mapping( if ( new_addr != 0 || (flags & GNTMAP_contains_pte) ) return GNTST_general_error; - old_mfn = gfn_to_mfn(d, gfn, &type); + old_mfn = gfn_to_mfn_unlocked(d, gfn, &type); if ( !p2m_is_grant(type) || mfn_x(old_mfn) != frame ) { gdprintk(XENLOG_WARNING, @@ -4441,14 +4517,19 @@ long set_gdt(struct vcpu *v, struct domain *d = v->domain; /* NB. There are 512 8-byte entries per GDT page. */ int i, nr_pages = (entries + 511) / 512; - unsigned long mfn; + unsigned long mfn, *pfns; if ( entries > FIRST_RESERVED_GDT_ENTRY ) return -EINVAL; + pfns = xmalloc_array(unsigned long, nr_pages); + if ( !pfns ) + return -ENOMEM; + /* Check the pages in the new GDT. */ for ( i = 0; i < nr_pages; i++ ) { + pfns[i] = frames[i]; mfn = frames[i] = gmfn_to_mfn(d, frames[i]); if ( !mfn_valid(mfn) || !get_page_and_type(mfn_to_page(mfn), d, PGT_seg_desc_page) ) @@ -4465,13 +4546,19 @@ long set_gdt(struct vcpu *v, v->arch.pv_vcpu.gdt_frames[i] = frames[i]; l1e_write(&v->arch.perdomain_ptes[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR)); + drop_p2m_gfn_domain(d, pfns[i], frames[i]); } + xfree(pfns); return 0; fail: while ( i-- > 0 ) + { put_page_and_type(mfn_to_page(frames[i])); + drop_p2m_gfn_domain(d, pfns[i], frames[i]); + } + xfree(pfns); return -EINVAL; } @@ -4519,11 +4606,17 @@ long do_update_descriptor(u64 pa, u64 de if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) || !mfn_valid(mfn) || !check_descriptor(dom, &d) ) + { + drop_p2m_gfn_domain(dom, gmfn, mfn); return -EINVAL; + } page = mfn_to_page(mfn); if ( unlikely(!get_page(page, dom)) ) + { + drop_p2m_gfn_domain(dom, gmfn, mfn); return -EINVAL; + } /* Check if the given frame is in use in an unsafe context. */ switch ( page->u.inuse.type_info & PGT_type_mask ) @@ -4551,6 +4644,7 @@ long do_update_descriptor(u64 pa, u64 de out: put_page(page); + drop_p2m_gfn_domain(dom, gmfn, mfn); return ret; } @@ -4592,6 +4686,7 @@ static int handle_iomem_range(unsigned l long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) { struct page_info *page = NULL; + unsigned long gfn = 0; /* gcc ... */ int rc; switch ( op ) @@ -4649,11 +4744,13 @@ long arch_memory_op(int op, XEN_GUEST_HA case XENMAPSPACE_gmfn: { p2m_type_t p2mt; + gfn = xatp.idx; xatp.idx = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt)); /* If the page is still shared, exit early */ if ( p2m_is_shared(p2mt) ) { + drop_p2m_gfn_domain(d, gfn, xatp.idx); rcu_unlock_domain(d); return -ENOMEM; } @@ -4671,6 +4768,8 @@ long arch_memory_op(int op, XEN_GUEST_HA { if ( page ) put_page(page); + if ( xatp.space == XENMAPSPACE_gmfn ) + drop_p2m_gfn_domain(d, gfn, mfn); rcu_unlock_domain(d); return -EINVAL; } @@ -4691,6 +4790,8 @@ long arch_memory_op(int op, XEN_GUEST_HA /* Normal domain memory is freed, to avoid leaking memory. */ guest_remove_page(d, xatp.gpfn); } + /* In the XENMAPSPACE_gmfn case we still hold a ref on the old page. */ + drop_p2m_gfn_domain(d, xatp.gpfn, prev_mfn); /* Unmap from old location, if any. */ gpfn = get_gpfn_from_mfn(mfn); @@ -4701,6 +4802,9 @@ long arch_memory_op(int op, XEN_GUEST_HA /* Map at new location. */ rc = guest_physmap_add_page(d, xatp.gpfn, mfn, 0); + /* In the XENMAPSPACE_gmfn, we took a ref and locked the p2m at the top */ + if ( xatp.space == XENMAPSPACE_gmfn ) + drop_p2m_gfn_domain(d, gfn, mfn); domain_unlock(d); rcu_unlock_domain(d); diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/mm/guest_walk.c --- a/xen/arch/x86/mm/guest_walk.c +++ b/xen/arch/x86/mm/guest_walk.c @@ -86,6 +86,8 @@ static uint32_t set_ad_bits(void *guest_ return 0; } +/* We leave this function with a lock on the p2m and a ref on the + * mapped page. Regardless of the map, you need to call drop_p2m_gfn. */ static inline void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn, @@ -120,6 +122,9 @@ static inline void *map_domain_gfn(struc /* Walk the guest pagetables, after the manner of a hardware walker. */ +/* Because the walk is essentially random, it can cause a deadlock + * warning in the p2m locking code. Highly unlikely this is an actual + * deadlock, because who would walk page table in the opposite order? */ uint32_t guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, unsigned long va, walk_t *gw, @@ -348,11 +353,17 @@ set_ad: out: #if GUEST_PAGING_LEVELS == 4 if ( l3p ) unmap_domain_page(l3p); + drop_p2m_gfn(p2m, gfn_x(guest_l4e_get_gfn(gw->l4e)), + mfn_x(gw->l3mfn)); #endif #if GUEST_PAGING_LEVELS >= 3 if ( l2p ) unmap_domain_page(l2p); + drop_p2m_gfn(p2m, gfn_x(guest_l3e_get_gfn(gw->l3e)), + mfn_x(gw->l2mfn)); #endif if ( l1p ) unmap_domain_page(l1p); + drop_p2m_gfn(p2m, gfn_x(guest_l2e_get_gfn(gw->l2e)), + mfn_x(gw->l1mfn)); return rc; } diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/mm/hap/guest_walk.c --- a/xen/arch/x86/mm/hap/guest_walk.c +++ b/xen/arch/x86/mm/hap/guest_walk.c @@ -56,9 +56,11 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA p2m_type_t p2mt; p2m_access_t p2ma; walk_t gw; + unsigned long top_gfn; /* Get the top-level table's MFN */ - top_mfn = gfn_to_mfn_type_p2m(p2m, cr3 >> PAGE_SHIFT, + top_gfn = cr3 >> PAGE_SHIFT; + top_mfn = gfn_to_mfn_type_p2m(p2m, top_gfn, &p2mt, &p2ma, p2m_unshare, NULL); if ( p2m_is_paging(p2mt) ) { @@ -66,16 +68,19 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT); pfec[0] = PFEC_page_paged; + drop_p2m_gfn(p2m, top_gfn, mfn_x(top_mfn)); return INVALID_GFN; } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; + drop_p2m_gfn(p2m, top_gfn, mfn_x(top_mfn)); return INVALID_GFN; } if ( !p2m_is_ram(p2mt) ) { pfec[0] &= ~PFEC_page_present; + drop_p2m_gfn(p2m, top_gfn, mfn_x(top_mfn)); return INVALID_GFN; } @@ -87,26 +92,32 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA #endif missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map); unmap_domain_page(top_map); + drop_p2m_gfn(p2m, top_gfn, mfn_x(top_mfn)); /* Interpret the answer */ if ( missing == 0 ) { gfn_t gfn = guest_l1e_get_gfn(gw.l1e); - gfn_to_mfn_type_p2m(p2m, gfn_x(gfn), &p2mt, &p2ma, p2m_unshare, NULL); + mfn_t eff_l1_mfn = gfn_to_mfn_type_p2m(p2m, gfn_x(gfn), &p2mt, + &p2ma, p2m_unshare, NULL); if ( p2m_is_paging(p2mt) ) { ASSERT(!p2m_is_nestedp2m(p2m)); p2m_mem_paging_populate(p2m->domain, gfn_x(gfn)); pfec[0] = PFEC_page_paged; + drop_p2m_gfn(p2m, gfn_x(gfn), mfn_x(eff_l1_mfn)); return INVALID_GFN; } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; + drop_p2m_gfn(p2m, gfn_x(gfn), mfn_x(eff_l1_mfn)); return INVALID_GFN; } + drop_p2m_gfn(p2m, gfn_x(gfn), mfn_x(eff_l1_mfn)); + if ( page_order ) *page_order = guest_walk_to_page_order(&gw); diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/mm/mem_event.c --- a/xen/arch/x86/mm/mem_event.c +++ b/xen/arch/x86/mm/mem_event.c @@ -47,7 +47,7 @@ static int mem_event_enable(struct domai unsigned long ring_addr = mec->ring_addr; unsigned long shared_addr = mec->shared_addr; l1_pgentry_t l1e; - unsigned long gfn; + unsigned long shared_gfn = 0, ring_gfn = 0; /* gcc ... */ p2m_type_t p2mt; mfn_t ring_mfn; mfn_t shared_mfn; @@ -60,23 +60,41 @@ static int mem_event_enable(struct domai /* Get MFN of ring page */ guest_get_eff_l1e(v, ring_addr, &l1e); - gfn = l1e_get_pfn(l1e); - ring_mfn = gfn_to_mfn(dom_mem_event, gfn, &p2mt); + ring_gfn = l1e_get_pfn(l1e); + /* We're grabbing these two in an order that could deadlock + * dom0 if 1. it were an hvm 2. there were two concurrent + * enables 3. the two gfn's in each enable criss-crossed + * 2MB regions. Duly noted.... */ + ring_mfn = gfn_to_mfn(dom_mem_event, ring_gfn, &p2mt); if ( unlikely(!mfn_valid(mfn_x(ring_mfn))) ) + { + drop_p2m_gfn_domain(dom_mem_event, + ring_gfn, mfn_x(ring_mfn)); return -EINVAL; + } /* Get MFN of shared page */ guest_get_eff_l1e(v, shared_addr, &l1e); - gfn = l1e_get_pfn(l1e); - shared_mfn = gfn_to_mfn(dom_mem_event, gfn, &p2mt); + shared_gfn = l1e_get_pfn(l1e); + shared_mfn = gfn_to_mfn(dom_mem_event, shared_gfn, &p2mt); if ( unlikely(!mfn_valid(mfn_x(shared_mfn))) ) + { + drop_p2m_gfn_domain(dom_mem_event, + ring_gfn, mfn_x(ring_mfn)); + drop_p2m_gfn_domain(dom_mem_event, + shared_gfn, mfn_x(shared_mfn)); return -EINVAL; + } /* Map ring and shared pages */ med->ring_page = map_domain_page(mfn_x(ring_mfn)); med->shared_page = map_domain_page(mfn_x(shared_mfn)); + drop_p2m_gfn_domain(dom_mem_event, ring_gfn, + mfn_x(ring_mfn)); + drop_p2m_gfn_domain(dom_mem_event, shared_gfn, + mfn_x(shared_mfn)); /* Allocate event channel */ rc = alloc_unbound_xen_event_channel(d->vcpu[0], diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -227,7 +227,7 @@ static void mem_sharing_audit(void) g->domain, g->gfn, mfn_x(e->mfn)); continue; } - mfn = gfn_to_mfn(d, g->gfn, &t); + mfn = gfn_to_mfn_unlocked(d, g->gfn, &t); if(mfn_x(mfn) != mfn_x(e->mfn)) MEM_SHARING_DEBUG("Incorrect P2M for d=%d, PFN=%lx." "Expecting MFN=%ld, got %ld\n", @@ -335,7 +335,7 @@ int mem_sharing_debug_gfn(struct domain p2m_type_t p2mt; mfn_t mfn; - mfn = gfn_to_mfn(d, gfn, &p2mt); + mfn = gfn_to_mfn_unlocked(d, gfn, &p2mt); printk("Debug for domain=%d, gfn=%lx, ", d->domain_id, @@ -524,6 +524,7 @@ int mem_sharing_nominate_page(struct dom ret = 0; out: + drop_p2m_gfn_domain(d, gfn, mfn_x(mfn)); shr_unlock(); return ret; } @@ -593,14 +594,18 @@ int mem_sharing_unshare_page(struct doma shr_handle_t handle; struct list_head *le; + /* Remove the gfn_info from the list */ + + /* This is one of the reasons why we can't enforce ordering + * between shr_lock and p2m fine-grained locks in mm-lock. + * Callers may walk in here already holding the lock for this gfn */ shr_lock(); mem_sharing_audit(); - - /* Remove the gfn_info from the list */ mfn = gfn_to_mfn(d, gfn, &p2mt); /* Has someone already unshared it? */ if (!p2m_is_shared(p2mt)) { + drop_p2m_gfn_domain(d, gfn, mfn_x(mfn)); shr_unlock(); return 0; } @@ -634,6 +639,7 @@ gfn_found: /* Even though we don't allocate a private page, we have to account * for the MFN that originally backed this PFN. */ atomic_dec(&nr_saved_mfns); + drop_p2m_gfn_domain(d, gfn, mfn_x(mfn)); shr_unlock(); put_page_and_type(page); if(last_gfn && @@ -653,6 +659,7 @@ gfn_found: /* We've failed to obtain memory for private page. Need to re-add the * gfn_info to relevant list */ list_add(&gfn_info->list, &hash_entry->gfns); + drop_p2m_gfn_domain(d, gfn, mfn_x(mfn)); shr_unlock(); return -ENOMEM; } @@ -665,6 +672,13 @@ gfn_found: BUG_ON(set_shared_p2m_entry(d, gfn, page_to_mfn(page)) == 0); put_page_and_type(old_page); + /* After switching the p2m entry we still hold it locked, and + * we have a ref count to the old page (mfn). Drop the ref + * on the old page, and set mfn to invalid, so the refcount is + * no further decremented. We are the only cpu who knows about + * the new page, so we don't need additional refs on it. */ + put_page(mfn_to_page(mfn)); + mfn = _mfn(INVALID_MFN); private_page_found: /* We've got a private page, we can commit the gfn destruction */ @@ -683,6 +697,7 @@ private_page_found: /* Update m2p entry */ set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), gfn); + drop_p2m_gfn_domain(d, gfn, mfn_x(mfn)); shr_unlock(); return 0; } diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -3741,6 +3741,8 @@ int shadow_track_dirty_vram(struct domai } } + drop_p2m_gfn_domain(d, begin_pfn + i, mfn_x(mfn)); + if ( dirty ) { dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8); @@ -3761,7 +3763,7 @@ int shadow_track_dirty_vram(struct domai /* was clean for more than two seconds, try to disable guest * write access */ for ( i = begin_pfn; i < end_pfn; i++ ) { - mfn_t mfn = gfn_to_mfn_query(d, i, &t); + mfn_t mfn = gfn_to_mfn_query_unlocked(d, i, &t); if (mfn_x(mfn) != INVALID_MFN) flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0); } diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -2275,6 +2275,7 @@ static int validate_gl4e(struct vcpu *v, if ( mfn_valid(sl3mfn) ) shadow_resync_all(v); #endif + drop_p2m_gfn_domain(d, gfn_x(gl3gfn), mfn_x(gl3mfn)); } l4e_propagate_from_guest(v, new_gl4e, sl3mfn, &new_sl4e, ft_prefetch); @@ -2332,6 +2333,7 @@ static int validate_gl3e(struct vcpu *v, if ( mfn_valid(sl2mfn) ) shadow_resync_all(v); #endif + drop_p2m_gfn_domain(v->domain, gfn_x(gl2gfn), mfn_x(gl2mfn)); } l3e_propagate_from_guest(v, new_gl3e, sl2mfn, &new_sl3e, ft_prefetch); result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn); @@ -2376,6 +2378,7 @@ static int validate_gl2e(struct vcpu *v, sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow); else if ( p2mt != p2m_populate_on_demand ) result |= SHADOW_SET_ERROR; + drop_p2m_gfn_domain(v->domain, gfn_x(gl1gfn), mfn_x(gl1mfn)); } } l2e_propagate_from_guest(v, new_gl2e, sl1mfn, &new_sl2e, ft_prefetch); @@ -2463,6 +2466,7 @@ static int validate_gl1e(struct vcpu *v, } #endif /* OOS */ + drop_p2m_gfn_domain(v->domain, gfn_x(gfn), mfn_x(gmfn)); return result; } @@ -2505,6 +2509,7 @@ void sh_resync_l1(struct vcpu *v, mfn_t l1e_propagate_from_guest(v, gl1e, gmfn, &nsl1e, ft_prefetch, p2mt); rc |= shadow_set_l1e(v, sl1p, nsl1e, p2mt, sl1mfn); + drop_p2m_gfn_domain(v->domain, gfn_x(gfn), mfn_x(gmfn)); *snpl1p = gl1e; } }); @@ -2834,6 +2839,8 @@ static void sh_prefetch(struct vcpu *v, if ( snpl1p != NULL ) snpl1p[i] = gl1e; #endif /* OOS */ + + drop_p2m_gfn_domain(v->domain, gfn_x(gfn), mfn_x(gmfn)); } if ( gl1p != NULL ) sh_unmap_domain_page(gl1p); @@ -3192,6 +3199,7 @@ static int sh_page_fault(struct vcpu *v, SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", gfn_x(gfn), mfn_x(gmfn)); reset_early_unshadow(v); + drop_p2m_gfn_domain(d, gfn_x(gfn), mfn_x(gmfn)); goto propagate; } @@ -3236,6 +3244,7 @@ static int sh_page_fault(struct vcpu *v, if ( rc & GW_RMWR_REWALK ) { paging_unlock(d); + drop_p2m_gfn_domain(d, gfn_x(gfn), mfn_x(gmfn)); goto rewalk; } #endif /* OOS */ @@ -3244,6 +3253,7 @@ static int sh_page_fault(struct vcpu *v, { perfc_incr(shadow_inconsistent_gwalk); paging_unlock(d); + drop_p2m_gfn_domain(d, gfn_x(gfn), mfn_x(gmfn)); goto rewalk; } @@ -3270,6 +3280,7 @@ static int sh_page_fault(struct vcpu *v, ASSERT(d->is_shutting_down); #endif paging_unlock(d); + drop_p2m_gfn_domain(d, gfn_x(gfn), mfn_x(gmfn)); trace_shadow_gen(TRC_SHADOW_DOMF_DYING, va); return 0; } @@ -3287,6 +3298,7 @@ static int sh_page_fault(struct vcpu *v, * failed. We cannot safely continue since some page is still * OOS but not in the hash table anymore. */ paging_unlock(d); + drop_p2m_gfn_domain(d, gfn_x(gfn), mfn_x(gmfn)); return 0; } @@ -3296,6 +3308,7 @@ static int sh_page_fault(struct vcpu *v, { perfc_incr(shadow_inconsistent_gwalk); paging_unlock(d); + drop_p2m_gfn_domain(d, gfn_x(gfn), mfn_x(gmfn)); goto rewalk; } #endif /* OOS */ @@ -3389,6 +3402,7 @@ static int sh_page_fault(struct vcpu *v, SHADOW_PRINTK("fixed\n"); shadow_audit_tables(v); paging_unlock(d); + drop_p2m_gfn_domain(d, gfn_x(gfn), mfn_x(gmfn)); return EXCRET_fault_fixed; emulate: @@ -3457,6 +3471,7 @@ static int sh_page_fault(struct vcpu *v, sh_audit_gw(v, &gw); shadow_audit_tables(v); paging_unlock(d); + drop_p2m_gfn_domain(d, gfn_x(gfn), mfn_x(gmfn)); this_cpu(trace_emulate_write_val) = 0; @@ -3595,6 +3610,7 @@ static int sh_page_fault(struct vcpu *v, shadow_audit_tables(v); reset_early_unshadow(v); paging_unlock(d); + drop_p2m_gfn_domain(d, gfn_x(gfn), mfn_x(gmfn)); trace_shadow_gen(TRC_SHADOW_MMIO, va); return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT) ? EXCRET_fault_fixed : 0); @@ -3605,6 +3621,7 @@ static int sh_page_fault(struct vcpu *v, shadow_audit_tables(v); reset_early_unshadow(v); paging_unlock(d); + drop_p2m_gfn_domain(d, gfn_x(gfn), mfn_x(gmfn)); propagate: trace_not_shadow_fault(gw.l1e, va); @@ -4292,7 +4309,7 @@ sh_update_cr3(struct vcpu *v, int do_loc if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT ) { gl2gfn = guest_l3e_get_gfn(gl3e[i]); - gl2mfn = gfn_to_mfn_query(d, gl2gfn, &p2mt); + gl2mfn = gfn_to_mfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt); if ( p2m_is_ram(p2mt) ) flush |= sh_remove_write_access(v, gl2mfn, 2, 0); } @@ -4312,6 +4329,8 @@ sh_update_cr3(struct vcpu *v, int do_loc : SH_type_l2_shadow); else sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); + drop_p2m_gfn_domain(d, gfn_x(gl2gfn), + mfn_x(gl2mfn)); } else sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); @@ -4689,11 +4708,12 @@ static void sh_pagetable_dying(struct vc int flush = 0; int fast_path = 0; paddr_t gcr3 = 0; - mfn_t smfn, gmfn; p2m_type_t p2mt; char *gl3pa = NULL; guest_l3e_t *gl3e = NULL; paddr_t gl2a = 0; + unsigned long l3gfn; + mfn_t l3mfn; paging_lock(v->domain); @@ -4702,8 +4722,9 @@ static void sh_pagetable_dying(struct vc if ( gcr3 == gpa ) fast_path = 1; - gmfn = gfn_to_mfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT), &p2mt); - if ( !mfn_valid(gmfn) || !p2m_is_ram(p2mt) ) + l3gfn = gpa >> PAGE_SHIFT; + l3mfn = gfn_to_mfn_query(v->domain, _gfn(l3gfn), &p2mt); + if ( !mfn_valid(l3mfn) || !p2m_is_ram(p2mt) ) { printk(XENLOG_DEBUG "sh_pagetable_dying: gpa not valid %"PRIpaddr"\n", gpa); @@ -4711,19 +4732,24 @@ static void sh_pagetable_dying(struct vc } if ( !fast_path ) { - gl3pa = sh_map_domain_page(gmfn); + gl3pa = sh_map_domain_page(l3mfn); gl3e = (guest_l3e_t *)(gl3pa + ((unsigned long)gpa & ~PAGE_MASK)); } for ( i = 0; i < 4; i++ ) { + unsigned long gfn; + mfn_t smfn, gmfn; + if ( fast_path ) smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[i])); else { /* retrieving the l2s */ gl2a = guest_l3e_get_paddr(gl3e[i]); - gmfn = gfn_to_mfn_query(v->domain, _gfn(gl2a >> PAGE_SHIFT), &p2mt); + gfn = gl2a >> PAGE_SHIFT; + gmfn = gfn_to_mfn_query(v->domain, _gfn(gfn), &p2mt); smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l2_pae_shadow); + drop_p2m_gfn_domain(v->domain, gfn, mfn_x(gmfn)); } if ( mfn_valid(smfn) ) @@ -4747,6 +4773,7 @@ static void sh_pagetable_dying(struct vc out: if ( !fast_path ) unmap_domain_page(gl3pa); + drop_p2m_gfn_domain(v->domain, l3gfn, mfn_x(l3mfn)); paging_unlock(v->domain); } #else @@ -4763,6 +4790,9 @@ static void sh_pagetable_dying(struct vc #else smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l4_64_shadow); #endif + drop_p2m_gfn_domain(v->domain, + gpa >> PAGE_SHIFT, mfn_x(gmfn)); + if ( mfn_valid(smfn) ) { mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying; @@ -4814,12 +4844,19 @@ static mfn_t emulate_gva_to_mfn(struct v mfn = gfn_to_mfn_guest(v->domain, _gfn(gfn), &p2mt); if ( p2m_is_readonly(p2mt) ) + { + drop_p2m_gfn_domain(v->domain, gfn, mfn_x(mfn)); return _mfn(READONLY_GFN); + } if ( !p2m_is_ram(p2mt) ) + { + drop_p2m_gfn_domain(v->domain, gfn, mfn_x(mfn)); return _mfn(BAD_GFN_TO_MFN); + } ASSERT(mfn_valid(mfn)); v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn); + drop_p2m_gfn_domain(v->domain, gfn, mfn_x(mfn)); return mfn; } @@ -5220,7 +5257,7 @@ int sh_audit_l1_table(struct vcpu *v, mf { gfn = guest_l1e_get_gfn(*gl1e); mfn = shadow_l1e_get_mfn(*sl1e); - gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt); + gmfn = gfn_to_mfn_query_unlocked(v->domain, gfn_x(gfn), &p2mt); if ( !p2m_is_grant(p2mt) && mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn " --> %" PRI_mfn " != mfn %" PRI_mfn, @@ -5291,16 +5328,17 @@ int sh_audit_l2_table(struct vcpu *v, mf mfn = shadow_l2e_get_mfn(*sl2e); gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? get_fl1_shadow_status(v, gfn) - : get_shadow_status(v, gfn_to_mfn_query(v->domain, gfn, &p2mt), - SH_type_l1_shadow); + : get_shadow_status(v, + gfn_to_mfn_query_unlocked(v->domain, gfn_x(gfn), + &p2mt), SH_type_l1_shadow); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn " (--> %" PRI_mfn ")" " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0 - : mfn_x(gfn_to_mfn_query(v->domain, - gfn, &p2mt)), mfn_x(gmfn), mfn_x(mfn)); + : mfn_x(gfn_to_mfn_query_unlocked(v->domain, + gfn_x(gfn), &p2mt)), mfn_x(gmfn), mfn_x(mfn)); } }); sh_unmap_domain_page(gp); @@ -5339,7 +5377,8 @@ int sh_audit_l3_table(struct vcpu *v, mf { gfn = guest_l3e_get_gfn(*gl3e); mfn = shadow_l3e_get_mfn(*sl3e); - gmfn = get_shadow_status(v, gfn_to_mfn_query(v->domain, gfn, &p2mt), + gmfn = get_shadow_status(v, gfn_to_mfn_query_unlocked( + v->domain, gfn_x(gfn), &p2mt), ((GUEST_PAGING_LEVELS == 3 || is_pv_32on64_vcpu(v)) && !shadow_mode_external(v->domain) @@ -5387,8 +5426,8 @@ int sh_audit_l4_table(struct vcpu *v, mf { gfn = guest_l4e_get_gfn(*gl4e); mfn = shadow_l4e_get_mfn(*sl4e); - gmfn = get_shadow_status(v, gfn_to_mfn_query(v->domain, - gfn, &p2mt), + gmfn = get_shadow_status(v, gfn_to_mfn_query_unlocked( + v->domain, gfn_x(gfn), &p2mt), SH_type_l3_shadow); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -288,12 +288,18 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( !mfn_valid(mfn) || !get_page_and_type(mfn_to_page(mfn), v->domain, PGT_writable_page) ) + { + drop_p2m_gfn_domain(current->domain, + info.gmfn, mfn); break; + } if ( cmpxchg(&v->domain->arch.pv_domain.pirq_eoi_map_mfn, 0, mfn) != 0 ) { put_page_and_type(mfn_to_page(mfn)); + drop_p2m_gfn_domain(current->domain, + info.gmfn, mfn); ret = -EBUSY; break; } @@ -303,10 +309,13 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H { v->domain->arch.pv_domain.pirq_eoi_map_mfn = 0; put_page_and_type(mfn_to_page(mfn)); + drop_p2m_gfn_domain(current->domain, + info.gmfn, mfn); ret = -ENOSPC; break; } + drop_p2m_gfn_domain(current->domain, info.gmfn, mfn); ret = 0; break; } diff -r 471d4f2754d6 -r d13f91c2fe18 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -678,6 +678,7 @@ int wrmsr_hypervisor_regs(uint32_t idx, if ( !mfn_valid(mfn) || !get_page_and_type(mfn_to_page(mfn), d, PGT_writable_page) ) { + drop_p2m_gfn_domain(d, gmfn, mfn); gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx) to MSR %08x\n", gmfn, mfn, base + idx); @@ -689,6 +690,7 @@ int wrmsr_hypervisor_regs(uint32_t idx, unmap_domain_page(hypercall_page); put_page_and_type(mfn_to_page(mfn)); + drop_p2m_gfn_domain(d, gmfn, mfn); break; } @@ -2347,18 +2349,25 @@ static int emulate_privileged_op(struct arch_set_cr2(v, *reg); break; - case 3: /* Write CR3 */ + case 3: {/* Write CR3 */ + unsigned long mfn, gfn; domain_lock(v->domain); if ( !is_pv_32on64_vcpu(v) ) - rc = new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg))); + { + gfn = xen_cr3_to_pfn(*reg); #ifdef CONFIG_COMPAT - else - rc = new_guest_cr3(gmfn_to_mfn(v->domain, compat_cr3_to_pfn(*reg))); + } else { + gfn = compat_cr3_to_pfn(*reg); #endif + } + mfn = gmfn_to_mfn(v->domain, gfn); + rc = new_guest_cr3(mfn); + drop_p2m_gfn_domain(v->domain, gfn, mfn); domain_unlock(v->domain); if ( rc == 0 ) /* not okay */ goto fail; break; + } case 4: /* Write CR4 */ v->arch.pv_vcpu.ctrlreg[4] = pv_guest_cr4_fixup(v, *reg); diff -r 471d4f2754d6 -r d13f91c2fe18 xen/common/grant_table.c --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -164,9 +164,11 @@ static int __get_paged_frame(unsigned lo if ( p2m_is_paging(p2mt) ) { p2m_mem_paging_populate(rd, gfn); + drop_p2m_gfn_domain(rd, gfn, mfn_x(mfn)); rc = GNTST_eagain; } } else { + drop_p2m_gfn_domain(rd, gfn, mfn_x(mfn)); *frame = INVALID_MFN; rc = GNTST_bad_page; } @@ -474,7 +476,7 @@ __gnttab_map_grant_ref( u32 old_pin; u32 act_pin; unsigned int cache_flags; - struct active_grant_entry *act; + struct active_grant_entry *act = NULL; /* gcc ... */ struct grant_mapping *mt; grant_entry_v1_t *sha1; grant_entry_v2_t *sha2; @@ -698,6 +700,7 @@ __gnttab_map_grant_ref( op->handle = handle; op->status = GNTST_okay; + drop_p2m_gfn_domain(rd, act->gfn, act->frame); rcu_unlock_domain(rd); return; @@ -735,6 +738,7 @@ __gnttab_map_grant_ref( gnttab_clear_flag(_GTF_reading, status); unlock_out: + drop_p2m_gfn_domain(rd, act->gfn, act->frame); spin_unlock(&rd->grant_table->lock); op->status = rc; put_maptrack_handle(ld->grant_table, handle); @@ -1454,7 +1458,7 @@ gnttab_transfer( struct page_info *page; int i; struct gnttab_transfer gop; - unsigned long mfn; + unsigned long mfn, drop_mfn; unsigned int max_bitsize; for ( i = 0; i < count; i++ ) @@ -1475,6 +1479,7 @@ gnttab_transfer( /* Check the passed page frame for basic validity. */ if ( unlikely(!mfn_valid(mfn)) ) { + drop_p2m_gfn_domain(d, gop.mfn, mfn); gdprintk(XENLOG_INFO, "gnttab_transfer: out-of-range %lx\n", (unsigned long)gop.mfn); gop.status = GNTST_bad_page; @@ -1484,6 +1489,7 @@ gnttab_transfer( page = mfn_to_page(mfn); if ( unlikely(is_xen_heap_page(page)) ) { + drop_p2m_gfn_domain(d, gop.mfn, mfn); gdprintk(XENLOG_INFO, "gnttab_transfer: xen frame %lx\n", (unsigned long)gop.mfn); gop.status = GNTST_bad_page; @@ -1492,6 +1498,7 @@ gnttab_transfer( if ( steal_page(d, page, 0) < 0 ) { + drop_p2m_gfn_domain(d, gop.mfn, mfn); gop.status = GNTST_bad_page; goto copyback; } @@ -1504,6 +1511,7 @@ gnttab_transfer( /* Find the target domain. */ if ( unlikely((e = rcu_lock_domain_by_id(gop.domid)) == NULL) ) { + drop_p2m_gfn_domain(d, gop.mfn, mfn); gdprintk(XENLOG_INFO, "gnttab_transfer: can't find domain %d\n", gop.domid); page->count_info &= ~(PGC_count_mask|PGC_allocated); @@ -1514,6 +1522,7 @@ gnttab_transfer( if ( xsm_grant_transfer(d, e) ) { + drop_p2m_gfn_domain(d, gop.mfn, mfn); gop.status = GNTST_permission_denied; unlock_and_copyback: rcu_unlock_domain(e); @@ -1542,9 +1551,15 @@ gnttab_transfer( unmap_domain_page(dp); unmap_domain_page(sp); + /* We took a ref on acquiring the p2m entry. Drop the ref */ + put_page(page); + drop_mfn = INVALID_MFN; /* Further drops of the p2m entry won't drop anyone's refcount */ page->count_info &= ~(PGC_count_mask|PGC_allocated); free_domheap_page(page); page = new_page; + /* BY the way, this doesn't update mfn, which is used later below ... */ + } else { + drop_mfn = mfn; } spin_lock(&e->page_alloc_lock); @@ -1566,6 +1581,7 @@ gnttab_transfer( e->tot_pages, e->max_pages, gop.ref, e->is_dying); spin_unlock(&e->page_alloc_lock); rcu_unlock_domain(e); + drop_p2m_gfn_domain(d, gop.mfn, drop_mfn); page->count_info &= ~(PGC_count_mask|PGC_allocated); free_domheap_page(page); gop.status = GNTST_general_error; @@ -1579,6 +1595,7 @@ gnttab_transfer( page_set_owner(page, e); spin_unlock(&e->page_alloc_lock); + drop_p2m_gfn_domain(d, gop.mfn, drop_mfn); TRACE_1D(TRC_MEM_PAGE_GRANT_TRANSFER, e->domain_id); @@ -1852,6 +1869,8 @@ __acquire_grant_for_copy( rc = __get_paged_frame(gfn, &grant_frame, readonly, rd); if ( rc != GNTST_okay ) goto unlock_out; + /* We drop this immediately per the comments at the top */ + drop_p2m_gfn_domain(rd, gfn, grant_frame); act->gfn = gfn; is_sub_page = 0; trans_page_off = 0; @@ -1864,6 +1883,7 @@ __acquire_grant_for_copy( rc = __get_paged_frame(gfn, &grant_frame, readonly, rd); if ( rc != GNTST_okay ) goto unlock_out; + drop_p2m_gfn_domain(rd, gfn, grant_frame); act->gfn = gfn; is_sub_page = 0; trans_page_off = 0; @@ -1876,6 +1896,7 @@ __acquire_grant_for_copy( rc = __get_paged_frame(gfn, &grant_frame, readonly, rd); if ( rc != GNTST_okay ) goto unlock_out; + drop_p2m_gfn_domain(rd, gfn, grant_frame); act->gfn = gfn; is_sub_page = 1; trans_page_off = sha2->sub_page.page_off; @@ -1973,6 +1994,7 @@ __gnttab_copy( { #ifdef CONFIG_X86 rc = __get_paged_frame(op->source.u.gmfn, &s_frame, 1, sd); + drop_p2m_gfn_domain(sd, op->source.u.gmfn, s_frame); if ( rc != GNTST_okay ) goto error_out; #else @@ -2012,6 +2034,7 @@ __gnttab_copy( { #ifdef CONFIG_X86 rc = __get_paged_frame(op->dest.u.gmfn, &d_frame, 0, dd); + drop_p2m_gfn_domain(dd, op->dest.u.gmfn, d_frame); if ( rc != GNTST_okay ) goto error_out; #else diff -r 471d4f2754d6 -r d13f91c2fe18 xen/common/memory.c --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -167,6 +167,7 @@ int guest_remove_page(struct domain *d, { guest_physmap_remove_page(d, gmfn, mfn, 0); p2m_mem_paging_drop_page(d, gmfn); + drop_p2m_gfn_domain(d, gmfn, mfn); return 1; } #else @@ -174,6 +175,7 @@ int guest_remove_page(struct domain *d, #endif if ( unlikely(!mfn_valid(mfn)) ) { + drop_p2m_gfn_domain(d, gmfn, mfn); gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n", d->domain_id, gmfn); return 0; @@ -187,12 +189,14 @@ int guest_remove_page(struct domain *d, { put_page_and_type(page); guest_physmap_remove_page(d, gmfn, mfn, 0); + drop_p2m_gfn_domain(d, gmfn, mfn); return 1; } #endif /* CONFIG_X86 */ if ( unlikely(!get_page(page, d)) ) { + drop_p2m_gfn_domain(d, gmfn, mfn); gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); return 0; } @@ -204,6 +208,7 @@ int guest_remove_page(struct domain *d, put_page(page); guest_physmap_remove_page(d, gmfn, mfn, 0); + drop_p2m_gfn_domain(d, gmfn, mfn); put_page(page); @@ -366,6 +371,7 @@ static long memory_exchange(XEN_GUEST_HA mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn + k, &p2mt)); if ( p2m_is_shared(p2mt) ) { + drop_p2m_gfn_domain(d, gmfn + k, mfn); rc = -ENOMEM; goto fail; } @@ -374,6 +380,7 @@ static long memory_exchange(XEN_GUEST_HA #endif if ( unlikely(!mfn_valid(mfn)) ) { + drop_p2m_gfn_domain(d, gmfn + k, mfn); rc = -EINVAL; goto fail; } @@ -382,11 +389,13 @@ static long memory_exchange(XEN_GUEST_HA if ( unlikely(steal_page(d, page, MEMF_no_refcount)) ) { + drop_p2m_gfn_domain(d, gmfn + k, mfn); rc = -EINVAL; goto fail; } page_list_add(page, &in_chunk_list); + drop_p2m_gfn_domain(d, gmfn + k, mfn); } } diff -r 471d4f2754d6 -r d13f91c2fe18 xen/common/tmem_xen.c --- a/xen/common/tmem_xen.c +++ b/xen/common/tmem_xen.c @@ -111,20 +111,28 @@ static inline void *cli_get_page(tmem_cl cli_mfn = mfn_x(gfn_to_mfn(current->domain, cmfn, &t)); if ( t != p2m_ram_rw || !mfn_valid(cli_mfn) ) + { + drop_p2m_gfn_domain(current->domain, + (unsigned long) cmfn, cli_mfn); return NULL; + } page = mfn_to_page(cli_mfn); if ( cli_write ) ret = get_page_and_type(page, current->domain, PGT_writable_page); else ret = get_page(page, current->domain); if ( !ret ) + { + drop_p2m_gfn_domain(current->domain, + (unsigned long) cmfn, cli_mfn); return NULL; + } *pcli_mfn = cli_mfn; *pcli_pfp = (pfp_t *)page; return map_domain_page(cli_mfn); } -static inline void cli_put_page(void *cli_va, pfp_t *cli_pfp, +static inline void cli_put_page(tmem_cli_mfn_t cmfn, void *cli_va, pfp_t *cli_pfp, unsigned long cli_mfn, bool_t mark_dirty) { if ( mark_dirty ) @@ -135,6 +143,7 @@ static inline void cli_put_page(void *cl else put_page((struct page_info *)cli_pfp); unmap_domain_page(cli_va); + drop_p2m_gfn_domain(current->domain, (unsigned long) cmfn, cli_mfn); } #endif @@ -169,7 +178,7 @@ EXPORT int tmh_copy_from_client(pfp_t *p (pfn_offset+len <= PAGE_SIZE) ) memcpy((char *)tmem_va+tmem_offset,(char *)cli_va+pfn_offset,len); if ( !tmemc ) - cli_put_page(cli_va, cli_pfp, cli_mfn, 0); + cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 0); unmap_domain_page(tmem_va); return 1; } @@ -197,7 +206,7 @@ EXPORT int tmh_compress_from_client(tmem ASSERT(ret == LZO_E_OK); *out_va = dmem; if ( !tmemc ) - cli_put_page(cli_va, cli_pfp, cli_mfn, 0); + cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 0); unmap_domain_page(cli_va); return 1; } @@ -225,7 +234,7 @@ EXPORT int tmh_copy_to_client(tmem_cli_m memcpy((char *)cli_va+pfn_offset,(char *)tmem_va+tmem_offset,len); unmap_domain_page(tmem_va); if ( !tmemc ) - cli_put_page(cli_va, cli_pfp, cli_mfn, 1); + cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1); mb(); return 1; } @@ -249,7 +258,7 @@ EXPORT int tmh_decompress_to_client(tmem ASSERT(ret == LZO_E_OK); ASSERT(out_len == PAGE_SIZE); if ( !tmemc ) - cli_put_page(cli_va, cli_pfp, cli_mfn, 1); + cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1); mb(); return 1; } @@ -271,7 +280,7 @@ EXPORT int tmh_copy_tze_to_client(tmem_c memcpy((char *)cli_va,(char *)tmem_va,len); if ( len < PAGE_SIZE ) memset((char *)cli_va+len,0,PAGE_SIZE-len); - cli_put_page(cli_va, cli_pfp, cli_mfn, 1); + cli_put_page(cmfn, cli_va, cli_pfp, cli_mfn, 1); mb(); return 1; } diff -r 471d4f2754d6 -r d13f91c2fe18 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -394,7 +394,10 @@ int hvm_virtual_to_linear_addr( void *hvm_map_guest_frame_rw(unsigned long gfn); void *hvm_map_guest_frame_ro(unsigned long gfn); -void hvm_unmap_guest_frame(void *p); +/* We pass back either the guest virtual or physical frame mapped, + * in order to drop any locks/refcounts we may have had on p2m + * entries or underlying mfn's while using the map */ +void hvm_unmap_guest_frame(void *p, unsigned long addr, int is_va); static inline void hvm_set_info_guest(struct vcpu *v) { diff -r 471d4f2754d6 -r d13f91c2fe18 xen/include/asm-x86/hvm/vmx/vvmx.h --- a/xen/include/asm-x86/hvm/vmx/vvmx.h +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h @@ -25,6 +25,7 @@ struct nestedvmx { paddr_t vmxon_region_pa; + unsigned long iobitmap_gfn[2]; void *iobitmap[2]; /* map (va) of L1 guest I/O bitmap */ /* deferred nested interrupt */ struct { _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |