[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v9 4/5] x86/ioreq server: Asynchronously reset outstanding p2m_ioreq_server entries.
After an ioreq server has unmapped, the remaining p2m_ioreq_server entries need to be reset back to p2m_ram_rw. This patch does this asynchronously with the current p2m_change_entry_type_global() interface. This patch also disallows live migration, when there's still any outstanding p2m_ioreq_server entry left. The core reason is our current implementation of p2m_change_entry_type_global() can not tell the state of p2m_ioreq_server entries(can not decide if an entry is to be emulated or to be resynced). Note: new field entry_count is introduced in struct p2m_domain, to record the number of p2m_ioreq_server p2m page table entries. One nature of these entries is that they only point to 4K sized page frames, because all p2m_ioreq_server entries are originated from p2m_ram_rw ones in p2m_change_type_one(). We do not need to worry about the counting for 2M/1G sized pages. Signed-off-by: Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx> --- Cc: Paul Durrant <paul.durrant@xxxxxxxxxx> Cc: Jan Beulich <jbeulich@xxxxxxxx> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx> Cc: Jun Nakajima <jun.nakajima@xxxxxxxxx> Cc: Kevin Tian <kevin.tian@xxxxxxxxx> changes in v4: - According to comments from Jan: use ASSERT() instead of 'if' condition in p2m_change_type_one(). - According to comments from Jan: commit message changes to mention the p2m_ioreq_server are all based on 4K sized pages. changes in v3: - Move the synchronously resetting logic into patch 5. - According to comments from Jan: introduce p2m_check_changeable() to clarify the p2m type change code. - According to comments from George: use locks in the same order to avoid deadlock, call p2m_change_entry_type_global() after unmap of the ioreq server is finished. changes in v2: - Move the calculation of ioreq server page entry_cout into p2m_change_type_one() so that we do not need a seperate lock. Note: entry_count is also calculated in resolve_misconfig()/ do_recalc(), fortunately callers of both routines have p2m lock protected already. - Simplify logic in hvmop_set_mem_type(). - Introduce routine p2m_finish_type_change() to walk the p2m table and do the p2m reset. --- xen/arch/x86/hvm/ioreq.c | 8 ++++++++ xen/arch/x86/mm/hap/hap.c | 9 +++++++++ xen/arch/x86/mm/p2m-ept.c | 8 +++++++- xen/arch/x86/mm/p2m-pt.c | 13 +++++++++++-- xen/arch/x86/mm/p2m.c | 20 ++++++++++++++++++++ xen/include/asm-x86/p2m.h | 9 ++++++++- 6 files changed, 63 insertions(+), 4 deletions(-) diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c index 746799f..102c6c2 100644 --- a/xen/arch/x86/hvm/ioreq.c +++ b/xen/arch/x86/hvm/ioreq.c @@ -949,6 +949,14 @@ int hvm_map_mem_type_to_ioreq_server(struct domain *d, ioservid_t id, spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock); + if ( rc == 0 && flags == 0 ) + { + struct p2m_domain *p2m = p2m_get_hostp2m(d); + + if ( read_atomic(&p2m->ioreq.entry_count) ) + p2m_change_entry_type_global(d, p2m_ioreq_server, p2m_ram_rw); + } + return rc; } diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c index a57b385..6ec950a 100644 --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -187,6 +187,15 @@ out: */ static int hap_enable_log_dirty(struct domain *d, bool_t log_global) { + struct p2m_domain *p2m = p2m_get_hostp2m(d); + + /* + * Refuse to turn on global log-dirty mode if + * there's outstanding p2m_ioreq_server pages. + */ + if ( log_global && read_atomic(&p2m->ioreq.entry_count) ) + return -EBUSY; + /* turn on PG_log_dirty bit in paging mode */ paging_lock(d); d->arch.paging.mode |= PG_log_dirty; diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c index cc1eb21..1df3d09 100644 --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -544,6 +544,12 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn) e.ipat = ipat; if ( e.recalc && p2m_is_changeable(e.sa_p2mt) ) { + if ( e.sa_p2mt == p2m_ioreq_server ) + { + p2m->ioreq.entry_count--; + ASSERT(p2m->ioreq.entry_count >= 0); + } + e.sa_p2mt = p2m_is_logdirty_range(p2m, gfn + i, gfn + i) ? p2m_ram_logdirty : p2m_ram_rw; ept_p2m_type_to_flags(p2m, &e, e.sa_p2mt, e.access); @@ -965,7 +971,7 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m, if ( is_epte_valid(ept_entry) ) { if ( (recalc || ept_entry->recalc) && - p2m_is_changeable(ept_entry->sa_p2mt) ) + p2m_check_changeable(ept_entry->sa_p2mt) ) *t = p2m_is_logdirty_range(p2m, gfn, gfn) ? p2m_ram_logdirty : p2m_ram_rw; else diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c index f6c45ec..169de75 100644 --- a/xen/arch/x86/mm/p2m-pt.c +++ b/xen/arch/x86/mm/p2m-pt.c @@ -436,11 +436,13 @@ static int do_recalc(struct p2m_domain *p2m, unsigned long gfn) needs_recalc(l1, *pent) ) { l1_pgentry_t e = *pent; + p2m_type_t p2mt_old; if ( !valid_recalc(l1, e) ) P2M_DEBUG("bogus recalc leaf at d%d:%lx:%u\n", p2m->domain->domain_id, gfn, level); - if ( p2m_is_changeable(p2m_flags_to_type(l1e_get_flags(e))) ) + p2mt_old = p2m_flags_to_type(l1e_get_flags(e)); + if ( p2m_is_changeable(p2mt_old) ) { unsigned long mask = ~0UL << (level * PAGETABLE_ORDER); p2m_type_t p2mt = p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask) @@ -460,6 +462,13 @@ static int do_recalc(struct p2m_domain *p2m, unsigned long gfn) mfn &= ~(_PAGE_PSE_PAT >> PAGE_SHIFT); flags |= _PAGE_PSE; } + + if ( p2mt_old == p2m_ioreq_server ) + { + p2m->ioreq.entry_count--; + ASSERT(p2m->ioreq.entry_count >= 0); + } + e = l1e_from_pfn(mfn, flags); p2m_add_iommu_flags(&e, level, (p2mt == p2m_ram_rw) @@ -729,7 +738,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, static inline p2m_type_t recalc_type(bool_t recalc, p2m_type_t t, struct p2m_domain *p2m, unsigned long gfn) { - if ( !recalc || !p2m_is_changeable(t) ) + if ( !recalc || !p2m_check_changeable(t) ) return t; return p2m_is_logdirty_range(p2m, gfn, gfn) ? p2m_ram_logdirty : p2m_ram_rw; diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index dd4e477..e3e54f1 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -954,6 +954,26 @@ int p2m_change_type_one(struct domain *d, unsigned long gfn, p2m->default_access) : -EBUSY; + if ( !rc ) + { + switch ( nt ) + { + case p2m_ram_rw: + if ( ot == p2m_ioreq_server ) + { + p2m->ioreq.entry_count--; + ASSERT(p2m->ioreq.entry_count >= 0); + } + break; + case p2m_ioreq_server: + ASSERT(ot == p2m_ram_rw); + p2m->ioreq.entry_count++; + break; + default: + break; + } + } + gfn_unlock(p2m, gfn, 0); return rc; diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h index 3786680..395f125 100644 --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -120,7 +120,10 @@ typedef unsigned int p2m_query_t; /* Types that can be subject to bulk transitions. */ #define P2M_CHANGEABLE_TYPES (p2m_to_mask(p2m_ram_rw) \ - | p2m_to_mask(p2m_ram_logdirty) ) + | p2m_to_mask(p2m_ram_logdirty) \ + | p2m_to_mask(p2m_ioreq_server) ) + +#define P2M_IOREQ_TYPES (p2m_to_mask(p2m_ioreq_server)) #define P2M_POD_TYPES (p2m_to_mask(p2m_populate_on_demand)) @@ -157,6 +160,7 @@ typedef unsigned int p2m_query_t; #define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES) #define p2m_is_discard_write(_t) (p2m_to_mask(_t) & P2M_DISCARD_WRITE_TYPES) #define p2m_is_changeable(_t) (p2m_to_mask(_t) & P2M_CHANGEABLE_TYPES) +#define p2m_is_ioreq(_t) (p2m_to_mask(_t) & P2M_IOREQ_TYPES) #define p2m_is_pod(_t) (p2m_to_mask(_t) & P2M_POD_TYPES) #define p2m_is_grant(_t) (p2m_to_mask(_t) & P2M_GRANT_TYPES) /* Grant types are *not* considered valid, because they can be @@ -178,6 +182,8 @@ typedef unsigned int p2m_query_t; #define p2m_allows_invalid_mfn(t) (p2m_to_mask(t) & P2M_INVALID_MFN_TYPES) +#define p2m_check_changeable(t) (p2m_is_changeable(t) && !p2m_is_ioreq(t)) + typedef enum { p2m_host, p2m_nested, @@ -349,6 +355,7 @@ struct p2m_domain { * are to be emulated by an ioreq server. */ unsigned int flags; + long entry_count; } ioreq; }; -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |