[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCHv2 3/3] x86/ept: defer the invalidation until the p2m lock is released



From: David Vrabel <dvrabel@xxxxxxxxxx>

Holding the p2m lock while calling ept_sync_domain() is very expensive
since it does a on_selected_cpus() call.  IPIs on many socket machines
can be very slows and on_selected_cpus() is serialized.

Defer the invalidate until the p2m lock is released.  Since the processor
may cache partial translations, we also need to make sure any page table
pages to be freed are not freed until the invalidate is complete.  Such
pages are temporarily stored in a list.

Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
v2:
- use per-p2m list for deferred pages.
- update synced_mask while holding write lock.
---
 xen/arch/x86/mm/mm-locks.h | 23 +++++++++++++-------
 xen/arch/x86/mm/p2m-ept.c  | 53 ++++++++++++++++++++++++++++++++++++++--------
 xen/arch/x86/mm/p2m.c      | 18 ++++++++++++++++
 xen/include/asm-x86/p2m.h  |  7 ++++++
 4 files changed, 84 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
index 76c7217..b5eb560 100644
--- a/xen/arch/x86/mm/mm-locks.h
+++ b/xen/arch/x86/mm/mm-locks.h
@@ -263,14 +263,21 @@ declare_mm_lock(altp2mlist)
  */
 
 declare_mm_rwlock(altp2m);
-#define p2m_lock(p)                         \
-{                                           \
-    if ( p2m_is_altp2m(p) )                 \
-        mm_write_lock(altp2m, &(p)->lock);  \
-    else                                    \
-        mm_write_lock(p2m, &(p)->lock);     \
-}
-#define p2m_unlock(p)         mm_write_unlock(&(p)->lock);
+#define p2m_lock(p)                             \
+    do {                                        \
+        if ( p2m_is_altp2m(p) )                 \
+            mm_write_lock(altp2m, &(p)->lock);  \
+        else                                    \
+            mm_write_lock(p2m, &(p)->lock);     \
+        (p)->defer_flush++;                     \
+    } while (0)
+#define p2m_unlock(p)                                                   \
+    do {                                                                \
+        if ( --(p)->defer_flush == 0 && (p)->need_flush )               \
+            (p)->flush_and_unlock(p);                                   \
+        else                                                            \
+            mm_write_unlock(&(p)->lock);                                \
+    } while (0)
 #define gfn_lock(p,g,o)       p2m_lock(p)
 #define gfn_unlock(p,g,o)     p2m_unlock(p)
 #define p2m_read_lock(p)      mm_read_lock(p2m, &(p)->lock)
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 548f2f2..9f4d6e6 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -263,7 +263,7 @@ static void ept_free_entry(struct p2m_domain *p2m, 
ept_entry_t *ept_entry, int l
         unmap_domain_page(epte);
     }
     
-    p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn));
+    p2m_free_ptp_defer(p2m, mfn_to_page(ept_entry->mfn));
 }
 
 static bool_t ept_split_super_page(struct p2m_domain *p2m,
@@ -1095,15 +1095,10 @@ static void __ept_sync_domain(void *info)
     __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept), 0);
 }
 
-void ept_sync_domain(struct p2m_domain *p2m)
+static void ept_sync_domain_prepare(struct p2m_domain *p2m)
 {
     struct domain *d = p2m->domain;
     struct ept_data *ept = &p2m->ept;
-    /* Only if using EPT and this domain has some VCPUs to dirty. */
-    if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] )
-        return;
-
-    ASSERT(local_irq_is_enabled());
 
     if ( nestedhvm_enabled(d) && !p2m_is_nestedp2m(p2m) )
         p2m_flush_nestedp2m(d);
@@ -1116,9 +1111,48 @@ void ept_sync_domain(struct p2m_domain *p2m)
      */
     cpumask_and(ept_get_synced_mask(ept),
                 d->domain_dirty_cpumask, &cpu_online_map);
+}
+
+static void ept_sync_domain_mask(struct p2m_domain *p2m, const cpumask_t *mask)
+{
+    on_selected_cpus(mask, __ept_sync_domain, p2m, 1);
+}
+
+void ept_sync_domain(struct p2m_domain *p2m)
+{
+    struct domain *d = p2m->domain;
+    struct ept_data *ept = &p2m->ept;
+
+    /* Only if using EPT and this domain has some VCPUs to dirty. */
+    if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] )
+        return;
+
+    if ( p2m->defer_flush )
+    {
+        p2m->need_flush = 1;
+        return;
+    }
+    else
+        p2m->need_flush = 0;
+
+    ept_sync_domain_prepare(p2m);
+    ept_sync_domain_mask(p2m, ept_get_synced_mask(ept));
+}
+
+static void ept_flush_and_unlock(struct p2m_domain *p2m)
+{
+    struct ept_data *ept = &p2m->ept;
+    PAGE_LIST_HEAD(deferred_pages);
+    cpumask_t flush_mask;
+
+    ept_sync_domain_prepare(p2m);
+    cpumask_copy(&flush_mask, ept_get_synced_mask(ept));
+    page_list_move(&deferred_pages, &p2m->deferred_pages);
+
+    mm_write_unlock(&p2m->lock);
 
-    on_selected_cpus(ept_get_synced_mask(ept),
-                     __ept_sync_domain, p2m, 1);
+    ept_sync_domain_mask(p2m, &flush_mask);
+    p2m_free_ptp_list(p2m, &deferred_pages);
 }
 
 static void ept_enable_pml(struct p2m_domain *p2m)
@@ -1169,6 +1203,7 @@ int ept_p2m_init(struct p2m_domain *p2m)
     p2m->change_entry_type_range = ept_change_entry_type_range;
     p2m->memory_type_changed = ept_memory_type_changed;
     p2m->audit_p2m = NULL;
+    p2m->flush_and_unlock = ept_flush_and_unlock;
 
     /* Set the memory type used when accessing EPT paging structures. */
     ept->ept_mt = EPT_DEFAULT_MT;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index e2c82b1..4080410 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -65,6 +65,7 @@ static int p2m_initialise(struct domain *d, struct p2m_domain 
*p2m)
     mm_lock_init(&p2m->pod.lock);
     INIT_LIST_HEAD(&p2m->np2m_list);
     INIT_PAGE_LIST_HEAD(&p2m->pages);
+    INIT_PAGE_LIST_HEAD(&p2m->deferred_pages);
     INIT_PAGE_LIST_HEAD(&p2m->pod.super);
     INIT_PAGE_LIST_HEAD(&p2m->pod.single);
 
@@ -504,6 +505,23 @@ void p2m_free_ptp(struct p2m_domain *p2m, struct page_info 
*pg)
     return;
 }
 
+void p2m_free_ptp_defer(struct p2m_domain *p2m, struct page_info *pg)
+{
+    page_list_del(pg, &p2m->pages);
+    page_list_add(pg, &p2m->deferred_pages);
+}
+
+void p2m_free_ptp_list(struct p2m_domain *p2m, struct page_list_head *list)
+{
+    struct page_info *pg, *tmp;
+
+    page_list_for_each_safe(pg, tmp, list)
+    {
+        page_list_del(pg, list);
+        p2m->domain->arch.paging.free_page(p2m->domain, pg);
+    }
+}
+
 /*
  * Allocate a new p2m table for a domain.
  *
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index d748557..89b7bd1 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -254,6 +254,11 @@ struct p2m_domain {
                                           unsigned long gfn, l1_pgentry_t *p,
                                           l1_pgentry_t new, unsigned int 
level);
     long               (*audit_p2m)(struct p2m_domain *p2m);
+    void               (*flush_and_unlock)(struct p2m_domain *p2m);
+
+    struct page_list_head deferred_pages;
+    unsigned int defer_flush;
+    bool_t need_flush;
 
     /* Default P2M access type for each page in the the domain: new pages,
      * swapped in pages, cleared pages, and pages that are ambiguously
@@ -681,6 +686,8 @@ static inline bool_t p2m_mem_access_sanity_check(struct 
domain *d)
 
 struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type);
 void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg);
+void p2m_free_ptp_defer(struct p2m_domain *p2m, struct page_info *pg);
+void p2m_free_ptp_list(struct p2m_domain *p2m, struct page_list_head *list);
 
 /* Directly set a p2m entry: only for use by p2m code. Does not need
  * a call to put_gfn afterwards/ */
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.