[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Enormously better process destruction performance. By not running on



ChangeSet 1.1383, 2005/04/25 17:53:12+01:00, kaf24@xxxxxxxxxxxxxxxxxxxx

        Enormously better process destruction performance. By not running on
        pagetables when we are destroying them we massively reduce TLB flushes
        and increase writable p.t. batches. This patch would probably improve
        even native Linux performance, because of the rabid ZAP_BLOCK_SIZE
        changes to unmap_vmas() that were introduced to 2.6.11. :-)
        Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>



 arch/xen/i386/kernel/smp.c             |    6 +++---
 arch/xen/i386/mm/hypervisor.c          |   12 ++++++++----
 arch/xen/x86_64/mm/hypervisor.c        |   12 ++++++++----
 include/asm-xen/asm-i386/mmu_context.h |   19 ++++++++++++++-----
 include/asm-xen/asm-i386/tlbflush.h    |    9 ++++++---
 include/asm-xen/asm-x86_64/tlbflush.h  |    9 ++++++---
 include/asm-xen/hypervisor.h           |    4 ++--
 7 files changed, 47 insertions(+), 24 deletions(-)


diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c 
b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c        2005-04-25 
13:03:18 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c        2005-04-25 
13:03:18 -04:00
@@ -448,11 +448,11 @@
                                     struct pt_regs *regs)
 { return 0; }
 void flush_tlb_current_task(void)
-{ xen_tlb_flush_mask(current->mm->cpu_vm_mask); }
+{ xen_tlb_flush_mask(&current->mm->cpu_vm_mask); }
 void flush_tlb_mm(struct mm_struct * mm)
-{ xen_tlb_flush_mask(mm->cpu_vm_mask); }
+{ xen_tlb_flush_mask(&mm->cpu_vm_mask); }
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
-{ xen_invlpg_mask(vma->vm_mm->cpu_vm_mask, va); }
+{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); }
 void flush_tlb_all(void)
 { xen_tlb_flush_all(); }
 
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c 
b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c     2005-04-25 
13:03:18 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c     2005-04-25 
13:03:18 -04:00
@@ -106,11 +106,13 @@
     BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
-void xen_tlb_flush_mask(cpumask_t mask)
+void xen_tlb_flush_mask(cpumask_t *mask)
 {
     struct mmuext_op op;
+    if ( cpus_empty(*mask) )
+        return;
     op.cmd = MMUEXT_TLB_FLUSH_MULTI;
-    op.cpuset = mask.bits;
+    op.cpuset = mask->bits;
     BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
@@ -122,11 +124,13 @@
     BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
-void xen_invlpg_mask(cpumask_t mask, unsigned long ptr)
+void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
 {
     struct mmuext_op op;
+    if ( cpus_empty(*mask) )
+        return;
     op.cmd = MMUEXT_INVLPG_MULTI;
-    op.cpuset = mask.bits;
+    op.cpuset = mask->bits;
     op.linear_addr = ptr & PAGE_MASK;
     BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/hypervisor.c 
b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/hypervisor.c
--- a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/hypervisor.c   2005-04-25 
13:03:18 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/hypervisor.c   2005-04-25 
13:03:18 -04:00
@@ -116,11 +116,13 @@
     BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
-void xen_tlb_flush_mask(cpumask_t mask)
+void xen_tlb_flush_mask(cpumask_t *mask)
 {
     struct mmuext_op op;
+    if ( cpus_empty(*mask) )
+        return;
     op.cmd = MMUEXT_TLB_FLUSH_MULTI;
-    op.cpuset = mask.bits[0];
+    op.cpuset = mask->bits;
     BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
@@ -132,11 +134,13 @@
     BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
-void xen_invlpg_mask(cpumask_t mask, unsigned long ptr)
+void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
 {
     struct mmuext_op op;
+    if ( cpus_empty(*mask) )
+        return;
     op.cmd = MMUEXT_INVLPG_MULTI;
-    op.cpuset = mask.bits[0];
+    op.cpuset = mask->bits;
     op.linear_addr = ptr & PAGE_MASK;
     BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
diff -Nru a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h 
b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h    
2005-04-25 13:03:18 -04:00
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h    
2005-04-25 13:03:18 -04:00
@@ -16,7 +16,7 @@
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct 
*tsk)
 {
-#if 0 /* XEN */
+#if 0 /* XEN: no lazy tlb */
        unsigned cpu = smp_processor_id();
        if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
                per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY;
@@ -51,7 +51,7 @@
        if (likely(prev != next)) {
                /* stop flush ipis for the previous mm */
                cpu_clear(cpu, prev->cpu_vm_mask);
-#if 0 /* XEN */
+#if 0 /* XEN: no lazy tlb */
                per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
                per_cpu(cpu_tlbstate, cpu).active_mm = next;
 #endif
@@ -76,7 +76,7 @@
 
                BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF));
        }
-#if 0 /* XEN */
+#if 0 /* XEN: no lazy tlb */
        else {
                per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
                BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next);
@@ -92,8 +92,17 @@
 #endif
 }
 
-#define deactivate_mm(tsk, mm) \
-       asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
+/*
+ * XEN: We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
+ * *much* faster this way, as no tlb flushes means much bigger wrpt batches.
+ */
+#define deactivate_mm(tsk, mm) do {                                    \
+       asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0));                   \
+       if ((mm) && cpu_isset(smp_processor_id(), (mm)->cpu_vm_mask)) { \
+               cpu_clear(smp_processor_id(), (mm)->cpu_vm_mask);       \
+               load_cr3(swapper_pg_dir);                               \
+       }                                                               \
+} while (0)
 
 #define activate_mm(prev, next) do {           \
        switch_mm((prev),(next),NULL);          \
diff -Nru a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h 
b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h       
2005-04-25 13:03:18 -04:00
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h       
2005-04-25 13:03:18 -04:00
@@ -40,21 +40,24 @@
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
-       if (mm == current->active_mm)
+       /* XEN: cpu_vm_mask is more accurate than active_mm. */
+       if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
                __flush_tlb();
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
        unsigned long addr)
 {
-       if (vma->vm_mm == current->active_mm)
+       /* XEN: cpu_vm_mask is more accurate than active_mm. */
+       if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
                __flush_tlb_one(addr);
 }
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
        unsigned long start, unsigned long end)
 {
-       if (vma->vm_mm == current->active_mm)
+       /* XEN: cpu_vm_mask is more accurate than active_mm. */
+       if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
                __flush_tlb();
 }
 
diff -Nru a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h 
b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h     
2005-04-25 13:03:18 -04:00
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h     
2005-04-25 13:03:18 -04:00
@@ -44,21 +44,24 @@
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
-       if (mm == current->active_mm)
+       /* XEN: cpu_vm_mask is more accurate than active_mm. */
+       if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
                __flush_tlb();
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
        unsigned long addr)
 {
-       if (vma->vm_mm == current->active_mm)
+       /* XEN: cpu_vm_mask is more accurate than active_mm. */
+       if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
                __flush_tlb_one(addr);
 }
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
        unsigned long start, unsigned long end)
 {
-       if (vma->vm_mm == current->active_mm)
+       /* XEN: cpu_vm_mask is more accurate than active_mm. */
+       if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
                __flush_tlb();
 }
 
diff -Nru a/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h 
b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h
--- a/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h      2005-04-25 
13:03:18 -04:00
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h      2005-04-25 
13:03:18 -04:00
@@ -110,8 +110,8 @@
 #include <linux/cpumask.h>
 void xen_tlb_flush_all(void);
 void xen_invlpg_all(unsigned long ptr);
-void xen_tlb_flush_mask(cpumask_t mask);
-void xen_invlpg_mask(cpumask_t mask, unsigned long ptr);
+void xen_tlb_flush_mask(cpumask_t *mask);
+void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr);
 #endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.