[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] x86: introduce and use scratch CPU mask



commit 3b6172645880f6324d0394d0d707f5d76b69ae1f
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Fri Dec 16 14:34:34 2016 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri Dec 16 14:34:34 2016 +0100

    x86: introduce and use scratch CPU mask
    
    __get_page_type(), so far using an on-stack CPU mask variable, is
    involved in recursion when e.g. pinning page tables. This means there
    may be up to five instances of the function active at a time, implying
    five instances of the (up to 512 bytes large) CPU mask variable. An IRQ
    happening at the deepest point of the stack has been observed to cause
    a stack overflow with a 4095-pCPU build, when the IRQ handling results
    in send_guest_pirq() being called (leading to vcpu_kick() -> ... ->
    csched_vcpu_wake() -> __runq_tickle() -> cpumask_raise_softirq(), the
    last two of which also have CPU mask variables on their stacks).
    
    Introduce a per-CPU variable instead, which can then be used by any
    code never running in IRQ context.
    
    The mask can then also be used by other MMU code as well as by
    msi_compose_msg() (and quite likely we'll find further uses down the
    road).
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 xen/arch/x86/mm.c         | 48 ++++++++++++++++++++++++++---------------------
 xen/arch/x86/msi.c        | 34 +--------------------------------
 xen/arch/x86/smpboot.c    |  9 +++++++--
 xen/include/asm-x86/smp.h |  1 +
 4 files changed, 36 insertions(+), 56 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 24a5211..8a9fe8f 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2477,6 +2477,7 @@ static int __get_page_type(struct page_info *page, 
unsigned long type,
     int rc = 0, iommu_ret = 0;
 
     ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
+    ASSERT(!in_irq());
 
     for ( ; ; )
     {
@@ -2509,20 +2510,21 @@ static int __get_page_type(struct page_info *page, 
unsigned long type,
                  * may be unnecessary (e.g., page was GDT/LDT) but those 
                  * circumstances should be very rare.
                  */
-                cpumask_t mask;
+                cpumask_t *mask = this_cpu(scratch_cpumask);
 
-                cpumask_copy(&mask, d->domain_dirty_cpumask);
+                BUG_ON(in_irq());
+                cpumask_copy(mask, d->domain_dirty_cpumask);
 
                 /* Don't flush if the timestamp is old enough */
-                tlbflush_filter(&mask, page->tlbflush_timestamp);
+                tlbflush_filter(mask, page->tlbflush_timestamp);
 
-                if ( unlikely(!cpumask_empty(&mask)) &&
+                if ( unlikely(!cpumask_empty(mask)) &&
                      /* Shadow mode: track only writable pages. */
                      (!shadow_mode_enabled(page_get_owner(page)) ||
                       ((nx & PGT_type_mask) == PGT_writable_page)) )
                 {
                     perfc_incr(need_flush_tlb_flush);
-                    flush_tlb_mask(&mask);
+                    flush_tlb_mask(mask);
                 }
 
                 /* We lose existing type and validity. */
@@ -3403,22 +3405,22 @@ long do_mmuext_op(
         case MMUEXT_TLB_FLUSH_MULTI:
         case MMUEXT_INVLPG_MULTI:
         {
-            cpumask_t pmask;
+            cpumask_t *mask = this_cpu(scratch_cpumask);
 
             if ( unlikely(d != pg_owner) )
                 rc = -EPERM;
             else if ( unlikely(vcpumask_to_pcpumask(d,
                                    guest_handle_to_param(op.arg2.vcpumask,
                                                          const_void),
-                                   &pmask)) )
+                                   mask)) )
                 rc = -EINVAL;
             if ( unlikely(rc) )
                 break;
 
             if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
-                flush_tlb_mask(&pmask);
+                flush_tlb_mask(mask);
             else if ( __addr_ok(op.arg1.linear_addr) )
-                flush_tlb_one_mask(&pmask, op.arg1.linear_addr);
+                flush_tlb_one_mask(mask, op.arg1.linear_addr);
             break;
         }
 
@@ -3456,14 +3458,14 @@ long do_mmuext_op(
             else if ( likely(cache_flush_permitted(d)) )
             {
                 unsigned int cpu;
-                cpumask_t mask;
+                cpumask_t *mask = this_cpu(scratch_cpumask);
 
-                cpumask_clear(&mask);
+                cpumask_clear(mask);
                 for_each_online_cpu(cpu)
-                    if ( !cpumask_intersects(&mask,
+                    if ( !cpumask_intersects(mask,
                                              per_cpu(cpu_sibling_mask, cpu)) )
-                        __cpumask_set_cpu(cpu, &mask);
-                flush_mask(&mask, FLUSH_CACHE);
+                        __cpumask_set_cpu(cpu, mask);
+                flush_mask(mask, FLUSH_CACHE);
             }
             else
             {
@@ -4458,7 +4460,7 @@ static int __do_update_va_mapping(
     struct page_info *gl1pg;
     l1_pgentry_t  *pl1e;
     unsigned long  bmap_ptr, gl1mfn;
-    cpumask_t      pmask;
+    cpumask_t     *mask = NULL;
     int            rc;
 
     perfc_incr(calls_to_update_va);
@@ -4504,15 +4506,17 @@ static int __do_update_va_mapping(
             flush_tlb_local();
             break;
         case UVMF_ALL:
-            flush_tlb_mask(d->domain_dirty_cpumask);
+            mask = d->domain_dirty_cpumask;
             break;
         default:
+            mask = this_cpu(scratch_cpumask);
             rc = vcpumask_to_pcpumask(d, const_guest_handle_from_ptr(bmap_ptr,
                                                                      void),
-                                      &pmask);
-            flush_tlb_mask(&pmask);
+                                      mask);
             break;
         }
+        if ( mask )
+            flush_tlb_mask(mask);
         break;
 
     case UVMF_INVLPG:
@@ -4522,15 +4526,17 @@ static int __do_update_va_mapping(
             paging_invlpg(v, va);
             break;
         case UVMF_ALL:
-            flush_tlb_one_mask(d->domain_dirty_cpumask, va);
+            mask = d->domain_dirty_cpumask;
             break;
         default:
+            mask = this_cpu(scratch_cpumask);
             rc = vcpumask_to_pcpumask(d, const_guest_handle_from_ptr(bmap_ptr,
                                                                      void),
-                                      &pmask);
-            flush_tlb_one_mask(&pmask, va);
+                                      mask);
             break;
         }
+        if ( mask )
+            flush_tlb_one_mask(mask, va);
         break;
     }
 
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index 2c38adb..6756a14 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -40,7 +40,6 @@ static void __pci_disable_msix(struct msi_desc *);
 /* bitmap indicate which fixed map is free */
 static DEFINE_SPINLOCK(msix_fixmap_lock);
 static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
-static DEFINE_PER_CPU(cpumask_var_t, scratch_mask);
 
 static int msix_fixmap_alloc(void)
 {
@@ -167,7 +166,7 @@ void msi_compose_msg(unsigned vector, const cpumask_t 
*cpu_mask, struct msi_msg
 
     if ( cpu_mask )
     {
-        cpumask_t *mask = this_cpu(scratch_mask);
+        cpumask_t *mask = this_cpu(scratch_cpumask);
 
         if ( !cpumask_intersects(cpu_mask, &cpu_online_map) )
             return;
@@ -1458,43 +1457,12 @@ int pci_restore_msi_state(struct pci_dev *pdev)
     return 0;
 }
 
-static int msi_cpu_callback(
-    struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
-    unsigned int cpu = (unsigned long)hcpu;
-
-    switch ( action )
-    {
-    case CPU_UP_PREPARE:
-        if ( !alloc_cpumask_var(&per_cpu(scratch_mask, cpu)) )
-            return notifier_from_errno(ENOMEM);
-        break;
-    case CPU_UP_CANCELED:
-    case CPU_DEAD:
-        free_cpumask_var(per_cpu(scratch_mask, cpu));
-        break;
-    default:
-        break;
-    }
-
-    return NOTIFY_DONE;
-}
-
-static struct notifier_block msi_cpu_nfb = {
-    .notifier_call = msi_cpu_callback
-};
-
 void __init early_msi_init(void)
 {
     if ( use_msi < 0 )
         use_msi = !(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI);
     if ( !use_msi )
         return;
-
-    register_cpu_notifier(&msi_cpu_nfb);
-    if ( msi_cpu_callback(&msi_cpu_nfb, CPU_UP_PREPARE, NULL) &
-         NOTIFY_STOP_MASK )
-        BUG();
 }
 
 static void dump_msi(unsigned char key)
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 3a9dd3e..ba651fb 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -56,6 +56,8 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_mask);
 /* representing HT and core siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_mask);
 
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, scratch_cpumask);
+
 cpumask_t cpu_online_map __read_mostly;
 EXPORT_SYMBOL(cpu_online_map);
 
@@ -646,6 +648,7 @@ static void cpu_smpboot_free(unsigned int cpu)
 
     free_cpumask_var(per_cpu(cpu_sibling_mask, cpu));
     free_cpumask_var(per_cpu(cpu_core_mask, cpu));
+    free_cpumask_var(per_cpu(scratch_cpumask, cpu));
 
     if ( per_cpu(stubs.addr, cpu) )
     {
@@ -734,7 +737,8 @@ static int cpu_smpboot_alloc(unsigned int cpu)
         goto oom;
 
     if ( zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, cpu)) &&
-         zalloc_cpumask_var(&per_cpu(cpu_core_mask, cpu)) )
+         zalloc_cpumask_var(&per_cpu(cpu_core_mask, cpu)) &&
+         alloc_cpumask_var(&per_cpu(scratch_cpumask, cpu)) )
         return 0;
 
  oom:
@@ -791,7 +795,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
         panic("No memory for socket CPU siblings map");
 
     if ( !zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, 0)) ||
-         !zalloc_cpumask_var(&per_cpu(cpu_core_mask, 0)) )
+         !zalloc_cpumask_var(&per_cpu(cpu_core_mask, 0)) ||
+         !alloc_cpumask_var(&per_cpu(scratch_cpumask, 0)) )
         panic("No memory for boot CPU sibling/core maps");
 
     set_cpu_sibling_map(0);
diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h
index e3782bb..95d6cd2 100644
--- a/xen/include/asm-x86/smp.h
+++ b/xen/include/asm-x86/smp.h
@@ -25,6 +25,7 @@
  */
 DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_mask);
 DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
+DECLARE_PER_CPU(cpumask_var_t, scratch_cpumask);
 
 void smp_send_nmi_allbutself(void);
 
--
generated by git-patchbot for /home/xen/git/xen.git#master

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.