[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] This is the initial patch for SMP PAE guest on x86-64 Xen.



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 0267063e050cbdb0408921c4acc68f09434bac65
# Parent  c3bb51c443a7a1a78a4917fd09e602fa35d9c318
This is the initial patch for SMP PAE guest on x86-64 Xen.
For vcpus=2, the SMP PAE guest can do kernel build successfully.
And it improves the stability of SMP guests.

Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Xiaohui Xin xiaohui.xin@xxxxxxxxx

diff -r c3bb51c443a7 -r 0267063e050c xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Thu Apr 13 09:29:27 2006
+++ b/xen/arch/x86/Makefile     Thu Apr 13 09:31:53 2006
@@ -76,6 +76,7 @@
        $(HOSTCC) $(HOSTCFLAGS) -o $@ $<
 
 shadow_guest32.o: shadow.c
+shadow_guest32pae.o: shadow.c
 
 .PHONY: clean
 clean::
diff -r c3bb51c443a7 -r 0267063e050c xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Thu Apr 13 09:29:27 2006
+++ b/xen/arch/x86/shadow.c     Thu Apr 13 09:31:53 2006
@@ -1531,14 +1531,10 @@
 
         idx = get_cr3_idxval(v);
         smfn = __shadow_status(
-            d, ((unsigned long)(idx << PGT_score_shift) | entry->gpfn), 
PGT_l4_shadow);
-
-#ifndef NDEBUG
+            d, ((unsigned long)(idx << PGT_pae_idx_shift) | entry->gpfn), 
PGT_l4_shadow);
+
         if ( !smfn ) 
-        {
-            BUG();
-        }
-#endif
+            continue;
 
         guest    = (pgentry_64_t *)map_domain_page(entry->gmfn);
         snapshot = (pgentry_64_t *)map_domain_page(entry->snapshot_mfn);
@@ -1550,9 +1546,35 @@
             if ( entry_has_changed(
                     guest[index], snapshot[index], PAGE_FLAG_MASK) ) 
             {
+                unsigned long gpfn;
+
+                /*
+                 * Looks like it's no longer a page table. 
+                 */
+                if ( unlikely(entry_get_value(guest[index]) & 
PAE_PDPT_RESERVED) )
+                {
+                    if ( entry_get_flags(shadow_l3[i]) & _PAGE_PRESENT )
+                        put_shadow_ref(entry_get_pfn(shadow_l3[i]));
+
+                    shadow_l3[i] = entry_empty();
+                    continue;
+                }
+
+                gpfn = entry_get_pfn(guest[index]);
+
+                if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
+                {
+                    if ( entry_get_flags(shadow_l3[i]) & _PAGE_PRESENT )
+                        put_shadow_ref(entry_get_pfn(shadow_l3[i]));
+
+                    shadow_l3[i] = entry_empty();
+                    continue;
+                }
+
                 validate_entry_change(d, &guest[index],
                                       &shadow_l3[i], PAGING_L3);
             }
+
             if ( entry_get_value(guest[index]) != 0 )
                 max = i;
 
@@ -1675,6 +1697,19 @@
                      guest_l1e_has_changed(guest1[i], snapshot1[i], 
PAGE_FLAG_MASK) )
                 {
                     int error;
+
+#if CONFIG_PAGING_LEVELS == 4
+                    unsigned long gpfn;
+
+                    gpfn = guest_l1e_get_paddr(guest1[i]) >> PAGE_SHIFT;
+
+                    if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
+                    {
+                        guest_l1_pgentry_t tmp_gl1e = guest_l1e_empty();
+                        validate_pte_change(d, tmp_gl1e, sl1e_p);
+                        continue;
+                    }
+#endif
 
                     error = validate_pte_change(d, guest1[i], sl1e_p);
                     if ( error ==  -1 )
@@ -1698,6 +1733,7 @@
             perfc_incrc(resync_l1);
             perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
             perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, 
PT_UPDATES);
+
             if ( d->arch.ops->guest_paging_levels >= PAGING_L3 &&
                  unshadow_l1 ) {
                 pgentry_64_t l2e = { 0 };
@@ -1804,18 +1840,22 @@
             for ( i = min_shadow; i <= max_shadow; i++ )
             {
                 if ( (i < min_snapshot) || (i > max_snapshot) ||
-                  entry_has_changed(
-                      guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) )
+                    entry_has_changed(
+                        guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) )
                 {
-
                     unsigned long gpfn;
 
                     gpfn = entry_get_pfn(guest_pt[i]);
                     /*
-                     * Looks like it's longer a page table.
+                     * Looks like it's no longer a page table.
                      */
                     if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
+                    {
+                        if ( entry_get_flags(shadow_pt[i]) & _PAGE_PRESENT )
+                            put_shadow_ref(entry_get_pfn(shadow_pt[i]));
+                         shadow_pt[i] = entry_empty(); 
                         continue;
+                    }
 
                     need_flush |= validate_entry_change(
                         d, &guest_pt[i], &shadow_pt[i],
@@ -1864,11 +1904,17 @@
                     unsigned long gpfn;
 
                     gpfn = l4e_get_pfn(new_root_e);
+
                     /*
-                     * Looks like it's longer a page table.
+                     * Looks like it's no longer a page table.
                      */
                     if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
+                    {
+                        if ( l4e_get_flags(shadow4[i]) & _PAGE_PRESENT )
+                            put_shadow_ref(l4e_get_pfn(shadow4[i]));
+                        shadow4[i] = l4e_empty(); 
                         continue;
+                    }
 
                     if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) 
                     {
@@ -2372,7 +2418,7 @@
     if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) 
     {
         u32 index = get_cr3_idxval(v);
-        gpfn = (index << PGT_score_shift) | gpfn;
+        gpfn = ((unsigned long)index << PGT_pae_idx_shift) | gpfn;
     }
 #endif
 
@@ -3233,8 +3279,35 @@
     int i;
 
     for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ )
+    {
+        unsigned long gpfn;
+
+        /*
+         * Looks like it's no longer a page table. 
+         */
+        if ( unlikely(entry_get_value(gple[index*4+i]) & PAE_PDPT_RESERVED) )
+        {
+            if ( entry_get_flags(sple[i]) & _PAGE_PRESENT )
+                put_shadow_ref(entry_get_pfn(sple[i]));
+
+            sple[i] = entry_empty();
+            continue;
+        }
+
+        gpfn = entry_get_pfn(gple[index*4+i]);
+
+        if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
+        {
+            if ( entry_get_flags(sple[i]) & _PAGE_PRESENT )
+                put_shadow_ref(entry_get_pfn(sple[i]));
+
+            sple[i] = entry_empty();
+            continue;
+        }
+
         validate_entry_change(
             v->domain, &gple[index*4+i], &sple[i], PAGING_L3);
+    }
 
     unmap_domain_page(sple);
 }
diff -r c3bb51c443a7 -r 0267063e050c xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Thu Apr 13 09:29:27 2006
+++ b/xen/arch/x86/shadow_public.c      Thu Apr 13 09:31:53 2006
@@ -102,6 +102,15 @@
 
 int shadow_set_guest_paging_levels(struct domain *d, int levels)
 {
+    struct vcpu *v = current;
+
+    /*
+     * Need to wait for VCPU0 to complete the on-going shadow ops.
+     */
+
+    if ( v->vcpu_id )
+        return 1;
+
     shadow_lock(d);
 
     switch(levels) {
@@ -692,7 +701,6 @@
 void free_shadow_page(unsigned long smfn)
 {
     struct page_info *page = mfn_to_page(smfn);
-
     unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
     struct domain *d = page_get_owner(mfn_to_page(gmfn));
     unsigned long gpfn = mfn_to_gmfn(d, gmfn);
@@ -709,10 +717,9 @@
         if ( !mfn )
             gpfn |= (1UL << 63);
     }
-    if (d->arch.ops->guest_paging_levels == PAGING_L3)
-        if (type == PGT_l4_shadow ) {
-            gpfn = ((unsigned long)page->tlbflush_timestamp << 
PGT_score_shift) | gpfn;
-        }
+    if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+        if ( type == PGT_l4_shadow ) 
+            gpfn = ((unsigned long)page->tlbflush_timestamp << 
PGT_pae_idx_shift) | gpfn;
 #endif
 
     delete_shadow_status(d, gpfn, gmfn, type);
@@ -743,9 +750,24 @@
 #if CONFIG_PAGING_LEVELS >= 3
     case PGT_l2_shadow:
     case PGT_l3_shadow:
+        shadow_demote(d, gpfn, gmfn);
+        free_shadow_tables(d, smfn, shadow_type_to_level(type));
+        d->arch.shadow_page_count--;
+        break;
+
     case PGT_l4_shadow:
         gpfn = gpfn & PGT_mfn_mask;
-        shadow_demote(d, gpfn, gmfn);
+        if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+        {
+            /*
+             * Since a single PDPT page can have multiple PDPs, it's possible
+             * that shadow_demote() has been already called for gmfn.
+             */
+            if ( mfn_is_page_table(gmfn) )
+                shadow_demote(d, gpfn, gmfn);
+        } else
+            shadow_demote(d, gpfn, gmfn);
+
         free_shadow_tables(d, smfn, shadow_type_to_level(type));
         d->arch.shadow_page_count--;
         break;
@@ -2041,7 +2063,16 @@
 
 void clear_all_shadow_status(struct domain *d)
 {
+    struct vcpu *v = current;
+
+    /*
+     * Don't clean up while other vcpus are working.
+     */
+    if ( v->vcpu_id )
+        return;
+
     shadow_lock(d);
+
     free_shadow_pages(d);
     free_shadow_ht_entries(d);
     d->arch.shadow_ht = 
@@ -2054,6 +2085,7 @@
            shadow_ht_buckets * sizeof(struct shadow_status));
 
     free_out_of_sync_entries(d);
+
     shadow_unlock(d);
 }
 
diff -r c3bb51c443a7 -r 0267063e050c xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Thu Apr 13 09:29:27 2006
+++ b/xen/include/asm-x86/mm.h  Thu Apr 13 09:31:53 2006
@@ -103,11 +103,13 @@
 #define PGT_high_mfn_mask   (0xfffUL << PGT_high_mfn_shift)
 #define PGT_mfn_mask        (((1U<<23)-1) | PGT_high_mfn_mask)
 #define PGT_high_mfn_nx     (0x800UL << PGT_high_mfn_shift)
+#define PGT_pae_idx_shift   PGT_high_mfn_shift
 #else
  /* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */
 #define PGT_mfn_mask        ((1U<<23)-1)
  /* NX for PAE xen is not supported yet */
 #define PGT_high_mfn_nx     (1ULL << 63)
+#define PGT_pae_idx_shift   23
 #endif
 
 #define PGT_score_shift     23
diff -r c3bb51c443a7 -r 0267063e050c xen/include/asm-x86/shadow_64.h
--- a/xen/include/asm-x86/shadow_64.h   Thu Apr 13 09:29:27 2006
+++ b/xen/include/asm-x86/shadow_64.h   Thu Apr 13 09:31:53 2006
@@ -119,6 +119,8 @@
 #define PAE_CR3_IDX_MASK    0x7f
 #define PAE_CR3_IDX_NO      128
 
+#define PAE_PDPT_RESERVED   0x1e6 /* [8:5], [2,1] */
+
 
/******************************************************************************/
 static inline int  table_offset_64(unsigned long va, int level)
 {

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.