[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Clean up, fix, and rationalise RAM mapping in Xen.



ChangeSet 1.1462, 2005/05/19 13:36:18+01:00, kaf24@xxxxxxxxxxxxxxxxxxxx

        Clean up, fix, and rationalise RAM mapping in Xen. 
        
        First, x86/64 must take care to map only registered RAM areas and not
        adjacent I/O holes -- otherwise a cpu may cache I/O space and cause
        coherency conflicts on the memory bus.
        
        Second, map_pages() and the memguard mechanisms are no longer sub-arch
        specific (moved to arch/x86/mm.c:map_pages_to_xen()).
        
        Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>



 arch/x86/acpi/boot.c    |   11 +-
 arch/x86/boot/x86_64.S  |    8 -
 arch/x86/domain.c       |    4 
 arch/x86/domain_build.c |    4 
 arch/x86/mm.c           |  180 ++++++++++++++++++++++--------------------
 arch/x86/setup.c        |   55 +++++++------
 arch/x86/x86_32/mm.c    |  142 +++++----------------------------
 arch/x86/x86_64/mm.c    |  203 ++++++++----------------------------------------
 include/asm-x86/mm.h    |    8 -
 include/asm-x86/page.h  |   18 +++-
 10 files changed, 219 insertions(+), 414 deletions(-)


diff -Nru a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c
--- a/xen/arch/x86/acpi/boot.c  2005-05-19 09:05:16 -04:00
+++ b/xen/arch/x86/acpi/boot.c  2005-05-19 09:05:16 -04:00
@@ -89,15 +89,18 @@
  */
 enum acpi_irq_model_id         acpi_irq_model = ACPI_IRQ_MODEL_PIC;
 
-#ifdef CONFIG_X86_64
+#if 0/*def     CONFIG_X86_64*/
 
 /* rely on all ACPI tables being in the direct mapping */
 char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
 {
        if (!phys_addr || !size)
-               return NULL;
-       /* XEN: We map all e820 areas which should include every ACPI table. */
-       return __va(phys_addr);
+       return NULL;
+
+       if (phys_addr < (end_pfn_map << PAGE_SHIFT))
+               return __va(phys_addr);
+
+       return NULL;
 }
 
 #else
diff -Nru a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        2005-05-19 09:05:15 -04:00
+++ b/xen/arch/x86/boot/x86_64.S        2005-05-19 09:05:15 -04:00
@@ -230,7 +230,7 @@
         .quad 0x0000000000000000     /* unused                            */
         .fill 4*NR_CPUS,8,0          /* space for TSS and LDT per CPU     */
 
-/* Initial PML4 -- level-4 page table */
+/* Initial PML4 -- level-4 page table. */
         .org 0x2000
 ENTRY(idle_pg_table)
 ENTRY(idle_pg_table_4)
@@ -238,15 +238,15 @@
         .fill 261,8,0
         .quad idle_pg_table_l3 - __PAGE_OFFSET + 7 # PML4[262]
 
-/* Initial PDP -- level-3 page table */
+/* Initial PDP -- level-3 page table. */
         .org 0x3000
 ENTRY(idle_pg_table_l3)
         .quad idle_pg_table_l2 - __PAGE_OFFSET + 7
 
-/* Initial PDE -- level-2 page table. */
+/* Initial PDE -- level-2 page table. Maps first 64MB physical memory. */
         .org 0x4000
 ENTRY(idle_pg_table_l2)
-        .macro identmap from=0, count=512
+        .macro identmap from=0, count=32
         .if \count-1
         identmap "(\from+0)","(\count/2)"
         identmap "(\from+(0x200000*(\count/2)))","(\count/2)"
diff -Nru a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     2005-05-19 09:05:15 -04:00
+++ b/xen/arch/x86/domain.c     2005-05-19 09:05:15 -04:00
@@ -264,7 +264,7 @@
     ed->arch.perdomain_ptes = d->arch.mm_perdomain_pt;
     ed->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] =
         l1e_create_pfn(page_to_pfn(virt_to_page(gdt_table)),
-                       __PAGE_HYPERVISOR);
+                       PAGE_HYPERVISOR);
     
     ed->arch.guest_vtable  = __linear_l2_table;
     ed->arch.shadow_vtable = __shadow_linear_l2_table;
@@ -303,7 +303,7 @@
         d->arch.mm_perdomain_pt + (ed->vcpu_id << PDPT_VCPU_SHIFT);
     ed->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] =
         l1e_create_pfn(page_to_pfn(virt_to_page(gdt_table)),
-                       __PAGE_HYPERVISOR);
+                       PAGE_HYPERVISOR);
 }
 
 #ifdef CONFIG_VMX
diff -Nru a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       2005-05-19 09:05:15 -04:00
+++ b/xen/arch/x86/domain_build.c       2005-05-19 09:05:15 -04:00
@@ -574,8 +574,8 @@
             //
             ASSERT( root_get_value(idle_pg_table[1]) == 0 );
             ASSERT( pagetable_val(d->arch.phys_table) );
-            idle_pg_table[1] = 
root_create_phys(pagetable_val(d->arch.phys_table),
-                                                __PAGE_HYPERVISOR);
+            idle_pg_table[1] = root_create_phys(
+                pagetable_val(d->arch.phys_table), __PAGE_HYPERVISOR);
             translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT),
                                 pagetable_get_pfn(ed->arch.guest_table));
             idle_pg_table[1] = root_empty();
diff -Nru a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c 2005-05-19 09:05:16 -04:00
+++ b/xen/arch/x86/mm.c 2005-05-19 09:05:16 -04:00
@@ -160,8 +160,8 @@
         p = alloc_boot_pages(min(frame_table_size - i, 4UL << 20), 4UL << 20);
         if ( p == 0 )
             panic("Not enough memory for frame table\n");
-        map_pages(idle_pg_table, FRAMETABLE_VIRT_START + i, p, 
-                  4UL << 20, PAGE_HYPERVISOR);
+        map_pages_to_xen(
+            FRAMETABLE_VIRT_START + i, p, 4UL << 20, PAGE_HYPERVISOR);
     }
 
     memset(frame_table, 0, frame_table_size);
@@ -2833,101 +2833,113 @@
     free_xenheap_page((unsigned long)d->arch.ptwr[PTWR_PT_INACTIVE].page);
 }
 
+/* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */
+int map_pages_to_xen(
+    unsigned long v,
+    unsigned long p,
+    unsigned long s,
+    unsigned long flags)
+{
+    l2_pgentry_t *pl2e, ol2e;
+    l1_pgentry_t *pl1e;
+    unsigned int  i;
 
+    unsigned int  map_small_pages = !!(flags & MAP_SMALL_PAGES);
+    flags &= ~MAP_SMALL_PAGES;
 
-/************************************************************************/
-/************************************************************************/
-/************************************************************************/
+    while ( s != 0 )
+    {
+        pl2e = virt_to_xen_l2e(v);
 
-/* Graveyard: stuff below may be useful in future. */
-#if 0
-    case MMUEXT_TRANSFER_PAGE:
-        domid  = (domid_t)(val >> 16);
-        gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF));
-        
-        if ( unlikely(IS_XEN_HEAP_FRAME(page)) ||
-             unlikely(!pfn_valid(pfn)) ||
-             unlikely((e = find_domain_by_id(domid)) == NULL) )
+        if ( (((v|p) & ((1 << L2_PAGETABLE_SHIFT) - 1)) == 0) &&
+             (s >= (1 << L2_PAGETABLE_SHIFT)) &&
+             !map_small_pages )
         {
-            MEM_LOG("Bad frame (%p) or bad domid (%d).\n", pfn, domid);
-            okay = 0;
-            break;
+            /* Super-page mapping. */
+            ol2e  = *pl2e;
+            *pl2e = l2e_create_phys(p, flags|_PAGE_PSE);
+
+            if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
+            {
+                local_flush_tlb_pge();
+                if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
+                    free_xen_pagetable(l2e_get_page(*pl2e));
+            }
+
+            v += 1 << L2_PAGETABLE_SHIFT;
+            p += 1 << L2_PAGETABLE_SHIFT;
+            s -= 1 << L2_PAGETABLE_SHIFT;
         }
+        else
+        {
+            /* Normal page mapping. */
+            if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
+            {
+                pl1e = page_to_virt(alloc_xen_pagetable());
+                clear_page(pl1e);
+                *pl2e = l2e_create_phys(__pa(pl1e), __PAGE_HYPERVISOR);
+            }
+            else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
+            {
+                pl1e = page_to_virt(alloc_xen_pagetable());
+                for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+                    pl1e[i] = l1e_create_pfn(
+                        l2e_get_pfn(*pl2e) + i,
+                        l2e_get_flags(*pl2e) & ~_PAGE_PSE);
+                *pl2e = l2e_create_phys(__pa(pl1e), __PAGE_HYPERVISOR);
+                local_flush_tlb_pge();
+            }
+
+            pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(v);
+            if ( (l1e_get_flags(*pl1e) & _PAGE_PRESENT) )
+                local_flush_tlb_one(v);
+            *pl1e = l1e_create_phys(p, flags);
+
+            v += 1 << L1_PAGETABLE_SHIFT;
+            p += 1 << L1_PAGETABLE_SHIFT;
+            s -= 1 << L1_PAGETABLE_SHIFT;       
+        }
+    }
 
-        spin_lock(&d->page_alloc_lock);
+    return 0;
+}
 
-        /*
-         * The tricky bit: atomically release ownership while there is just one
-         * benign reference to the page (PGC_allocated). If that reference
-         * disappears then the deallocation routine will safely spin.
-         */
-        _d  = pickle_domptr(d);
-        _nd = page->u.inuse._domain;
-        y   = page->count_info;
-        do {
-            x = y;
-            if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != 
-                          (1|PGC_allocated)) ||
-                 unlikely(_nd != _d) )
-            {
-                MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p,"
-                        " caf=%08x, taf=%08x\n", page_to_pfn(page),
-                        d, d->domain_id, unpickle_domptr(_nd), x, 
-                        page->u.inuse.type_info);
-                spin_unlock(&d->page_alloc_lock);
-                put_domain(e);
-                return 0;
-            }
-            __asm__ __volatile__(
-                LOCK_PREFIX "cmpxchg8b %2"
-                : "=d" (_nd), "=a" (y),
-                "=m" (*(volatile u64 *)(&page->count_info))
-                : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
-        } 
-        while ( unlikely(_nd != _d) || unlikely(y != x) );
+#ifdef MEMORY_GUARD
 
-        /*
-         * Unlink from 'd'. At least one reference remains (now anonymous), so
-         * noone else is spinning to try to delete this page from 'd'.
-         */
-        d->tot_pages--;
-        list_del(&page->list);
-        
-        spin_unlock(&d->page_alloc_lock);
+void memguard_init(void)
+{
+    map_pages_to_xen(
+        PAGE_OFFSET, 0, xenheap_phys_end, __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
+}
 
-        spin_lock(&e->page_alloc_lock);
+static void __memguard_change_range(void *p, unsigned long l, int guard)
+{
+    unsigned long _p = (unsigned long)p;
+    unsigned long _l = (unsigned long)l;
+    unsigned long flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES;
 
-        /*

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.