[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Memory management fixes. Page tables are created, buddy allocator now



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 98a6eb458c783cbbb8f19e5713a4ad8a1ea830c6
# Parent  b88c5350de57a1fc18943cbb33fa72c348004fc8
Memory management fixes. Page tables are created, buddy allocator now 
has ability to free memory (as well as allocate it).

Signed-off-by: Grzegorz Milos <gm281@xxxxxxxxx>

diff -r b88c5350de57 -r 98a6eb458c78 extras/mini-os/include/hypervisor.h
--- a/extras/mini-os/include/hypervisor.h       Thu Aug 25 16:21:19 2005
+++ b/extras/mini-os/include/hypervisor.h       Thu Aug 25 16:25:26 2005
@@ -80,16 +80,42 @@
 
 static __inline__ int HYPERVISOR_mmu_update(mmu_update_t *req, 
                                             int count, 
-                                            int *success_count)
-{
-    int ret;
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret) : "0" (__HYPERVISOR_mmu_update), 
-        _a1 (req), _a2 (count), _a3 (success_count)  : "memory" );
-
-    return ret;
-}
+                                            int *success_count, 
+                                            domid_t domid)
+{
+    int ret;
+    unsigned long ign1, ign2, ign3, ign4;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+        : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
+          "3" (success_count), "4" (domid)
+        : "memory" );
+
+    return ret;
+}
+
+
+static __inline__ int HYPERVISOR_mmuext_op(struct mmuext_op *op, 
+                                           int count, 
+                                           int *success_count, 
+                                           domid_t domid)
+{
+    int ret;
+    unsigned long ign1, ign2, ign3, ign4;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+        : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count),
+          "3" (success_count), "4" (domid)
+        : "memory" );
+
+    return ret;
+}
+
+
 
 static __inline__ int HYPERVISOR_set_gdt(unsigned long *frame_list, int 
entries)
 {
diff -r b88c5350de57 -r 98a6eb458c78 extras/mini-os/include/mm.h
--- a/extras/mini-os/include/mm.h       Thu Aug 25 16:21:19 2005
+++ b/extras/mini-os/include/mm.h       Thu Aug 25 16:25:26 2005
@@ -43,13 +43,27 @@
 #define PADDR_MASK              ((1UL << PADDR_BITS)-1)
 #define VADDR_MASK              ((1UL << VADDR_BITS)-1)
 
-#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
+#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> 
L1_PAGETABLE_SHIFT)
+
+#endif
+
+
+
+#ifdef __i386__
+
+#define L1_PAGETABLE_SHIFT      12
+#define L2_PAGETABLE_SHIFT      22
+
+#define L1_PAGETABLE_ENTRIES    1024
+#define L2_PAGETABLE_ENTRIES    1024
+#endif
 
 /* Given a virtual address, get an entry offset into a page table. */
 #define l1_table_offset(_a) \
   (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
 #define l2_table_offset(_a) \
   (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
+#ifdef __x86_64__
 #define l3_table_offset(_a) \
   (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
 #define l4_table_offset(_a) \
@@ -67,13 +81,16 @@
 #define _PAGE_PSE      0x080UL
 #define _PAGE_GLOBAL   0x100UL
 
-#define PAGE_SHIFT      12
-#define PAGE_SIZE       (1UL << PAGE_SHIFT)
+#define L1_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | 
_PAGE_USER)
+
+#define PAGE_SIZE       (1UL << L1_PAGETABLE_SHIFT)
+#define PAGE_SHIFT      L1_PAGETABLE_SHIFT
 #define PAGE_MASK       (~(PAGE_SIZE-1))
 
-#define PFN_UP(x)      (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x)    ((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)    ((x) << PAGE_SHIFT)
+#define PFN_UP(x)      (((x) + PAGE_SIZE-1) >> L1_PAGETABLE_SHIFT)
+#define PFN_DOWN(x)    ((x) >> L1_PAGETABLE_SHIFT)
+#define PFN_PHYS(x)    ((x) << L1_PAGETABLE_SHIFT)
 
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr)        (((addr)+PAGE_SIZE-1)&PAGE_MASK)
@@ -83,14 +100,14 @@
 #define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)])
 static __inline__ unsigned long phys_to_machine(unsigned long phys)
 {
-    unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT);
-    machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
+    unsigned long machine = pfn_to_mfn(phys >> L1_PAGETABLE_SHIFT);
+    machine = (machine << L1_PAGETABLE_SHIFT) | (phys & ~PAGE_MASK);
     return machine;
 }
 static __inline__ unsigned long machine_to_phys(unsigned long machine)
 {
-    unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT);
-    phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
+    unsigned long phys = mfn_to_pfn(machine >> L1_PAGETABLE_SHIFT);
+    phys = (phys << L1_PAGETABLE_SHIFT) | (machine & ~PAGE_MASK);
     return phys;
 }
 
@@ -105,7 +122,10 @@
 #define __va to_virt
 #define __pa to_phys
 
+#define virt_to_pfn(_virt)         (PFN_DOWN(to_phys(_virt)))
+
 void init_mm(void);
 unsigned long alloc_pages(int order);
+int is_mfn_mapped(unsigned long mfn);
 
 #endif /* _MM_H_ */
diff -r b88c5350de57 -r 98a6eb458c78 extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Thu Aug 25 16:21:19 2005
+++ b/extras/mini-os/kernel.c   Thu Aug 25 16:25:26 2005
@@ -133,7 +133,7 @@
     for ( ; ; ) 
     {      
 //        HYPERVISOR_yield();
-        block(1);
+        block(100);
         i++;
     }
 }
diff -r b88c5350de57 -r 98a6eb458c78 extras/mini-os/mm.c
--- a/extras/mini-os/mm.c       Thu Aug 25 16:21:19 2005
+++ b/extras/mini-os/mm.c       Thu Aug 25 16:25:26 2005
@@ -5,9 +5,9 @@
  *
  *        File: mm.c
  *      Author: Rolf Neugebauer (neugebar@xxxxxxxxxxxxx)
- *     Changes: 
+ *     Changes: Grzegorz Milos
  *              
- *        Date: Aug 2003
+ *        Date: Aug 2003, chages Aug 2005
  * 
  * Environment: Xen Minimal OS
  * Description: memory management related functions
@@ -41,86 +41,18 @@
 #include <types.h>
 #include <lib.h>
 
+
+#ifdef MM_DEBUG
+#define DEBUG(_f, _a...) \
+    printk("MINI_OS(file=mm.c, line=%d) " _f "\n", __LINE__, ## _a)
+#else
+#define DEBUG(_f, _a...)    ((void)0)
+#endif
+
 unsigned long *phys_to_machine_mapping;
 extern char *stack;
 extern char _text, _etext, _edata, _end;
 
-static void init_page_allocator(unsigned long min, unsigned long max);
-
-void init_mm(void)
-{
-
-    unsigned long start_pfn, max_pfn, max_free_pfn;
-
-    unsigned long *pgd = (unsigned long *)start_info.pt_base;
-
-    printk("MM: Init\n");
-
-    printk("  _text:        %p\n", &_text);
-    printk("  _etext:       %p\n", &_etext);
-    printk("  _edata:       %p\n", &_edata);
-    printk("  stack start:  %p\n", &stack);
-    printk("  _end:         %p\n", &_end);
-
-    /* set up minimal memory infos */
-    start_pfn = PFN_UP(to_phys(&_end));
-    max_pfn = start_info.nr_pages;
-
-    printk("  start_pfn:    %lx\n", start_pfn);
-    printk("  max_pfn:      %lx\n", max_pfn);
-
-    /*
-     * we know where free tables start (start_pfn) and how many we 
-     * have (max_pfn). 
-     * 
-     * Currently the hypervisor stores page tables it providesin the
-     * high region of the this memory range.
-     * 
-     * next we work out how far down this goes (max_free_pfn)
-     * 
-     * XXX this assumes the hypervisor provided page tables to be in
-     * the upper region of our initial memory. I don't know if this 
-     * is always true.
-     */
-
-    max_free_pfn = PFN_DOWN(to_phys(pgd));
-#ifdef __i386__
-    {
-        unsigned long *pgd = (unsigned long *)start_info.pt_base;
-        unsigned long  pte;
-        int i;
-        printk("  pgd(pa(pgd)): %lx(%lx)", (u_long)pgd, to_phys(pgd));
-
-        for ( i = 0; i < (HYPERVISOR_VIRT_START>>22); i++ )
-        {
-            unsigned long pgde = *pgd++;
-            if ( !(pgde & 1) ) continue;
-            pte = machine_to_phys(pgde & PAGE_MASK);
-            printk("  PT(%x): %lx(%lx)", i, (u_long)to_virt(pte), pte);
-            if (PFN_DOWN(pte) <= max_free_pfn) 
-                max_free_pfn = PFN_DOWN(pte);
-        }
-    }
-    max_free_pfn--;
-    printk("  max_free_pfn: %lx\n", max_free_pfn);
-
-    /*
-     * now we can initialise the page allocator
-     */
-    printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n",
-           (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn), 
-           (u_long)to_virt(PFN_PHYS(max_free_pfn)), PFN_PHYS(max_free_pfn));
-    init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_free_pfn));   
-#endif
-
-
-    /* Now initialise the physical->machine mapping table. */
-
-
-    printk("MM: done\n");
-
-    
-}
 
 /*********************
  * ALLOCATION BITMAP
@@ -213,6 +145,59 @@
 
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+
+#ifdef MM_DEBUG
+/*
+ * Prints allocation[0/1] for @nr_pages, starting at @start
+ * address (virtual).
+ */
+static void print_allocation(void *start, int nr_pages)
+{
+    unsigned long pfn_start = virt_to_pfn(start);
+    int count;
+    for(count = 0; count < nr_pages; count++)
+        if(allocated_in_map(pfn_start + count)) printk("1");
+        else printk("0");
+        
+    printk("\n");        
+}
+
+/*
+ * Prints chunks (making them with letters) for @nr_pages starting
+ * at @start (virtual).
+ */
+static void print_chunks(void *start, int nr_pages)
+{
+    char chunks[1001], current='A';
+    int order, count;
+    chunk_head_t *head;
+    unsigned long pfn_start = virt_to_pfn(start);
+   
+    memset(chunks, (int)'_', 1000);
+    if(nr_pages > 1000) 
+    {
+        DEBUG("Can only pring 1000 pages. Increase buffer size.");
+    }
+    
+    for(order=0; order < FREELIST_SIZE; order++)
+    {
+        head = free_head[order];
+        while(!FREELIST_EMPTY(head))
+        {
+            for(count = 0; count < 1<< head->level; count++)
+            {
+                if(count + virt_to_pfn(head) - pfn_start < 1000)
+                    chunks[count + virt_to_pfn(head) - pfn_start] = current;
+            }
+            head = head->next;
+            current++;
+        }
+    }
+    chunks[nr_pages] = '\0';
+    printk("%s\n", chunks);
+}
+#endif
+
 
 
 /*
@@ -328,3 +313,198 @@
     return 0;
 }
 
+void free_pages(void *pointer, int order)
+{
+    chunk_head_t *freed_ch, *to_merge_ch;
+    chunk_tail_t *freed_ct;
+    unsigned long mask;
+    
+    /* First free the chunk */
+    map_free(virt_to_pfn(pointer), 1 << order);
+    
+    /* Create free chunk */
+    freed_ch = (chunk_head_t *)pointer;
+    freed_ct = (chunk_tail_t *)((char *)pointer + (1<<(order + PAGE_SHIFT)))-1;
+    
+    /* Now, possibly we can conseal chunks together */
+    while(order < FREELIST_SIZE)
+    {
+        mask = 1 << (order + PAGE_SHIFT);
+        if((unsigned long)freed_ch & mask) 
+        {
+            to_merge_ch = (chunk_head_t *)((char *)freed_ch - mask);
+            if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
+                    to_merge_ch->level != order)
+                break;
+            
+            /* Merge with predecessor */
+            freed_ch = to_merge_ch;   
+        }
+        else 
+        {
+            to_merge_ch = (chunk_head_t *)((char *)freed_ch + mask);
+            if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
+                    to_merge_ch->level != order)
+                break;
+            
+            /* Merge with successor */
+            freed_ct = (chunk_tail_t *)((char *)to_merge_ch + mask);
+        }
+        
+        /* We are commited to merging, unlink the chunk */
+        *(to_merge_ch->pprev) = to_merge_ch->next;
+        to_merge_ch->next->pprev = to_merge_ch->pprev;
+        
+        order++;
+    }
+
+    /* Link the new chunk */
+    freed_ch->level = order;
+    freed_ch->next  = free_head[order];
+    freed_ch->pprev = &free_head[order];
+    freed_ct->level = order;
+    
+    freed_ch->next->pprev = &freed_ch->next;
+    free_head[order] = freed_ch;   
+   
+}
+void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
+{
+    unsigned long pfn_to_map, pt_frame;
+    unsigned long mach_ptd, max_mach_ptd;
+    int count;
+    unsigned long mach_pte, virt_pte;
+    unsigned long *ptd = (unsigned long *)start_info.pt_base;
+    mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
+    struct mmuext_op pin_request;
+    
+    /* Firstly work out what is the first pfn that is not yet in page tables
+       NB. Assuming that builder fills whole pt_frames (which it does at the
+       moment)
+     */  
+    pfn_to_map = (start_info.nr_pt_frames - 1) * L1_PAGETABLE_ENTRIES;
+    DEBUG("start_pfn=%ld, first pfn_to_map %ld, max_pfn=%ld", 
+            *start_pfn, pfn_to_map, *max_pfn);
+
+    /* Machine address of page table directory */
+    mach_ptd = phys_to_machine(to_phys(start_info.pt_base));
+    mach_ptd += sizeof(void *) * 
+        l2_table_offset((unsigned long)to_virt(PFN_PHYS(pfn_to_map)));
+  
+    max_mach_ptd = sizeof(void *) * 
+        l2_table_offset((unsigned long)to_virt(PFN_PHYS(*max_pfn)));
+    
+    /* Check that we are not trying to access Xen region */
+    if(max_mach_ptd > sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START))
+    {
+        printk("WARNING: mini-os will not use all the memory supplied\n");
+        max_mach_ptd = sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START);
+        *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE);
+    }
+    max_mach_ptd += phys_to_machine(to_phys(start_info.pt_base));
+    DEBUG("Max_mach_ptd 0x%lx", max_mach_ptd); 
+   
+    pt_frame = *start_pfn;
+    /* Should not happen - no empty, mapped pages */
+    if(pt_frame >= pfn_to_map)
+    {
+        printk("ERROR: Not even a single empty, mapped page\n");
+        *(int*)0=0;
+    }
+    
+    while(mach_ptd < max_mach_ptd)
+    {
+        /* Correct protection needs to be set for the new page table frame */
+        virt_pte = (unsigned long)to_virt(PFN_PHYS(pt_frame));
+        mach_pte = ptd[l2_table_offset(virt_pte)] & ~(PAGE_SIZE-1);
+        mach_pte += sizeof(void *) * l1_table_offset(virt_pte);
+        DEBUG("New page table page: pfn=0x%lx, mfn=0x%lx, virt_pte=0x%lx, "
+                "mach_pte=0x%lx", pt_frame, pfn_to_mfn(pt_frame), 
+                virt_pte, mach_pte);
+        
+        /* Update the entry */
+        mmu_updates[0].ptr = mach_pte;
+        mmu_updates[0].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT | 
+                                                    (L1_PROT & ~_PAGE_RW);
+        if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
+        {
+            printk("PTE for new page table page could not be updated\n");
+            *(int*)0=0;
+        }
+        
+        /* Pin the page to provide correct protection */
+        pin_request.cmd = MMUEXT_PIN_L1_TABLE;
+        pin_request.mfn = pfn_to_mfn(pt_frame);
+        if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0)
+        {
+            printk("ERROR: pinning failed\n");
+            *(int*)0=0;
+        }
+        
+        /* Now fill the new page table page with entries.
+           Update the page directory as well. */
+        count = 0;
+        mmu_updates[count].ptr = mach_ptd;
+        mmu_updates[count].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT |
+                                                         L2_PROT;
+        count++;
+        mach_ptd += sizeof(void *);
+        mach_pte = phys_to_machine(PFN_PHYS(pt_frame++));
+        
+        for(;count <= L1_PAGETABLE_ENTRIES && pfn_to_map <= *max_pfn; count++)
+        {
+            mmu_updates[count].ptr = mach_pte;
+            mmu_updates[count].val = 
+                pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
+            if(count == 1) DEBUG("mach_pte 0x%lx", mach_pte);
+            mach_pte += sizeof(void *);
+        }
+        if(HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF) < 0) 
+        {            
+            printk("ERROR: mmu_update failed\n");
+            *(int*)0=0;
+        }
+        (*start_pfn)++;
+    }
+
+    *start_pfn = pt_frame;
+}
+
+void init_mm(void)
+{
+
+    unsigned long start_pfn, max_pfn;
+
+    printk("MM: Init\n");
+
+    printk("  _text:        %p\n", &_text);
+    printk("  _etext:       %p\n", &_etext);
+    printk("  _edata:       %p\n", &_edata);
+    printk("  stack start:  %p\n", &stack);
+    printk("  _end:         %p\n", &_end);
+
+    /* set up minimal memory infos */
+    phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
+   
+    /* First page follows page table pages and 3 more pages (store page etc) */
+    start_pfn = PFN_UP(__pa(start_info.pt_base)) + start_info.nr_pt_frames + 3;
+    max_pfn = start_info.nr_pages;
+
+    printk("  start_pfn:    %lx\n", start_pfn);
+    printk("  max_pfn:      %lx\n", max_pfn);
+
+
+    build_pagetable(&start_pfn, &max_pfn);
+    
+#ifdef __i386__
+    /*
+     * now we can initialise the page allocator
+     */
+    printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n",
+           (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn), 
+           (u_long)to_virt(PFN_PHYS(max_pfn)), PFN_PHYS(max_pfn));
+    init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn));   
+#endif
+
+    printk("MM: done\n");
+}

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.