[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Minios-devel] [PATCH v3 28/43] arm64: init the memory system



Hi,

On 16/04/18 07:32, Huang Shijie wrote:
This patch do followings to initialize the memory system:
     0.) Map extra 2M for the first_free_pfn.

What guarantees you there will be free space after the kernel? And why only 2MB?

As I have already said before, most likely you want to include a couple of more page-table in the image directly.


     1.) add arch_mm_preinit() to setup the page table for Device Tree.

     2.) add functions to setup the page table, such as
         early_alloc_page()/build_pagetable()/build_pud/build_pmd.

     3.) Just as the x86 does, limits the max memory size to MAX_MEM_SIZE,
         the min memory size to MIN_MEM_SIZE,

     4.) and setup the page allocator in arch_init_mm().
         The init_pagetable() will find the best block mapping level to setup
        the page table.

Signed-off-by: Huang Shijie <shijie.huang@xxxxxxx>
---
  arch/arm/arm64/arm64.S |   3 +
  arch/arm/mm.c          | 238 +++++++++++++++++++++++++++++++++++++++++++++++++
  arch/arm/setup.c       |   3 +
  include/arm/arch_mm.h  |   5 ++
  4 files changed, 249 insertions(+)

diff --git a/arch/arm/arm64/arm64.S b/arch/arm/arm64/arm64.S
index 93ffc89..4e9c042 100644
--- a/arch/arm/arm64/arm64.S
+++ b/arch/arm/arm64/arm64.S
@@ -232,6 +232,9 @@ _setup_initial_pgtable:
      ldr     x0, =_text                 /* x0 := vaddr(_text)            */
      ldr     x1, =_end                  /* x1 := vaddr(_end)             */
+ /* Map extra 2M for first_free_pfn */
+    add     x1, x1, L2_SIZE
+
      set_page_table x0, 0, PT_PT
      set_page_table x0, 1, PT_PT
  1:
diff --git a/arch/arm/mm.c b/arch/arm/mm.c
index d98fad8..ed59159 100644
--- a/arch/arm/mm.c
+++ b/arch/arm/mm.c
@@ -6,6 +6,7 @@
  #include <mini-os/posix/limits.h>
  #include <libfdt.h>
  #include <lib.h>
+#include <arm64/pagetable.h>
paddr_t physical_address_offset;
  unsigned mem_blocks = 1;
@@ -23,6 +24,236 @@ unsigned long allocate_ondemand(unsigned long n, unsigned 
long alignment)
      BUG();
  }
+extern lpae_t boot_l0_pgtable[512];
+
+static inline void set_pgt_entry(lpae_t *ptr, lpae_t val)
+{
+    *ptr = val;
+    dsb(ishst);
+    isb();
+}
+
+static void build_pte(lpae_t *pmd, unsigned long vaddr, unsigned long vend,
+                      paddr_t phys, uint64_t mem_type)
+{
+    lpae_t *pte;
+
+    pte = (lpae_t *)to_virt((*pmd) & ~ATTR_MASK_L) + l3_pgt_idx(vaddr);
+    do {
+        set_pgt_entry(pte, (phys & L3_MASK) | mem_type | L3_PAGE);
+
+        vaddr += L3_SIZE;
+        phys += L3_SIZE;
+        pte++;
+    } while (vaddr < vend);
+}
+
+static int build_pmd(lpae_t *pud, unsigned long vaddr, unsigned long vend,

You are using the term pte, pmd, pud (which basically looks very Linuxism...) but I have not idea what you are refer to in Mini-OS context. Please explain it.

+                      paddr_t phys, uint64_t mem_type,
+                      paddr_t (*new_page)(void), int level)

The indentation looks wrong.

+{
+    lpae_t *pmd;
+    unsigned long next;
+
+    pmd = (lpae_t *)to_virt((*pud) & ~ATTR_MASK_L) + l2_pgt_idx(vaddr);
+    do {
+        if (level == 2) {
+             set_pgt_entry(pmd, (phys & L2_MASK) | mem_type | L2_BLOCK);
+        } else {
+             next = vaddr + L2_SIZE;
+             if (next > vend)
+                 next = vend;
+
+             if ((*pmd) == L2_INVAL) {
+                 paddr_t newpage = new_page();
+                 if (!newpage)
+                         return -ENOMEM;
+                 set_pgt_entry(pmd, newpage | PT_PT);
+             }
+
+             build_pte(pmd, vaddr, next, phys, mem_type);
+        }
+
+        vaddr += L2_SIZE;
+        phys += L2_SIZE;
+        pmd++;
+    } while (vaddr < vend);
+
+    return 0;
+}
+
+static int build_pud(lpae_t *pgd, unsigned long vaddr, unsigned long vend,
+                      paddr_t phys, uint64_t mem_type,
+                      paddr_t (*new_page)(void), int level)

indentation.


+{
+    lpae_t *pud;
+    unsigned long next;
+    int ret;
+
+    pud = (lpae_t *)to_virt((*pgd) & ~ATTR_MASK_L) + l1_pgt_idx(vaddr);
+    do {
+        if (level == 1) {
+             set_pgt_entry(pud, (phys & L1_MASK) | mem_type | L1_BLOCK);
+        } else {
+             next = vaddr + L1_SIZE;
+             if (next > vend)
+                 next = vend;
+
+             if ((*pud) == L1_INVAL) {
+                 paddr_t newpage = new_page();
+                 if (!newpage)
+                     return -ENOMEM;
+                 set_pgt_entry(pud, newpage | PT_PT);
+             }
+
+             ret = build_pmd(pud, vaddr, next, phys, mem_type, new_page, 
level);
+             if (ret)
+                 return ret;
+        }
+
+        vaddr += L1_SIZE;
+        phys += L1_SIZE;
+        pud++;
+    } while (vaddr < vend);
+
+    return 0;
+}
+
+static int build_pagetable(unsigned long vaddr, unsigned long start_pfn,
+                     unsigned long max_pfn, uint64_t mem_type,
+                     paddr_t (*new_page)(void), int level)

Indentation.

Also, this is a bit weird to impose the caller to know the level where it stops. How do you ensure the virtual and physical address are going to be aligned properly?

+{
+    paddr_t p_start;
+    unsigned long v_end, next;
+    lpae_t *pgd;
+    int ret;
+
+    v_end = vaddr + max_pfn * PAGE_SIZE;
+    p_start = PFN_PHYS(start_pfn);
+
+    pgd = &boot_l0_pgtable[l0_pgt_idx(vaddr)];
+
+    do {
+        next = (vaddr + L0_SIZE);
+        if (next > v_end)
+            next = v_end;
+
+        if ((*pgd) == L0_INVAL) {
+            paddr_t newpage = new_page();
+            if (!newpage)
+                return -ENOMEM;
+            set_pgt_entry(pgd, newpage | PT_PT);
+        }
+
+        ret = build_pud(pgd, vaddr, next, p_start, mem_type, new_page, level);
+        if (ret)
+            return ret;
+
+        p_start += next - vaddr;
+        vaddr = next;
+        pgd++;
+    } while (vaddr != v_end);
+
+    return 0;
+}
+
+/*
+ * Before the page allocator is ready, we use first_free_pfn to record
+ * the first free page. The first_free_pfn will be increased by
+ * early_alloc_page().
+ */
+static unsigned long first_free_pfn;
+
+/* The pfn for MIN_MEM_SIZE */
+static unsigned long min_mem_pfn;
+
+static paddr_t early_alloc_page(void)
+{
+    paddr_t new_page;
+
+    memset(pfn_to_virt(first_free_pfn), 0, PAGE_SIZE);
+    dsb(ishst);
+
+    new_page = PFN_PHYS(first_free_pfn);
+    first_free_pfn++;
+    ASSERT(first_free_pfn < min_mem_pfn);
+    return new_page;
+}
+
+static int init_pagetable_ok;

Please explain the purpose of the variable in a comment.

+/*
+ * This function will setup the page table for the memory system.
+ */
+void init_pagetable(unsigned long *start_pfn, unsigned long base,
+                    unsigned long size)
+{
+    unsigned long vaddr = (unsigned long)to_virt(base);
+    paddr_t phys = base;
+    paddr_t sz = L1_SIZE;
+    lpae_t *pgd;
+    lpae_t *pud;
+    int level;
+
+    do {
+        /*
+         * We cannot set block mapping for PGD(level 0),
+         * but we can set block mapping for PUD(level 1) and PMD(level 2).
+         * Get the proper level for build_pagetable().

Your API looks wrong, a caller of the PT mapping should not need to know the level it is going to map. The only thing necessary is the size of the mapping.

+         */
+        if (size >= L1_SIZE) {
+            pgd = &boot_l0_pgtable[l0_pgt_idx(vaddr)];
+            if ((*pgd) == L0_INVAL) {

I don't understand this code. Why do you need to check the boot table in order to now the level to map?

+                 level = 1;
+            } else {
+                 pud = (lpae_t *)to_virt((*pgd) & ~ATTR_MASK_L) + 
l1_pgt_idx(vaddr);
+                 if ((*pud) == L1_INVAL)
+                     level = 1;
+                 else
+                     level = 2;
+            }
+        } else {
+             sz = size & L2_MASK;
+             level = 2;
+        }
+
+        build_pagetable(vaddr, PHYS_PFN(phys), PFN_UP(sz),
+                        MEM_DEF_ATTR, early_alloc_page, level);
+
+        vaddr += sz;
+        phys  += sz;
+        size -= sz;
+    } while (size > L2_SIZE);
+
+    /* Use the page mapping (level 3) for the left */
+    if (size)
+        build_pagetable(vaddr, PHYS_PFN(phys), PFN_UP(size),
+                        MEM_DEF_ATTR, early_alloc_page, 3);
+
+    *start_pfn = first_free_pfn;
+    init_pagetable_ok = 1;
+}
+
+void arch_mm_preinit(void *dtb_pointer)
+{
+    paddr_t **dtb_p = dtb_pointer;
+    paddr_t *dtb = *dtb_p;
+    uintptr_t end = (uintptr_t) &_end;
+
+    dtb = to_virt(((paddr_t)dtb));
+    first_free_pfn = PFN_UP(to_phys(end));
+    min_mem_pfn = PFN_UP(to_phys(_text) + MIN_MEM_SIZE);
+
+    /*
+     * Setup the mapping for Device Tree, only map 2M(L2_SIZE) size.
+     *
+     * Note: The early_alloc_page() will increase @first_free_pfn.
+     */
+    build_pagetable((unsigned long)dtb, virt_to_pfn((unsigned long)dtb),
+                    PHYS_PFN(L2_SIZE), MEM_DEF_ATTR, early_alloc_page, 2);
+
+    *dtb_p = dtb;
+}
+
  void arch_init_mm(unsigned long *start_pfn_p, unsigned long *max_pfn_p)
  {
      int memory;
@@ -65,6 +296,11 @@ void arch_init_mm(unsigned long *start_pfn_p, unsigned long 
*max_pfn_p)
      end = (uintptr_t) &_end;
      mem_base = fdt64_to_cpu(regs[0]);
      mem_size = fdt64_to_cpu(regs[1]);
+
+    BUG_ON(mem_size < MIN_MEM_SIZE);
+    if (mem_size > MAX_MEM_SIZE)
+        mem_size = MAX_MEM_SIZE;
+
      printk("Found memory at 0x%llx (len 0x%llx)\n",
              (unsigned long long) mem_base, (unsigned long long) mem_size);
@@ -73,6 +309,8 @@ void arch_init_mm(unsigned long *start_pfn_p, unsigned long *max_pfn_p)
      heap_len = mem_size - (PFN_PHYS(*start_pfn_p) - mem_base);
      *max_pfn_p = *start_pfn_p + PFN_DOWN(heap_len);
+ init_pagetable(start_pfn_p, mem_base, mem_size);
+
      printk("Using pages %lu to %lu as free space for heap.\n", *start_pfn_p, 
*max_pfn_p);
/* The device tree is probably in memory that we're about to hand over to the page
diff --git a/arch/arm/setup.c b/arch/arm/setup.c
index 27bea4a..ab82eda 100644
--- a/arch/arm/setup.c
+++ b/arch/arm/setup.c
@@ -29,6 +29,9 @@ void arch_init(void *dtb_pointer, paddr_t physical_offset)
xprintk("Virtual -> physical offset = %"PRIpaddr" \n", physical_address_offset); + /* Do the preparations */
+    arch_mm_preinit(&dtb_pointer);
+
      xprintk("Checking DTB at %p...\n", dtb_pointer);
if ((r = fdt_check_header(dtb_pointer))) {
diff --git a/include/arm/arch_mm.h b/include/arm/arch_mm.h
index f77a210..db6e781 100644
--- a/include/arm/arch_mm.h
+++ b/include/arm/arch_mm.h
@@ -3,6 +3,10 @@
typedef uint64_t paddr_t;
  #define PRIpaddr "lx"
+#define MIN_MEM_SIZE            (0x400000)

Where does this value come from?

+#define MAX_MEM_SIZE            (1UL << 39)

Same here?

+
+typedef uint64_t lpae_t;
extern char _text, _etext, _erodata, _edata, _end, __bss_start;
  extern int _boot_stack[];
@@ -30,6 +34,7 @@ extern paddr_t physical_address_offset;
#define virtual_to_mfn(_virt) virt_to_mfn(_virt) +void arch_mm_preinit(void *dtb_pointer);
  // FIXME
  #define map_frames(f, n) (NULL)

Cheers,

--
Julien Grall

_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.