[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v6 04/16] xen: arm: allocate dom0 memory separately from preparing the dtb



Mixing these two together is a pain, it forces us to prepare the dtb before
processing the kernel which means we don't know whether the guest is 32- or
64-bit while we construct its DTB.

Instead split out the memory allocation (including 1:1 workaround handling)
and p2m setup into a separate phase and then create a memory node in the DTB
based on the result.

This allows us to move kernel parsing before DTB setup.

As part of this it was also necessary to rework where the decision regarding
the placement of the DTB and initrd in RAM was made. It is now made when
loading the kernel, which allows it to make use of the zImage/ELF specific
information and therefore to make decisions based on complete knowledge and do
it right rather than guessing in prepare_dtb and relying on a later check to
see if things worked.

Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Acked-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
---
v6: Drop spurious +2MB on the module end address
v3: Also rework module placement, v2 broke boot because dtb_paddr wasn't set
    soon enough. This ends up cleaner anyway.
v2: Fixed typo in the commit log
    Handle multiple memory nodes as well as individual nodes with several
    entries in them.
    Strip the original memory node and recreate rather than trying to modify.
---
 xen/arch/arm/domain_build.c |  203 ++++++++++++++++++++++---------------------
 xen/arch/arm/kernel.c       |   78 +++++++++++------
 xen/arch/arm/kernel.h       |    2 -
 3 files changed, 156 insertions(+), 127 deletions(-)

diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index e574fb0..f0def77 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -81,11 +81,8 @@ struct vcpu *__init alloc_dom0_vcpu0(void)
     return alloc_vcpu(dom0, 0, 0);
 }
 
-static int set_memory_reg_11(struct domain *d, struct kernel_info *kinfo,
-                             const struct dt_property *pp,
-                             const struct dt_device_node *np, __be32 *new_cell)
+static void allocate_memory_11(struct domain *d, struct kernel_info *kinfo)
 {
-    int reg_size = dt_cells_to_size(dt_n_addr_cells(np) + dt_n_size_cells(np));
     paddr_t start;
     paddr_t size;
     struct page_info *pg = NULL;
@@ -116,53 +113,61 @@ static int set_memory_reg_11(struct domain *d, struct 
kernel_info *kinfo,
     if ( res )
         panic("Unable to add pages in DOM0: %d\n", res);
 
-    dt_set_range(&new_cell, np, start, size);
-
     kinfo->mem.bank[0].start = start;
     kinfo->mem.bank[0].size = size;
     kinfo->mem.nr_banks = 1;
 
-    return reg_size;
+    kinfo->unassigned_mem -= size;
 }
 
-static int set_memory_reg(struct domain *d, struct kernel_info *kinfo,
-                          const struct dt_property *pp,
-                          const struct dt_device_node *np, __be32 *new_cell)
+static void allocate_memory(struct domain *d, struct kernel_info *kinfo)
 {
-    int reg_size = dt_cells_to_size(dt_n_addr_cells(np) + dt_n_size_cells(np));
-    int l = 0;
+
+    struct dt_device_node *memory = NULL;
+    const void *reg;
+    u32 reg_len, reg_size;
     unsigned int bank = 0;
-    u64 start;
-    u64 size;
-    int ret;
 
     if ( platform_has_quirk(PLATFORM_QUIRK_DOM0_MAPPING_11) )
-        return set_memory_reg_11(d, kinfo, pp, np, new_cell);
+        return allocate_memory_11(d, kinfo);
 
-    while ( kinfo->unassigned_mem > 0 && l + reg_size <= pp->length
-            && kinfo->mem.nr_banks < NR_MEM_BANKS )
+    while ( (memory = dt_find_node_by_type(memory, "memory")) )
     {
-        ret = dt_device_get_address(np, bank, &start, &size);
-        if ( ret )
-            panic("Unable to retrieve the bank %u for %s\n",
-                  bank, dt_node_full_name(np));
-
-        if ( size > kinfo->unassigned_mem )
-            size = kinfo->unassigned_mem;
-        dt_set_range(&new_cell, np, start, size);
-
-        printk("Populate P2M %#"PRIx64"->%#"PRIx64"\n", start, start + size);
-        if ( p2m_populate_ram(d, start, start + size) < 0 )
-            panic("Failed to populate P2M\n");
-        kinfo->mem.bank[kinfo->mem.nr_banks].start = start;
-        kinfo->mem.bank[kinfo->mem.nr_banks].size = size;
-        kinfo->mem.nr_banks++;
-        kinfo->unassigned_mem -= size;
-
-        l += reg_size;
-    }
+        int l;
+
+        DPRINT("memory node\n");
+
+        reg_size = dt_cells_to_size(dt_n_addr_cells(memory) + 
dt_n_size_cells(memory));
+
+        reg = dt_get_property(memory, "reg", &reg_len);
+        if ( reg == NULL )
+            panic("Memory node has no reg property!\n");
+
+        for ( l = 0;
+              kinfo->unassigned_mem > 0 && l + reg_size <= reg_len
+                  && kinfo->mem.nr_banks < NR_MEM_BANKS;
+              l += reg_size )
+        {
+            paddr_t start, size;
 
-    return l;
+            if ( dt_device_get_address(memory, bank, &start, &size) )
+                panic("Unable to retrieve the bank %u for %s\n",
+                      bank, dt_node_full_name(memory));
+
+            if ( size > kinfo->unassigned_mem )
+                size = kinfo->unassigned_mem;
+
+            printk("Populate P2M %#"PRIx64"->%#"PRIx64"\n",
+                   start, start + size);
+            if ( p2m_populate_ram(d, start, start + size) < 0 )
+                panic("Failed to populate P2M\n");
+            kinfo->mem.bank[kinfo->mem.nr_banks].start = start;
+            kinfo->mem.bank[kinfo->mem.nr_banks].size = size;
+            kinfo->mem.nr_banks++;
+
+            kinfo->unassigned_mem -= size;
+        }
+    }
 }
 
 static int write_properties(struct domain *d, struct kernel_info *kinfo,
@@ -211,23 +216,6 @@ static int write_properties(struct domain *d, struct 
kernel_info *kinfo,
                 continue;
             }
         }
-        /*
-         * In a memory node: adjust reg property.
-         * TODO: handle properly memory node (ie: device_type = "memory")
-         */
-        else if ( dt_node_name_is_equal(np, "memory") )
-        {
-            if ( dt_property_name_is_equal(pp, "reg") )
-            {
-                new_data = xzalloc_bytes(pp->length);
-                if ( new_data  == NULL )
-                    return -FDT_ERR_XEN(ENOMEM);
-
-                prop_len = set_memory_reg(d, kinfo, pp, np,
-                                          (__be32 *)new_data);
-                prop_data = new_data;
-            }
-        }
 
         res = fdt_property(kinfo->fdt, pp->name, prop_data, prop_len);
 
@@ -304,6 +292,46 @@ static int fdt_property_interrupts(void *fdt, 
gic_interrupt_t *intr,
     return res;
 }
 
+static int make_memory_node(const struct domain *d,
+                            void *fdt,
+                            const struct kernel_info *kinfo)
+{
+    int res, i;
+    int nr_cells = XEN_FDT_NODE_REG_SIZE*kinfo->mem.nr_banks;
+    __be32 reg[nr_cells];
+    __be32 *cells;
+
+    DPRINT("Create memory node\n");
+
+    /* ePAPR 3.4 */
+    res = fdt_begin_node(fdt, "memory");
+    if ( res )
+        return res;
+
+    res = fdt_property_string(fdt, "device_type", "memory");
+    if ( res )
+        return res;
+
+    cells = &reg[0];
+    for ( i = 0 ; i < kinfo->mem.nr_banks; i++ )
+    {
+        u64 start = kinfo->mem.bank[i].start;
+        u64 size = kinfo->mem.bank[i].size;
+
+        DPRINT("  Bank %d: %#"PRIx64"->%#"PRIx64"\n",
+                i, start, start + size);
+
+        set_xen_range(&cells, start, size);
+    }
+
+    res = fdt_property(fdt, "reg", reg, nr_cells);
+    if ( res )
+        return res;
+
+    res = fdt_end_node(fdt);
+
+    return res;
+}
 
 static int make_hypervisor_node(void *fdt, const struct dt_device_node *parent)
 {
@@ -627,7 +655,8 @@ static int make_timer_node(const struct domain *d, void 
*fdt)
 }
 
 static int make_xen_node(const struct domain *d, void *fdt,
-                         const struct dt_device_node *parent)
+                         const struct dt_device_node *parent,
+                         const struct kernel_info *kinfo)
 {
     int res;
 
@@ -649,6 +678,10 @@ static int make_xen_node(const struct domain *d, void *fdt,
     if ( res )
         return res;
 
+    res = make_memory_node(d, fdt, kinfo);
+    if ( res )
+        return res;
+
     res = make_gic_node(d, fdt);
     if ( res )
         return res;
@@ -750,6 +783,7 @@ static int handle_node(struct domain *d, struct kernel_info 
*kinfo,
         DT_MATCH_COMPATIBLE("xen,multiboot-module"),
         DT_MATCH_COMPATIBLE("arm,psci"),
         DT_MATCH_PATH("/cpus"),
+        DT_MATCH_TYPE("memory"),
         DT_MATCH_GIC,
         DT_MATCH_TIMER,
         { /* sentinel */ },
@@ -826,7 +860,7 @@ static int handle_node(struct domain *d, struct kernel_info 
*kinfo,
         if ( res )
             return res;
 
-        res = make_xen_node(d, kinfo->fdt, np);
+        res = make_xen_node(d, kinfo->fdt, np, kinfo);
         if ( res )
             return res;
     }
@@ -841,14 +875,9 @@ static int prepare_dtb(struct domain *d, struct 
kernel_info *kinfo)
     const void *fdt;
     int new_size;
     int ret;
-    paddr_t end;
-    paddr_t initrd_len;
-    paddr_t dtb_len;
 
     ASSERT(dt_host && (dt_host->sibling == NULL));
 
-    kinfo->unassigned_mem = dom0_mem;
-
     fdt = device_tree_flattened;
 
     new_size = fdt_totalsize(fdt) + DOM0_FDT_EXTRA_SIZE;
@@ -870,36 +899,6 @@ static int prepare_dtb(struct domain *d, struct 
kernel_info *kinfo)
     if ( ret < 0 )
         goto err;
 
-    /* Align DTB and initrd size to 2Mb. Linux only requires 4 byte alignment 
*/
-    initrd_len = ROUNDUP(early_info.modules.module[MOD_INITRD].size, MB(2));
-    dtb_len = ROUNDUP(fdt_totalsize(kinfo->fdt), MB(2));
-    new_size = initrd_len + dtb_len;
-
-    /*
-     * DTB must be loaded such that it does not conflict with the
-     * kernel decompressor. For 32-bit Linux Documentation/arm/Booting
-     * recommends just after the 128MB boundary while for 64-bit Linux
-     * the recommendation in Documentation/arm64/booting.txt is below
-     * 512MB. Place at 128MB, (or, if we have less RAM, as high as
-     * possible) in order to satisfy both.
-     * If the bootloader provides an initrd, it will be loaded just
-     * after the DTB.
-     */
-    end = kinfo->mem.bank[0].start + kinfo->mem.bank[0].size;
-    end = MIN(kinfo->mem.bank[0].start + (128<<20) + new_size, end);
-
-    kinfo->initrd_paddr = end - initrd_len;
-    kinfo->dtb_paddr = kinfo->initrd_paddr - dtb_len;
-
-    if ( kinfo->dtb_paddr < kinfo->mem.bank[0].start ||
-         kinfo->mem.bank[0].start + new_size > end )
-    {
-        printk(XENLOG_ERR "Not enough memory in the first bank for "
-               "the device tree.");
-        ret = -FDT_ERR_XEN(EINVAL);
-        goto err;
-    }
-
     return 0;
 
   err:
@@ -994,11 +993,19 @@ int construct_dom0(struct domain *d)
 
     d->max_pages = ~0U;
 
-    rc = prepare_dtb(d, &kinfo);
+    kinfo.unassigned_mem = dom0_mem;
+
+    allocate_memory(d, &kinfo);
+
+    rc = kernel_prepare(&kinfo);
     if ( rc < 0 )
         return rc;
 
-    rc = kernel_prepare(&kinfo);
+#ifdef CONFIG_ARM_64
+    d->arch.type = kinfo.type;
+#endif
+
+    rc = prepare_dtb(d, &kinfo);
     if ( rc < 0 )
         return rc;
 
@@ -1006,9 +1013,6 @@ int construct_dom0(struct domain *d)
     if ( rc < 0 )
         return rc;
 
-    if ( kinfo.check_overlap )
-        kinfo.check_overlap(&kinfo);
-
     /* The following loads use the domain's p2m */
     p2m_load_VTTBR(d);
 #ifdef CONFIG_ARM_64
@@ -1019,6 +1023,10 @@ int construct_dom0(struct domain *d)
         WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_RW, HCR_EL2);
 #endif
 
+    /*
+     * kernel_load will determine the placement of the initrd & fdt in
+     * RAM, so call it first.
+     */
     kernel_load(&kinfo);
     /* initrd_load will fix up the fdt, so call it before dtb_load */
     initrd_load(&kinfo);
@@ -1033,7 +1041,6 @@ int construct_dom0(struct domain *d)
 
     regs->pc = (register_t)kinfo.entry;
 
-
     if ( is_pv32_domain(d) )
     {
         regs->cpsr = PSR_GUEST32_INIT;
diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
index 7036d94..9c1c1ad 100644
--- a/xen/arch/arm/kernel.c
+++ b/xen/arch/arm/kernel.c
@@ -68,26 +68,56 @@ void copy_from_paddr(void *dst, paddr_t paddr, unsigned 
long len, int attrindx)
     clear_fixmap(FIXMAP_MISC);
 }
 
-static void kernel_zimage_check_overlap(struct kernel_info *info)
+static void place_modules(struct kernel_info *info,
+                         paddr_t kernel_start,
+                         paddr_t kernel_end)
 {
-    paddr_t zimage_start = info->zimage.load_addr;
-    paddr_t zimage_end = info->zimage.load_addr + info->zimage.len;
-    paddr_t start = info->dtb_paddr;
-    paddr_t end;
+    /* Align DTB and initrd size to 2Mb. Linux only requires 4 byte alignment 
*/
+    const paddr_t initrd_len =
+        ROUNDUP(early_info.modules.module[MOD_INITRD].size, MB(2));
+    const paddr_t dtb_len = ROUNDUP(fdt_totalsize(info->fdt), MB(2));
+    const paddr_t total = initrd_len + dtb_len;
 
-    end = info->initrd_paddr + early_info.modules.module[MOD_INITRD].size;
+    /* Convenient */
+    const paddr_t mem_start = info->mem.bank[0].start;
+    const paddr_t mem_size = info->mem.bank[0].size;
+    const paddr_t mem_end = mem_start + mem_size;
+    const paddr_t kernel_size = kernel_end - kernel_start;
+
+    paddr_t addr;
+
+    if ( total + kernel_size > mem_size )
+        panic("Not enough memory in the first bank for the dtb+initrd.");
 
     /*
-     * In the dom0 memory, the initrd will be just after the DTB. So we
-     * only need to check if the zImage range will overlap the
-     * DTB-initrd range.
+     * DTB must be loaded such that it does not conflict with the
+     * kernel decompressor. For 32-bit Linux Documentation/arm/Booting
+     * recommends just after the 128MB boundary while for 64-bit Linux
+     * the recommendation in Documentation/arm64/booting.txt is below
+     * 512MB.
+     *
+     * If the bootloader provides an initrd, it will be loaded just
+     * after the DTB.
+     *
+     * We try to place dtb+initrd at 128MB, (or, if we have less RAM,
+     * as high as possible). If there is no space then fallback to
+     * just after the kernel, if there is room, otherwise just before.
      */
-    if ( (start > zimage_end) || (end < zimage_start) )
+
+    if ( kernel_end < MIN(mem_start + MB(128), mem_end - total) )
+        addr = MIN(mem_start + MB(128), mem_end - total);
+    else if ( mem_end - ROUNDUP(kernel_end, MB(2)) >= total )
+        addr = ROUNDUP(kernel_end, MB(2));
+    else if ( kernel_start - mem_start >= total )
+        addr = kernel_start - total;
+    else
+    {
+        panic("Unable to find suitable location for dtb+initrd.");
         return;
+    }
 
-    panic(XENLOG_ERR "The kernel(0x%"PRIpaddr"-0x%"PRIpaddr
-          ") is overlapping the DTB-initrd(0x%"PRIpaddr"-0x%"PRIpaddr")\n",
-          zimage_start, zimage_end, start, end);
+    info->dtb_paddr = addr;
+    info->initrd_paddr = info->dtb_paddr + dtb_len;
 }
 
 static void kernel_zimage_load(struct kernel_info *info)
@@ -98,6 +128,8 @@ static void kernel_zimage_load(struct kernel_info *info)
     paddr_t len = info->zimage.len;
     unsigned long offs;
 
+    place_modules(info, load_addr, load_addr + len);
+
     printk("Loading zImage from %"PRIpaddr" to %"PRIpaddr"-%"PRIpaddr"\n",
            paddr, load_addr, load_addr + len);
     for ( offs = 0; offs < len; )
@@ -176,7 +208,6 @@ static int kernel_try_zimage64_prepare(struct kernel_info 
*info,
 
     info->entry = info->zimage.load_addr;
     info->load = kernel_zimage_load;
-    info->check_overlap = kernel_zimage_check_overlap;
 
     info->type = DOMAIN_PV64;
 
@@ -236,17 +267,7 @@ static int kernel_try_zimage32_prepare(struct kernel_info 
*info,
         paddr_t load_end;
 
         load_end = info->mem.bank[0].start + info->mem.bank[0].size;
-        load_end = MIN(info->mem.bank[0].start + (128<<20), load_end);
-
-        /*
-         * FDT is loaded above 128M or as high as possible, so the
-         * only way we can clash is if we have <=128MB, in which case
-         * FDT will be right at the end and so dtb_paddr will be below
-         * the proposed kernel load address. Move the kernel down if
-         * necessary.
-         */
-        if ( load_end >= info->dtb_paddr )
-            load_end = info->dtb_paddr;
+        load_end = MIN(info->mem.bank[0].start + MB(128), load_end);
 
         info->zimage.load_addr = load_end - end;
         /* Align to 2MB */
@@ -258,7 +279,6 @@ static int kernel_try_zimage32_prepare(struct kernel_info 
*info,
 
     info->entry = info->zimage.load_addr;
     info->load = kernel_zimage_load;
-    info->check_overlap = kernel_zimage_check_overlap;
 
 #ifdef CONFIG_ARM_64
     info->type = DOMAIN_PV32;
@@ -269,10 +289,15 @@ static int kernel_try_zimage32_prepare(struct kernel_info 
*info,
 
 static void kernel_elf_load(struct kernel_info *info)
 {
+    place_modules(info,
+                  info->elf.parms.virt_kstart,
+                  info->elf.parms.virt_kend);
+
     printk("Loading ELF image into guest memory\n");
     info->elf.elf.dest_base = (void*)(unsigned 
long)info->elf.parms.virt_kstart;
     info->elf.elf.dest_size =
          info->elf.parms.virt_kend - info->elf.parms.virt_kstart;
+
     elf_load_binary(&info->elf.elf);
 
     printk("Free temporary kernel buffer\n");
@@ -321,7 +346,6 @@ static int kernel_try_elf_prepare(struct kernel_info *info,
      */
     info->entry = info->elf.parms.virt_entry;
     info->load = kernel_elf_load;
-    info->check_overlap = NULL;
 
     if ( elf_check_broken(&info->elf.elf) )
         printk("Xen: warning: ELF kernel broken: %s\n",
diff --git a/xen/arch/arm/kernel.h b/xen/arch/arm/kernel.h
index debf590..b48c2c9 100644
--- a/xen/arch/arm/kernel.h
+++ b/xen/arch/arm/kernel.h
@@ -40,8 +40,6 @@ struct kernel_info {
     };
 
     void (*load)(struct kernel_info *info);
-    /* Callback to check overlap between the kernel and the device tree */
-    void (*check_overlap)(struct kernel_info *kinfo);
     int load_attr;
 };
 
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.