[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5 07/19] xen: arm: allocate dom0 memory separately from preparing the dtb
Mixing these two together is a pain, it forces us to prepare the dtb before processing the kernel which means we don't know whether the guest is 32- or 64-bit while we construct its DTB. Instead split out the memory allocation (including 1:1 workaround handling) and p2m setup into a separate phase and then create a memory node in the DTB based on the result. This allows us to move kernel parsing before DTB setup. As part of this it was also necessary to rework where the decision regarding the placement of the DTB and initrd in RAM was made. It is now made when loading the kernel, which allows it to make use of the zImage/ELF specific information and therefore to make decisions based on complete knowledge and do it right rather than guessing in prepare_dtb and relying on a later check to see if things worked. Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx> --- v3: Also rework module placement, v2 broke boot because dtb_paddr wasn't set soon enough. This ends up cleaner anyway. v2: Fixed typo in the commit log Handle multiple memory nodes as well as individual nodes with several entries in them. Strip the original memory node and recreate rather than trying to modify. --- xen/arch/arm/domain_build.c | 203 ++++++++++++++++++++++--------------------- xen/arch/arm/kernel.c | 80 +++++++++++------ xen/arch/arm/kernel.h | 2 - 3 files changed, 158 insertions(+), 127 deletions(-) diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c index 8645aa1..edfcf14 100644 --- a/xen/arch/arm/domain_build.c +++ b/xen/arch/arm/domain_build.c @@ -81,11 +81,8 @@ struct vcpu *__init alloc_dom0_vcpu0(void) return alloc_vcpu(dom0, 0, 0); } -static int set_memory_reg_11(struct domain *d, struct kernel_info *kinfo, - const struct dt_property *pp, - const struct dt_device_node *np, __be32 *new_cell) +static void allocate_memory_11(struct domain *d, struct kernel_info *kinfo) { - int reg_size = dt_cells_to_size(dt_n_addr_cells(np) + dt_n_size_cells(np)); paddr_t start; paddr_t size; struct page_info *pg = NULL; @@ -116,53 +113,61 @@ static int set_memory_reg_11(struct domain *d, struct kernel_info *kinfo, if ( res ) panic("Unable to add pages in DOM0: %d\n", res); - dt_set_range(&new_cell, np, start, size); - kinfo->mem.bank[0].start = start; kinfo->mem.bank[0].size = size; kinfo->mem.nr_banks = 1; - return reg_size; + kinfo->unassigned_mem -= size; } -static int set_memory_reg(struct domain *d, struct kernel_info *kinfo, - const struct dt_property *pp, - const struct dt_device_node *np, __be32 *new_cell) +static void allocate_memory(struct domain *d, struct kernel_info *kinfo) { - int reg_size = dt_cells_to_size(dt_n_addr_cells(np) + dt_n_size_cells(np)); - int l = 0; + + struct dt_device_node *memory = NULL; + const void *reg; + u32 reg_len, reg_size; unsigned int bank = 0; - u64 start; - u64 size; - int ret; if ( platform_has_quirk(PLATFORM_QUIRK_DOM0_MAPPING_11) ) - return set_memory_reg_11(d, kinfo, pp, np, new_cell); + return allocate_memory_11(d, kinfo); - while ( kinfo->unassigned_mem > 0 && l + reg_size <= pp->length - && kinfo->mem.nr_banks < NR_MEM_BANKS ) + while ( (memory = dt_find_node_by_type(memory, "memory")) ) { - ret = dt_device_get_address(np, bank, &start, &size); - if ( ret ) - panic("Unable to retrieve the bank %u for %s\n", - bank, dt_node_full_name(np)); - - if ( size > kinfo->unassigned_mem ) - size = kinfo->unassigned_mem; - dt_set_range(&new_cell, np, start, size); - - printk("Populate P2M %#"PRIx64"->%#"PRIx64"\n", start, start + size); - if ( p2m_populate_ram(d, start, start + size) < 0 ) - panic("Failed to populate P2M\n"); - kinfo->mem.bank[kinfo->mem.nr_banks].start = start; - kinfo->mem.bank[kinfo->mem.nr_banks].size = size; - kinfo->mem.nr_banks++; - kinfo->unassigned_mem -= size; - - l += reg_size; - } + int l; + + DPRINT("memory node\n"); + + reg_size = dt_cells_to_size(dt_n_addr_cells(memory) + dt_n_size_cells(memory)); + + reg = dt_get_property(memory, "reg", ®_len); + if ( reg == NULL ) + panic("Memory node has no reg property!\n"); + + for ( l = 0; + kinfo->unassigned_mem > 0 && l + reg_size <= reg_len + && kinfo->mem.nr_banks < NR_MEM_BANKS; + l += reg_size ) + { + paddr_t start, size; - return l; + if ( dt_device_get_address(memory, bank, &start, &size) ) + panic("Unable to retrieve the bank %u for %s\n", + bank, dt_node_full_name(memory)); + + if ( size > kinfo->unassigned_mem ) + size = kinfo->unassigned_mem; + + printk("Populate P2M %#"PRIx64"->%#"PRIx64"\n", + start, start + size); + if ( p2m_populate_ram(d, start, start + size) < 0 ) + panic("Failed to populate P2M\n"); + kinfo->mem.bank[kinfo->mem.nr_banks].start = start; + kinfo->mem.bank[kinfo->mem.nr_banks].size = size; + kinfo->mem.nr_banks++; + + kinfo->unassigned_mem -= size; + } + } } static int write_properties(struct domain *d, struct kernel_info *kinfo, @@ -211,23 +216,6 @@ static int write_properties(struct domain *d, struct kernel_info *kinfo, continue; } } - /* - * In a memory node: adjust reg property. - * TODO: handle properly memory node (ie: device_type = "memory") - */ - else if ( dt_node_name_is_equal(np, "memory") ) - { - if ( dt_property_name_is_equal(pp, "reg") ) - { - new_data = xzalloc_bytes(pp->length); - if ( new_data == NULL ) - return -FDT_ERR_XEN(ENOMEM); - - prop_len = set_memory_reg(d, kinfo, pp, np, - (__be32 *)new_data); - prop_data = new_data; - } - } res = fdt_property(kinfo->fdt, pp->name, prop_data, prop_len); @@ -304,6 +292,46 @@ static int fdt_property_interrupts(void *fdt, gic_interrupt_t *intr, return res; } +static int make_memory_node(const struct domain *d, + void *fdt, + const struct kernel_info *kinfo) +{ + int res, i; + int nr_cells = XEN_FDT_NODE_REG_SIZE*kinfo->mem.nr_banks; + __be32 reg[nr_cells]; + __be32 *cells; + + DPRINT("Create memory node\n"); + + /* ePAPR 3.4 */ + res = fdt_begin_node(fdt, "memory"); + if ( res ) + return res; + + res = fdt_property_string(fdt, "device_type", "memory"); + if ( res ) + return res; + + cells = ®[0]; + for ( i = 0 ; i < kinfo->mem.nr_banks; i++ ) + { + u64 start = kinfo->mem.bank[i].start; + u64 size = kinfo->mem.bank[i].size; + + DPRINT(" Bank %d: %#"PRIx64"->%#"PRIx64"\n", + i, start, start + size); + + set_xen_range(&cells, start, size); + } + + res = fdt_property(fdt, "reg", reg, nr_cells); + if ( res ) + return res; + + res = fdt_end_node(fdt); + + return res; +} static int make_hypervisor_node(void *fdt, const struct dt_device_node *parent) { @@ -627,7 +655,8 @@ static int make_timer_node(const struct domain *d, void *fdt) } static int make_xen_node(const struct domain *d, void *fdt, - const struct dt_device_node *parent) + const struct dt_device_node *parent, + const struct kernel_info *kinfo) { int res; @@ -649,6 +678,10 @@ static int make_xen_node(const struct domain *d, void *fdt, if ( res ) return res; + res = make_memory_node(d, fdt, kinfo); + if ( res ) + return res; + res = make_gic_node(d, fdt); if ( res ) return res; @@ -750,6 +783,7 @@ static int handle_node(struct domain *d, struct kernel_info *kinfo, DT_MATCH_COMPATIBLE("xen,multiboot-module"), DT_MATCH_COMPATIBLE("arm,psci"), DT_MATCH_PATH("/cpus"), + DT_MATCH_TYPE("memory"), DT_MATCH_GIC, DT_MATCH_TIMER, { /* sentinel */ }, @@ -826,7 +860,7 @@ static int handle_node(struct domain *d, struct kernel_info *kinfo, if ( res ) return res; - res = make_xen_node(d, kinfo->fdt, np); + res = make_xen_node(d, kinfo->fdt, np, kinfo); if ( res ) return res; } @@ -841,14 +875,9 @@ static int prepare_dtb(struct domain *d, struct kernel_info *kinfo) const void *fdt; int new_size; int ret; - paddr_t end; - paddr_t initrd_len; - paddr_t dtb_len; ASSERT(dt_host && (dt_host->sibling == NULL)); - kinfo->unassigned_mem = dom0_mem; - fdt = device_tree_flattened; new_size = fdt_totalsize(fdt) + DOM0_FDT_EXTRA_SIZE; @@ -870,36 +899,6 @@ static int prepare_dtb(struct domain *d, struct kernel_info *kinfo) if ( ret < 0 ) goto err; - /* Align DTB and initrd size to 2Mb. Linux only requires 4 byte alignment */ - initrd_len = ROUNDUP(early_info.modules.module[MOD_INITRD].size, MB(2)); - dtb_len = ROUNDUP(fdt_totalsize(kinfo->fdt), MB(2)); - new_size = initrd_len + dtb_len; - - /* - * DTB must be loaded such that it does not conflict with the - * kernel decompressor. For 32-bit Linux Documentation/arm/Booting - * recommends just after the 128MB boundary while for 64-bit Linux - * the recommendation in Documentation/arm64/booting.txt is below - * 512MB. Place at 128MB, (or, if we have less RAM, as high as - * possible) in order to satisfy both. - * If the bootloader provides an initrd, it will be loaded just - * after the DTB. - */ - end = kinfo->mem.bank[0].start + kinfo->mem.bank[0].size; - end = MIN(kinfo->mem.bank[0].start + (128<<20) + new_size, end); - - kinfo->initrd_paddr = end - initrd_len; - kinfo->dtb_paddr = kinfo->initrd_paddr - dtb_len; - - if ( kinfo->dtb_paddr < kinfo->mem.bank[0].start || - kinfo->mem.bank[0].start + new_size > end ) - { - printk(XENLOG_ERR "Not enough memory in the first bank for " - "the device tree."); - ret = -FDT_ERR_XEN(EINVAL); - goto err; - } - return 0; err: @@ -994,11 +993,19 @@ int construct_dom0(struct domain *d) d->max_pages = ~0U; - rc = prepare_dtb(d, &kinfo); + kinfo.unassigned_mem = dom0_mem; + + allocate_memory(d, &kinfo); + + rc = kernel_prepare(&kinfo); if ( rc < 0 ) return rc; - rc = kernel_prepare(&kinfo); +#ifdef CONFIG_ARM_64 + d->arch.type = kinfo.type; +#endif + + rc = prepare_dtb(d, &kinfo); if ( rc < 0 ) return rc; @@ -1006,9 +1013,6 @@ int construct_dom0(struct domain *d) if ( rc < 0 ) return rc; - if ( kinfo.check_overlap ) - kinfo.check_overlap(&kinfo); - /* The following loads use the domain's p2m */ p2m_load_VTTBR(d); #ifdef CONFIG_ARM_64 @@ -1019,6 +1023,10 @@ int construct_dom0(struct domain *d) WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_RW, HCR_EL2); #endif + /* + * kernel_load will determine the placement of the initrd & fdt in + * RAM, so call it first. + */ kernel_load(&kinfo); /* initrd_load will fix up the fdt, so call it before dtb_load */ initrd_load(&kinfo); @@ -1033,7 +1041,6 @@ int construct_dom0(struct domain *d) regs->pc = (register_t)kinfo.entry; - if ( is_pv32_domain(d) ) { regs->cpsr = PSR_GUEST32_INIT; diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c index 7036d94..9046797 100644 --- a/xen/arch/arm/kernel.c +++ b/xen/arch/arm/kernel.c @@ -68,26 +68,56 @@ void copy_from_paddr(void *dst, paddr_t paddr, unsigned long len, int attrindx) clear_fixmap(FIXMAP_MISC); } -static void kernel_zimage_check_overlap(struct kernel_info *info) +static void place_modules(struct kernel_info *info, + paddr_t kernel_start, + paddr_t kernel_end) { - paddr_t zimage_start = info->zimage.load_addr; - paddr_t zimage_end = info->zimage.load_addr + info->zimage.len; - paddr_t start = info->dtb_paddr; - paddr_t end; + /* Align DTB and initrd size to 2Mb. Linux only requires 4 byte alignment */ + const paddr_t initrd_len = + ROUNDUP(early_info.modules.module[MOD_INITRD].size, MB(2)); + const paddr_t dtb_len = ROUNDUP(fdt_totalsize(info->fdt), MB(2)); + const paddr_t total = initrd_len + dtb_len; - end = info->initrd_paddr + early_info.modules.module[MOD_INITRD].size; + /* Convenient */ + const paddr_t mem_start = info->mem.bank[0].start; + const paddr_t mem_size = info->mem.bank[0].size; + const paddr_t mem_end = mem_start + mem_size; + const paddr_t kernel_size = kernel_end - kernel_start; + + paddr_t addr; + + if ( total + kernel_size > mem_size ) + panic("Not enough memory in the first bank for the dtb+initrd."); /* - * In the dom0 memory, the initrd will be just after the DTB. So we - * only need to check if the zImage range will overlap the - * DTB-initrd range. + * DTB must be loaded such that it does not conflict with the + * kernel decompressor. For 32-bit Linux Documentation/arm/Booting + * recommends just after the 128MB boundary while for 64-bit Linux + * the recommendation in Documentation/arm64/booting.txt is below + * 512MB. + * + * If the bootloader provides an initrd, it will be loaded just + * after the DTB. + * + * We try to place dtb+initrd at 128MB, (or, if we have less RAM, + * as high as possible). If there is no space then fallback to + * just after the kernel, if there is room, otherwise just before. */ - if ( (start > zimage_end) || (end < zimage_start) ) + + if ( kernel_end < MIN(mem_start + MB(128), mem_end - total) ) + addr = MIN(mem_start + MB(128), mem_end - total); + else if ( mem_end - ROUNDUP(kernel_end, MB(2)) >= total ) + addr = ROUNDUP(kernel_end, MB(2)); + else if ( kernel_start - mem_start >= total ) + addr = kernel_start - total; + else + { + panic("Unable to find suitable location for dtb+initrd."); return; + } - panic(XENLOG_ERR "The kernel(0x%"PRIpaddr"-0x%"PRIpaddr - ") is overlapping the DTB-initrd(0x%"PRIpaddr"-0x%"PRIpaddr")\n", - zimage_start, zimage_end, start, end); + info->dtb_paddr = addr; + info->initrd_paddr = info->dtb_paddr + dtb_len; } static void kernel_zimage_load(struct kernel_info *info) @@ -98,6 +128,8 @@ static void kernel_zimage_load(struct kernel_info *info) paddr_t len = info->zimage.len; unsigned long offs; + place_modules(info, load_addr, load_addr + len); + printk("Loading zImage from %"PRIpaddr" to %"PRIpaddr"-%"PRIpaddr"\n", paddr, load_addr, load_addr + len); for ( offs = 0; offs < len; ) @@ -176,7 +208,6 @@ static int kernel_try_zimage64_prepare(struct kernel_info *info, info->entry = info->zimage.load_addr; info->load = kernel_zimage_load; - info->check_overlap = kernel_zimage_check_overlap; info->type = DOMAIN_PV64; @@ -236,17 +267,9 @@ static int kernel_try_zimage32_prepare(struct kernel_info *info, paddr_t load_end; load_end = info->mem.bank[0].start + info->mem.bank[0].size; - load_end = MIN(info->mem.bank[0].start + (128<<20), load_end); - - /* - * FDT is loaded above 128M or as high as possible, so the - * only way we can clash is if we have <=128MB, in which case - * FDT will be right at the end and so dtb_paddr will be below - * the proposed kernel load address. Move the kernel down if - * necessary. - */ - if ( load_end >= info->dtb_paddr ) - load_end = info->dtb_paddr; + load_end = MIN(info->mem.bank[0].start + MB(128), load_end); + + load_end += MB(2); info->zimage.load_addr = load_end - end; /* Align to 2MB */ @@ -258,7 +281,6 @@ static int kernel_try_zimage32_prepare(struct kernel_info *info, info->entry = info->zimage.load_addr; info->load = kernel_zimage_load; - info->check_overlap = kernel_zimage_check_overlap; #ifdef CONFIG_ARM_64 info->type = DOMAIN_PV32; @@ -269,10 +291,15 @@ static int kernel_try_zimage32_prepare(struct kernel_info *info, static void kernel_elf_load(struct kernel_info *info) { + place_modules(info, + info->elf.parms.virt_kstart, + info->elf.parms.virt_kend); + printk("Loading ELF image into guest memory\n"); info->elf.elf.dest_base = (void*)(unsigned long)info->elf.parms.virt_kstart; info->elf.elf.dest_size = info->elf.parms.virt_kend - info->elf.parms.virt_kstart; + elf_load_binary(&info->elf.elf); printk("Free temporary kernel buffer\n"); @@ -321,7 +348,6 @@ static int kernel_try_elf_prepare(struct kernel_info *info, */ info->entry = info->elf.parms.virt_entry; info->load = kernel_elf_load; - info->check_overlap = NULL; if ( elf_check_broken(&info->elf.elf) ) printk("Xen: warning: ELF kernel broken: %s\n", diff --git a/xen/arch/arm/kernel.h b/xen/arch/arm/kernel.h index debf590..b48c2c9 100644 --- a/xen/arch/arm/kernel.h +++ b/xen/arch/arm/kernel.h @@ -40,8 +40,6 @@ struct kernel_info { }; void (*load)(struct kernel_info *info); - /* Callback to check overlap between the kernel and the device tree */ - void (*check_overlap)(struct kernel_info *kinfo); int load_attr; }; -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |