[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v5 07/19] xen: arm: allocate dom0 memory separately from preparing the dtb
On Wed, 13 Nov 2013, Ian Campbell wrote: > Mixing these two together is a pain, it forces us to prepare the dtb before > processing the kernel which means we don't know whether the guest is 32- or > 64-bit while we construct its DTB. > > Instead split out the memory allocation (including 1:1 workaround handling) > and p2m setup into a separate phase and then create a memory node in the DTB > based on the result. > > This allows us to move kernel parsing before DTB setup. > > As part of this it was also necessary to rework where the decision regarding > the placement of the DTB and initrd in RAM was made. It is now made when > loading the kernel, which allows it to make use of the zImage/ELF specific > information and therefore to make decisions based on complete knowledge and do > it right rather than guessing in prepare_dtb and relying on a later check to > see if things worked. > > Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx> It looks good > v3: Also rework module placement, v2 broke boot because dtb_paddr wasn't set > soon enough. This ends up cleaner anyway. > v2: Fixed typo in the commit log > Handle multiple memory nodes as well as individual nodes with several > entries in them. > Strip the original memory node and recreate rather than trying to modify. > --- > xen/arch/arm/domain_build.c | 203 > ++++++++++++++++++++++--------------------- > xen/arch/arm/kernel.c | 80 +++++++++++------ > xen/arch/arm/kernel.h | 2 - > 3 files changed, 158 insertions(+), 127 deletions(-) > > diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > index 8645aa1..edfcf14 100644 > --- a/xen/arch/arm/domain_build.c > +++ b/xen/arch/arm/domain_build.c > @@ -81,11 +81,8 @@ struct vcpu *__init alloc_dom0_vcpu0(void) > return alloc_vcpu(dom0, 0, 0); > } > > -static int set_memory_reg_11(struct domain *d, struct kernel_info *kinfo, > - const struct dt_property *pp, > - const struct dt_device_node *np, __be32 > *new_cell) > +static void allocate_memory_11(struct domain *d, struct kernel_info *kinfo) > { > - int reg_size = dt_cells_to_size(dt_n_addr_cells(np) + > dt_n_size_cells(np)); > paddr_t start; > paddr_t size; > struct page_info *pg = NULL; > @@ -116,53 +113,61 @@ static int set_memory_reg_11(struct domain *d, struct > kernel_info *kinfo, > if ( res ) > panic("Unable to add pages in DOM0: %d\n", res); > > - dt_set_range(&new_cell, np, start, size); > - > kinfo->mem.bank[0].start = start; > kinfo->mem.bank[0].size = size; > kinfo->mem.nr_banks = 1; > > - return reg_size; > + kinfo->unassigned_mem -= size; > } > > -static int set_memory_reg(struct domain *d, struct kernel_info *kinfo, > - const struct dt_property *pp, > - const struct dt_device_node *np, __be32 *new_cell) > +static void allocate_memory(struct domain *d, struct kernel_info *kinfo) > { > - int reg_size = dt_cells_to_size(dt_n_addr_cells(np) + > dt_n_size_cells(np)); > - int l = 0; > + > + struct dt_device_node *memory = NULL; > + const void *reg; > + u32 reg_len, reg_size; > unsigned int bank = 0; > - u64 start; > - u64 size; > - int ret; > > if ( platform_has_quirk(PLATFORM_QUIRK_DOM0_MAPPING_11) ) > - return set_memory_reg_11(d, kinfo, pp, np, new_cell); > + return allocate_memory_11(d, kinfo); > > - while ( kinfo->unassigned_mem > 0 && l + reg_size <= pp->length > - && kinfo->mem.nr_banks < NR_MEM_BANKS ) > + while ( (memory = dt_find_node_by_type(memory, "memory")) ) > { > - ret = dt_device_get_address(np, bank, &start, &size); > - if ( ret ) > - panic("Unable to retrieve the bank %u for %s\n", > - bank, dt_node_full_name(np)); > - > - if ( size > kinfo->unassigned_mem ) > - size = kinfo->unassigned_mem; > - dt_set_range(&new_cell, np, start, size); > - > - printk("Populate P2M %#"PRIx64"->%#"PRIx64"\n", start, start + size); > - if ( p2m_populate_ram(d, start, start + size) < 0 ) > - panic("Failed to populate P2M\n"); > - kinfo->mem.bank[kinfo->mem.nr_banks].start = start; > - kinfo->mem.bank[kinfo->mem.nr_banks].size = size; > - kinfo->mem.nr_banks++; > - kinfo->unassigned_mem -= size; > - > - l += reg_size; > - } > + int l; > + > + DPRINT("memory node\n"); > + > + reg_size = dt_cells_to_size(dt_n_addr_cells(memory) + > dt_n_size_cells(memory)); > + > + reg = dt_get_property(memory, "reg", ®_len); > + if ( reg == NULL ) > + panic("Memory node has no reg property!\n"); > + > + for ( l = 0; > + kinfo->unassigned_mem > 0 && l + reg_size <= reg_len > + && kinfo->mem.nr_banks < NR_MEM_BANKS; > + l += reg_size ) > + { > + paddr_t start, size; > > - return l; > + if ( dt_device_get_address(memory, bank, &start, &size) ) > + panic("Unable to retrieve the bank %u for %s\n", > + bank, dt_node_full_name(memory)); > + > + if ( size > kinfo->unassigned_mem ) > + size = kinfo->unassigned_mem; > + > + printk("Populate P2M %#"PRIx64"->%#"PRIx64"\n", > + start, start + size); > + if ( p2m_populate_ram(d, start, start + size) < 0 ) > + panic("Failed to populate P2M\n"); > + kinfo->mem.bank[kinfo->mem.nr_banks].start = start; > + kinfo->mem.bank[kinfo->mem.nr_banks].size = size; > + kinfo->mem.nr_banks++; > + > + kinfo->unassigned_mem -= size; > + } > + } > } > > static int write_properties(struct domain *d, struct kernel_info *kinfo, > @@ -211,23 +216,6 @@ static int write_properties(struct domain *d, struct > kernel_info *kinfo, > continue; > } > } > - /* > - * In a memory node: adjust reg property. > - * TODO: handle properly memory node (ie: device_type = "memory") > - */ > - else if ( dt_node_name_is_equal(np, "memory") ) > - { > - if ( dt_property_name_is_equal(pp, "reg") ) > - { > - new_data = xzalloc_bytes(pp->length); > - if ( new_data == NULL ) > - return -FDT_ERR_XEN(ENOMEM); > - > - prop_len = set_memory_reg(d, kinfo, pp, np, > - (__be32 *)new_data); > - prop_data = new_data; > - } > - } > > res = fdt_property(kinfo->fdt, pp->name, prop_data, prop_len); > > @@ -304,6 +292,46 @@ static int fdt_property_interrupts(void *fdt, > gic_interrupt_t *intr, > return res; > } > > +static int make_memory_node(const struct domain *d, > + void *fdt, > + const struct kernel_info *kinfo) > +{ > + int res, i; > + int nr_cells = XEN_FDT_NODE_REG_SIZE*kinfo->mem.nr_banks; > + __be32 reg[nr_cells]; > + __be32 *cells; > + > + DPRINT("Create memory node\n"); > + > + /* ePAPR 3.4 */ > + res = fdt_begin_node(fdt, "memory"); > + if ( res ) > + return res; > + > + res = fdt_property_string(fdt, "device_type", "memory"); > + if ( res ) > + return res; > + > + cells = ®[0]; > + for ( i = 0 ; i < kinfo->mem.nr_banks; i++ ) > + { > + u64 start = kinfo->mem.bank[i].start; > + u64 size = kinfo->mem.bank[i].size; > + > + DPRINT(" Bank %d: %#"PRIx64"->%#"PRIx64"\n", > + i, start, start + size); > + > + set_xen_range(&cells, start, size); > + } > + > + res = fdt_property(fdt, "reg", reg, nr_cells); > + if ( res ) > + return res; > + > + res = fdt_end_node(fdt); > + > + return res; > +} > > static int make_hypervisor_node(void *fdt, const struct dt_device_node > *parent) > { > @@ -627,7 +655,8 @@ static int make_timer_node(const struct domain *d, void > *fdt) > } > > static int make_xen_node(const struct domain *d, void *fdt, > - const struct dt_device_node *parent) > + const struct dt_device_node *parent, > + const struct kernel_info *kinfo) > { > int res; > > @@ -649,6 +678,10 @@ static int make_xen_node(const struct domain *d, void > *fdt, > if ( res ) > return res; > > + res = make_memory_node(d, fdt, kinfo); > + if ( res ) > + return res; > + > res = make_gic_node(d, fdt); > if ( res ) > return res; > @@ -750,6 +783,7 @@ static int handle_node(struct domain *d, struct > kernel_info *kinfo, > DT_MATCH_COMPATIBLE("xen,multiboot-module"), > DT_MATCH_COMPATIBLE("arm,psci"), > DT_MATCH_PATH("/cpus"), > + DT_MATCH_TYPE("memory"), > DT_MATCH_GIC, > DT_MATCH_TIMER, > { /* sentinel */ }, > @@ -826,7 +860,7 @@ static int handle_node(struct domain *d, struct > kernel_info *kinfo, > if ( res ) > return res; > > - res = make_xen_node(d, kinfo->fdt, np); > + res = make_xen_node(d, kinfo->fdt, np, kinfo); > if ( res ) > return res; > } > @@ -841,14 +875,9 @@ static int prepare_dtb(struct domain *d, struct > kernel_info *kinfo) > const void *fdt; > int new_size; > int ret; > - paddr_t end; > - paddr_t initrd_len; > - paddr_t dtb_len; > > ASSERT(dt_host && (dt_host->sibling == NULL)); > > - kinfo->unassigned_mem = dom0_mem; > - > fdt = device_tree_flattened; > > new_size = fdt_totalsize(fdt) + DOM0_FDT_EXTRA_SIZE; > @@ -870,36 +899,6 @@ static int prepare_dtb(struct domain *d, struct > kernel_info *kinfo) > if ( ret < 0 ) > goto err; > > - /* Align DTB and initrd size to 2Mb. Linux only requires 4 byte > alignment */ > - initrd_len = ROUNDUP(early_info.modules.module[MOD_INITRD].size, MB(2)); > - dtb_len = ROUNDUP(fdt_totalsize(kinfo->fdt), MB(2)); > - new_size = initrd_len + dtb_len; > - > - /* > - * DTB must be loaded such that it does not conflict with the > - * kernel decompressor. For 32-bit Linux Documentation/arm/Booting > - * recommends just after the 128MB boundary while for 64-bit Linux > - * the recommendation in Documentation/arm64/booting.txt is below > - * 512MB. Place at 128MB, (or, if we have less RAM, as high as > - * possible) in order to satisfy both. > - * If the bootloader provides an initrd, it will be loaded just > - * after the DTB. > - */ > - end = kinfo->mem.bank[0].start + kinfo->mem.bank[0].size; > - end = MIN(kinfo->mem.bank[0].start + (128<<20) + new_size, end); > - > - kinfo->initrd_paddr = end - initrd_len; > - kinfo->dtb_paddr = kinfo->initrd_paddr - dtb_len; > - > - if ( kinfo->dtb_paddr < kinfo->mem.bank[0].start || > - kinfo->mem.bank[0].start + new_size > end ) > - { > - printk(XENLOG_ERR "Not enough memory in the first bank for " > - "the device tree."); > - ret = -FDT_ERR_XEN(EINVAL); > - goto err; > - } > - > return 0; > > err: > @@ -994,11 +993,19 @@ int construct_dom0(struct domain *d) > > d->max_pages = ~0U; > > - rc = prepare_dtb(d, &kinfo); > + kinfo.unassigned_mem = dom0_mem; > + > + allocate_memory(d, &kinfo); > + > + rc = kernel_prepare(&kinfo); > if ( rc < 0 ) > return rc; > > - rc = kernel_prepare(&kinfo); > +#ifdef CONFIG_ARM_64 > + d->arch.type = kinfo.type; > +#endif > + > + rc = prepare_dtb(d, &kinfo); > if ( rc < 0 ) > return rc; > > @@ -1006,9 +1013,6 @@ int construct_dom0(struct domain *d) > if ( rc < 0 ) > return rc; > > - if ( kinfo.check_overlap ) > - kinfo.check_overlap(&kinfo); > - > /* The following loads use the domain's p2m */ > p2m_load_VTTBR(d); > #ifdef CONFIG_ARM_64 > @@ -1019,6 +1023,10 @@ int construct_dom0(struct domain *d) > WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_RW, HCR_EL2); > #endif > > + /* > + * kernel_load will determine the placement of the initrd & fdt in > + * RAM, so call it first. > + */ > kernel_load(&kinfo); > /* initrd_load will fix up the fdt, so call it before dtb_load */ > initrd_load(&kinfo); > @@ -1033,7 +1041,6 @@ int construct_dom0(struct domain *d) > > regs->pc = (register_t)kinfo.entry; > > - > if ( is_pv32_domain(d) ) > { > regs->cpsr = PSR_GUEST32_INIT; > diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c > index 7036d94..9046797 100644 > --- a/xen/arch/arm/kernel.c > +++ b/xen/arch/arm/kernel.c > @@ -68,26 +68,56 @@ void copy_from_paddr(void *dst, paddr_t paddr, unsigned > long len, int attrindx) > clear_fixmap(FIXMAP_MISC); > } > > -static void kernel_zimage_check_overlap(struct kernel_info *info) > +static void place_modules(struct kernel_info *info, > + paddr_t kernel_start, > + paddr_t kernel_end) > { > - paddr_t zimage_start = info->zimage.load_addr; > - paddr_t zimage_end = info->zimage.load_addr + info->zimage.len; > - paddr_t start = info->dtb_paddr; > - paddr_t end; > + /* Align DTB and initrd size to 2Mb. Linux only requires 4 byte > alignment */ > + const paddr_t initrd_len = > + ROUNDUP(early_info.modules.module[MOD_INITRD].size, MB(2)); > + const paddr_t dtb_len = ROUNDUP(fdt_totalsize(info->fdt), MB(2)); > + const paddr_t total = initrd_len + dtb_len; > > - end = info->initrd_paddr + early_info.modules.module[MOD_INITRD].size; > + /* Convenient */ > + const paddr_t mem_start = info->mem.bank[0].start; > + const paddr_t mem_size = info->mem.bank[0].size; > + const paddr_t mem_end = mem_start + mem_size; > + const paddr_t kernel_size = kernel_end - kernel_start; > + > + paddr_t addr; > + > + if ( total + kernel_size > mem_size ) > + panic("Not enough memory in the first bank for the dtb+initrd."); > > /* > - * In the dom0 memory, the initrd will be just after the DTB. So we > - * only need to check if the zImage range will overlap the > - * DTB-initrd range. > + * DTB must be loaded such that it does not conflict with the > + * kernel decompressor. For 32-bit Linux Documentation/arm/Booting > + * recommends just after the 128MB boundary while for 64-bit Linux > + * the recommendation in Documentation/arm64/booting.txt is below > + * 512MB. > + * > + * If the bootloader provides an initrd, it will be loaded just > + * after the DTB. > + * > + * We try to place dtb+initrd at 128MB, (or, if we have less RAM, > + * as high as possible). If there is no space then fallback to > + * just after the kernel, if there is room, otherwise just before. > */ > - if ( (start > zimage_end) || (end < zimage_start) ) > + > + if ( kernel_end < MIN(mem_start + MB(128), mem_end - total) ) > + addr = MIN(mem_start + MB(128), mem_end - total); > + else if ( mem_end - ROUNDUP(kernel_end, MB(2)) >= total ) > + addr = ROUNDUP(kernel_end, MB(2)); > + else if ( kernel_start - mem_start >= total ) > + addr = kernel_start - total; > + else > + { > + panic("Unable to find suitable location for dtb+initrd."); > return; > + } > > - panic(XENLOG_ERR "The kernel(0x%"PRIpaddr"-0x%"PRIpaddr > - ") is overlapping the DTB-initrd(0x%"PRIpaddr"-0x%"PRIpaddr")\n", > - zimage_start, zimage_end, start, end); > + info->dtb_paddr = addr; > + info->initrd_paddr = info->dtb_paddr + dtb_len; > } > > static void kernel_zimage_load(struct kernel_info *info) > @@ -98,6 +128,8 @@ static void kernel_zimage_load(struct kernel_info *info) > paddr_t len = info->zimage.len; > unsigned long offs; > > + place_modules(info, load_addr, load_addr + len); > + > printk("Loading zImage from %"PRIpaddr" to %"PRIpaddr"-%"PRIpaddr"\n", > paddr, load_addr, load_addr + len); > for ( offs = 0; offs < len; ) > @@ -176,7 +208,6 @@ static int kernel_try_zimage64_prepare(struct kernel_info > *info, > > info->entry = info->zimage.load_addr; > info->load = kernel_zimage_load; > - info->check_overlap = kernel_zimage_check_overlap; > > info->type = DOMAIN_PV64; > > @@ -236,17 +267,9 @@ static int kernel_try_zimage32_prepare(struct > kernel_info *info, > paddr_t load_end; > > load_end = info->mem.bank[0].start + info->mem.bank[0].size; > - load_end = MIN(info->mem.bank[0].start + (128<<20), load_end); > - > - /* > - * FDT is loaded above 128M or as high as possible, so the > - * only way we can clash is if we have <=128MB, in which case > - * FDT will be right at the end and so dtb_paddr will be below > - * the proposed kernel load address. Move the kernel down if > - * necessary. > - */ > - if ( load_end >= info->dtb_paddr ) > - load_end = info->dtb_paddr; > + load_end = MIN(info->mem.bank[0].start + MB(128), load_end); > + > + load_end += MB(2); > > info->zimage.load_addr = load_end - end; > /* Align to 2MB */ > @@ -258,7 +281,6 @@ static int kernel_try_zimage32_prepare(struct kernel_info > *info, > > info->entry = info->zimage.load_addr; > info->load = kernel_zimage_load; > - info->check_overlap = kernel_zimage_check_overlap; > > #ifdef CONFIG_ARM_64 > info->type = DOMAIN_PV32; > @@ -269,10 +291,15 @@ static int kernel_try_zimage32_prepare(struct > kernel_info *info, > > static void kernel_elf_load(struct kernel_info *info) > { > + place_modules(info, > + info->elf.parms.virt_kstart, > + info->elf.parms.virt_kend); > + > printk("Loading ELF image into guest memory\n"); > info->elf.elf.dest_base = (void*)(unsigned > long)info->elf.parms.virt_kstart; > info->elf.elf.dest_size = > info->elf.parms.virt_kend - info->elf.parms.virt_kstart; > + > elf_load_binary(&info->elf.elf); > > printk("Free temporary kernel buffer\n"); > @@ -321,7 +348,6 @@ static int kernel_try_elf_prepare(struct kernel_info > *info, > */ > info->entry = info->elf.parms.virt_entry; > info->load = kernel_elf_load; > - info->check_overlap = NULL; > > if ( elf_check_broken(&info->elf.elf) ) > printk("Xen: warning: ELF kernel broken: %s\n", > diff --git a/xen/arch/arm/kernel.h b/xen/arch/arm/kernel.h > index debf590..b48c2c9 100644 > --- a/xen/arch/arm/kernel.h > +++ b/xen/arch/arm/kernel.h > @@ -40,8 +40,6 @@ struct kernel_info { > }; > > void (*load)(struct kernel_info *info); > - /* Callback to check overlap between the kernel and the device tree */ > - void (*check_overlap)(struct kernel_info *kinfo); > int load_attr; > }; > > -- > 1.7.10.4 > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |