[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v5 07/19] xen: arm: allocate dom0 memory separately from preparing the dtb



On Wed, 13 Nov 2013, Ian Campbell wrote:
> Mixing these two together is a pain, it forces us to prepare the dtb before
> processing the kernel which means we don't know whether the guest is 32- or
> 64-bit while we construct its DTB.
> 
> Instead split out the memory allocation (including 1:1 workaround handling)
> and p2m setup into a separate phase and then create a memory node in the DTB
> based on the result.
> 
> This allows us to move kernel parsing before DTB setup.
> 
> As part of this it was also necessary to rework where the decision regarding
> the placement of the DTB and initrd in RAM was made. It is now made when
> loading the kernel, which allows it to make use of the zImage/ELF specific
> information and therefore to make decisions based on complete knowledge and do
> it right rather than guessing in prepare_dtb and relying on a later check to
> see if things worked.
> 
> Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

It looks good


> v3: Also rework module placement, v2 broke boot because dtb_paddr wasn't set
>     soon enough. This ends up cleaner anyway.
> v2: Fixed typo in the commit log
>     Handle multiple memory nodes as well as individual nodes with several
>     entries in them.
>     Strip the original memory node and recreate rather than trying to modify.
> ---
>  xen/arch/arm/domain_build.c |  203 
> ++++++++++++++++++++++---------------------
>  xen/arch/arm/kernel.c       |   80 +++++++++++------
>  xen/arch/arm/kernel.h       |    2 -
>  3 files changed, 158 insertions(+), 127 deletions(-)
> 
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index 8645aa1..edfcf14 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -81,11 +81,8 @@ struct vcpu *__init alloc_dom0_vcpu0(void)
>      return alloc_vcpu(dom0, 0, 0);
>  }
>  
> -static int set_memory_reg_11(struct domain *d, struct kernel_info *kinfo,
> -                             const struct dt_property *pp,
> -                             const struct dt_device_node *np, __be32 
> *new_cell)
> +static void allocate_memory_11(struct domain *d, struct kernel_info *kinfo)
>  {
> -    int reg_size = dt_cells_to_size(dt_n_addr_cells(np) + 
> dt_n_size_cells(np));
>      paddr_t start;
>      paddr_t size;
>      struct page_info *pg = NULL;
> @@ -116,53 +113,61 @@ static int set_memory_reg_11(struct domain *d, struct 
> kernel_info *kinfo,
>      if ( res )
>          panic("Unable to add pages in DOM0: %d\n", res);
>  
> -    dt_set_range(&new_cell, np, start, size);
> -
>      kinfo->mem.bank[0].start = start;
>      kinfo->mem.bank[0].size = size;
>      kinfo->mem.nr_banks = 1;
>  
> -    return reg_size;
> +    kinfo->unassigned_mem -= size;
>  }
>  
> -static int set_memory_reg(struct domain *d, struct kernel_info *kinfo,
> -                          const struct dt_property *pp,
> -                          const struct dt_device_node *np, __be32 *new_cell)
> +static void allocate_memory(struct domain *d, struct kernel_info *kinfo)
>  {
> -    int reg_size = dt_cells_to_size(dt_n_addr_cells(np) + 
> dt_n_size_cells(np));
> -    int l = 0;
> +
> +    struct dt_device_node *memory = NULL;
> +    const void *reg;
> +    u32 reg_len, reg_size;
>      unsigned int bank = 0;
> -    u64 start;
> -    u64 size;
> -    int ret;
>  
>      if ( platform_has_quirk(PLATFORM_QUIRK_DOM0_MAPPING_11) )
> -        return set_memory_reg_11(d, kinfo, pp, np, new_cell);
> +        return allocate_memory_11(d, kinfo);
>  
> -    while ( kinfo->unassigned_mem > 0 && l + reg_size <= pp->length
> -            && kinfo->mem.nr_banks < NR_MEM_BANKS )
> +    while ( (memory = dt_find_node_by_type(memory, "memory")) )
>      {
> -        ret = dt_device_get_address(np, bank, &start, &size);
> -        if ( ret )
> -            panic("Unable to retrieve the bank %u for %s\n",
> -                  bank, dt_node_full_name(np));
> -
> -        if ( size > kinfo->unassigned_mem )
> -            size = kinfo->unassigned_mem;
> -        dt_set_range(&new_cell, np, start, size);
> -
> -        printk("Populate P2M %#"PRIx64"->%#"PRIx64"\n", start, start + size);
> -        if ( p2m_populate_ram(d, start, start + size) < 0 )
> -            panic("Failed to populate P2M\n");
> -        kinfo->mem.bank[kinfo->mem.nr_banks].start = start;
> -        kinfo->mem.bank[kinfo->mem.nr_banks].size = size;
> -        kinfo->mem.nr_banks++;
> -        kinfo->unassigned_mem -= size;
> -
> -        l += reg_size;
> -    }
> +        int l;
> +
> +        DPRINT("memory node\n");
> +
> +        reg_size = dt_cells_to_size(dt_n_addr_cells(memory) + 
> dt_n_size_cells(memory));
> +
> +        reg = dt_get_property(memory, "reg", &reg_len);
> +        if ( reg == NULL )
> +            panic("Memory node has no reg property!\n");
> +
> +        for ( l = 0;
> +              kinfo->unassigned_mem > 0 && l + reg_size <= reg_len
> +                  && kinfo->mem.nr_banks < NR_MEM_BANKS;
> +              l += reg_size )
> +        {
> +            paddr_t start, size;
>  
> -    return l;
> +            if ( dt_device_get_address(memory, bank, &start, &size) )
> +                panic("Unable to retrieve the bank %u for %s\n",
> +                      bank, dt_node_full_name(memory));
> +
> +            if ( size > kinfo->unassigned_mem )
> +                size = kinfo->unassigned_mem;
> +
> +            printk("Populate P2M %#"PRIx64"->%#"PRIx64"\n",
> +                   start, start + size);
> +            if ( p2m_populate_ram(d, start, start + size) < 0 )
> +                panic("Failed to populate P2M\n");
> +            kinfo->mem.bank[kinfo->mem.nr_banks].start = start;
> +            kinfo->mem.bank[kinfo->mem.nr_banks].size = size;
> +            kinfo->mem.nr_banks++;
> +
> +            kinfo->unassigned_mem -= size;
> +        }
> +    }
>  }
>  
>  static int write_properties(struct domain *d, struct kernel_info *kinfo,
> @@ -211,23 +216,6 @@ static int write_properties(struct domain *d, struct 
> kernel_info *kinfo,
>                  continue;
>              }
>          }
> -        /*
> -         * In a memory node: adjust reg property.
> -         * TODO: handle properly memory node (ie: device_type = "memory")
> -         */
> -        else if ( dt_node_name_is_equal(np, "memory") )
> -        {
> -            if ( dt_property_name_is_equal(pp, "reg") )
> -            {
> -                new_data = xzalloc_bytes(pp->length);
> -                if ( new_data  == NULL )
> -                    return -FDT_ERR_XEN(ENOMEM);
> -
> -                prop_len = set_memory_reg(d, kinfo, pp, np,
> -                                          (__be32 *)new_data);
> -                prop_data = new_data;
> -            }
> -        }
>  
>          res = fdt_property(kinfo->fdt, pp->name, prop_data, prop_len);
>  
> @@ -304,6 +292,46 @@ static int fdt_property_interrupts(void *fdt, 
> gic_interrupt_t *intr,
>      return res;
>  }
>  
> +static int make_memory_node(const struct domain *d,
> +                            void *fdt,
> +                            const struct kernel_info *kinfo)
> +{
> +    int res, i;
> +    int nr_cells = XEN_FDT_NODE_REG_SIZE*kinfo->mem.nr_banks;
> +    __be32 reg[nr_cells];
> +    __be32 *cells;
> +
> +    DPRINT("Create memory node\n");
> +
> +    /* ePAPR 3.4 */
> +    res = fdt_begin_node(fdt, "memory");
> +    if ( res )
> +        return res;
> +
> +    res = fdt_property_string(fdt, "device_type", "memory");
> +    if ( res )
> +        return res;
> +
> +    cells = &reg[0];
> +    for ( i = 0 ; i < kinfo->mem.nr_banks; i++ )
> +    {
> +        u64 start = kinfo->mem.bank[i].start;
> +        u64 size = kinfo->mem.bank[i].size;
> +
> +        DPRINT("  Bank %d: %#"PRIx64"->%#"PRIx64"\n",
> +                i, start, start + size);
> +
> +        set_xen_range(&cells, start, size);
> +    }
> +
> +    res = fdt_property(fdt, "reg", reg, nr_cells);
> +    if ( res )
> +        return res;
> +
> +    res = fdt_end_node(fdt);
> +
> +    return res;
> +}
>  
>  static int make_hypervisor_node(void *fdt, const struct dt_device_node 
> *parent)
>  {
> @@ -627,7 +655,8 @@ static int make_timer_node(const struct domain *d, void 
> *fdt)
>  }
>  
>  static int make_xen_node(const struct domain *d, void *fdt,
> -                         const struct dt_device_node *parent)
> +                         const struct dt_device_node *parent,
> +                         const struct kernel_info *kinfo)
>  {
>      int res;
>  
> @@ -649,6 +678,10 @@ static int make_xen_node(const struct domain *d, void 
> *fdt,
>      if ( res )
>          return res;
>  
> +    res = make_memory_node(d, fdt, kinfo);
> +    if ( res )
> +        return res;
> +
>      res = make_gic_node(d, fdt);
>      if ( res )
>          return res;
> @@ -750,6 +783,7 @@ static int handle_node(struct domain *d, struct 
> kernel_info *kinfo,
>          DT_MATCH_COMPATIBLE("xen,multiboot-module"),
>          DT_MATCH_COMPATIBLE("arm,psci"),
>          DT_MATCH_PATH("/cpus"),
> +        DT_MATCH_TYPE("memory"),
>          DT_MATCH_GIC,
>          DT_MATCH_TIMER,
>          { /* sentinel */ },
> @@ -826,7 +860,7 @@ static int handle_node(struct domain *d, struct 
> kernel_info *kinfo,
>          if ( res )
>              return res;
>  
> -        res = make_xen_node(d, kinfo->fdt, np);
> +        res = make_xen_node(d, kinfo->fdt, np, kinfo);
>          if ( res )
>              return res;
>      }
> @@ -841,14 +875,9 @@ static int prepare_dtb(struct domain *d, struct 
> kernel_info *kinfo)
>      const void *fdt;
>      int new_size;
>      int ret;
> -    paddr_t end;
> -    paddr_t initrd_len;
> -    paddr_t dtb_len;
>  
>      ASSERT(dt_host && (dt_host->sibling == NULL));
>  
> -    kinfo->unassigned_mem = dom0_mem;
> -
>      fdt = device_tree_flattened;
>  
>      new_size = fdt_totalsize(fdt) + DOM0_FDT_EXTRA_SIZE;
> @@ -870,36 +899,6 @@ static int prepare_dtb(struct domain *d, struct 
> kernel_info *kinfo)
>      if ( ret < 0 )
>          goto err;
>  
> -    /* Align DTB and initrd size to 2Mb. Linux only requires 4 byte 
> alignment */
> -    initrd_len = ROUNDUP(early_info.modules.module[MOD_INITRD].size, MB(2));
> -    dtb_len = ROUNDUP(fdt_totalsize(kinfo->fdt), MB(2));
> -    new_size = initrd_len + dtb_len;
> -
> -    /*
> -     * DTB must be loaded such that it does not conflict with the
> -     * kernel decompressor. For 32-bit Linux Documentation/arm/Booting
> -     * recommends just after the 128MB boundary while for 64-bit Linux
> -     * the recommendation in Documentation/arm64/booting.txt is below
> -     * 512MB. Place at 128MB, (or, if we have less RAM, as high as
> -     * possible) in order to satisfy both.
> -     * If the bootloader provides an initrd, it will be loaded just
> -     * after the DTB.
> -     */
> -    end = kinfo->mem.bank[0].start + kinfo->mem.bank[0].size;
> -    end = MIN(kinfo->mem.bank[0].start + (128<<20) + new_size, end);
> -
> -    kinfo->initrd_paddr = end - initrd_len;
> -    kinfo->dtb_paddr = kinfo->initrd_paddr - dtb_len;
> -
> -    if ( kinfo->dtb_paddr < kinfo->mem.bank[0].start ||
> -         kinfo->mem.bank[0].start + new_size > end )
> -    {
> -        printk(XENLOG_ERR "Not enough memory in the first bank for "
> -               "the device tree.");
> -        ret = -FDT_ERR_XEN(EINVAL);
> -        goto err;
> -    }
> -
>      return 0;
>  
>    err:
> @@ -994,11 +993,19 @@ int construct_dom0(struct domain *d)
>  
>      d->max_pages = ~0U;
>  
> -    rc = prepare_dtb(d, &kinfo);
> +    kinfo.unassigned_mem = dom0_mem;
> +
> +    allocate_memory(d, &kinfo);
> +
> +    rc = kernel_prepare(&kinfo);
>      if ( rc < 0 )
>          return rc;
>  
> -    rc = kernel_prepare(&kinfo);
> +#ifdef CONFIG_ARM_64
> +    d->arch.type = kinfo.type;
> +#endif
> +
> +    rc = prepare_dtb(d, &kinfo);
>      if ( rc < 0 )
>          return rc;
>  
> @@ -1006,9 +1013,6 @@ int construct_dom0(struct domain *d)
>      if ( rc < 0 )
>          return rc;
>  
> -    if ( kinfo.check_overlap )
> -        kinfo.check_overlap(&kinfo);
> -
>      /* The following loads use the domain's p2m */
>      p2m_load_VTTBR(d);
>  #ifdef CONFIG_ARM_64
> @@ -1019,6 +1023,10 @@ int construct_dom0(struct domain *d)
>          WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_RW, HCR_EL2);
>  #endif
>  
> +    /*
> +     * kernel_load will determine the placement of the initrd & fdt in
> +     * RAM, so call it first.
> +     */
>      kernel_load(&kinfo);
>      /* initrd_load will fix up the fdt, so call it before dtb_load */
>      initrd_load(&kinfo);
> @@ -1033,7 +1041,6 @@ int construct_dom0(struct domain *d)
>  
>      regs->pc = (register_t)kinfo.entry;
>  
> -
>      if ( is_pv32_domain(d) )
>      {
>          regs->cpsr = PSR_GUEST32_INIT;
> diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
> index 7036d94..9046797 100644
> --- a/xen/arch/arm/kernel.c
> +++ b/xen/arch/arm/kernel.c
> @@ -68,26 +68,56 @@ void copy_from_paddr(void *dst, paddr_t paddr, unsigned 
> long len, int attrindx)
>      clear_fixmap(FIXMAP_MISC);
>  }
>  
> -static void kernel_zimage_check_overlap(struct kernel_info *info)
> +static void place_modules(struct kernel_info *info,
> +                         paddr_t kernel_start,
> +                         paddr_t kernel_end)
>  {
> -    paddr_t zimage_start = info->zimage.load_addr;
> -    paddr_t zimage_end = info->zimage.load_addr + info->zimage.len;
> -    paddr_t start = info->dtb_paddr;
> -    paddr_t end;
> +    /* Align DTB and initrd size to 2Mb. Linux only requires 4 byte 
> alignment */
> +    const paddr_t initrd_len =
> +        ROUNDUP(early_info.modules.module[MOD_INITRD].size, MB(2));
> +    const paddr_t dtb_len = ROUNDUP(fdt_totalsize(info->fdt), MB(2));
> +    const paddr_t total = initrd_len + dtb_len;
>  
> -    end = info->initrd_paddr + early_info.modules.module[MOD_INITRD].size;
> +    /* Convenient */
> +    const paddr_t mem_start = info->mem.bank[0].start;
> +    const paddr_t mem_size = info->mem.bank[0].size;
> +    const paddr_t mem_end = mem_start + mem_size;
> +    const paddr_t kernel_size = kernel_end - kernel_start;
> +
> +    paddr_t addr;
> +
> +    if ( total + kernel_size > mem_size )
> +        panic("Not enough memory in the first bank for the dtb+initrd.");
>  
>      /*
> -     * In the dom0 memory, the initrd will be just after the DTB. So we
> -     * only need to check if the zImage range will overlap the
> -     * DTB-initrd range.
> +     * DTB must be loaded such that it does not conflict with the
> +     * kernel decompressor. For 32-bit Linux Documentation/arm/Booting
> +     * recommends just after the 128MB boundary while for 64-bit Linux
> +     * the recommendation in Documentation/arm64/booting.txt is below
> +     * 512MB.
> +     *
> +     * If the bootloader provides an initrd, it will be loaded just
> +     * after the DTB.
> +     *
> +     * We try to place dtb+initrd at 128MB, (or, if we have less RAM,
> +     * as high as possible). If there is no space then fallback to
> +     * just after the kernel, if there is room, otherwise just before.
>       */
> -    if ( (start > zimage_end) || (end < zimage_start) )
> +
> +    if ( kernel_end < MIN(mem_start + MB(128), mem_end - total) )
> +        addr = MIN(mem_start + MB(128), mem_end - total);
> +    else if ( mem_end - ROUNDUP(kernel_end, MB(2)) >= total )
> +        addr = ROUNDUP(kernel_end, MB(2));
> +    else if ( kernel_start - mem_start >= total )
> +        addr = kernel_start - total;
> +    else
> +    {
> +        panic("Unable to find suitable location for dtb+initrd.");
>          return;
> +    }
>  
> -    panic(XENLOG_ERR "The kernel(0x%"PRIpaddr"-0x%"PRIpaddr
> -          ") is overlapping the DTB-initrd(0x%"PRIpaddr"-0x%"PRIpaddr")\n",
> -          zimage_start, zimage_end, start, end);
> +    info->dtb_paddr = addr;
> +    info->initrd_paddr = info->dtb_paddr + dtb_len;
>  }
>  
>  static void kernel_zimage_load(struct kernel_info *info)
> @@ -98,6 +128,8 @@ static void kernel_zimage_load(struct kernel_info *info)
>      paddr_t len = info->zimage.len;
>      unsigned long offs;
>  
> +    place_modules(info, load_addr, load_addr + len);
> +
>      printk("Loading zImage from %"PRIpaddr" to %"PRIpaddr"-%"PRIpaddr"\n",
>             paddr, load_addr, load_addr + len);
>      for ( offs = 0; offs < len; )
> @@ -176,7 +208,6 @@ static int kernel_try_zimage64_prepare(struct kernel_info 
> *info,
>  
>      info->entry = info->zimage.load_addr;
>      info->load = kernel_zimage_load;
> -    info->check_overlap = kernel_zimage_check_overlap;
>  
>      info->type = DOMAIN_PV64;
>  
> @@ -236,17 +267,9 @@ static int kernel_try_zimage32_prepare(struct 
> kernel_info *info,
>          paddr_t load_end;
>  
>          load_end = info->mem.bank[0].start + info->mem.bank[0].size;
> -        load_end = MIN(info->mem.bank[0].start + (128<<20), load_end);
> -
> -        /*
> -         * FDT is loaded above 128M or as high as possible, so the
> -         * only way we can clash is if we have <=128MB, in which case
> -         * FDT will be right at the end and so dtb_paddr will be below
> -         * the proposed kernel load address. Move the kernel down if
> -         * necessary.
> -         */
> -        if ( load_end >= info->dtb_paddr )
> -            load_end = info->dtb_paddr;
> +        load_end = MIN(info->mem.bank[0].start + MB(128), load_end);
> +
> +        load_end += MB(2);
>  
>          info->zimage.load_addr = load_end - end;
>          /* Align to 2MB */
> @@ -258,7 +281,6 @@ static int kernel_try_zimage32_prepare(struct kernel_info 
> *info,
>  
>      info->entry = info->zimage.load_addr;
>      info->load = kernel_zimage_load;
> -    info->check_overlap = kernel_zimage_check_overlap;
>  
>  #ifdef CONFIG_ARM_64
>      info->type = DOMAIN_PV32;
> @@ -269,10 +291,15 @@ static int kernel_try_zimage32_prepare(struct 
> kernel_info *info,
>  
>  static void kernel_elf_load(struct kernel_info *info)
>  {
> +    place_modules(info,
> +                  info->elf.parms.virt_kstart,
> +                  info->elf.parms.virt_kend);
> +
>      printk("Loading ELF image into guest memory\n");
>      info->elf.elf.dest_base = (void*)(unsigned 
> long)info->elf.parms.virt_kstart;
>      info->elf.elf.dest_size =
>           info->elf.parms.virt_kend - info->elf.parms.virt_kstart;
> +
>      elf_load_binary(&info->elf.elf);
>  
>      printk("Free temporary kernel buffer\n");
> @@ -321,7 +348,6 @@ static int kernel_try_elf_prepare(struct kernel_info 
> *info,
>       */
>      info->entry = info->elf.parms.virt_entry;
>      info->load = kernel_elf_load;
> -    info->check_overlap = NULL;
>  
>      if ( elf_check_broken(&info->elf.elf) )
>          printk("Xen: warning: ELF kernel broken: %s\n",
> diff --git a/xen/arch/arm/kernel.h b/xen/arch/arm/kernel.h
> index debf590..b48c2c9 100644
> --- a/xen/arch/arm/kernel.h
> +++ b/xen/arch/arm/kernel.h
> @@ -40,8 +40,6 @@ struct kernel_info {
>      };
>  
>      void (*load)(struct kernel_info *info);
> -    /* Callback to check overlap between the kernel and the device tree */
> -    void (*check_overlap)(struct kernel_info *kinfo);
>      int load_attr;
>  };
>  
> -- 
> 1.7.10.4
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.