[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v4 5/6] xen/x86: move NUMA scan nodes codes from x86 to common


  • To: Wei Chen <wei.chen@xxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Thu, 8 Sep 2022 15:02:53 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=4ZezF46r6xIcbGdDNQ19SoK7mLvv0hwZ/KPQ5tp41J0=; b=hDISNb/QaWtLKgGRbd5sAobn/l98Tma0LwTm6KjrDc4mju76OpvOCBD5mgofjInQ190ETDX0T7lVvU9C8rC7N2vIlP4fH1kHYmanz93J08Pp0jJijTWlcAAh1+kKVW+FIfGKa9o9k+8VolZt09HUk7BOefNJeA0RAQ5HufsOFZvwXBQrxWkD20/QDhOcqt73uf0KXhxrv+iwSl9hHoJl07GyHLv+BWPSd6qtGH/IwBfJJHP0/do8MbetnLnC/uVHO6wDUCh9rFYeMeZc2f2FNpl/XnWkpDGKpRZxNbDO5innnR/X+WYVtaxquijJz5Ht4hec8l4eAWDyBLgsqi9Enw==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=gEVSfDU1c4Q7q4oSmY7IlV3KVK32ynungNzQ6WM1Kwxu1FOaHoyWjAhFXhQElIElZ3z4ky2BAUGmmMgEJ+SwsC3xyJI+dTLCZBDedLiN85GZyFIKgjysADh4p2VIywli7430iM00AtsgZWBUR9a7oNRFLxPcP2yn9qk76i+qUkrFkzD+waHz/jUoadENWMJ+giM/s78vd9fRBMYdCygYytHVzrnrR7X16Af/yAKoIsdoc3MVNUSeM514+Y9E0maR7Nhi7N7c7W6BVj6OmrUNhbisAm2Q/sCE3NtMcC9fWoPItf+BUoTQPSGo4cPj9q0axocAwHj7OsFX48djKmW2Kw==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: nd@xxxxxxx, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxx>, Julien Grall <julien@xxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx
  • Delivery-date: Thu, 08 Sep 2022 13:03:07 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 02.09.2022 05:31, Wei Chen wrote:
> --- a/xen/arch/x86/numa.c
> +++ b/xen/arch/x86/numa.c
> @@ -41,9 +41,12 @@ int __init arch_numa_setup(const char *opt)
>      return -EINVAL;
>  }
>  
> -bool arch_numa_disabled(void)
> +bool arch_numa_disabled(bool init_as_disable)

I'm afraid my question as to the meaning of the name of the parameter has
remained unanswered.

> @@ -306,32 +218,27 @@ acpi_numa_processor_affinity_init(const struct 
> acpi_srat_cpu_affinity *pa)
>  void __init
>  acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
>  {
> -     struct node *nd;
> -     paddr_t nd_start, nd_end;
> -     paddr_t start, end;
>       unsigned pxm;
>       nodeid_t node;
> -     unsigned int i;
>  
>       if (numa_disabled())
>               return;
>       if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
> -             bad_srat();
> +             numa_fw_bad();
>               return;
>       }
>       if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
>               return;
>  
> -     start = ma->base_address;
> -     end = start + ma->length;
>       /* Supplement the heuristics in l1tf_calculations(). */
> -     l1tf_safe_maddr = max(l1tf_safe_maddr, ROUNDUP(end, PAGE_SIZE));
> +     l1tf_safe_maddr = max(l1tf_safe_maddr,
> +                           ROUNDUP(ma->base_address + ma->length,
> +                           PAGE_SIZE));

Indentation:

        l1tf_safe_maddr = max(l1tf_safe_maddr,
                              ROUNDUP(ma->base_address + ma->length,
                                      PAGE_SIZE));

> @@ -33,7 +48,309 @@ bool __read_mostly numa_off;
>  
>  bool numa_disabled(void)
>  {
> -    return numa_off || arch_numa_disabled();
> +    return numa_off || arch_numa_disabled(false);
> +}
> +
> +void __init numa_set_processor_nodes_parsed(nodeid_t node)
> +{
> +    node_set(node, processor_nodes_parsed);
> +}
> +
> +bool valid_numa_range(paddr_t start, paddr_t end, nodeid_t node)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < num_node_memblks; i++ )
> +    {
> +        struct node *nd = &node_memblk_range[i];

const? (This is particularly relevant with __ro_after_init.)

> +bool __init numa_update_node_memblks(nodeid_t node, unsigned int arch_nid,
> +                                     paddr_t start, paddr_t size,
> +                                     const char *prefix,
> +                                     bool hotplug)
> +{
> +    unsigned int i;
> +    paddr_t end = start + size;
> +    paddr_t nd_start = start;
> +    paddr_t nd_end = end;
> +    struct node *nd = &nodes[node];
> +
> +    /*
> +     * For the node that already has some memory blocks, we will
> +     * expand the node memory range temporarily to check memory
> +     * interleaves with other nodes. We will not use this node
> +     * temp memory range to check overlaps, because it will mask
> +     * the overlaps in same node.
> +     *
> +     * Node with 0 bytes memory doesn't need this expandsion.

Mind taking the opportunity and drop the 'd' from "expansion"?

> +     */
> +    if ( nd->start != nd->end )
> +    {
> +        if ( nd_start > nd->start )
> +            nd_start = nd->start;
> +
> +        if ( nd_end < nd->end )
> +            nd_end = nd->end;
> +    }
> +
> +    /* It is fine to add this area to the nodes data it will be used later */
> +    switch ( conflicting_memblks(node, start, end, nd_start, nd_end, &i) )
> +    {
> +    case OVERLAP:
> +        if ( memblk_nodeid[i] == node )
> +        {
> +            bool mismatch = !(hotplug) != !test_bit(i, memblk_hotplug);

No need to parenthesize "hotplug" here.

> +            printk("%sNUMA: %s %u [%"PRIpaddr", %"PRIpaddr"] overlaps with 
> itself [%"PRIpaddr", %"PRIpaddr"]\n",
> +                   mismatch ? KERN_ERR : KERN_WARNING, prefix,
> +                   arch_nid, start, end - 1,
> +                   node_memblk_range[i].start, node_memblk_range[i].end - 1);
> +            if ( mismatch )
> +                return false;
> +            break;
> +        }
> +
> +        printk(KERN_ERR
> +               "NUMA: %s %u [%"PRIpaddr", %"PRIpaddr"] overlaps with %s %u 
> [%"PRIpaddr", %"PRIpaddr"]\n",
> +               prefix, arch_nid, start, end - 1, prefix,
> +               numa_node_to_arch_nid(memblk_nodeid[i]),
> +               node_memblk_range[i].start, node_memblk_range[i].end - 1);
> +        return false;
> +
> +
> +    case INTERLEAVE:

Please don't add double blank lines anywhere (original code didn't have
these); there's at least one more instance below.

> +static int __init nodes_cover_memory(void)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; ; i++ )
> +    {
> +        int err;
> +        bool found;
> +        unsigned int j;
> +        paddr_t start, end;
> +
> +        /* Try to loop memory map from index 0 to end to get RAM ranges. */
> +        err = arch_get_ram_range(i, &start, &end);
> +
> +        /* Reach the end of arch's memory map */
> +        if ( err == -ENOENT )
> +            break;
> +
> +        /* Index relate entry is not RAM, skip it. */
> +        if ( err )
> +            continue;
> +
> +        do {
> +            found = false;
> +            for_each_node_mask( j, memory_nodes_parsed )

Please be consistent with style for constructs like this one: Either
you consider for_each_node_mask a pseudo-keyword (along the lines of
for(;;)), then there's a blank missing ahead of the opening
parenthesis. Or you consider this an ordinary identifier (i.e. the
function-like macro that it is), then there shouldn't be blanks
immediately inside the parentheses. (Same issue elsewhere.)

> +                if ( start < nodes[j].end
> +                    && end > nodes[j].start )
> +                {
> +                    if ( start >= nodes[j].start )
> +                    {
> +                        start = nodes[j].end;
> +                        found = true;
> +                    }
> +
> +                    if ( end <= nodes[j].end )
> +                    {
> +                        end = nodes[j].start;
> +                        found = true;
> +                    }
> +                }
> +        } while ( found && start < end );
> +
> +        if ( start < end )
> +        {
> +            printk(KERN_ERR "NUMA: No node for RAM range: "
> +                   "[%"PRIpaddr", %"PRIpaddr"]\n", start, end - 1);
> +            return 0;
> +        }
> +    }
> +    return 1;
> +}

Seeing the two returns (and no further ones in the function) - did
you not mean to also switch to bool/true/false here?

> +/* Use the information discovered above to actually set up the nodes. */
> +static bool __init numa_scan_nodes(paddr_t start, paddr_t end)

Is "above" in the comment actually still accurate? Aiui the discovery
is now in a different CU. Then perhaps "Use discovered information to
actually set up the nodes."

> +{
> +    unsigned int i;
> +    nodemask_t all_nodes_parsed;
> +
> +    /* First clean up the node list */
> +    for ( i = 0; i < MAX_NUMNODES; i++ )
> +        cutoff_node(i, start, end);
> +
> +    /* When numa is on with good firmware, we can do numa scan nodes. */
> +    if ( arch_numa_disabled(true) )
> +        return false;

Btw - the comment here doesn't help me figure your choice of
"init_as_disabled". The wording towards the end is also a little
odd, considering we're already in numa_scan_nodes(). Which further
points out that really there's no scanning here, just processing,
so maybe the earlier patch wants to rename the function to
numa_process_nodes()?

> +    if ( !nodes_cover_memory() )
> +    {
> +        numa_fw_bad();
> +        return false;
> +    }
> +
> +    memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
> +                                       memblk_nodeid);
> +
> +    if ( memnode_shift < 0 )

As previously pointed out: As of patch 2 memnode_shift is unsigned,
so this comparison is always false (and the latest Coverity will
point this out). You can't get away here without using an intermediate
(signed, i.e. plain int) variable.

> +    {
> +        printk(KERN_ERR
> +               "NUMA: No NUMA node hash function found. Contact 
> maintainer\n");
> +        numa_fw_bad();
> +        return false;
> +    }
> +
> +    nodes_or(all_nodes_parsed, memory_nodes_parsed, processor_nodes_parsed);
> +
> +    /* Finally register nodes */
> +    for_each_node_mask( i, all_nodes_parsed )
> +    {
> +        if ( nodes[i].end - nodes[i].start == 0 )

nodes[i].end == nodes[i].start ?

> +            printk(KERN_INFO "NUMA: node %u has no memory\n", i);
> +
> +        setup_node_bootmem(i, nodes[i].start, nodes[i].end);
> +    }
> +
> +    for ( i = 0; i < nr_cpu_ids; i++ )
> +    {
> +        if ( cpu_to_node[i] == NUMA_NO_NODE )
> +            continue;
> +        if ( !nodemask_test(cpu_to_node[i], &processor_nodes_parsed) )
> +            numa_set_node(i, NUMA_NO_NODE);
> +    }
> +    numa_init_array();
> +    return true;
>  }

While you said you'd check elsewhere as well, just to be sure: Please
add a blank line before the function's main "return". And perhaps
another one between loop and function call.

> --- a/xen/drivers/acpi/Kconfig
> +++ b/xen/drivers/acpi/Kconfig
> @@ -7,4 +7,5 @@ config ACPI_LEGACY_TABLES_LOOKUP
>  
>  config ACPI_NUMA
>       bool
> +     select HAS_NUMA_NODE_FWID

Are you selecting an option here which doesn't exist anywhere? Or
am I overlooking where this new option is being added?

Jan



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.