Xen project Mailing List

Re: [PATCH v4 5/6] xen/x86: move NUMA scan nodes codes from x86 to common

Date: Thu, 8 Sep 2022 15:02:53 +0200

Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none

Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=4ZezF46r6xIcbGdDNQ19SoK7mLvv0hwZ/KPQ5tp41J0=; b=hDISNb/QaWtLKgGRbd5sAobn/l98Tma0LwTm6KjrDc4mju76OpvOCBD5mgofjInQ190ETDX0T7lVvU9C8rC7N2vIlP4fH1kHYmanz93J08Pp0jJijTWlcAAh1+kKVW+FIfGKa9o9k+8VolZt09HUk7BOefNJeA0RAQ5HufsOFZvwXBQrxWkD20/QDhOcqt73uf0KXhxrv+iwSl9hHoJl07GyHLv+BWPSd6qtGH/IwBfJJHP0/do8MbetnLnC/uVHO6wDUCh9rFYeMeZc2f2FNpl/XnWkpDGKpRZxNbDO5innnR/X+WYVtaxquijJz5Ht4hec8l4eAWDyBLgsqi9Enw==

Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=gEVSfDU1c4Q7q4oSmY7IlV3KVK32ynungNzQ6WM1Kwxu1FOaHoyWjAhFXhQElIElZ3z4ky2BAUGmmMgEJ+SwsC3xyJI+dTLCZBDedLiN85GZyFIKgjysADh4p2VIywli7430iM00AtsgZWBUR9a7oNRFLxPcP2yn9qk76i+qUkrFkzD+waHz/jUoadENWMJ+giM/s78vd9fRBMYdCygYytHVzrnrR7X16Af/yAKoIsdoc3MVNUSeM514+Y9E0maR7Nhi7N7c7W6BVj6OmrUNhbisAm2Q/sCE3NtMcC9fWoPItf+BUoTQPSGo4cPj9q0axocAwHj7OsFX48djKmW2Kw==

Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;

Cc: nd@xxxxxxx, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxx>, Julien Grall <julien@xxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx

Delivery-date: Thu, 08 Sep 2022 13:03:07 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 02.09.2022 05:31, Wei Chen wrote: > --- a/xen/arch/x86/numa.c > +++ b/xen/arch/x86/numa.c > @@ -41,9 +41,12 @@ int __init arch_numa_setup(const char *opt) > return -EINVAL; > } > > -bool arch_numa_disabled(void) > +bool arch_numa_disabled(bool init_as_disable) I'm afraid my question as to the meaning of the name of the parameter has remained unanswered. > @@ -306,32 +218,27 @@ acpi_numa_processor_affinity_init(const struct > acpi_srat_cpu_affinity *pa) > void __init > acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma) > { > - struct node *nd; > - paddr_t nd_start, nd_end; > - paddr_t start, end; > unsigned pxm; > nodeid_t node; > - unsigned int i; > > if (numa_disabled()) > return; > if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) { > - bad_srat(); > + numa_fw_bad(); > return; > } > if (!(ma->flags & ACPI_SRAT_MEM_ENABLED)) > return; > > - start = ma->base_address; > - end = start + ma->length; > /* Supplement the heuristics in l1tf_calculations(). */ > - l1tf_safe_maddr = max(l1tf_safe_maddr, ROUNDUP(end, PAGE_SIZE)); > + l1tf_safe_maddr = max(l1tf_safe_maddr, > + ROUNDUP(ma->base_address + ma->length, > + PAGE_SIZE)); Indentation: l1tf_safe_maddr = max(l1tf_safe_maddr, ROUNDUP(ma->base_address + ma->length, PAGE_SIZE)); > @@ -33,7 +48,309 @@ bool __read_mostly numa_off; > > bool numa_disabled(void) > { > - return numa_off || arch_numa_disabled(); > + return numa_off || arch_numa_disabled(false); > +} > + > +void __init numa_set_processor_nodes_parsed(nodeid_t node) > +{ > + node_set(node, processor_nodes_parsed); > +} > + > +bool valid_numa_range(paddr_t start, paddr_t end, nodeid_t node) > +{ > + unsigned int i; > + > + for ( i = 0; i < num_node_memblks; i++ ) > + { > + struct node *nd = &node_memblk_range[i]; const? (This is particularly relevant with __ro_after_init.) > +bool __init numa_update_node_memblks(nodeid_t node, unsigned int arch_nid, > + paddr_t start, paddr_t size, > + const char *prefix, > + bool hotplug) > +{ > + unsigned int i; > + paddr_t end = start + size; > + paddr_t nd_start = start; > + paddr_t nd_end = end; > + struct node *nd = &nodes[node]; > + > + /* > + * For the node that already has some memory blocks, we will > + * expand the node memory range temporarily to check memory > + * interleaves with other nodes. We will not use this node > + * temp memory range to check overlaps, because it will mask > + * the overlaps in same node. > + * > + * Node with 0 bytes memory doesn't need this expandsion. Mind taking the opportunity and drop the 'd' from "expansion"? > + */ > + if ( nd->start != nd->end ) > + { > + if ( nd_start > nd->start ) > + nd_start = nd->start; > + > + if ( nd_end < nd->end ) > + nd_end = nd->end; > + } > + > + /* It is fine to add this area to the nodes data it will be used later */ > + switch ( conflicting_memblks(node, start, end, nd_start, nd_end, &i) ) > + { > + case OVERLAP: > + if ( memblk_nodeid[i] == node ) > + { > + bool mismatch = !(hotplug) != !test_bit(i, memblk_hotplug); No need to parenthesize "hotplug" here. > + printk("%sNUMA: %s %u [%"PRIpaddr", %"PRIpaddr"] overlaps with > itself [%"PRIpaddr", %"PRIpaddr"]\n", > + mismatch ? KERN_ERR : KERN_WARNING, prefix, > + arch_nid, start, end - 1, > + node_memblk_range[i].start, node_memblk_range[i].end - 1); > + if ( mismatch ) > + return false; > + break; > + } > + > + printk(KERN_ERR > + "NUMA: %s %u [%"PRIpaddr", %"PRIpaddr"] overlaps with %s %u > [%"PRIpaddr", %"PRIpaddr"]\n", > + prefix, arch_nid, start, end - 1, prefix, > + numa_node_to_arch_nid(memblk_nodeid[i]), > + node_memblk_range[i].start, node_memblk_range[i].end - 1); > + return false; > + > + > + case INTERLEAVE: Please don't add double blank lines anywhere (original code didn't have these); there's at least one more instance below. > +static int __init nodes_cover_memory(void) > +{ > + unsigned int i; > + > + for ( i = 0; ; i++ ) > + { > + int err; > + bool found; > + unsigned int j; > + paddr_t start, end; > + > + /* Try to loop memory map from index 0 to end to get RAM ranges. */ > + err = arch_get_ram_range(i, &start, &end); > + > + /* Reach the end of arch's memory map */ > + if ( err == -ENOENT ) > + break; > + > + /* Index relate entry is not RAM, skip it. */ > + if ( err ) > + continue; > + > + do { > + found = false; > + for_each_node_mask( j, memory_nodes_parsed ) Please be consistent with style for constructs like this one: Either you consider for_each_node_mask a pseudo-keyword (along the lines of for(;;)), then there's a blank missing ahead of the opening parenthesis. Or you consider this an ordinary identifier (i.e. the function-like macro that it is), then there shouldn't be blanks immediately inside the parentheses. (Same issue elsewhere.) > + if ( start < nodes[j].end > + && end > nodes[j].start ) > + { > + if ( start >= nodes[j].start ) > + { > + start = nodes[j].end; > + found = true; > + } > + > + if ( end <= nodes[j].end ) > + { > + end = nodes[j].start; > + found = true; > + } > + } > + } while ( found && start < end ); > + > + if ( start < end ) > + { > + printk(KERN_ERR "NUMA: No node for RAM range: " > + "[%"PRIpaddr", %"PRIpaddr"]\n", start, end - 1); > + return 0; > + } > + } > + return 1; > +} Seeing the two returns (and no further ones in the function) - did you not mean to also switch to bool/true/false here? > +/* Use the information discovered above to actually set up the nodes. */ > +static bool __init numa_scan_nodes(paddr_t start, paddr_t end) Is "above" in the comment actually still accurate? Aiui the discovery is now in a different CU. Then perhaps "Use discovered information to actually set up the nodes." > +{ > + unsigned int i; > + nodemask_t all_nodes_parsed; > + > + /* First clean up the node list */ > + for ( i = 0; i < MAX_NUMNODES; i++ ) > + cutoff_node(i, start, end); > + > + /* When numa is on with good firmware, we can do numa scan nodes. */ > + if ( arch_numa_disabled(true) ) > + return false; Btw - the comment here doesn't help me figure your choice of "init_as_disabled". The wording towards the end is also a little odd, considering we're already in numa_scan_nodes(). Which further points out that really there's no scanning here, just processing, so maybe the earlier patch wants to rename the function to numa_process_nodes()? > + if ( !nodes_cover_memory() ) > + { > + numa_fw_bad(); > + return false; > + } > + > + memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, > + memblk_nodeid); > + > + if ( memnode_shift < 0 ) As previously pointed out: As of patch 2 memnode_shift is unsigned, so this comparison is always false (and the latest Coverity will point this out). You can't get away here without using an intermediate (signed, i.e. plain int) variable. > + { > + printk(KERN_ERR > + "NUMA: No NUMA node hash function found. Contact > maintainer\n"); > + numa_fw_bad(); > + return false; > + } > + > + nodes_or(all_nodes_parsed, memory_nodes_parsed, processor_nodes_parsed); > + > + /* Finally register nodes */ > + for_each_node_mask( i, all_nodes_parsed ) > + { > + if ( nodes[i].end - nodes[i].start == 0 ) nodes[i].end == nodes[i].start ? > + printk(KERN_INFO "NUMA: node %u has no memory\n", i); > + > + setup_node_bootmem(i, nodes[i].start, nodes[i].end); > + } > + > + for ( i = 0; i < nr_cpu_ids; i++ ) > + { > + if ( cpu_to_node[i] == NUMA_NO_NODE ) > + continue; > + if ( !nodemask_test(cpu_to_node[i], &processor_nodes_parsed) ) > + numa_set_node(i, NUMA_NO_NODE); > + } > + numa_init_array(); > + return true; > } While you said you'd check elsewhere as well, just to be sure: Please add a blank line before the function's main "return". And perhaps another one between loop and function call. > --- a/xen/drivers/acpi/Kconfig > +++ b/xen/drivers/acpi/Kconfig > @@ -7,4 +7,5 @@ config ACPI_LEGACY_TABLES_LOOKUP > > config ACPI_NUMA > bool > + select HAS_NUMA_NODE_FWID Are you selecting an option here which doesn't exist anywhere? Or am I overlooking where this new option is being added? Jan

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.