releases/4.9.104/powerpc-numa-ensure-nodes-initialized-for-hotplug.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From foo@baz Sun May 27 17:33:38 CEST 2018
 From: Michael Bringmann <mwb@linux.vnet.ibm.com>
 Date: Tue, 28 Nov 2017 16:58:40 -0600
 Subject: powerpc/numa: Ensure nodes initialized for hotplug

 From: Michael Bringmann <mwb@linux.vnet.ibm.com>

 [ Upstream commit ea05ba7c559c8e5a5946c3a94a2a266e9a6680a6 ]

 This patch fixes some problems encountered at runtime with
 configurations that support memory-less nodes, or that hot-add CPUs
 into nodes that are memoryless during system execution after boot. The
 problems of interest include:

 * Nodes known to powerpc to be memoryless at boot, but to have CPUs in
   them are allowed to be 'possible' and 'online'. Memory allocations
   for those nodes are taken from another node that does have memory
   until and if memory is hot-added to the node.

 * Nodes which have no resources assigned at boot, but which may still
   be referenced subsequently by affinity or associativity attributes,
   are kept in the list of 'possible' nodes for powerpc. Hot-add of
   memory or CPUs to the system can reference these nodes and bring
   them online instead of redirecting the references to one of the set
   of nodes known to have memory at boot.

 Note that this software operates under the context of CPU hotplug. We
 are not doing memory hotplug in this code, but rather updating the
 kernel's CPU topology (i.e. arch_update_cpu_topology /
 numa_update_cpu_topology). We are initializing a node that may be used
 by CPUs or memory before it can be referenced as invalid by a CPU
 hotplug operation. CPU hotplug operations are protected by a range of
 APIs including cpu_maps_update_begin/cpu_maps_update_done,
 cpus_read/write_lock / cpus_read/write_unlock, device locks, and more.
 Memory hotplug operations, including try_online_node, are protected by
 mem_hotplug_begin/mem_hotplug_done, device locks, and more. In the
 case of CPUs being hot-added to a previously memoryless node, the
 try_online_node operation occurs wholly within the CPU locks with no
 overlap. Using HMC hot-add/hot-remove operations, we have been able to
 add and remove CPUs to any possible node without failures. HMC
 operations involve a degree self-serialization, though.

 Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
 Reviewed-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
 Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
 Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 ---
  arch/powerpc/mm/numa.c |   47 +++++++++++++++++++++++++++++++++++++----------
  1 file changed, 37 insertions(+), 10 deletions(-)

 --- a/arch/powerpc/mm/numa.c
 +++ b/arch/powerpc/mm/numa.c
 @@ -551,7 +551,7 @@ static int numa_setup_cpu(unsigned long
  	nid = of_node_to_nid_single(cpu);

  out_present:
 -	if (nid < 0 || !node_online(nid))
 +	if (nid < 0 || !node_possible(nid))
  		nid = first_online_node;

  	map_cpu_to_node(lcpu, nid);
 @@ -922,10 +922,8 @@ static void __init find_possible_nodes(v
  		goto out;

  	for (i = 0; i < numnodes; i++) {
 -		if (!node_possible(i)) {
 -			setup_node_data(i, 0, 0);
 +		if (!node_possible(i))
  			node_set(i, node_possible_map);
 -		}
  	}

  out:
 @@ -1305,6 +1303,40 @@ static long vphn_get_associativity(unsig
  	return rc;
  }

 +static inline int find_and_online_cpu_nid(int cpu)
 +{
 +	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
 +	int new_nid;
 +
 +	/* Use associativity from first thread for all siblings */
 +	vphn_get_associativity(cpu, associativity);
 +	new_nid = associativity_to_nid(associativity);
 +	if (new_nid < 0 || !node_possible(new_nid))
 +		new_nid = first_online_node;
 +
 +	if (NODE_DATA(new_nid) == NULL) {
 +#ifdef CONFIG_MEMORY_HOTPLUG
 +		/*
 +		 * Need to ensure that NODE_DATA is initialized for a node from
 +		 * available memory (see memblock_alloc_try_nid). If unable to
 +		 * init the node, then default to nearest node that has memory
 +		 * installed.
 +		 */
 +		if (try_online_node(new_nid))
 +			new_nid = first_online_node;
 +#else
 +		/*
 +		 * Default to using the nearest node that has memory installed.
 +		 * Otherwise, it would be necessary to patch the kernel MM code
 +		 * to deal with more memoryless-node error conditions.
 +		 */
 +		new_nid = first_online_node;
 +#endif
 +	}
 +
 +	return new_nid;
 +}
 +
  /*
   * Update the CPU maps and sysfs entries for a single CPU when its NUMA
   * characteristics change. This function doesn't perform any locking and is
 @@ -1370,7 +1402,6 @@ int arch_update_cpu_topology(void)
  {
  	unsigned int cpu, sibling, changed = 0;
  	struct topology_update_data *updates, *ud;
 -	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
  	cpumask_t updated_cpus;
  	struct device *dev;
  	int weight, new_nid, i = 0;
 @@ -1405,11 +1436,7 @@ int arch_update_cpu_topology(void)
  			continue;
  		}

 -		/* Use associativity from first thread for all siblings */
 -		vphn_get_associativity(cpu, associativity);
 -		new_nid = associativity_to_nid(associativity);
 -		if (new_nid < 0 || !node_online(new_nid))
 -			new_nid = first_online_node;
 +		new_nid = find_and_online_cpu_nid(cpu);

  		if (new_nid == numa_cpu_lookup_table[cpu]) {
  			cpumask_andnot(&cpu_associativity_changes_mask,
	From foo@baz Sun May 27 17:33:38 CEST 2018
	From: Michael Bringmann <mwb@linux.vnet.ibm.com>
	Date: Tue, 28 Nov 2017 16:58:40 -0600
	Subject: powerpc/numa: Ensure nodes initialized for hotplug

	From: Michael Bringmann <mwb@linux.vnet.ibm.com>

	[ Upstream commit ea05ba7c559c8e5a5946c3a94a2a266e9a6680a6 ]

	This patch fixes some problems encountered at runtime with
	configurations that support memory-less nodes, or that hot-add CPUs
	into nodes that are memoryless during system execution after boot. The
	problems of interest include:

	* Nodes known to powerpc to be memoryless at boot, but to have CPUs in
	them are allowed to be 'possible' and 'online'. Memory allocations
	for those nodes are taken from another node that does have memory
	until and if memory is hot-added to the node.

	* Nodes which have no resources assigned at boot, but which may still
	be referenced subsequently by affinity or associativity attributes,
	are kept in the list of 'possible' nodes for powerpc. Hot-add of
	memory or CPUs to the system can reference these nodes and bring
	them online instead of redirecting the references to one of the set
	of nodes known to have memory at boot.

	Note that this software operates under the context of CPU hotplug. We
	are not doing memory hotplug in this code, but rather updating the
	kernel's CPU topology (i.e. arch_update_cpu_topology /
	numa_update_cpu_topology). We are initializing a node that may be used
	by CPUs or memory before it can be referenced as invalid by a CPU
	hotplug operation. CPU hotplug operations are protected by a range of
	APIs including cpu_maps_update_begin/cpu_maps_update_done,
	cpus_read/write_lock / cpus_read/write_unlock, device locks, and more.
	Memory hotplug operations, including try_online_node, are protected by
	mem_hotplug_begin/mem_hotplug_done, device locks, and more. In the
	case of CPUs being hot-added to a previously memoryless node, the
	try_online_node operation occurs wholly within the CPU locks with no
	overlap. Using HMC hot-add/hot-remove operations, we have been able to
	add and remove CPUs to any possible node without failures. HMC
	operations involve a degree self-serialization, though.

	Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
	Reviewed-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
	Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
	Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	---
	arch/powerpc/mm/numa.c \| 47 +++++++++++++++++++++++++++++++++++++----------
	1 file changed, 37 insertions(+), 10 deletions(-)

	--- a/arch/powerpc/mm/numa.c
	+++ b/arch/powerpc/mm/numa.c
	@@ -551,7 +551,7 @@ static int numa_setup_cpu(unsigned long
	nid = of_node_to_nid_single(cpu);

	out_present:
	- if (nid < 0 \|\| !node_online(nid))
	+ if (nid < 0 \|\| !node_possible(nid))
	nid = first_online_node;

	map_cpu_to_node(lcpu, nid);
	@@ -922,10 +922,8 @@ static void __init find_possible_nodes(v
	goto out;

	for (i = 0; i < numnodes; i++) {
	- if (!node_possible(i)) {
	- setup_node_data(i, 0, 0);
	+ if (!node_possible(i))
	node_set(i, node_possible_map);
	- }
	}

	out:
	@@ -1305,6 +1303,40 @@ static long vphn_get_associativity(unsig
	return rc;
	}

	+static inline int find_and_online_cpu_nid(int cpu)
	+{
	+ __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
	+ int new_nid;
	+
	+ /* Use associativity from first thread for all siblings */
	+ vphn_get_associativity(cpu, associativity);
	+ new_nid = associativity_to_nid(associativity);
	+ if (new_nid < 0 \|\| !node_possible(new_nid))
	+ new_nid = first_online_node;
	+
	+ if (NODE_DATA(new_nid) == NULL) {
	+#ifdef CONFIG_MEMORY_HOTPLUG
	+ /*
	+ * Need to ensure that NODE_DATA is initialized for a node from
	+ * available memory (see memblock_alloc_try_nid). If unable to
	+ * init the node, then default to nearest node that has memory
	+ * installed.
	+ */
	+ if (try_online_node(new_nid))
	+ new_nid = first_online_node;
	+#else
	+ /*
	+ * Default to using the nearest node that has memory installed.
	+ * Otherwise, it would be necessary to patch the kernel MM code
	+ * to deal with more memoryless-node error conditions.
	+ */
	+ new_nid = first_online_node;
	+#endif
	+ }
	+
	+ return new_nid;
	+}
	+
	/*
	* Update the CPU maps and sysfs entries for a single CPU when its NUMA
	* characteristics change. This function doesn't perform any locking and is
	@@ -1370,7 +1402,6 @@ int arch_update_cpu_topology(void)
	{
	unsigned int cpu, sibling, changed = 0;
	struct topology_update_data updates, ud;
	- __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
	cpumask_t updated_cpus;
	struct device *dev;
	int weight, new_nid, i = 0;
	@@ -1405,11 +1436,7 @@ int arch_update_cpu_topology(void)
	continue;
	}

	- /* Use associativity from first thread for all siblings */
	- vphn_get_associativity(cpu, associativity);
	- new_nid = associativity_to_nid(associativity);
	- if (new_nid < 0 \|\| !node_online(new_nid))
	- new_nid = first_online_node;
	+ new_nid = find_and_online_cpu_nid(cpu);

	if (new_nid == numa_cpu_lookup_table[cpu]) {
	cpumask_andnot(&cpu_associativity_changes_mask,