linux/arch/x86/mm/numa.c

290 lines
6.5 KiB
C

/* Common code for 32 and 64-bit NUMA */
#include <linux/topology.h>
#include <linux/module.h>
#include <linux/bootmem.h>
#include <asm/numa.h>
#include <asm/acpi.h>
int __initdata numa_off;
static __init int numa_setup(char *opt)
{
if (!opt)
return -EINVAL;
if (!strncmp(opt, "off", 3))
numa_off = 1;
#ifdef CONFIG_NUMA_EMU
if (!strncmp(opt, "fake=", 5))
numa_emu_cmdline(opt + 5);
#endif
#ifdef CONFIG_ACPI_NUMA
if (!strncmp(opt, "noacpi", 6))
acpi_numa = -1;
#endif
return 0;
}
early_param("numa", numa_setup);
/*
* apicid, cpu, node mappings
*/
s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
};
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
EXPORT_SYMBOL(node_to_cpumask_map);
/*
* Map cpu index to node index
*/
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
void __cpuinit numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
/* early setting, no percpu area yet */
if (cpu_to_node_map) {
cpu_to_node_map[cpu] = node;
return;
}
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
dump_stack();
return;
}
#endif
per_cpu(x86_cpu_to_node_map, cpu) = node;
if (node != NUMA_NO_NODE)
set_cpu_numa_node(cpu, node);
}
void __cpuinit numa_clear_node(int cpu)
{
numa_set_node(cpu, NUMA_NO_NODE);
}
/*
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid.
*
* Note: node_to_cpumask() is not valid until after this is done.
* (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
*/
void __init setup_node_to_cpumask_map(void)
{
unsigned int node, num = 0;
/* setup nr_node_ids if not done yet */
if (nr_node_ids == MAX_NUMNODES) {
for_each_node_mask(node, node_possible_map)
num = node;
nr_node_ids = num + 1;
}
/* allocate the map */
for (node = 0; node < nr_node_ids; node++)
alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
/* cpumask_of_node() will now work */
pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
}
/*
* There are unfortunately some poorly designed mainboards around that
* only connect memory to a single CPU. This breaks the 1:1 cpu->node
* mapping. To avoid this fill in the mapping for all possible CPUs,
* as the number of CPUs is not known yet. We round robin the existing
* nodes.
*/
void __init numa_init_array(void)
{
int rr, i;
rr = first_node(node_online_map);
for (i = 0; i < nr_cpu_ids; i++) {
if (early_cpu_to_node(i) != NUMA_NO_NODE)
continue;
numa_set_node(i, rr);
rr = next_node(rr, node_online_map);
if (rr == MAX_NUMNODES)
rr = first_node(node_online_map);
}
}
static __init int find_near_online_node(int node)
{
int n, val;
int min_val = INT_MAX;
int best_node = -1;
for_each_online_node(n) {
val = node_distance(node, n);
if (val < min_val) {
min_val = val;
best_node = n;
}
}
return best_node;
}
/*
* Setup early cpu_to_node.
*
* Populate cpu_to_node[] only if x86_cpu_to_apicid[],
* and apicid_to_node[] tables have valid entries for a CPU.
* This means we skip cpu_to_node[] initialisation for NUMA
* emulation and faking node case (when running a kernel compiled
* for NUMA on a non NUMA box), which is OK as cpu_to_node[]
* is already initialized in a round robin manner at numa_init_array,
* prior to this call, and this initialization is good enough
* for the fake NUMA cases.
*
* Called before the per_cpu areas are setup.
*/
void __init init_cpu_to_node(void)
{
int cpu;
u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
BUG_ON(cpu_to_apicid == NULL);
for_each_possible_cpu(cpu) {
int node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE)
continue;
if (!node_online(node))
node = find_near_online_node(node);
numa_set_node(cpu, node);
}
}
#ifndef CONFIG_DEBUG_PER_CPU_MAPS
# ifndef CONFIG_NUMA_EMU
void __cpuinit numa_add_cpu(int cpu)
{
cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
void __cpuinit numa_remove_cpu(int cpu)
{
cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
# endif /* !CONFIG_NUMA_EMU */
#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
int __cpu_to_node(int cpu)
{
if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
printk(KERN_WARNING
"cpu_to_node(%d): usage too early!\n", cpu);
dump_stack();
return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
}
return per_cpu(x86_cpu_to_node_map, cpu);
}
EXPORT_SYMBOL(__cpu_to_node);
/*
* Same function as cpu_to_node() but used if called before the
* per_cpu areas are setup.
*/
int early_cpu_to_node(int cpu)
{
if (early_per_cpu_ptr(x86_cpu_to_node_map))
return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
if (!cpu_possible(cpu)) {
printk(KERN_WARNING
"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
dump_stack();
return NUMA_NO_NODE;
}
return per_cpu(x86_cpu_to_node_map, cpu);
}
struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
{
int node = early_cpu_to_node(cpu);
struct cpumask *mask;
char buf[64];
if (node == NUMA_NO_NODE) {
/* early_cpu_to_node() already emits a warning and trace */
return NULL;
}
mask = node_to_cpumask_map[node];
if (!mask) {
pr_err("node_to_cpumask_map[%i] NULL\n", node);
dump_stack();
return NULL;
}
cpulist_scnprintf(buf, sizeof(buf), mask);
printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
enable ? "numa_add_cpu" : "numa_remove_cpu",
cpu, node, buf);
return mask;
}
# ifndef CONFIG_NUMA_EMU
static void __cpuinit numa_set_cpumask(int cpu, int enable)
{
struct cpumask *mask;
mask = debug_cpumask_set_cpu(cpu, enable);
if (!mask)
return;
if (enable)
cpumask_set_cpu(cpu, mask);
else
cpumask_clear_cpu(cpu, mask);
}
void __cpuinit numa_add_cpu(int cpu)
{
numa_set_cpumask(cpu, 1);
}
void __cpuinit numa_remove_cpu(int cpu)
{
numa_set_cpumask(cpu, 0);
}
# endif /* !CONFIG_NUMA_EMU */
/*
* Returns a pointer to the bitmask of CPUs on Node 'node'.
*/
const struct cpumask *cpumask_of_node(int node)
{
if (node >= nr_node_ids) {
printk(KERN_WARNING
"cpumask_of_node(%d): node > nr_node_ids(%d)\n",
node, nr_node_ids);
dump_stack();
return cpu_none_mask;
}
if (node_to_cpumask_map[node] == NULL) {
printk(KERN_WARNING
"cpumask_of_node(%d): no node_to_cpumask_map!\n",
node);
dump_stack();
return cpu_online_mask;
}
return node_to_cpumask_map[node];
}
EXPORT_SYMBOL(cpumask_of_node);
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */