diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index cfa3d5c3144a..9c9fe1b0bc4e 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -190,6 +190,7 @@ extern int x86_acpi_numa_init(void); #ifdef CONFIG_NUMA_EMU extern void acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes); +extern int acpi_emu_node_distance(int a, int b); #endif #endif /* CONFIG_ACPI_NUMA */ diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index f42710bd3e73..5361c5947986 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -28,6 +28,7 @@ extern nodemask_t numa_nodes_parsed __initdata; extern int __cpuinit numa_cpu_node(int cpu); extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); +extern void __init numa_set_distance(int from, int to, int distance); #ifdef CONFIG_NUMA_EMU #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index b101c17861f5..910a7084f7f2 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -138,7 +138,7 @@ extern unsigned long node_remap_size[]; .balance_interval = 1, \ } -#ifdef CONFIG_X86_64_ACPI_NUMA +#ifdef CONFIG_X86_64 extern int __node_distance(int, int); #define node_distance(a, b) __node_distance(a, b) #endif diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 8b1f178a866e..a3621f2953d6 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -45,6 +45,13 @@ static unsigned long __initdata nodemap_size; static struct numa_meminfo numa_meminfo __initdata; +static int numa_distance_cnt; +static u8 *numa_distance; + +#ifdef CONFIG_NUMA_EMU +static bool numa_emu_dist; +#endif + /* * Given a shift value, try to populate memnodemap[] * Returns : @@ -356,6 +363,92 @@ static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, node_set(mi->blk[i].nid, *nodemask); } +/* + * Reset distance table. The current table is freed. The next + * numa_set_distance() call will create a new one. + */ +static void __init numa_reset_distance(void) +{ + size_t size; + + size = numa_distance_cnt * sizeof(numa_distance[0]); + memblock_x86_free_range(__pa(numa_distance), + __pa(numa_distance) + size); + numa_distance = NULL; + numa_distance_cnt = 0; +} + +/* + * Set the distance between node @from to @to to @distance. If distance + * table doesn't exist, one which is large enough to accomodate all the + * currently known nodes will be created. + */ +void __init numa_set_distance(int from, int to, int distance) +{ + if (!numa_distance) { + nodemask_t nodes_parsed; + size_t size; + int i, j, cnt = 0; + u64 phys; + + /* size the new table and allocate it */ + nodes_parsed = numa_nodes_parsed; + numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); + + for_each_node_mask(i, nodes_parsed) + cnt = i; + size = ++cnt * sizeof(numa_distance[0]); + + phys = memblock_find_in_range(0, + (u64)max_pfn_mapped << PAGE_SHIFT, + size, PAGE_SIZE); + if (phys == MEMBLOCK_ERROR) { + pr_warning("NUMA: Warning: can't allocate distance table!\n"); + /* don't retry until explicitly reset */ + numa_distance = (void *)1LU; + return; + } + memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); + + numa_distance = __va(phys); + numa_distance_cnt = cnt; + + /* fill with the default distances */ + for (i = 0; i < cnt; i++) + for (j = 0; j < cnt; j++) + numa_distance[i * cnt + j] = i == j ? + LOCAL_DISTANCE : REMOTE_DISTANCE; + printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); + } + + if (from >= numa_distance_cnt || to >= numa_distance_cnt) { + printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n", + from, to, distance); + return; + } + + if ((u8)distance != distance || + (from == to && distance != LOCAL_DISTANCE)) { + pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", + from, to, distance); + return; + } + + numa_distance[from * numa_distance_cnt + to] = distance; +} + +int __node_distance(int from, int to) +{ +#if defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA_EMU) + if (numa_emu_dist) + return acpi_emu_node_distance(from, to); +#endif + if (from >= numa_distance_cnt || to >= numa_distance_cnt) + return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; + return numa_distance[from * numa_distance_cnt + to]; +} +EXPORT_SYMBOL(__node_distance); + /* * Sanity check to catch more bad NUMA configurations (they are amazingly * common). Make sure the nodes cover all memory. @@ -826,6 +919,7 @@ static int __init numa_emulation(unsigned long start_pfn, setup_physnodes(addr, max_addr); fake_physnodes(acpi, amd, num_nodes); numa_init_array(); + numa_emu_dist = true; return 0; } #endif /* CONFIG_NUMA_EMU */ @@ -869,6 +963,7 @@ void __init initmem_init(void) nodes_clear(node_online_map); memset(&numa_meminfo, 0, sizeof(numa_meminfo)); remove_all_active_ranges(); + numa_reset_distance(); if (numa_init[i]() < 0) continue; diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 4f8e6cde9bf6..d2f53f35d86a 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -50,9 +50,16 @@ static __init inline int srat_disabled(void) /* Callback for SLIT parsing */ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) { + int i, j; unsigned length; unsigned long phys; + for (i = 0; i < slit->locality_count; i++) + for (j = 0; j < slit->locality_count; j++) + numa_set_distance(pxm_to_node(i), pxm_to_node(j), + slit->entry[slit->locality_count * i + j]); + + /* acpi_slit is used only by emulation */ length = slit->header.length; phys = memblock_find_in_range(0, max_pfn_mapped<locality_count * node_to_pxm(a); return acpi_slit->entry[index + node_to_pxm(b)]; } - -EXPORT_SYMBOL(__node_distance); +#endif /* CONFIG_NUMA_EMU */ #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) int memory_add_physaddr_to_nid(u64 start)