From a0842b704b0f7c2d925473676f2012ea5dc39976 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 19 Jul 2013 11:47:48 -0600 Subject: [PATCH 1/4] mm/hotplug, x86: Disable ARCH_MEMORY_PROBE by default CONFIG_ARCH_MEMORY_PROBE enables the /sys/devices/system/memory/probe interface, which allows a given memory address to be hot-added as follows: # echo start_address_of_new_memory > /sys/devices/system/memory/probe (See Documentation/memory-hotplug.txt for more details.) This probe interface is required on powerpc. On x86, however, ACPI notifies a memory hotplug event to the kernel, which performs its hotplug operation as the result. Therefore, regular users do not need this interface on x86. This probe interface is also error-prone and misleading that the kernel blindly adds a given memory address without checking if the memory is present on the system; no probing is done despite of its name. The kernel crashes when a user requests to online a memory block that is not present on the system. This interface is currently used for testing as it can fake a hotplug event. This patch disables CONFIG_ARCH_MEMORY_PROBE by default on x86, adds its Kconfig menu entry on x86, and clarifies its use in Documentation/ memory-hotplug.txt. Signed-off-by: Toshi Kani Acked-by: KOSAKI Motohiro Cc: linux-mm@kvack.org Cc: dave@sr71.net Cc: isimatu.yasuaki@jp.fujitsu.com Cc: tangchen@cn.fujitsu.com Cc: vasilis.liaskovitis@profitbricks.com Link: http://lkml.kernel.org/r/1374256068-26016-1-git-send-email-toshi.kani@hp.com [ Edited it slightly. ] Signed-off-by: Ingo Molnar --- Documentation/memory-hotplug.txt | 14 ++++++++------ arch/x86/Kconfig | 6 +++++- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 8e5eacbdcfa3..8fd254c73589 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt @@ -210,13 +210,15 @@ If memory device is found, memory hotplug code will be called. 4.2 Notify memory hot-add event by hand ------------ -In some environments, especially virtualized environment, firmware will not -notify memory hotplug event to the kernel. For such environment, "probe" -interface is supported. This interface depends on CONFIG_ARCH_MEMORY_PROBE. +On powerpc, the firmware does not notify a memory hotplug event to the kernel. +Therefore, "probe" interface is supported to notify the event to the kernel. +This interface depends on CONFIG_ARCH_MEMORY_PROBE. -Now, CONFIG_ARCH_MEMORY_PROBE is supported only by powerpc but it does not -contain highly architecture codes. Please add config if you need "probe" -interface. +CONFIG_ARCH_MEMORY_PROBE is supported on powerpc only. On x86, this config +option is disabled by default since ACPI notifies a memory hotplug event to +the kernel, which performs its hotplug operation as the result. Please +enable this option if you need the "probe" interface for testing purposes +on x86. Probe interface is located at /sys/devices/system/memory/probe diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b32ebf92b0ce..2f9d95775d27 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1344,8 +1344,12 @@ config ARCH_SELECT_MEMORY_MODEL depends on ARCH_SPARSEMEM_ENABLE config ARCH_MEMORY_PROBE - def_bool y + bool "Enable sysfs memory/probe interface" depends on X86_64 && MEMORY_HOTPLUG + help + This option enables a sysfs memory/probe interface for testing. + See Documentation/memory-hotplug.txt for more information. + If you are unsure how to answer this question, answer N. config ARCH_PROC_KCORE_TEXT def_bool y From d4f5228c01c130ff2c1f9240f1de22a5dfc61554 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Tue, 13 Aug 2013 11:01:07 +0800 Subject: [PATCH 2/4] mm: Remove unused variable idx0 in __early_ioremap() After commit: 8827247ffcc ("x86: don't define __this_fixmap_does_not_exist()") variable idx0 is no longer needed, so just remove it. Signed-off-by: Jianguo Wu Cc: Joonsoo Kim Cc: Cc: Cc: Hanjun Guo Link: http://lkml.kernel.org/r/5209A173.3090600@huawei.com Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0215e2c563ef..799580cabc78 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -487,7 +487,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) unsigned long offset; resource_size_t last_addr; unsigned int nrpages; - enum fixed_addresses idx0, idx; + enum fixed_addresses idx; int i, slot; WARN_ON(system_state != SYSTEM_BOOTING); @@ -540,8 +540,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) /* * Ok, go for it.. */ - idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; - idx = idx0; + idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; while (nrpages > 0) { early_set_fixmap(idx, phys_addr, prot); phys_addr += PAGE_SIZE; From 2449f343e4adc778de1c3d45b5aa14fe788663f5 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Wed, 14 Aug 2013 11:44:04 +0800 Subject: [PATCH 3/4] x86: Use memblock_set_current_limit() to set limit for memblock. In setup_arch() of x86, it set memblock.current_limit directly. We should use memblock_set_current_limit(). If the implementation is changed, it is easy to maintain. Signed-off-by: Tang Chen Link: http://lkml.kernel.org/r/1376451844-15682-1-git-send-email-tangchen@cn.fujitsu.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f8ec57815c05..de337983f679 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1070,7 +1070,7 @@ void __init setup_arch(char **cmdline_p) cleanup_highmap(); - memblock.current_limit = ISA_END_ADDRESS; + memblock_set_current_limit(ISA_END_ADDRESS); memblock_x86_fill(); /* @@ -1103,7 +1103,7 @@ void __init setup_arch(char **cmdline_p) setup_real_mode(); - memblock.current_limit = get_max_mapped(); + memblock_set_current_limit(get_max_mapped()); dma_contiguous_reserve(0); /* From 30e46b574a1db7d14404e52dca8e1aa5f5155fd2 Mon Sep 17 00:00:00 2001 From: Linn Crosetto Date: Tue, 13 Aug 2013 15:46:41 -0600 Subject: [PATCH 4/4] x86: avoid remapping data in parse_setup_data() Type SETUP_PCI, added by setup_efi_pci(), may advertise a ROM size larger than early_memremap() is able to handle, which is currently limited to 256kB. If this occurs it leads to a NULL dereference in parse_setup_data(). To avoid this, remap the setup_data header and allow parsing functions for individual types to handle their own data remapping. Signed-off-by: Linn Crosetto Link: http://lkml.kernel.org/r/1376430401-67445-1-git-send-email-linn@hp.com Acked-by: Yinghai Lu Reviewed-by: Pekka Enberg Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/e820.h | 2 +- arch/x86/kernel/e820.c | 5 ++++- arch/x86/kernel/setup.c | 19 ++++++++----------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index cccd07fa5e3a..779c2efe2e97 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -29,7 +29,7 @@ extern void e820_setup_gap(void); extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, unsigned long start_addr, unsigned long long end_addr); struct setup_data; -extern void parse_e820_ext(struct setup_data *data); +extern void parse_e820_ext(u64 phys_addr, u32 data_len); #if defined(CONFIG_X86_64) || \ (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d32abeabbda5..174da5fc5a7b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -658,15 +658,18 @@ __init void e820_setup_gap(void) * boot_params.e820_map, others are passed via SETUP_E820_EXT node of * linked list of struct setup_data, which is parsed here. */ -void __init parse_e820_ext(struct setup_data *sdata) +void __init parse_e820_ext(u64 phys_addr, u32 data_len) { int entries; struct e820entry *extmap; + struct setup_data *sdata; + sdata = early_memremap(phys_addr, data_len); entries = sdata->len / sizeof(struct e820entry); extmap = (struct e820entry *)(sdata->data); __append_e820_map(extmap, entries); sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + early_iounmap(sdata, data_len); printk(KERN_INFO "e820: extended physical RAM map:\n"); e820_print_map("extended"); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index de337983f679..b6b45e4d6d3e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -426,25 +426,23 @@ static void __init reserve_initrd(void) static void __init parse_setup_data(void) { struct setup_data *data; - u64 pa_data; + u64 pa_data, pa_next; pa_data = boot_params.hdr.setup_data; while (pa_data) { - u32 data_len, map_len; + u32 data_len, map_len, data_type; map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK), (u64)sizeof(struct setup_data)); data = early_memremap(pa_data, map_len); data_len = data->len + sizeof(struct setup_data); - if (data_len > map_len) { - early_iounmap(data, map_len); - data = early_memremap(pa_data, data_len); - map_len = data_len; - } + data_type = data->type; + pa_next = data->next; + early_iounmap(data, map_len); - switch (data->type) { + switch (data_type) { case SETUP_E820_EXT: - parse_e820_ext(data); + parse_e820_ext(pa_data, data_len); break; case SETUP_DTB: add_dtb(pa_data); @@ -452,8 +450,7 @@ static void __init parse_setup_data(void) default: break; } - pa_data = data->next; - early_iounmap(data, map_len); + pa_data = pa_next; } }