From ab2180a15ce54739fed381efb4cb12e78dfb1561 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Thu, 29 Nov 2018 18:12:27 +0100 Subject: [PATCH 01/10] firmware/efi: Add NULL pointer checks in efivars API functions Since commit: ce2e6db554fa ("brcmfmac: Add support for getting nvram contents from EFI variables") we have a device driver accessing the efivars API. Several functions in the efivars API assume __efivars is set, i.e., that they will be accessed only after efivars_register() has been called. However, the following NULL pointer access was reported calling efivar_entry_size() from the brcmfmac device driver: Unable to handle kernel NULL pointer dereference at virtual address 00000008 pgd = 60bfa5f1 [00000008] *pgd=00000000 Internal error: Oops: 5 [#1] SMP ARM ... Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) Workqueue: events request_firmware_work_func PC is at efivar_entry_size+0x28/0x90 LR is at brcmf_fw_complete_request+0x3f8/0x8d4 [brcmfmac] pc : [] lr : [] psr: a00d0113 sp : ede7fe28 ip : ee983410 fp : c1787f30 r10: 00000000 r9 : 00000000 r8 : bf2b2258 r7 : ee983000 r6 : c1604c48 r5 : ede7fe88 r4 : edf337c0 r3 : 00000000 r2 : 00000000 r1 : ede7fe88 r0 : c17712c8 Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: ad16804a DAC: 00000051 Disassembly showed that the local static variable __efivars is NULL, which is not entirely unexpected given that it is a non-EFI platform. So add a NULL pointer check to efivar_entry_size(), and to related functions while at it. In efivars_register() a couple of sanity checks are added as well. Reported-by: Jon Hunter Signed-off-by: Arend van Spriel Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Julien Thierry Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Sedat Dilek Cc: Thomas Gleixner Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-9-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/vars.c | 99 +++++++++++++++++++++++++++++-------- 1 file changed, 78 insertions(+), 21 deletions(-) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 9336ffdf6e2c..fceaafd67ec6 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -318,7 +318,12 @@ EXPORT_SYMBOL_GPL(efivar_variable_is_removable); static efi_status_t check_var_size(u32 attributes, unsigned long size) { - const struct efivar_operations *fops = __efivars->ops; + const struct efivar_operations *fops; + + if (!__efivars) + return EFI_UNSUPPORTED; + + fops = __efivars->ops; if (!fops->query_variable_store) return EFI_UNSUPPORTED; @@ -329,7 +334,12 @@ check_var_size(u32 attributes, unsigned long size) static efi_status_t check_var_size_nonblocking(u32 attributes, unsigned long size) { - const struct efivar_operations *fops = __efivars->ops; + const struct efivar_operations *fops; + + if (!__efivars) + return EFI_UNSUPPORTED; + + fops = __efivars->ops; if (!fops->query_variable_store) return EFI_UNSUPPORTED; @@ -429,13 +439,18 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid, int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; unsigned long variable_name_size = 1024; efi_char16_t *variable_name; efi_status_t status; efi_guid_t vendor_guid; int err = 0; + if (!__efivars) + return -EFAULT; + + ops = __efivars->ops; + variable_name = kzalloc(variable_name_size, GFP_KERNEL); if (!variable_name) { printk(KERN_ERR "efivars: Memory allocation failed.\n"); @@ -583,12 +598,14 @@ static void efivar_entry_list_del_unlock(struct efivar_entry *entry) */ int __efivar_entry_delete(struct efivar_entry *entry) { - const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - status = ops->set_variable(entry->var.VariableName, - &entry->var.VendorGuid, - 0, 0, NULL); + if (!__efivars) + return -EINVAL; + + status = __efivars->ops->set_variable(entry->var.VariableName, + &entry->var.VendorGuid, + 0, 0, NULL); return efi_status_to_err(status); } @@ -607,12 +624,17 @@ EXPORT_SYMBOL_GPL(__efivar_entry_delete); */ int efivar_entry_delete(struct efivar_entry *entry) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; efi_status_t status; if (down_interruptible(&efivars_lock)) return -EINTR; + if (!__efivars) { + up(&efivars_lock); + return -EINVAL; + } + ops = __efivars->ops; status = ops->set_variable(entry->var.VariableName, &entry->var.VendorGuid, 0, 0, NULL); @@ -650,13 +672,19 @@ EXPORT_SYMBOL_GPL(efivar_entry_delete); int efivar_entry_set(struct efivar_entry *entry, u32 attributes, unsigned long size, void *data, struct list_head *head) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; efi_status_t status; efi_char16_t *name = entry->var.VariableName; efi_guid_t vendor = entry->var.VendorGuid; if (down_interruptible(&efivars_lock)) return -EINTR; + + if (!__efivars) { + up(&efivars_lock); + return -EINVAL; + } + ops = __efivars->ops; if (head && efivar_entry_find(name, vendor, head, false)) { up(&efivars_lock); return -EEXIST; @@ -687,12 +715,17 @@ static int efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor, u32 attributes, unsigned long size, void *data) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; efi_status_t status; if (down_trylock(&efivars_lock)) return -EBUSY; + if (!__efivars) { + up(&efivars_lock); + return -EINVAL; + } + status = check_var_size_nonblocking(attributes, size + ucs2_strsize(name, 1024)); if (status != EFI_SUCCESS) { @@ -700,6 +733,7 @@ efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor, return -ENOSPC; } + ops = __efivars->ops; status = ops->set_variable_nonblocking(name, &vendor, attributes, size, data); @@ -727,9 +761,13 @@ efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor, int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, bool block, unsigned long size, void *data) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; efi_status_t status; + if (!__efivars) + return -EINVAL; + + ops = __efivars->ops; if (!ops->query_variable_store) return -ENOSYS; @@ -829,13 +867,18 @@ EXPORT_SYMBOL_GPL(efivar_entry_find); */ int efivar_entry_size(struct efivar_entry *entry, unsigned long *size) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; efi_status_t status; *size = 0; if (down_interruptible(&efivars_lock)) return -EINTR; + if (!__efivars) { + up(&efivars_lock); + return -EINVAL; + } + ops = __efivars->ops; status = ops->get_variable(entry->var.VariableName, &entry->var.VendorGuid, NULL, size, NULL); up(&efivars_lock); @@ -861,12 +904,14 @@ EXPORT_SYMBOL_GPL(efivar_entry_size); int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes, unsigned long *size, void *data) { - const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - status = ops->get_variable(entry->var.VariableName, - &entry->var.VendorGuid, - attributes, size, data); + if (!__efivars) + return -EINVAL; + + status = __efivars->ops->get_variable(entry->var.VariableName, + &entry->var.VendorGuid, + attributes, size, data); return efi_status_to_err(status); } @@ -882,14 +927,19 @@ EXPORT_SYMBOL_GPL(__efivar_entry_get); int efivar_entry_get(struct efivar_entry *entry, u32 *attributes, unsigned long *size, void *data) { - const struct efivar_operations *ops = __efivars->ops; efi_status_t status; if (down_interruptible(&efivars_lock)) return -EINTR; - status = ops->get_variable(entry->var.VariableName, - &entry->var.VendorGuid, - attributes, size, data); + + if (!__efivars) { + up(&efivars_lock); + return -EINVAL; + } + + status = __efivars->ops->get_variable(entry->var.VariableName, + &entry->var.VendorGuid, + attributes, size, data); up(&efivars_lock); return efi_status_to_err(status); @@ -921,7 +971,7 @@ EXPORT_SYMBOL_GPL(efivar_entry_get); int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, unsigned long *size, void *data, bool *set) { - const struct efivar_operations *ops = __efivars->ops; + const struct efivar_operations *ops; efi_char16_t *name = entry->var.VariableName; efi_guid_t *vendor = &entry->var.VendorGuid; efi_status_t status; @@ -940,6 +990,11 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, if (down_interruptible(&efivars_lock)) return -EINTR; + if (!__efivars) { + err = -EINVAL; + goto out; + } + /* * Ensure that the available space hasn't shrunk below the safe level */ @@ -956,6 +1011,8 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, } } + ops = __efivars->ops; + status = ops->set_variable(name, vendor, attributes, *size, data); if (status != EFI_SUCCESS) { err = efi_status_to_err(status); From 6935b3c43da96bb48017b2a3bc1d4f93899f9b28 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 29 Nov 2018 18:12:21 +0100 Subject: [PATCH 02/10] efi/fdt: Indentation fix Closing bracket seems to end a for statement when it is actually ending the contained if. Add some brackets to have clear delimitation of each scope. No functional change/fix, just fix the indentation. Signed-off-by: Julien Thierry Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Sedat Dilek Cc: Thomas Gleixner Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/fdt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 0c0d2312f4a8..a3614f9b5f75 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -376,7 +376,7 @@ void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size) tables = (efi_config_table_t *) sys_table->tables; fdt = NULL; - for (i = 0; i < sys_table->nr_tables; i++) + for (i = 0; i < sys_table->nr_tables; i++) { if (efi_guidcmp(tables[i].guid, fdt_guid) == 0) { fdt = (void *) tables[i].table; if (fdt_check_header(fdt) != 0) { @@ -385,7 +385,8 @@ void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size) } *fdt_size = fdt_totalsize(fdt); break; - } + } + } return fdt; } From 8c25db0a5a67986106aa3da7ce165ff961aa7847 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 29 Nov 2018 18:12:22 +0100 Subject: [PATCH 03/10] efi/fdt: Simplify the get_fdt() flow Reorganize the get_fdt() lookup loop, clearly showing that: - Nothing is done for table entries that do not have fdt_guid - Once an entry with fdt_guid is found, break out of the loop No functional changes. Suggested-by: Joe Perches Signed-off-by: Julien Thierry Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Jon Hunter Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Sedat Dilek Cc: Thomas Gleixner Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-4-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/fdt.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index a3614f9b5f75..0dc7b4987cc2 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -370,23 +370,24 @@ void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size) { efi_guid_t fdt_guid = DEVICE_TREE_GUID; efi_config_table_t *tables; - void *fdt; int i; - tables = (efi_config_table_t *) sys_table->tables; - fdt = NULL; + tables = (efi_config_table_t *)sys_table->tables; for (i = 0; i < sys_table->nr_tables; i++) { - if (efi_guidcmp(tables[i].guid, fdt_guid) == 0) { - fdt = (void *) tables[i].table; - if (fdt_check_header(fdt) != 0) { - pr_efi_err(sys_table, "Invalid header detected on UEFI supplied FDT, ignoring ...\n"); - return NULL; - } - *fdt_size = fdt_totalsize(fdt); - break; + void *fdt; + + if (efi_guidcmp(tables[i].guid, fdt_guid) != 0) + continue; + + fdt = (void *)tables[i].table; + if (fdt_check_header(fdt) != 0) { + pr_efi_err(sys_table, "Invalid header detected on UEFI supplied FDT, ignoring ...\n"); + return NULL; } + *fdt_size = fdt_totalsize(fdt); + return fdt; } - return fdt; + return NULL; } From 7e0dabd3010d6041ee0a952c1146b2150a11f1be Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Thu, 29 Nov 2018 18:12:23 +0100 Subject: [PATCH 04/10] x86/mm/pageattr: Introduce helper function to unmap EFI boot services Ideally, after kernel assumes control of the platform, firmware shouldn't access EFI boot services code/data regions. But, it's noticed that this is not so true in many x86 platforms. Hence, during boot, kernel reserves EFI boot services code/data regions [1] and maps [2] them to efi_pgd so that call to set_virtual_address_map() doesn't fail. After returning from set_virtual_address_map(), kernel frees the reserved regions [3] but they still remain mapped. Hence, introduce kernel_unmap_pages_in_pgd() which will later be used to unmap EFI boot services code/data regions. While at it modify kernel_map_pages_in_pgd() by: 1. Adding __init modifier because it's always used *only* during boot. 2. Add a warning if it's used after SMP is initialized because it uses __flush_tlb_all() which flushes mappings only on current CPU. Unmapping EFI boot services code/data regions will result in clearing PAGE_PRESENT bit and it shouldn't bother L1TF cases because it's already handled by protnone_mask() at arch/x86/include/asm/pgtable-invert.h. [1] efi_reserve_boot_services() [2] efi_map_region() -> __map_region() -> kernel_map_pages_in_pgd() [3] efi_free_boot_services() Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Ard Biesheuvel Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sedat Dilek Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-5-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_types.h | 8 ++++-- arch/x86/mm/pageattr.c | 40 ++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 106b7d0e2dae..d6ff0bbdb394 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -564,8 +564,12 @@ extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, unsigned int *level); extern pmd_t *lookup_pmd_address(unsigned long address); extern phys_addr_t slow_virt_to_phys(void *__address); -extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, - unsigned numpages, unsigned long page_flags); +extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, + unsigned long address, + unsigned numpages, + unsigned long page_flags); +extern int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, + unsigned long numpages); #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index db7a10082238..bac35001d896 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -2338,8 +2338,8 @@ bool kernel_page_present(struct page *page) #endif /* CONFIG_DEBUG_PAGEALLOC */ -int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, - unsigned numpages, unsigned long page_flags) +int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, + unsigned numpages, unsigned long page_flags) { int retval = -EINVAL; @@ -2353,6 +2353,8 @@ int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .flags = 0, }; + WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); + if (!(__supported_pte_mask & _PAGE_NX)) goto out; @@ -2374,6 +2376,40 @@ out: return retval; } +/* + * __flush_tlb_all() flushes mappings only on current CPU and hence this + * function shouldn't be used in an SMP environment. Presently, it's used only + * during boot (way before smp_init()) by EFI subsystem and hence is ok. + */ +int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, + unsigned long numpages) +{ + int retval; + + /* + * The typical sequence for unmapping is to find a pte through + * lookup_address_in_pgd() (ideally, it should never return NULL because + * the address is already mapped) and change it's protections. As pfn is + * the *target* of a mapping, it's not useful while unmapping. + */ + struct cpa_data cpa = { + .vaddr = &address, + .pfn = 0, + .pgd = pgd, + .numpages = numpages, + .mask_set = __pgprot(0), + .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), + .flags = 0, + }; + + WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); + + retval = __change_page_attr_set_clr(&cpa, 0); + __flush_tlb_all(); + + return retval; +} + /* * The testcases use internal knowledge of the implementation that shouldn't * be exposed to the rest of the kernel. Include these directly here. From 08cfb38f3ef49cfd1bba11a00401451606477d80 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Thu, 29 Nov 2018 18:12:24 +0100 Subject: [PATCH 05/10] x86/efi: Unmap EFI boot services code/data regions from efi_pgd efi_free_boot_services(), as the name suggests, frees EFI boot services code/data regions but forgets to unmap these regions from efi_pgd. This means that any code that's running in efi_pgd address space (e.g: any EFI runtime service) would still be able to access these regions but the contents of these regions would have long been over written by someone else. So, it's important to unmap these regions. Hence, introduce efi_unmap_pages() to unmap these regions from efi_pgd. After unmapping EFI boot services code/data regions, any illegal access by buggy firmware to these regions would result in page fault which will be handled by EFI specific fault handler. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Ard Biesheuvel Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sedat Dilek Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-6-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/quirks.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 95e77a667ba5..09e811b9da26 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -369,6 +369,24 @@ void __init efi_reserve_boot_services(void) } } +/* + * Apart from having VA mappings for EFI boot services code/data regions, + * (duplicate) 1:1 mappings were also created as a quirk for buggy firmware. So, + * unmap both 1:1 and VA mappings. + */ +static void __init efi_unmap_pages(efi_memory_desc_t *md) +{ + pgd_t *pgd = efi_mm.pgd; + u64 pa = md->phys_addr; + u64 va = md->virt_addr; + + if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages)) + pr_err("Failed to unmap 1:1 mapping for 0x%llx\n", pa); + + if (kernel_unmap_pages_in_pgd(pgd, va, md->num_pages)) + pr_err("Failed to unmap VA mapping for 0x%llx\n", va); +} + void __init efi_free_boot_services(void) { phys_addr_t new_phys, new_size; @@ -393,6 +411,13 @@ void __init efi_free_boot_services(void) continue; } + /* + * Before calling set_virtual_address_map(), EFI boot services + * code/data regions were mapped as a quirk for buggy firmware. + * Unmap them from efi_pgd before freeing them up. + */ + efi_unmap_pages(md); + /* * Nasty quirk: if all sub-1MB memory is used for boot * services, we can get here without having allocated the From 47c33a095e1fae376d74b4160a0d73c1a4e73969 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Thu, 29 Nov 2018 18:12:25 +0100 Subject: [PATCH 06/10] x86/efi: Move efi__boot_services() to arch/x86 efi__boot_services() are x86 specific quirks and as such should be in asm/efi.h, so move them from linux/efi.h. Also, call efi_free_boot_services() from __efi_enter_virtual_mode() as it is x86 specific call and ideally shouldn't be part of init/main.c Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Ard Biesheuvel Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sedat Dilek Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-7-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 2 ++ arch/x86/platform/efi/efi.c | 2 ++ include/linux/efi.h | 3 --- init/main.c | 4 ---- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index eea40d52ca78..d1e64ac80b9c 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -141,6 +141,8 @@ extern int __init efi_reuse_config(u64 tables, int nr_tables); extern void efi_delete_dummy_variable(void); extern void efi_switch_mm(struct mm_struct *mm); extern void efi_recover_from_page_fault(unsigned long phys_addr); +extern void efi_free_boot_services(void); +extern void efi_reserve_boot_services(void); struct efi_setup_data { u64 fw_vendor; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 7ae939e353cd..e1cb01a22fa8 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -993,6 +993,8 @@ static void __init __efi_enter_virtual_mode(void) panic("EFI call to SetVirtualAddressMap() failed!"); } + efi_free_boot_services(); + /* * Now that EFI is in virtual mode, update the function * pointers in the runtime service table to the new virtual addresses. diff --git a/include/linux/efi.h b/include/linux/efi.h index 100ce4a4aff6..2b3b33c83b05 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1000,13 +1000,11 @@ extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg); extern void efi_gettimeofday (struct timespec64 *ts); extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ #ifdef CONFIG_X86 -extern void efi_free_boot_services(void); extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, bool nonblocking); extern void efi_find_mirror(void); #else -static inline void efi_free_boot_services(void) {} static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, @@ -1046,7 +1044,6 @@ extern void efi_mem_reserve(phys_addr_t addr, u64 size); extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); -extern void efi_reserve_boot_services(void); extern int efi_get_fdt_params(struct efi_fdt_params *params); extern struct kobject *efi_kobj; diff --git a/init/main.c b/init/main.c index ee147103ba1b..ccefcd8e855f 100644 --- a/init/main.c +++ b/init/main.c @@ -737,10 +737,6 @@ asmlinkage __visible void __init start_kernel(void) arch_post_acpi_subsys_init(); sfi_init_late(); - if (efi_enabled(EFI_RUNTIME_SERVICES)) { - efi_free_boot_services(); - } - /* Do the rest non-__init'ed, we're now alive */ arch_call_rest_init(); } From 3db5e0ba8b8f4aee631d7ee04b7a11c56cfdc213 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 29 Nov 2018 18:12:26 +0100 Subject: [PATCH 07/10] efi/libstub: Disable some warnings for x86{,_64} When building the kernel with Clang, some disabled warnings appear because this Makefile overrides KBUILD_CFLAGS for x86{,_64}. Add them to this list so that the build is clean again. -Wpointer-sign was disabled for the whole kernel before the beginning of Git history. -Waddress-of-packed-member was disabled for the whole kernel and for the early boot code in these commits: bfb38988c51e ("kbuild: clang: Disable 'address-of-packed-member' warning") 20c6c1890455 ("x86/boot: Disable the address-of-packed-member compiler warning"). -Wgnu was disabled for the whole kernel and for the early boot code in these commits: 61163efae020 ("kbuild: LLVMLinux: Add Kbuild support for building kernel with Clang") 6c3b56b19730 ("x86/boot: Disable Clang warnings about GNU extensions"). [ mingo: Made the changelog more readable. ] Tested-by: Sedat Dilek Signed-off-by: Nathan Chancellor Signed-off-by: Ard Biesheuvel Reviewed-by: Sedat Dilek Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-8-ard.biesheuvel@linaro.org Link: https://github.com/ClangBuiltLinux/linux/issues/112 Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index c51627660dbb..d9845099635e 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -9,7 +9,10 @@ cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ -O2 \ -fPIC -fno-strict-aliasing -mno-red-zone \ - -mno-mmx -mno-sse -fshort-wchar + -mno-mmx -mno-sse -fshort-wchar \ + -Wno-pointer-sign \ + $(call cc-disable-warning, address-of-packed-member) \ + $(call cc-disable-warning, gnu) # arm64 uses the full KBUILD_CFLAGS so it's necessary to explicitly # disable the stackleak plugin From 5f0b0ecf043a5319e729c11a53bc8294df12dab3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 29 Nov 2018 18:12:28 +0100 Subject: [PATCH 08/10] efi: Permit multiple entries in persistent memreserve data structure In preparation of updating efi_mem_reserve_persistent() to cause less fragmentation when dealing with many persistent reservations, update the struct definition and the code that handles it currently so it can describe an arbitrary number of reservations using a single linked list entry. The actual optimization will be implemented in a subsequent patch. Tested-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Sedat Dilek Cc: Thomas Gleixner Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-10-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/efi.c | 37 +++++++++++++++++-------- drivers/firmware/efi/libstub/arm-stub.c | 2 +- include/linux/efi.h | 13 +++++++-- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 415849bab233..80b11521627a 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -602,21 +602,33 @@ int __init efi_apply_persistent_mem_reservations(void) while (prsv) { struct linux_efi_memreserve *rsv; + u8 *p; + int i; - /* reserve the entry itself */ - memblock_reserve(prsv, sizeof(*rsv)); - - rsv = early_memremap(prsv, sizeof(*rsv)); - if (rsv == NULL) { + /* + * Just map a full page: that is what we will get + * anyway, and it permits us to map the entire entry + * before knowing its size. + */ + p = early_memremap(ALIGN_DOWN(prsv, PAGE_SIZE), + PAGE_SIZE); + if (p == NULL) { pr_err("Could not map UEFI memreserve entry!\n"); return -ENOMEM; } - if (rsv->size) - memblock_reserve(rsv->base, rsv->size); + rsv = (void *)(p + prsv % PAGE_SIZE); + + /* reserve the entry itself */ + memblock_reserve(prsv, EFI_MEMRESERVE_SIZE(rsv->size)); + + for (i = 0; i < atomic_read(&rsv->count); i++) { + memblock_reserve(rsv->entry[i].base, + rsv->entry[i].size); + } prsv = rsv->next; - early_memunmap(rsv, sizeof(*rsv)); + early_memunmap(p, PAGE_SIZE); } } @@ -985,6 +997,7 @@ static int __init efi_memreserve_map_root(void) int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) { struct linux_efi_memreserve *rsv; + int rsvsize = EFI_MEMRESERVE_SIZE(1); int rc; if (efi_memreserve_root == (void *)ULONG_MAX) @@ -996,12 +1009,14 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) return rc; } - rsv = kmalloc(sizeof(*rsv), GFP_ATOMIC); + rsv = kmalloc(rsvsize, GFP_ATOMIC); if (!rsv) return -ENOMEM; - rsv->base = addr; - rsv->size = size; + rsv->size = 1; + atomic_set(&rsv->count, 1); + rsv->entry[0].base = addr; + rsv->entry[0].size = size; spin_lock(&efi_mem_reserve_persistent_lock); rsv->next = efi_memreserve_root->next; diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 3d36142cf812..9e20159ea5f5 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -86,8 +86,8 @@ void install_memreserve_table(efi_system_table_t *sys_table_arg) } rsv->next = 0; - rsv->base = 0; rsv->size = 0; + atomic_set(&rsv->count, 0); status = efi_call_early(install_configuration_table, &memreserve_table_guid, diff --git a/include/linux/efi.h b/include/linux/efi.h index 2b3b33c83b05..4f27640fdcdc 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1712,9 +1712,16 @@ extern struct efi_runtime_work efi_rts_work; extern struct workqueue_struct *efi_rts_wq; struct linux_efi_memreserve { - phys_addr_t next; - phys_addr_t base; - phys_addr_t size; + int size; // allocated size of the array + atomic_t count; // number of entries used + phys_addr_t next; // pa of next struct instance + struct { + phys_addr_t base; + phys_addr_t size; + } entry[0]; }; +#define EFI_MEMRESERVE_SIZE(count) (sizeof(struct linux_efi_memreserve) + \ + (count) * sizeof(((struct linux_efi_memreserve *)0)->entry[0])) + #endif /* _LINUX_EFI_H */ From 80424b02d42bb22f8ff8839cb93a84ade53b39c0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 29 Nov 2018 18:12:29 +0100 Subject: [PATCH 09/10] efi: Reduce the amount of memblock reservations for persistent allocations The current implementation of efi_mem_reserve_persistent() is rather naive, in the sense that for each invocation, it creates a separate linked list entry to describe the reservation. Since the linked list entries themselves need to persist across subsequent kexec reboots, every reservation created this way results in two memblock_reserve() calls at the next boot. On arm64 systems with 100s of CPUs, this may result in a excessive number of memblock reservations, and needless fragmentation. So instead, make use of the newly updated struct linux_efi_memreserve layout to put multiple reservations into a single linked list entry. This should get rid of the numerous tiny memblock reservations, and effectively cut the total number of reservations in half on arm64 systems with many CPUs. [ mingo: build warning fix. ] Tested-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Sedat Dilek Cc: Thomas Gleixner Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-11-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/efi.c | 21 +++++++++++++++++---- include/linux/efi.h | 3 +++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 80b11521627a..4c46ff6f2242 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -997,8 +997,8 @@ static int __init efi_memreserve_map_root(void) int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) { struct linux_efi_memreserve *rsv; - int rsvsize = EFI_MEMRESERVE_SIZE(1); - int rc; + unsigned long prsv; + int rc, index; if (efi_memreserve_root == (void *)ULONG_MAX) return -ENODEV; @@ -1009,11 +1009,24 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) return rc; } - rsv = kmalloc(rsvsize, GFP_ATOMIC); + /* first try to find a slot in an existing linked list entry */ + for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { + rsv = __va(prsv); + index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size); + if (index < rsv->size) { + rsv->entry[index].base = addr; + rsv->entry[index].size = size; + + return 0; + } + } + + /* no slot found - allocate a new linked list entry */ + rsv = (struct linux_efi_memreserve *)__get_free_page(GFP_ATOMIC); if (!rsv) return -ENOMEM; - rsv->size = 1; + rsv->size = EFI_MEMRESERVE_COUNT(PAGE_SIZE); atomic_set(&rsv->count, 1); rsv->entry[0].base = addr; rsv->entry[0].size = size; diff --git a/include/linux/efi.h b/include/linux/efi.h index 4f27640fdcdc..becd5d76a207 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1724,4 +1724,7 @@ struct linux_efi_memreserve { #define EFI_MEMRESERVE_SIZE(count) (sizeof(struct linux_efi_memreserve) + \ (count) * sizeof(((struct linux_efi_memreserve *)0)->entry[0])) +#define EFI_MEMRESERVE_COUNT(size) (((size) - sizeof(struct linux_efi_memreserve)) \ + / sizeof(((struct linux_efi_memreserve *)0)->entry[0])) + #endif /* _LINUX_EFI_H */ From 1debf0958fa27b7c469dbf22754929ec59a7c0e7 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Fri, 21 Dec 2018 18:22:34 -0800 Subject: [PATCH 10/10] x86/efi: Don't unmap EFI boot services code/data regions for EFI_OLD_MEMMAP and EFI_MIXED_MODE The following commit: d5052a7130a6 ("x86/efi: Unmap EFI boot services code/data regions from efi_pgd") forgets to take two EFI modes into consideration, namely EFI_OLD_MEMMAP and EFI_MIXED_MODE: - EFI_OLD_MEMMAP is a legacy way of mapping EFI regions into swapper_pg_dir using ioremap() and init_memory_mapping(). This feature can be enabled by passing "efi=old_map" as kernel command line argument. But, efi_unmap_pages() unmaps EFI boot services code/data regions *only* from efi_pgd and hence cannot be used for unmapping EFI boot services code/data regions from swapper_pg_dir. Introduce a temporary fix to not unmap EFI boot services code/data regions when EFI_OLD_MEMMAP is enabled while working on a real fix. - EFI_MIXED_MODE is another feature where a 64-bit kernel runs on a 64-bit platform crippled by a 32-bit firmware. To support EFI_MIXED_MODE, all RAM (i.e. namely EFI regions like EFI_CONVENTIONAL_MEMORY, EFI_LOADER_, EFI_BOOT_SERVICES_ and EFI_RUNTIME_CODE/DATA regions) is mapped into efi_pgd all the time to facilitate EFI runtime calls access it's arguments in 1:1 mode. Hence, don't unmap EFI boot services code/data regions when booted in mixed mode. Signed-off-by: Sai Praneeth Prakhya Acked-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181222022234.7573-1-sai.praneeth.prakhya@intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/quirks.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 09e811b9da26..17456a1d3f04 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -380,6 +380,22 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) u64 pa = md->phys_addr; u64 va = md->virt_addr; + /* + * To Do: Remove this check after adding functionality to unmap EFI boot + * services code/data regions from direct mapping area because + * "efi=old_map" maps EFI regions in swapper_pg_dir. + */ + if (efi_enabled(EFI_OLD_MEMMAP)) + return; + + /* + * EFI mixed mode has all RAM mapped to access arguments while making + * EFI runtime calls, hence don't unmap EFI boot services code/data + * regions. + */ + if (!efi_is_native()) + return; + if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages)) pr_err("Failed to unmap 1:1 mapping for 0x%llx\n", pa);