diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9a53c929f017..000336733a6a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3491,6 +3491,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ro [KNL] Mount root device read-only on boot + rodata= [KNL] + on Mark read-only kernel memory as read-only (default). + off Leave read-only kernel memory writable for debugging. + root= [KNL] Root filesystem See name_to_dev_t comment in init/do_mounts.c. diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index d5525bfc7e3e..9156fc303afd 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -491,7 +491,6 @@ static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif #ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); void set_kernel_text_rw(void); void set_kernel_text_ro(void); #else diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S index b2b97e3e7bab..a62a7b64f49c 100644 --- a/arch/arm/vdso/vdso.S +++ b/arch/arm/vdso/vdso.S @@ -23,9 +23,8 @@ #include #include - __PAGE_ALIGNED_DATA - .globl vdso_start, vdso_end + .section .data..ro_after_init .balign PAGE_SIZE vdso_start: .incbin "arch/arm/vdso/vdso.so" diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 7fc294c3bc5b..22dda613f9c9 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -156,8 +156,4 @@ int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); -#endif - #endif diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 3d0e17bcc8e9..df0f52bd18b4 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -22,6 +22,9 @@ #define __read_mostly __attribute__((__section__(".data..read_mostly"))) +/* Read-only memory is marked before mark_rodata_ro() is called. */ +#define __ro_after_init __read_mostly + void parisc_cache_init(void); /* initializes cache-flushing */ void disable_sr_hashing_asm(int); /* low level support for above */ void disable_sr_hashing(void); /* turns off space register hashing */ diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 845272ce9cc5..7bd69bd43a01 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -121,10 +121,6 @@ flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vma } } -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); -#endif - #include #define ARCH_HAS_KMAP diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c46662f64c39..b1051057e5b0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -303,6 +303,9 @@ config ARCH_SUPPORTS_UPROBES config FIX_EARLYCON_MEM def_bool y +config DEBUG_RODATA + def_bool y + config PGTABLE_LEVELS int default 4 if X86_64 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 9b18ed97a8a2..7816b7b276f4 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -74,28 +74,16 @@ config EFI_PGT_DUMP issues with the mapping of the EFI runtime regions into that table. -config DEBUG_RODATA - bool "Write protect kernel read-only data structures" - default y - depends on DEBUG_KERNEL - ---help--- - Mark the kernel read-only data as write-protected in the pagetables, - in order to catch accidental (and incorrect) writes to such const - data. This is recommended so that we can catch kernel bugs sooner. - If in doubt, say "Y". - config DEBUG_RODATA_TEST - bool "Testcase for the DEBUG_RODATA feature" - depends on DEBUG_RODATA + bool "Testcase for the marking rodata read-only" default y ---help--- - This option enables a testcase for the DEBUG_RODATA - feature as well as for the change_page_attr() infrastructure. + This option enables a testcase for the setting rodata read-only + as well as for the change_page_attr() infrastructure. If in doubt, say "N" config DEBUG_WX bool "Warn on W+X mappings at boot" - depends on DEBUG_RODATA select X86_PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 0224987556ce..3f69326ed545 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -140,7 +140,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, fprintf(outfile, "#include \n"); fprintf(outfile, "\n"); fprintf(outfile, - "static unsigned char raw_data[%lu] __page_aligned_data = {", + "static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {", mapping_size); for (j = 0; j < stripped_len; j++) { if (j % 10 == 0) diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index e63aa38e85fb..61518cf79437 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -91,16 +91,10 @@ void clflush_cache_range(void *addr, unsigned int size); #define mmio_flush_range(addr, size) clflush_cache_range(addr, size) -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); extern const int rodata_test_data; extern int kernel_set_to_readonly; void set_kernel_text_rw(void); void set_kernel_text_ro(void); -#else -static inline void set_kernel_text_rw(void) { } -static inline void set_kernel_text_ro(void) { } -#endif #ifdef CONFIG_DEBUG_RODATA_TEST int rodata_test(void); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c1adf33fdd0d..bc62e7cbf1b1 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -17,15 +17,8 @@ static inline bool kvm_check_and_clear_guest_paused(void) } #endif /* CONFIG_KVM_GUEST */ -#ifdef CONFIG_DEBUG_RODATA #define KVM_HYPERCALL \ ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) -#else -/* On AMD processors, vmcall will generate a trap that we will - * then rewrite to the appropriate instruction. - */ -#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" -#endif /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 0a5242428659..13b6cdd0af57 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -7,7 +7,7 @@ extern char __brk_base[], __brk_limit[]; extern struct exception_table_entry __stop___ex_table[]; -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; #endif diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 29408d6d6626..05c9e3f5b6d7 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -81,9 +81,9 @@ within(unsigned long addr, unsigned long start, unsigned long end) static unsigned long text_ip_addr(unsigned long ip) { /* - * On x86_64, kernel text mappings are mapped read-only with - * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead - * of the kernel text mapping to modify the kernel text. + * On x86_64, kernel text mappings are mapped read-only, so we use + * the kernel identity mapping instead of the kernel text mapping + * to modify the kernel text. * * For 32bit kernels, these mappings are same and we can use * kernel identity mapping to modify code. diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 44256a62702b..ed15cd486d06 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -750,9 +750,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; -#ifdef CONFIG_DEBUG_RODATA char opc[BREAK_INSTR_SIZE]; -#endif /* CONFIG_DEBUG_RODATA */ bpt->type = BP_BREAKPOINT; err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, @@ -761,7 +759,6 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) return err; err = probe_kernel_write((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); -#ifdef CONFIG_DEBUG_RODATA if (!err) return err; /* @@ -778,13 +775,12 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE)) return -EINVAL; bpt->type = BP_POKE_BREAKPOINT; -#endif /* CONFIG_DEBUG_RODATA */ + return err; } int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { -#ifdef CONFIG_DEBUG_RODATA int err; char opc[BREAK_INSTR_SIZE]; @@ -801,8 +797,8 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE)) goto knl_write; return err; + knl_write: -#endif /* CONFIG_DEBUG_RODATA */ return probe_kernel_write((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index 3f92ce07e525..27538f183c3b 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c @@ -142,7 +142,6 @@ static int test_NX(void) * by the error message */ -#ifdef CONFIG_DEBUG_RODATA /* Test 3: Check if the .rodata section is executable */ if (rodata_test_data != 0xC3) { printk(KERN_ERR "test_nx: .rodata marker has invalid value\n"); @@ -151,7 +150,6 @@ static int test_NX(void) printk(KERN_ERR "test_nx: .rodata section is executable\n"); ret = -ENODEV; } -#endif #if 0 /* Test 4: Check if the .data section of a module is executable */ diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c index 5ecbfe5099da..cb4a01b41e27 100644 --- a/arch/x86/kernel/test_rodata.c +++ b/arch/x86/kernel/test_rodata.c @@ -76,5 +76,5 @@ int rodata_test(void) } MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure"); +MODULE_DESCRIPTION("Testcase for marking rodata as read-only"); MODULE_AUTHOR("Arjan van de Ven "); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 74e4bf11f562..fe133b710bef 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -41,29 +41,28 @@ ENTRY(phys_startup_64) jiffies_64 = jiffies; #endif -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) /* - * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA - * we retain large page mappings for boundaries spanning kernel text, rodata - * and data sections. + * On 64-bit, align RODATA to 2MB so we retain large page mappings for + * boundaries spanning kernel text, rodata and data sections. * * However, kernel identity mappings will have different RWX permissions * to the pages mapping to text and to the pages padding (which are freed) the * text section. Hence kernel identity mappings will be broken to smaller * pages. For 64-bit, kernel text and kernel identity mappings are different, - * so we can enable protection checks that come with CONFIG_DEBUG_RODATA, - * as well as retain 2MB large page mappings for kernel text. + * so we can enable protection checks as well as retain 2MB large page + * mappings for kernel text. */ -#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); +#define X64_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); -#define X64_ALIGN_DEBUG_RODATA_END \ +#define X64_ALIGN_RODATA_END \ . = ALIGN(HPAGE_SIZE); \ __end_rodata_hpage_align = .; #else -#define X64_ALIGN_DEBUG_RODATA_BEGIN -#define X64_ALIGN_DEBUG_RODATA_END +#define X64_ALIGN_RODATA_BEGIN +#define X64_ALIGN_RODATA_END #endif @@ -112,13 +111,11 @@ SECTIONS EXCEPTION_TABLE(16) :text = 0x9090 -#if defined(CONFIG_DEBUG_RODATA) /* .text should occupy whole number of pages */ . = ALIGN(PAGE_SIZE); -#endif - X64_ALIGN_DEBUG_RODATA_BEGIN + X64_ALIGN_RODATA_BEGIN RO_DATA(PAGE_SIZE) - X64_ALIGN_DEBUG_RODATA_END + X64_ALIGN_RODATA_END /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index cb4ef3de61f9..2ebfbaf61142 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -871,7 +871,6 @@ static noinline int do_test_wp_bit(void) return flag; } -#ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; EXPORT_SYMBOL_GPL(rodata_test_data); @@ -960,5 +959,3 @@ void mark_rodata_ro(void) if (__supported_pte_mask & _PAGE_NX) debug_checkwx(); } -#endif - diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5488d21123bd..a40b755c67e3 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1074,7 +1074,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -#ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; EXPORT_SYMBOL_GPL(rodata_test_data); @@ -1166,8 +1165,6 @@ void mark_rodata_ro(void) debug_checkwx(); } -#endif - int kern_addr_valid(unsigned long addr) { unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 9cf96d82147a..1c37e650acac 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -283,7 +283,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, __pa_symbol(__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) /* * Once the kernel maps the text as RO (kernel_set_to_readonly is set), * kernel text mappings for the large page aligned text, rodata sections diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c index 11fdadc68e53..2a6eaf1122b4 100644 --- a/drivers/misc/lkdtm.c +++ b/drivers/misc/lkdtm.c @@ -103,6 +103,7 @@ enum ctype { CT_EXEC_USERSPACE, CT_ACCESS_USERSPACE, CT_WRITE_RO, + CT_WRITE_RO_AFTER_INIT, CT_WRITE_KERN, }; @@ -140,6 +141,7 @@ static char* cp_type[] = { "EXEC_USERSPACE", "ACCESS_USERSPACE", "WRITE_RO", + "WRITE_RO_AFTER_INIT", "WRITE_KERN", }; @@ -162,6 +164,7 @@ static DEFINE_SPINLOCK(lock_me_up); static u8 data_area[EXEC_SIZE]; static const unsigned long rodata = 0xAA55AA55; +static unsigned long ro_after_init __ro_after_init = 0x55AA5500; module_param(recur_count, int, 0644); MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test"); @@ -503,11 +506,28 @@ static void lkdtm_do_action(enum ctype which) break; } case CT_WRITE_RO: { - unsigned long *ptr; + /* Explicitly cast away "const" for the test. */ + unsigned long *ptr = (unsigned long *)&rodata; - ptr = (unsigned long *)&rodata; + pr_info("attempting bad rodata write at %p\n", ptr); + *ptr ^= 0xabcd1234; - pr_info("attempting bad write at %p\n", ptr); + break; + } + case CT_WRITE_RO_AFTER_INIT: { + unsigned long *ptr = &ro_after_init; + + /* + * Verify we were written to during init. Since an Oops + * is considered a "success", a failure is to just skip the + * real test. + */ + if ((*ptr & 0xAA) != 0xAA) { + pr_info("%p was NOT written during init!?\n", ptr); + break; + } + + pr_info("attempting bad ro_after_init write at %p\n", ptr); *ptr ^= 0xabcd1234; break; @@ -817,6 +837,9 @@ static int __init lkdtm_module_init(void) int n_debugfs_entries = 1; /* Assume only the direct entry */ int i; + /* Make sure we can write to __ro_after_init values during __init */ + ro_after_init |= 0xAA; + /* Register debugfs interface */ lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL); if (!lkdtm_debugfs_root) { diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c4bd0e2c173c..772c784ba763 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -256,6 +256,7 @@ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_rodata) = .; \ *(.rodata) *(.rodata.*) \ + *(.data..ro_after_init) /* Read only after init */ \ *(__vermagic) /* Kernel version magic */ \ . = ALIGN(8); \ VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .; \ diff --git a/include/linux/cache.h b/include/linux/cache.h index 17e7e82d2aa7..1be04f8c563a 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -12,10 +12,24 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif +/* + * __read_mostly is used to keep rarely changing variables out of frequently + * updated cachelines. If an architecture doesn't support it, ignore the + * hint. + */ #ifndef __read_mostly #define __read_mostly #endif +/* + * __ro_after_init is used to mark things that are read-only after init (i.e. + * after mark_rodata_ro() has been called). These are effectively read-only, + * but may get written to during init, so can't live in .rodata (via "const"). + */ +#ifndef __ro_after_init +#define __ro_after_init __attribute__((__section__(".data..ro_after_init"))) +#endif + #ifndef ____cacheline_aligned #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) #endif diff --git a/include/linux/init.h b/include/linux/init.h index b449f378f995..aedb254abc37 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -142,6 +142,10 @@ void prepare_namespace(void); void __init load_default_modules(void); int __init init_rootfs(void); +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +#endif + extern void (*late_time_init)(void); extern bool initcall_debug; diff --git a/init/main.c b/init/main.c index b3008bcfb1dc..7c27de4577ed 100644 --- a/init/main.c +++ b/init/main.c @@ -93,9 +93,6 @@ static int kernel_init(void *); extern void init_IRQ(void); extern void fork_init(void); extern void radix_tree_init(void); -#ifndef CONFIG_DEBUG_RODATA -static inline void mark_rodata_ro(void) { } -#endif /* * Debug helper: via this flag we know that we are in 'early bootup code' @@ -924,6 +921,28 @@ static int try_to_run_init_process(const char *init_filename) static noinline void __init kernel_init_freeable(void); +#ifdef CONFIG_DEBUG_RODATA +static bool rodata_enabled = true; +static int __init set_debug_rodata(char *str) +{ + return strtobool(str, &rodata_enabled); +} +__setup("rodata=", set_debug_rodata); + +static void mark_readonly(void) +{ + if (rodata_enabled) + mark_rodata_ro(); + else + pr_info("Kernel memory protection disabled.\n"); +} +#else +static inline void mark_readonly(void) +{ + pr_warn("This architecture does not have kernel memory protection.\n"); +} +#endif + static int __ref kernel_init(void *unused) { int ret; @@ -932,7 +951,7 @@ static int __ref kernel_init(void *unused) /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); free_initmem(); - mark_rodata_ro(); + mark_readonly(); system_state = SYSTEM_RUNNING; numa_default_policy(); diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index e1dbf4a2c69e..90ff129c88a2 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c @@ -153,13 +153,11 @@ static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp) } else { kdb_printf("%s: failed to set breakpoint at 0x%lx\n", __func__, bp->bp_addr); -#ifdef CONFIG_DEBUG_RODATA if (!bp->bp_type) { kdb_printf("Software breakpoints are unavailable.\n" - " Change the kernel CONFIG_DEBUG_RODATA=n\n" + " Boot the kernel with rodata=off\n" " OR use hw breaks: help bph\n"); } -#endif return 1; } return 0;