From 9922c1deff915c2b67ec79ea6b87c289772c6492 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 14 Aug 2020 17:30:04 -0700 Subject: [PATCH 01/35] asm-generic: pgalloc.h: use correct #ifdef to enable pud_alloc_one() The #ifdef statement that guards the generic version of pud_alloc_one() by mistake used __HAVE_ARCH_PUD_FREE instead of __HAVE_ARCH_PUD_ALLOC_ONE. Fix it. Fixes: d9e8b929670b ("asm-generic: pgalloc: provide generic pud_alloc_one() and pud_free_one()") Reported-by: kernel test robot Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/20200812191415.GE163101@linux.ibm.com Signed-off-by: Linus Torvalds --- include/asm-generic/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 6f44810921aa..02932efad3ab 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -147,7 +147,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #if CONFIG_PGTABLE_LEVELS > 3 -#ifndef __HAVE_ARCH_PUD_FREE +#ifndef __HAVE_ARCH_PUD_ALLOC_ONE /** * pud_alloc_one - allocate a page for PUD-level page table * @mm: the mm_struct of the current context From a8a4b7aeaf841311cb13ff0f6c4710c7a00e68d4 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 14 Aug 2020 17:30:07 -0700 Subject: [PATCH 02/35] Revert "mm/vmstat.c: do not show lowmem reserve protection information of empty zone" This reverts commit 26e7deadaae175. Sonny reported that one of their tests started failing on the latest kernel on their Chrome OS platform. The root cause is that the above commit removed the protection line of empty zone, while the parser used in the test relies on the protection line to mark the end of each zone. Let's revert it to avoid breaking userspace testing or applications. Fixes: 26e7deadaae175 ("mm/vmstat.c: do not show lowmem reserve protection information of empty zone)" Reported-by: Sonny Rao Signed-off-by: Baoquan He Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Acked-by: David Rientjes Cc: [5.8.x] Link: http://lkml.kernel.org/r/20200811075412.12872-1-bhe@redhat.com Signed-off-by: Linus Torvalds --- mm/vmstat.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 727a26d1ec1d..e670f910cd2f 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1642,12 +1642,6 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, zone->present_pages, zone_managed_pages(zone)); - /* If unpopulated, no other information is useful */ - if (!populated_zone(zone)) { - seq_putc(m, '\n'); - return; - } - seq_printf(m, "\n protection: (%ld", zone->lowmem_reserve[0]); @@ -1655,6 +1649,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, seq_printf(m, ", %ld", zone->lowmem_reserve[i]); seq_putc(m, ')'); + /* If unpopulated, no other information is useful */ + if (!populated_zone(zone)) { + seq_putc(m, '\n'); + return; + } + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) seq_printf(m, "\n %-12s %lu", zone_stat_name(i), zone_page_state(zone, i)); From b1a3e75e466d96383508634f3d2e477ac45f2fc1 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 14 Aug 2020 17:30:10 -0700 Subject: [PATCH 03/35] lz4: fix kernel decompression speed This patch replaces all memcpy() calls with LZ4_memcpy() which calls __builtin_memcpy() so the compiler can inline it. LZ4 relies heavily on memcpy() with a constant size being inlined. In x86 and i386 pre-boot environments memcpy() cannot be inlined because memcpy() doesn't get defined as __builtin_memcpy(). An equivalent patch has been applied upstream so that the next import won't lose this change [1]. I've measured the kernel decompression speed using QEMU before and after this patch for the x86_64 and i386 architectures. The speed-up is about 10x as shown below. Code Arch Kernel Size Time Speed v5.8 x86_64 11504832 B 148 ms 79 MB/s patch x86_64 11503872 B 13 ms 885 MB/s v5.8 i386 9621216 B 91 ms 106 MB/s patch i386 9620224 B 10 ms 962 MB/s I also measured the time to decompress the initramfs on x86_64, i386, and arm. All three show the same decompression speed before and after, as expected. [1] https://github.com/lz4/lz4/pull/890 Signed-off-by: Nick Terrell Signed-off-by: Andrew Morton Cc: Yann Collet Cc: Gao Xiang Cc: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Arvind Sankar Link: http://lkml.kernel.org/r/20200803194022.2966806-1-nickrterrell@gmail.com Signed-off-by: Linus Torvalds --- lib/lz4/lz4_compress.c | 4 ++-- lib/lz4/lz4_decompress.c | 18 +++++++++--------- lib/lz4/lz4defs.h | 10 ++++++++++ lib/lz4/lz4hc_compress.c | 2 +- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c index cc7b6d4cc7c7..90bb67994688 100644 --- a/lib/lz4/lz4_compress.c +++ b/lib/lz4/lz4_compress.c @@ -446,7 +446,7 @@ _last_literals: *op++ = (BYTE)(lastRun << ML_BITS); } - memcpy(op, anchor, lastRun); + LZ4_memcpy(op, anchor, lastRun); op += lastRun; } @@ -708,7 +708,7 @@ _last_literals: } else { *op++ = (BYTE)(lastRunSize<= 8) && (dict == withPrefix64k || match >= lowPrefix)) { /* Copy the match. */ - memcpy(op + 0, match + 0, 8); - memcpy(op + 8, match + 8, 8); - memcpy(op + 16, match + 16, 2); + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op + 16, match + 16, 2); op += length + MINMATCH; /* Both stages worked, load the next token. */ continue; @@ -263,7 +263,7 @@ static FORCE_INLINE int LZ4_decompress_generic( } } - memcpy(op, ip, length); + LZ4_memcpy(op, ip, length); ip += length; op += length; @@ -350,7 +350,7 @@ _copy_match: size_t const copySize = (size_t)(lowPrefix - match); size_t const restSize = length - copySize; - memcpy(op, dictEnd - copySize, copySize); + LZ4_memcpy(op, dictEnd - copySize, copySize); op += copySize; if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ @@ -360,7 +360,7 @@ _copy_match: while (op < endOfMatch) *op++ = *copyFrom++; } else { - memcpy(op, lowPrefix, restSize); + LZ4_memcpy(op, lowPrefix, restSize); op += restSize; } } @@ -386,7 +386,7 @@ _copy_match: while (op < copyEnd) *op++ = *match++; } else { - memcpy(op, match, mlen); + LZ4_memcpy(op, match, mlen); } op = copyEnd; if (op == oend) @@ -400,7 +400,7 @@ _copy_match: op[2] = match[2]; op[3] = match[3]; match += inc32table[offset]; - memcpy(op + 4, match, 4); + LZ4_memcpy(op + 4, match, 4); match -= dec64table[offset]; } else { LZ4_copy8(op, match); diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index 1a7fa9d9170f..c91dd96ef629 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -137,6 +137,16 @@ static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value) return put_unaligned_le16(value, memPtr); } +/* + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so apply its specialized memcpy() inlining logic. When + * possible, use __builtin_memcpy() to tell the compiler to analyze memcpy() + * as-if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. + */ +#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) + static FORCE_INLINE void LZ4_copy8(void *dst, const void *src) { #if LZ4_ARCH64 diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c index 1b61d874e337..e7ac8694b797 100644 --- a/lib/lz4/lz4hc_compress.c +++ b/lib/lz4/lz4hc_compress.c @@ -570,7 +570,7 @@ _Search3: *op++ = (BYTE) lastRun; } else *op++ = (BYTE)(lastRun< Date: Fri, 14 Aug 2020 17:30:14 -0700 Subject: [PATCH 04/35] exec: restore EACCES of S_ISDIR execve() Patch series "Fix S_ISDIR execve() errno". Fix an errno change for execve() of directories, noticed by Marc Zyngier. Along with the fix, include a regression test to avoid seeing this return in the future. This patch (of 2): The return code for attempting to execute a directory has always been EACCES. Adjust the S_ISDIR exec test to reflect the old errno instead of the general EISDIR for other kinds of "open" attempts on directories. Fixes: 633fb6ac3980 ("exec: move S_ISREG() check earlier") Reported-by: Marc Zyngier Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Tested-by: Greg Kroah-Hartman Reviewed-by: Greg Kroah-Hartman Link: http://lkml.kernel.org/r/20200813231723.2725102-2-keescook@chromium.org Link: https://lore.kernel.org/lkml/20200813151305.6191993b@why Signed-off-by: Linus Torvalds --- fs/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 2112e578dccc..e99e2a9da0f7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2849,8 +2849,10 @@ static int may_open(const struct path *path, int acc_mode, int flag) case S_IFLNK: return -ELOOP; case S_IFDIR: - if (acc_mode & (MAY_WRITE | MAY_EXEC)) + if (acc_mode & MAY_WRITE) return -EISDIR; + if (acc_mode & MAY_EXEC) + return -EACCES; break; case S_IFBLK: case S_IFCHR: From 0f71241a8e32baf9ac290cf915bc9b3ff265f7d3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 14 Aug 2020 17:30:17 -0700 Subject: [PATCH 05/35] selftests/exec: add file type errno tests Make sure execve() returns the expected errno values for non-regular files. Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Cc: Marc Zyngier Link: http://lkml.kernel.org/r/20200813231723.2725102-3-keescook@chromium.org Signed-off-by: Linus Torvalds --- tools/testing/selftests/exec/.gitignore | 1 + tools/testing/selftests/exec/Makefile | 5 +- tools/testing/selftests/exec/non-regular.c | 196 +++++++++++++++++++++ 3 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/exec/non-regular.c diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index 94b02a18f230..344a99c6da1b 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -10,3 +10,4 @@ execveat.denatured /recursion-depth xxxxxxxx* pipe +S_I*.test diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 4453b8f8def3..0a13b110c1e6 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -3,7 +3,7 @@ CFLAGS = -Wall CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE -TEST_PROGS := binfmt_script +TEST_PROGS := binfmt_script non-regular TEST_GEN_PROGS := execveat TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe # Makefile is a run-time dependency, since it's accessed by the execveat test @@ -11,7 +11,8 @@ TEST_FILES := Makefile TEST_GEN_PROGS += recursion-depth -EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* +EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \ + $(OUTPUT)/S_I*.test include ../lib.mk diff --git a/tools/testing/selftests/exec/non-regular.c b/tools/testing/selftests/exec/non-regular.c new file mode 100644 index 000000000000..cd3a34aca93e --- /dev/null +++ b/tools/testing/selftests/exec/non-regular.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest_harness.h" + +/* Remove a file, ignoring the result if it didn't exist. */ +void rm(struct __test_metadata *_metadata, const char *pathname, + int is_dir) +{ + int rc; + + if (is_dir) + rc = rmdir(pathname); + else + rc = unlink(pathname); + + if (rc < 0) { + ASSERT_EQ(errno, ENOENT) { + TH_LOG("Not ENOENT: %s", pathname); + } + } else { + ASSERT_EQ(rc, 0) { + TH_LOG("Failed to remove: %s", pathname); + } + } +} + +FIXTURE(file) { + char *pathname; + int is_dir; +}; + +FIXTURE_VARIANT(file) +{ + const char *name; + int expected; + int is_dir; + void (*setup)(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant); + int major, minor, mode; /* for mknod() */ +}; + +void setup_link(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + const char * const paths[] = { + "/bin/true", + "/usr/bin/true", + }; + int i; + + for (i = 0; i < ARRAY_SIZE(paths); i++) { + if (access(paths[i], X_OK) == 0) { + ASSERT_EQ(symlink(paths[i], self->pathname), 0); + return; + } + } + ASSERT_EQ(1, 0) { + TH_LOG("Could not find viable 'true' binary"); + } +} + +FIXTURE_VARIANT_ADD(file, S_IFLNK) +{ + .name = "S_IFLNK", + .expected = ELOOP, + .setup = setup_link, +}; + +void setup_dir(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + ASSERT_EQ(mkdir(self->pathname, 0755), 0); +} + +FIXTURE_VARIANT_ADD(file, S_IFDIR) +{ + .name = "S_IFDIR", + .is_dir = 1, + .expected = EACCES, + .setup = setup_dir, +}; + +void setup_node(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + dev_t dev; + int rc; + + dev = makedev(variant->major, variant->minor); + rc = mknod(self->pathname, 0755 | variant->mode, dev); + ASSERT_EQ(rc, 0) { + if (errno == EPERM) + SKIP(return, "Please run as root; cannot mknod(%s)", + variant->name); + } +} + +FIXTURE_VARIANT_ADD(file, S_IFBLK) +{ + .name = "S_IFBLK", + .expected = EACCES, + .setup = setup_node, + /* /dev/loop0 */ + .major = 7, + .minor = 0, + .mode = S_IFBLK, +}; + +FIXTURE_VARIANT_ADD(file, S_IFCHR) +{ + .name = "S_IFCHR", + .expected = EACCES, + .setup = setup_node, + /* /dev/zero */ + .major = 1, + .minor = 5, + .mode = S_IFCHR, +}; + +void setup_fifo(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + ASSERT_EQ(mkfifo(self->pathname, 0755), 0); +} + +FIXTURE_VARIANT_ADD(file, S_IFIFO) +{ + .name = "S_IFIFO", + .expected = EACCES, + .setup = setup_fifo, +}; + +FIXTURE_SETUP(file) +{ + ASSERT_GT(asprintf(&self->pathname, "%s.test", variant->name), 6); + self->is_dir = variant->is_dir; + + rm(_metadata, self->pathname, variant->is_dir); + variant->setup(_metadata, self, variant); +} + +FIXTURE_TEARDOWN(file) +{ + rm(_metadata, self->pathname, self->is_dir); +} + +TEST_F(file, exec_errno) +{ + char * const argv[2] = { (char * const)self->pathname, NULL }; + + EXPECT_LT(execv(argv[0], argv), 0); + EXPECT_EQ(errno, variant->expected); +} + +/* S_IFSOCK */ +FIXTURE(sock) +{ + int fd; +}; + +FIXTURE_SETUP(sock) +{ + self->fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(self->fd, 0); +} + +FIXTURE_TEARDOWN(sock) +{ + if (self->fd >= 0) + ASSERT_EQ(close(self->fd), 0); +} + +TEST_F(sock, exec_errno) +{ + char * const argv[2] = { " magic socket ", NULL }; + char * const envp[1] = { NULL }; + + EXPECT_LT(fexecve(self->fd, argv, envp), 0); + EXPECT_EQ(errno, EACCES); +} + +TEST_HARNESS_MAIN From 14a36a435343931296277f4a3c5adf4e29a8f46d Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 14 Aug 2020 17:30:20 -0700 Subject: [PATCH 06/35] mailmap: add entry for Greg Kurz I had stopped using gkurz@linux.vnet.ibm.com a while back already but this email address was shutdown last June when I quit IBM. It's about time to map it to groug@kaod.org. Signed-off-by: Greg Kurz Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/159724692879.76040.4938578139173154028.stgit@bahia.lan Signed-off-by: Linus Torvalds --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 83d8d0158f3b..57fe0085b465 100644 --- a/.mailmap +++ b/.mailmap @@ -104,6 +104,7 @@ Gerald Schaefer Greg Kroah-Hartman Greg Kroah-Hartman Greg Kroah-Hartman +Greg Kurz Gregory CLEMENT Hanjun Guo Heiko Carstens From 1378a5ee451a5e87d0d8dd6356a0b7844db231f6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:23 -0700 Subject: [PATCH 07/35] mm: store compound_nr as well as compound_order Patch series "THP prep patches". These are some generic cleanups and improvements, which I would like merged into mmotm soon. The first one should be a performance improvement for all users of compound pages, and the others are aimed at getting code to compile away when CONFIG_TRANSPARENT_HUGEPAGE is disabled (ie small systems). Also better documented / less confusing than the current prefix mixture of compound, hpage and thp. This patch (of 7): This removes a few instructions from functions which need to know how many pages are in a compound page. The storage used is either page->mapping on 64-bit or page->index on 32-bit. Both of these are fine to overlay on tail pages. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-1-willy@infradead.org Link: http://lkml.kernel.org/r/20200629151959.15779-2-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 ++++- include/linux/mm_types.h | 1 + mm/page_alloc.c | 5 +++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e7602a3bcef1..10b1e6e5f885 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -922,12 +922,15 @@ static inline int compound_pincount(struct page *page) static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; + page[1].compound_nr = 1U << order; } /* Returns the number of pages in this potentially compound page. */ static inline unsigned long compound_nr(struct page *page) { - return 1UL << compound_order(page); + if (!PageHead(page)) + return 1; + return page[1].compound_nr; } /* Returns the number of bytes in this potentially compound page. */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0277fbab7c93..496c3ff97cce 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -134,6 +134,7 @@ struct page { unsigned char compound_dtor; unsigned char compound_order; atomic_t compound_mapcount; + unsigned int compound_nr; /* 1 << compound_order */ }; struct { /* Second tail page of compound page */ unsigned long _compound_pad_1; /* compound_head */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8b7d0ecf30b1..0e2bab486fea 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -666,8 +666,6 @@ void prep_compound_page(struct page *page, unsigned int order) int i; int nr_pages = 1 << order; - set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); - set_compound_order(page, order); __SetPageHead(page); for (i = 1; i < nr_pages; i++) { struct page *p = page + i; @@ -675,6 +673,9 @@ void prep_compound_page(struct page *page, unsigned int order) p->mapping = TAIL_MAPPING; set_compound_head(p, page); } + + set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); + set_compound_order(page, order); atomic_set(compound_mapcount_ptr(page), -1); if (hpage_pincount_available(page)) atomic_set(compound_pincount_ptr(page), 0); From 419015675fef6c4b28689bd2ace559564c2e106c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:26 -0700 Subject: [PATCH 08/35] mm: move page-flags include to top of file Give up on the notion that we can remove page-flags.h from mm.h. There are currently 14 inline functions which use a PageFoo function. Also, two of the files directly included by mm.h include page-flags.h themselves, and there are probably more indirect inclusions. So just include it at the top like any other header file. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-3-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 10b1e6e5f885..fd0cd4e93029 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -668,11 +669,6 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); struct mmu_gather; struct inode; -/* - * FIXME: take this include out, include page-flags.h in - * files which need it (119 of them) - */ -#include #include /* From 6ffbb45826f5d9ae09aa60cd88594b7816c96190 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:30 -0700 Subject: [PATCH 09/35] mm: add thp_order This function returns the order of a transparent huge page. It compiles to 0 if CONFIG_TRANSPARENT_HUGEPAGE is disabled. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-4-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 467302056e17..9521cfdf18ca 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -258,6 +258,19 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, else return NULL; } + +/** + * thp_order - Order of a transparent huge page. + * @page: Head page of a transparent huge page. + */ +static inline unsigned int thp_order(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + if (PageHead(page)) + return HPAGE_PMD_ORDER; + return 0; +} + static inline int hpage_nr_pages(struct page *page) { if (unlikely(PageTransHuge(page))) @@ -317,6 +330,12 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) +static inline unsigned int thp_order(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + return 0; +} + static inline int hpage_nr_pages(struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); From af3bbc12df80e8c279b94c752b6edca29841f4f5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:33 -0700 Subject: [PATCH 10/35] mm: add thp_size This function returns the number of bytes in a THP. It is like page_size(), but compiles to just PAGE_SIZE if CONFIG_TRANSPARENT_HUGEPAGE is disabled. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-5-willy@infradead.org Signed-off-by: Linus Torvalds --- drivers/nvdimm/btt.c | 4 +--- drivers/nvdimm/pmem.c | 6 ++---- include/linux/huge_mm.h | 11 +++++++++++ mm/internal.h | 2 +- mm/page_io.c | 2 +- mm/page_vma_mapped.c | 4 ++-- 6 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 412d21d8f643..0ff610e728ff 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1490,10 +1490,8 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector, { struct btt *btt = bdev->bd_disk->private_data; int rc; - unsigned int len; - len = hpage_nr_pages(page) * PAGE_SIZE; - rc = btt_do_bvec(btt, NULL, page, len, 0, op, sector); + rc = btt_do_bvec(btt, NULL, page, thp_size(page), 0, op, sector); if (rc == 0) page_endio(page, op_is_write(op), 0); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 94790e6e0e4c..fab29b514372 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -238,11 +238,9 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, blk_status_t rc; if (op_is_write(op)) - rc = pmem_do_write(pmem, page, 0, sector, - hpage_nr_pages(page) * PAGE_SIZE); + rc = pmem_do_write(pmem, page, 0, sector, thp_size(page)); else - rc = pmem_do_read(pmem, page, 0, sector, - hpage_nr_pages(page) * PAGE_SIZE); + rc = pmem_do_read(pmem, page, 0, sector, thp_size(page)); /* * The ->rw_page interface is subtle and tricky. The core * retries on any error, so we can only invoke page_endio() in diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9521cfdf18ca..9b33ac774fdd 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -462,4 +462,15 @@ static inline bool thp_migration_supported(void) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +/** + * thp_size - Size of a transparent huge page. + * @page: Head page of a transparent huge page. + * + * Return: Number of bytes in this page. + */ +static inline unsigned long thp_size(struct page *page) +{ + return PAGE_SIZE << thp_order(page); +} + #endif /* _LINUX_HUGE_MM_H */ diff --git a/mm/internal.h b/mm/internal.h index d11a9a8d2135..912bb1a1c10e 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -396,7 +396,7 @@ vma_address(struct page *page, struct vm_area_struct *vma) unsigned long start, end; start = __vma_address(page, vma); - end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1); + end = start + thp_size(page) - PAGE_SIZE; /* page should be within @vma mapping range */ VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma); diff --git a/mm/page_io.c b/mm/page_io.c index 9e362567d454..f5e8bec8a8c7 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -40,7 +40,7 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io; - bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0); + bio_add_page(bio, page, thp_size(page), 0); } return bio; } diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 719c35246cfa..e65629c056e8 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -227,7 +227,7 @@ next_pte: if (pvmw->address >= pvmw->vma->vm_end || pvmw->address >= __vma_address(pvmw->page, pvmw->vma) + - hpage_nr_pages(pvmw->page) * PAGE_SIZE) + thp_size(pvmw->page)) return not_found(pvmw); /* Did we cross page table boundary? */ if (pvmw->address % PMD_SIZE == 0) { @@ -268,7 +268,7 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) unsigned long start, end; start = __vma_address(page, vma); - end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1); + end = start + thp_size(page) - PAGE_SIZE; if (unlikely(end < vma->vm_start || start >= vma->vm_end)) return 0; From 6c357848b44b4016ca422178aa368a7472245f6f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:37 -0700 Subject: [PATCH 11/35] mm: replace hpage_nr_pages with thp_nr_pages The thp prefix is more frequently used than hpage and we should be consistent between the various functions. [akpm@linux-foundation.org: fix mm/migrate.c] Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: Mike Kravetz Cc: David Hildenbrand Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-6-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 13 +++++++++---- include/linux/mm_inline.h | 6 +++--- include/linux/pagemap.h | 6 +++--- mm/compaction.c | 2 +- mm/filemap.c | 2 +- mm/gup.c | 2 +- mm/internal.h | 2 +- mm/memcontrol.c | 10 +++++----- mm/memory_hotplug.c | 7 +++---- mm/mempolicy.c | 2 +- mm/migrate.c | 18 +++++++++--------- mm/mlock.c | 9 ++++----- mm/page_io.c | 2 +- mm/page_vma_mapped.c | 2 +- mm/rmap.c | 8 ++++---- mm/swap.c | 16 ++++++++-------- mm/swap_state.c | 6 +++--- mm/swapfile.c | 2 +- mm/vmscan.c | 6 +++--- mm/workingset.c | 6 +++--- 20 files changed, 65 insertions(+), 62 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9b33ac774fdd..229f986d535a 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -271,9 +271,14 @@ static inline unsigned int thp_order(struct page *page) return 0; } -static inline int hpage_nr_pages(struct page *page) +/** + * thp_nr_pages - The number of regular pages in this huge page. + * @page: The head page of a huge page. + */ +static inline int thp_nr_pages(struct page *page) { - if (unlikely(PageTransHuge(page))) + VM_BUG_ON_PGFLAGS(PageTail(page), page); + if (PageHead(page)) return HPAGE_PMD_NR; return 1; } @@ -336,9 +341,9 @@ static inline unsigned int thp_order(struct page *page) return 0; } -static inline int hpage_nr_pages(struct page *page) +static inline int thp_nr_pages(struct page *page) { - VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PGFLAGS(PageTail(page), page); return 1; } diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 219bef41d87c..8fc71e9d7bb0 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -48,14 +48,14 @@ static __always_inline void update_lru_size(struct lruvec *lruvec, static __always_inline void add_page_to_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { - update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page)); + update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); list_add(&page->lru, &lruvec->lists[lru]); } static __always_inline void add_page_to_lru_list_tail(struct page *page, struct lruvec *lruvec, enum lru_list lru) { - update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page)); + update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); list_add_tail(&page->lru, &lruvec->lists[lru]); } @@ -63,7 +63,7 @@ static __always_inline void del_page_from_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { list_del(&page->lru); - update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page)); + update_lru_size(lruvec, lru, page_zonenum(page), -thp_nr_pages(page)); } /** diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d1f4eff605ad..7de11dcd534d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -381,7 +381,7 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) if (PageHuge(head)) return head; - return head + (index & (hpage_nr_pages(head) - 1)); + return head + (index & (thp_nr_pages(head) - 1)); } struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); @@ -773,7 +773,7 @@ static inline struct page *readahead_page(struct readahead_control *rac) page = xa_load(&rac->mapping->i_pages, rac->_index); VM_BUG_ON_PAGE(!PageLocked(page), page); - rac->_batch_count = hpage_nr_pages(page); + rac->_batch_count = thp_nr_pages(page); return page; } @@ -796,7 +796,7 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageTail(page), page); array[i++] = page; - rac->_batch_count += hpage_nr_pages(page); + rac->_batch_count += thp_nr_pages(page); /* * The page cache isn't using multi-index entries yet, diff --git a/mm/compaction.c b/mm/compaction.c index b89581bf859c..176dcded298e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1009,7 +1009,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, del_page_from_lru_list(page, lruvec, page_lru(page)); mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_is_file_lru(page), - hpage_nr_pages(page)); + thp_nr_pages(page)); isolate_success: list_add(&page->lru, &cc->migratepages); diff --git a/mm/filemap.c b/mm/filemap.c index 8e75bce0346d..653190943aa7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -198,7 +198,7 @@ static void unaccount_page_cache_page(struct address_space *mapping, if (PageHuge(page)) return; - nr = hpage_nr_pages(page); + nr = thp_nr_pages(page); __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr); if (PageSwapBacked(page)) { diff --git a/mm/gup.c b/mm/gup.c index 39e58df6925d..ae096ea7583f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1637,7 +1637,7 @@ check_again: mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + page_is_file_lru(head), - hpage_nr_pages(head)); + thp_nr_pages(head)); } } } diff --git a/mm/internal.h b/mm/internal.h index 912bb1a1c10e..10c677655912 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -369,7 +369,7 @@ extern void clear_page_mlock(struct page *page); static inline void mlock_migrate_page(struct page *newpage, struct page *page) { if (TestClearPageMlocked(page)) { - int nr_pages = hpage_nr_pages(page); + int nr_pages = thp_nr_pages(page); /* Holding pmd lock, no change in irq context: __mod is safe */ __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9d87082e64aa..b807952b4d43 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5589,7 +5589,7 @@ static int mem_cgroup_move_account(struct page *page, { struct lruvec *from_vec, *to_vec; struct pglist_data *pgdat; - unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1; + unsigned int nr_pages = compound ? thp_nr_pages(page) : 1; int ret; VM_BUG_ON(from == to); @@ -6682,7 +6682,7 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, */ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { - unsigned int nr_pages = hpage_nr_pages(page); + unsigned int nr_pages = thp_nr_pages(page); struct mem_cgroup *memcg = NULL; int ret = 0; @@ -6912,7 +6912,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) return; /* Force-charge the new page. The old one will be freed soon */ - nr_pages = hpage_nr_pages(newpage); + nr_pages = thp_nr_pages(newpage); page_counter_charge(&memcg->memory, nr_pages); if (do_memsw_account()) @@ -7114,7 +7114,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) * ancestor for the swap instead and transfer the memory+swap charge. */ swap_memcg = mem_cgroup_id_get_online(memcg); - nr_entries = hpage_nr_pages(page); + nr_entries = thp_nr_pages(page); /* Get references for the tail pages, too */ if (nr_entries > 1) mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); @@ -7158,7 +7158,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) */ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) { - unsigned int nr_pages = hpage_nr_pages(page); + unsigned int nr_pages = thp_nr_pages(page); struct page_counter *counter; struct mem_cgroup *memcg; unsigned short oldid; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c32ead89c911..e9d5ab5d3ca0 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1299,7 +1299,7 @@ static int do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; - struct page *page; + struct page *page, *head; int ret = 0; LIST_HEAD(source); @@ -1307,15 +1307,14 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (!pfn_valid(pfn)) continue; page = pfn_to_page(pfn); + head = compound_head(page); if (PageHuge(page)) { - struct page *head = compound_head(page); pfn = page_to_pfn(head) + compound_nr(head) - 1; isolate_huge_page(head, &source); continue; } else if (PageTransHuge(page)) - pfn = page_to_pfn(compound_head(page)) - + hpage_nr_pages(page) - 1; + pfn = page_to_pfn(head) + thp_nr_pages(page) - 1; /* * HWPoison pages have elevated reference counts so the migration would diff --git a/mm/mempolicy.c b/mm/mempolicy.c index afaa09ff9f6c..eddbe4e56c73 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1049,7 +1049,7 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist, list_add_tail(&head->lru, pagelist); mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + page_is_file_lru(head), - hpage_nr_pages(head)); + thp_nr_pages(head)); } else if (flags & MPOL_MF_STRICT) { /* * Non-movable page may reach here. And, there may be diff --git a/mm/migrate.c b/mm/migrate.c index 5053439be6ab..34a842a8eb6a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -193,7 +193,7 @@ void putback_movable_pages(struct list_head *l) put_page(page); } else { mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_lru(page), -hpage_nr_pages(page)); + page_is_file_lru(page), -thp_nr_pages(page)); putback_lru_page(page); } } @@ -386,7 +386,7 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) */ expected_count += is_device_private_page(page); if (mapping) - expected_count += hpage_nr_pages(page) + page_has_private(page); + expected_count += thp_nr_pages(page) + page_has_private(page); return expected_count; } @@ -441,7 +441,7 @@ int migrate_page_move_mapping(struct address_space *mapping, */ newpage->index = page->index; newpage->mapping = page->mapping; - page_ref_add(newpage, hpage_nr_pages(page)); /* add cache reference */ + page_ref_add(newpage, thp_nr_pages(page)); /* add cache reference */ if (PageSwapBacked(page)) { __SetPageSwapBacked(newpage); if (PageSwapCache(page)) { @@ -474,7 +474,7 @@ int migrate_page_move_mapping(struct address_space *mapping, * to one less reference. * We know this isn't the last reference. */ - page_ref_unfreeze(page, expected_count - hpage_nr_pages(page)); + page_ref_unfreeze(page, expected_count - thp_nr_pages(page)); xas_unlock(&xas); /* Leave irq disabled to prevent preemption while updating stats */ @@ -591,7 +591,7 @@ static void copy_huge_page(struct page *dst, struct page *src) } else { /* thp page */ BUG_ON(!PageTransHuge(src)); - nr_pages = hpage_nr_pages(src); + nr_pages = thp_nr_pages(src); } for (i = 0; i < nr_pages; i++) { @@ -1213,7 +1213,7 @@ out: */ if (likely(!__PageMovable(page))) mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_lru(page), -hpage_nr_pages(page)); + page_is_file_lru(page), -thp_nr_pages(page)); } /* @@ -1446,7 +1446,7 @@ retry: * during migration. */ is_thp = PageTransHuge(page); - nr_subpages = hpage_nr_pages(page); + nr_subpages = thp_nr_pages(page); cond_resched(); if (PageHuge(page)) @@ -1670,7 +1670,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, list_add_tail(&head->lru, pagelist); mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + page_is_file_lru(head), - hpage_nr_pages(head)); + thp_nr_pages(head)); } out_putpage: /* @@ -2034,7 +2034,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) page_lru = page_is_file_lru(page); mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru, - hpage_nr_pages(page)); + thp_nr_pages(page)); /* * Isolating the page has taken another reference, so the diff --git a/mm/mlock.c b/mm/mlock.c index f8736136fad7..93ca2bf30b4f 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -61,8 +61,7 @@ void clear_page_mlock(struct page *page) if (!TestClearPageMlocked(page)) return; - mod_zone_page_state(page_zone(page), NR_MLOCK, - -hpage_nr_pages(page)); + mod_zone_page_state(page_zone(page), NR_MLOCK, -thp_nr_pages(page)); count_vm_event(UNEVICTABLE_PGCLEARED); /* * The previous TestClearPageMlocked() corresponds to the smp_mb() @@ -95,7 +94,7 @@ void mlock_vma_page(struct page *page) if (!TestSetPageMlocked(page)) { mod_zone_page_state(page_zone(page), NR_MLOCK, - hpage_nr_pages(page)); + thp_nr_pages(page)); count_vm_event(UNEVICTABLE_PGMLOCKED); if (!isolate_lru_page(page)) putback_lru_page(page); @@ -192,7 +191,7 @@ unsigned int munlock_vma_page(struct page *page) /* * Serialize with any parallel __split_huge_page_refcount() which * might otherwise copy PageMlocked to part of the tail pages before - * we clear it in the head page. It also stabilizes hpage_nr_pages(). + * we clear it in the head page. It also stabilizes thp_nr_pages(). */ spin_lock_irq(&pgdat->lru_lock); @@ -202,7 +201,7 @@ unsigned int munlock_vma_page(struct page *page) goto unlock_out; } - nr_pages = hpage_nr_pages(page); + nr_pages = thp_nr_pages(page); __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); if (__munlock_isolate_lru_page(page, true)) { diff --git a/mm/page_io.c b/mm/page_io.c index f5e8bec8a8c7..454b70d8cda7 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -274,7 +274,7 @@ static inline void count_swpout_vm_event(struct page *page) if (unlikely(PageTransHuge(page))) count_vm_event(THP_SWPOUT); #endif - count_vm_events(PSWPOUT, hpage_nr_pages(page)); + count_vm_events(PSWPOUT, thp_nr_pages(page)); } #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index e65629c056e8..5e77b269c330 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -61,7 +61,7 @@ static inline bool pfn_is_match(struct page *page, unsigned long pfn) return page_pfn == pfn; /* THP can be referenced by any subpage */ - return pfn >= page_pfn && pfn - page_pfn < hpage_nr_pages(page); + return pfn >= page_pfn && pfn - page_pfn < thp_nr_pages(page); } /** diff --git a/mm/rmap.c b/mm/rmap.c index 6cce9ef06753..4ace1e32f705 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1130,7 +1130,7 @@ void do_page_add_anon_rmap(struct page *page, } if (first) { - int nr = compound ? hpage_nr_pages(page) : 1; + int nr = compound ? thp_nr_pages(page) : 1; /* * We use the irq-unsafe __{inc|mod}_zone_page_stat because * these counters are not modified in interrupt context, and @@ -1169,7 +1169,7 @@ void do_page_add_anon_rmap(struct page *page, void page_add_new_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, bool compound) { - int nr = compound ? hpage_nr_pages(page) : 1; + int nr = compound ? thp_nr_pages(page) : 1; VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); __SetPageSwapBacked(page); @@ -1860,7 +1860,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, return; pgoff_start = page_to_pgoff(page); - pgoff_end = pgoff_start + hpage_nr_pages(page) - 1; + pgoff_end = pgoff_start + thp_nr_pages(page) - 1; anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff_start, pgoff_end) { struct vm_area_struct *vma = avc->vma; @@ -1913,7 +1913,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, return; pgoff_start = page_to_pgoff(page); - pgoff_end = pgoff_start + hpage_nr_pages(page) - 1; + pgoff_end = pgoff_start + thp_nr_pages(page) - 1; if (!locked) i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, diff --git a/mm/swap.c b/mm/swap.c index 9285e60c7d6e..d26c22baf7c5 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -241,7 +241,7 @@ static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec, del_page_from_lru_list(page, lruvec, page_lru(page)); ClearPageActive(page); add_page_to_lru_list_tail(page, lruvec, page_lru(page)); - (*pgmoved) += hpage_nr_pages(page); + (*pgmoved) += thp_nr_pages(page); } } @@ -312,7 +312,7 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) void lru_note_cost_page(struct page *page) { lru_note_cost(mem_cgroup_page_lruvec(page, page_pgdat(page)), - page_is_file_lru(page), hpage_nr_pages(page)); + page_is_file_lru(page), thp_nr_pages(page)); } static void __activate_page(struct page *page, struct lruvec *lruvec, @@ -320,7 +320,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, { if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { int lru = page_lru_base_type(page); - int nr_pages = hpage_nr_pages(page); + int nr_pages = thp_nr_pages(page); del_page_from_lru_list(page, lruvec, lru); SetPageActive(page); @@ -500,7 +500,7 @@ void lru_cache_add_inactive_or_unevictable(struct page *page, * lock is held(spinlock), which implies preemption disabled. */ __mod_zone_page_state(page_zone(page), NR_MLOCK, - hpage_nr_pages(page)); + thp_nr_pages(page)); count_vm_event(UNEVICTABLE_PGMLOCKED); } lru_cache_add(page); @@ -532,7 +532,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, { int lru; bool active; - int nr_pages = hpage_nr_pages(page); + int nr_pages = thp_nr_pages(page); if (!PageLRU(page)) return; @@ -580,7 +580,7 @@ static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, { if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { int lru = page_lru_base_type(page); - int nr_pages = hpage_nr_pages(page); + int nr_pages = thp_nr_pages(page); del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE); ClearPageActive(page); @@ -599,7 +599,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page) && !PageUnevictable(page)) { bool active = PageActive(page); - int nr_pages = hpage_nr_pages(page); + int nr_pages = thp_nr_pages(page); del_page_from_lru_list(page, lruvec, LRU_INACTIVE_ANON + active); @@ -972,7 +972,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, { enum lru_list lru; int was_unevictable = TestClearPageUnevictable(page); - int nr_pages = hpage_nr_pages(page); + int nr_pages = thp_nr_pages(page); VM_BUG_ON_PAGE(PageLRU(page), page); diff --git a/mm/swap_state.c b/mm/swap_state.c index b73aabdfd35a..d9d4a49f3241 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -130,7 +130,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, struct address_space *address_space = swap_address_space(entry); pgoff_t idx = swp_offset(entry); XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page)); - unsigned long i, nr = hpage_nr_pages(page); + unsigned long i, nr = thp_nr_pages(page); void *old; VM_BUG_ON_PAGE(!PageLocked(page), page); @@ -183,7 +183,7 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry, void *shadow) { struct address_space *address_space = swap_address_space(entry); - int i, nr = hpage_nr_pages(page); + int i, nr = thp_nr_pages(page); pgoff_t idx = swp_offset(entry); XA_STATE(xas, &address_space->i_pages, idx); @@ -278,7 +278,7 @@ void delete_from_swap_cache(struct page *page) xa_unlock_irq(&address_space->i_pages); put_swap_page(page, entry); - page_ref_sub(page, hpage_nr_pages(page)); + page_ref_sub(page, thp_nr_pages(page)); } void clear_shadow_from_swap_cache(int type, unsigned long begin, diff --git a/mm/swapfile.c b/mm/swapfile.c index e653eea1eb88..eb410d3c8de8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1370,7 +1370,7 @@ void put_swap_page(struct page *page, swp_entry_t entry) unsigned char *map; unsigned int i, free_entries = 0; unsigned char val; - int size = swap_entry_size(hpage_nr_pages(page)); + int size = swap_entry_size(thp_nr_pages(page)); si = _swap_info_get(entry); if (!si) diff --git a/mm/vmscan.c b/mm/vmscan.c index 738115ed75e2..99e1796eb833 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1354,7 +1354,7 @@ static unsigned int shrink_page_list(struct list_head *page_list, case PAGE_ACTIVATE: goto activate_locked; case PAGE_SUCCESS: - stat->nr_pageout += hpage_nr_pages(page); + stat->nr_pageout += thp_nr_pages(page); if (PageWriteback(page)) goto keep; @@ -1862,7 +1862,7 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec, SetPageLRU(page); lru = page_lru(page); - nr_pages = hpage_nr_pages(page); + nr_pages = thp_nr_pages(page); update_lru_size(lruvec, lru, page_zonenum(page), nr_pages); list_move(&page->lru, &lruvec->lists[lru]); @@ -2065,7 +2065,7 @@ static void shrink_active_list(unsigned long nr_to_scan, * so we ignore them here. */ if ((vm_flags & VM_EXEC) && page_is_file_lru(page)) { - nr_rotated += hpage_nr_pages(page); + nr_rotated += thp_nr_pages(page); list_add(&page->lru, &l_active); continue; } diff --git a/mm/workingset.c b/mm/workingset.c index 8cbe4e3cbe5c..92e66113a577 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -263,7 +263,7 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) VM_BUG_ON_PAGE(!PageLocked(page), page); lruvec = mem_cgroup_lruvec(target_memcg, pgdat); - workingset_age_nonresident(lruvec, hpage_nr_pages(page)); + workingset_age_nonresident(lruvec, thp_nr_pages(page)); /* XXX: target_memcg can be NULL, go through lruvec */ memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); eviction = atomic_long_read(&lruvec->nonresident_age); @@ -374,7 +374,7 @@ void workingset_refault(struct page *page, void *shadow) goto out; SetPageActive(page); - workingset_age_nonresident(lruvec, hpage_nr_pages(page)); + workingset_age_nonresident(lruvec, thp_nr_pages(page)); inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file); /* Page was active prior to eviction */ @@ -411,7 +411,7 @@ void workingset_activation(struct page *page) if (!mem_cgroup_disabled() && !memcg) goto out; lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); - workingset_age_nonresident(lruvec, hpage_nr_pages(page)); + workingset_age_nonresident(lruvec, thp_nr_pages(page)); out: rcu_read_unlock(); } From 2be1d71841b7ecfb01ce4c59f6e1d082c3c18a8a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:40 -0700 Subject: [PATCH 12/35] mm: add thp_head This is like compound_head() but compiles away when CONFIG_TRANSPARENT_HUGEPAGE is not enabled. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-7-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 229f986d535a..8a8bc46a2432 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -259,6 +259,15 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, return NULL; } +/** + * thp_head - Head page of a transparent huge page. + * @page: Any page (tail, head or regular) found in the page cache. + */ +static inline struct page *thp_head(struct page *page) +{ + return compound_head(page); +} + /** * thp_order - Order of a transparent huge page. * @page: Head page of a transparent huge page. @@ -335,6 +344,12 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) +static inline struct page *thp_head(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + return page; +} + static inline unsigned int thp_order(struct page *page) { VM_BUG_ON_PGFLAGS(PageTail(page), page); From ee6c400f5c05459b8c5f2884a176a1287ce2f68f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:43 -0700 Subject: [PATCH 13/35] mm: introduce offset_in_thp Mirroring offset_in_page(), this gives you the offset within this particular page, no matter what size page it is. It optimises down to offset_in_page() if CONFIG_TRANSPARENT_HUGEPAGE is not set. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-8-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index fd0cd4e93029..2e7ec3ee34cf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1594,6 +1594,7 @@ static inline void clear_page_pfmemalloc(struct page *page) extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) +#define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1)) /* * Flags passed to show_mem() and show_free_areas() to suppress output in From c734124c5c824511f553f794a514a185dfc0e3e7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 14 Aug 2020 17:30:46 -0700 Subject: [PATCH 14/35] fs: autofs: delete repeated words in comments Drop duplicated words {the, at} in comments. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Acked-by: Ian Kent Link: http://lkml.kernel.org/r/20200811021817.24982-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- fs/autofs/dev-ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index f3a0f412b43b..75105f45c51a 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -20,7 +20,7 @@ * another mount. This situation arises when starting automount(8) * or other user space daemon which uses direct mounts or offset * mounts (used for autofs lazy mount/umount of nested mount trees), - * which have been left busy at at service shutdown. + * which have been left busy at service shutdown. */ typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *, @@ -496,7 +496,7 @@ static int autofs_dev_ioctl_askumount(struct file *fp, * located path is the root of a mount we return 1 along with * the super magic of the mount or 0 otherwise. * - * In both cases the the device number (as returned by + * In both cases the device number (as returned by * new_encode_dev()) is also returned. */ static int autofs_dev_ioctl_ismountpoint(struct file *fp, From 88db0aa2421666d2f73486d15b239a4521983d55 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 14 Aug 2020 17:31:07 -0700 Subject: [PATCH 15/35] all arch: remove system call sys_sysctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"), sys_sysctl is actually unavailable: any input can only return an error. We have been warning about people using the sysctl system call for years and believe there are no more users. Even if there are users of this interface if they have not complained or fixed their code by now they probably are not going to, so there is no point in warning them any longer. So completely remove sys_sysctl on all architectures. [nixiaoming@huawei.com: s390: fix build error for sys_call_table_emu] Link: http://lkml.kernel.org/r/20200618141426.16884-1-nixiaoming@huawei.com Signed-off-by: Xiaoming Ni Signed-off-by: Andrew Morton Acked-by: Will Deacon [arm/arm64] Acked-by: "Eric W. Biederman" Cc: Aleksa Sarai Cc: Alexander Shishkin Cc: Al Viro Cc: Andi Kleen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Bin Meng Cc: Borislav Petkov Cc: Brian Gerst Cc: Catalin Marinas Cc: chenzefeng Cc: Christian Borntraeger Cc: Christian Brauner Cc: Chris Zankel Cc: David Howells Cc: David S. Miller Cc: Diego Elio Pettenò Cc: Dmitry Vyukov Cc: Dominik Brodowski Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Iurii Zaikin Cc: Ivan Kokshaysky Cc: James Bottomley Cc: Jens Axboe Cc: Jiri Olsa Cc: Kars de Jong Cc: Kees Cook Cc: Krzysztof Kozlowski Cc: Luis Chamberlain Cc: Marco Elver Cc: Mark Rutland Cc: Martin K. Petersen Cc: Masahiro Yamada Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Miklos Szeredi Cc: Minchan Kim Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Nick Piggin Cc: Oleg Nesterov Cc: Olof Johansson Cc: Paul Burton Cc: "Paul E. McKenney" Cc: Paul Mackerras Cc: Peter Zijlstra (Intel) Cc: Randy Dunlap Cc: Ravi Bangoria Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sami Tolvanen Cc: Sargun Dhillon Cc: Stephen Rothwell Cc: Sudeep Holla Cc: Sven Schnelle Cc: Thiago Jung Bauermann Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Tony Luck Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Yoshinori Sato Cc: Zhou Yanjie Link: http://lkml.kernel.org/r/20200616030734.87257-1-nixiaoming@huawei.com Signed-off-by: Linus Torvalds --- arch/alpha/kernel/syscalls/syscall.tbl | 2 +- arch/arm/configs/am200epdkit_defconfig | 1 - arch/arm/tools/syscall.tbl | 2 +- arch/arm64/include/asm/unistd32.h | 4 +- arch/ia64/kernel/syscalls/syscall.tbl | 2 +- arch/m68k/kernel/syscalls/syscall.tbl | 2 +- arch/microblaze/kernel/syscalls/syscall.tbl | 2 +- arch/mips/configs/cu1000-neo_defconfig | 1 - arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_n64.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sh/configs/dreamcast_defconfig | 1 - arch/sh/configs/espt_defconfig | 1 - arch/sh/configs/hp6xx_defconfig | 1 - arch/sh/configs/landisk_defconfig | 1 - arch/sh/configs/lboxre2_defconfig | 1 - arch/sh/configs/microdev_defconfig | 1 - arch/sh/configs/migor_defconfig | 1 - arch/sh/configs/r7780mp_defconfig | 1 - arch/sh/configs/r7785rp_defconfig | 1 - arch/sh/configs/rts7751r2d1_defconfig | 1 - arch/sh/configs/rts7751r2dplus_defconfig | 1 - arch/sh/configs/se7206_defconfig | 1 - arch/sh/configs/se7343_defconfig | 1 - arch/sh/configs/se7619_defconfig | 1 - arch/sh/configs/se7705_defconfig | 1 - arch/sh/configs/se7750_defconfig | 1 - arch/sh/configs/se7751_defconfig | 1 - arch/sh/configs/secureedge5410_defconfig | 1 - arch/sh/configs/sh03_defconfig | 1 - arch/sh/configs/sh7710voipgw_defconfig | 1 - arch/sh/configs/sh7757lcr_defconfig | 1 - arch/sh/configs/sh7763rdp_defconfig | 1 - arch/sh/configs/shmin_defconfig | 1 - arch/sh/configs/titan_defconfig | 1 - arch/sh/kernel/syscalls/syscall.tbl | 2 +- arch/sparc/kernel/syscalls/syscall.tbl | 2 +- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/entry/syscalls/syscall_64.tbl | 2 +- arch/xtensa/kernel/syscalls/syscall.tbl | 2 +- include/linux/compat.h | 1 - include/linux/syscalls.h | 2 - include/linux/sysctl.h | 6 +- kernel/Makefile | 2 +- kernel/sys_ni.c | 1 - kernel/sysctl_binary.c | 171 ------------------ .../arch/powerpc/entry/syscalls/syscall.tbl | 2 +- .../perf/arch/s390/entry/syscalls/syscall.tbl | 2 +- .../arch/x86/entry/syscalls/syscall_64.tbl | 2 +- 52 files changed, 24 insertions(+), 227 deletions(-) delete mode 100644 kernel/sysctl_binary.c diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index a28fb211881d..ec8bed9e7b75 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -249,7 +249,7 @@ 316 common mlockall sys_mlockall 317 common munlockall sys_munlockall 318 common sysinfo sys_sysinfo -319 common _sysctl sys_sysctl +319 common _sysctl sys_ni_syscall # 320 was sys_idle 321 common oldumount sys_oldumount 322 common swapon sys_swapon diff --git a/arch/arm/configs/am200epdkit_defconfig b/arch/arm/configs/am200epdkit_defconfig index f56ac394caf1..4e49d6cb2f62 100644 --- a/arch/arm/configs/am200epdkit_defconfig +++ b/arch/arm/configs/am200epdkit_defconfig @@ -3,7 +3,6 @@ CONFIG_LOCALVERSION="gum" CONFIG_SYSVIPC=y CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_EXPERT=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 7e8ee4adf269..171077cbf419 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -162,7 +162,7 @@ 146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 17e81bd9a2d3..734860ac7cf9 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -308,8 +308,8 @@ __SYSCALL(__NR_writev, compat_sys_writev) __SYSCALL(__NR_getsid, sys_getsid) #define __NR_fdatasync 148 __SYSCALL(__NR_fdatasync, sys_fdatasync) -#define __NR__sysctl 149 -__SYSCALL(__NR__sysctl, compat_sys_sysctl) + /* 149 was sys_sysctl */ +__SYSCALL(149, sys_ni_syscall) #define __NR_mlock 150 __SYSCALL(__NR_mlock, sys_mlock) #define __NR_munlock 151 diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index ced9c83e47c9..f52a41f4c340 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -135,7 +135,7 @@ 123 common writev sys_writev 124 common pread64 sys_pread64 125 common pwrite64 sys_pwrite64 -126 common _sysctl sys_sysctl +126 common _sysctl sys_ni_syscall 127 common mmap sys_mmap 128 common munmap sys_munmap 129 common mlock sys_mlock diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 1a4822de7292..81fc799d8392 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -156,7 +156,7 @@ 146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a3f4be8e7238..b4e263916f41 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -156,7 +156,7 @@ 146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig index 6b471cdb16cf..e924c817f73d 100644 --- a/arch/mips/configs/cu1000-neo_defconfig +++ b/arch/mips/configs/cu1000-neo_defconfig @@ -17,7 +17,6 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 6b4ee92e3aed..f9df9edb67a4 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -159,7 +159,7 @@ 149 n32 munlockall sys_munlockall 150 n32 vhangup sys_vhangup 151 n32 pivot_root sys_pivot_root -152 n32 _sysctl compat_sys_sysctl +152 n32 _sysctl sys_ni_syscall 153 n32 prctl sys_prctl 154 n32 adjtimex sys_adjtimex_time32 155 n32 setrlimit compat_sys_setrlimit diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 391acbf425a0..557f9954a2b9 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -159,7 +159,7 @@ 149 n64 munlockall sys_munlockall 150 n64 vhangup sys_vhangup 151 n64 pivot_root sys_pivot_root -152 n64 _sysctl sys_sysctl +152 n64 _sysctl sys_ni_syscall 153 n64 prctl sys_prctl 154 n64 adjtimex sys_adjtimex 155 n64 setrlimit sys_setrlimit diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 5727c5187508..195b43cf27c8 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -164,7 +164,7 @@ 150 o32 unused150 sys_ni_syscall 151 o32 getsid sys_getsid 152 o32 fdatasync sys_fdatasync -153 o32 _sysctl sys_sysctl compat_sys_sysctl +153 o32 _sysctl sys_ni_syscall 154 o32 mlock sys_mlock 155 o32 munlock sys_munlock 156 o32 mlockall sys_mlockall diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 292baabefade..def64d221cd4 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -163,7 +163,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl compat_sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index be9f74546068..c2d737ff2e7b 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -197,7 +197,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 nospu _sysctl sys_sysctl compat_sys_sysctl +149 nospu _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index f1fda4375526..10456bc936fb 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -138,7 +138,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync -149 common _sysctl sys_sysctl compat_sys_sysctl +149 common _sysctl - - 150 common mlock sys_mlock sys_mlock 151 common munlock sys_munlock sys_munlock 152 common mlockall sys_mlockall sys_mlockall diff --git a/arch/sh/configs/dreamcast_defconfig b/arch/sh/configs/dreamcast_defconfig index ae067e0b15e3..6a82c7b8ff32 100644 --- a/arch/sh/configs/dreamcast_defconfig +++ b/arch/sh/configs/dreamcast_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_MODULES=y diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index a5b865a75d22..9a988c347e9d 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -5,7 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_IPC_NS=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig index a92db6694ce2..70e6605d7f7e 100644 --- a/arch/sh/configs/hp6xx_defconfig +++ b/arch/sh/configs/hp6xx_defconfig @@ -3,7 +3,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set CONFIG_CPU_SUBTYPE_SH7709=y diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index 567af752b1bb..ba6ec042606f 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -1,6 +1,5 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 10f6d371ce2c..05e4ac6fed5f 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -1,6 +1,5 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/microdev_defconfig b/arch/sh/configs/microdev_defconfig index ed84d1303acf..c65667d00313 100644 --- a/arch/sh/configs/microdev_defconfig +++ b/arch/sh/configs/microdev_defconfig @@ -2,7 +2,6 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set CONFIG_CPU_SUBTYPE_SH4_202=y diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index 37e9521a99e5..a24cf8cd2cea 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -4,7 +4,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index c97ec60cff27..e922659fdadb 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -3,7 +3,6 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set CONFIG_SLAB=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index 55fce65eb454..5978866358ec 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -7,7 +7,6 @@ CONFIG_RCU_TRACE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig index 6a3cfe08295f..fc9c22152b08 100644 --- a/arch/sh/configs/rts7751r2d1_defconfig +++ b/arch/sh/configs/rts7751r2d1_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig index 2b3d7d280672..ff3fd6787fd6 100644 --- a/arch/sh/configs/rts7751r2dplus_defconfig +++ b/arch/sh/configs/rts7751r2dplus_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 21a43f14ffac..ff5bb4489922 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -18,7 +18,6 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_UID16 is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_ALL=y # CONFIG_ELF_CORE is not set # CONFIG_COMPAT_BRK is not set diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig index 4e794e719a28..5d6c19338ebf 100644 --- a/arch/sh/configs/se7343_defconfig +++ b/arch/sh/configs/se7343_defconfig @@ -2,7 +2,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig index 3264415a5931..71a672c30716 100644 --- a/arch/sh/configs/se7619_defconfig +++ b/arch/sh/configs/se7619_defconfig @@ -1,7 +1,6 @@ # CONFIG_LOCALVERSION_AUTO is not set CONFIG_LOG_BUF_SHIFT=14 # CONFIG_UID16 is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_ELF_CORE is not set diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig index 4496b94b7d88..ed00a6eeadf5 100644 --- a/arch/sh/configs/se7705_defconfig +++ b/arch/sh/configs/se7705_defconfig @@ -2,7 +2,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig index b23f67542728..3f1c13799d79 100644 --- a/arch/sh/configs/se7750_defconfig +++ b/arch/sh/configs/se7750_defconfig @@ -5,7 +5,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig index 162343683937..4a024065bb75 100644 --- a/arch/sh/configs/se7751_defconfig +++ b/arch/sh/configs/se7751_defconfig @@ -3,7 +3,6 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/secureedge5410_defconfig b/arch/sh/configs/secureedge5410_defconfig index 360592d63a2f..8422599cfb04 100644 --- a/arch/sh/configs/secureedge5410_defconfig +++ b/arch/sh/configs/secureedge5410_defconfig @@ -1,7 +1,6 @@ # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_HOTPLUG is not set CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 87db9a84b5ec..f0073ed39947 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -3,7 +3,6 @@ CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=m diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig index 08426913c0e3..0d814770b07f 100644 --- a/arch/sh/configs/sh7710voipgw_defconfig +++ b/arch/sh/configs/sh7710voipgw_defconfig @@ -2,7 +2,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig index d0933a9b9799..a2700ab165af 100644 --- a/arch/sh/configs/sh7757lcr_defconfig +++ b/arch/sh/configs/sh7757lcr_defconfig @@ -8,7 +8,6 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index d0a0aa74cecf..26c5fd02c87a 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -5,7 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_IPC_NS=y -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig index a27b129b93c5..c0b6f40d01cc 100644 --- a/arch/sh/configs/shmin_defconfig +++ b/arch/sh/configs/shmin_defconfig @@ -1,7 +1,6 @@ # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 # CONFIG_UID16 is not set -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_BUG is not set diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 4ec961ace688..ba887f1351be 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -6,7 +6,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 96848db9659e..ae0a00beea5f 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -156,7 +156,7 @@ 146 common writev sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 common _sysctl sys_sysctl +149 common _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 46024e80ee86..4af114e84f20 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -300,7 +300,7 @@ 249 64 nanosleep sys_nanosleep 250 32 mremap sys_mremap 250 64 mremap sys_64_mremap -251 common _sysctl sys_sysctl compat_sys_sysctl +251 common _sysctl sys_ni_syscall 252 common getsid sys_getsid 253 common fdatasync sys_fdatasync 254 32 nfsservctl sys_ni_syscall sys_nis_syscall diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index e31a75262c9c..9d1102873666 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -160,7 +160,7 @@ 146 i386 writev sys_writev compat_sys_writev 147 i386 getsid sys_getsid 148 i386 fdatasync sys_fdatasync -149 i386 _sysctl sys_sysctl compat_sys_sysctl +149 i386 _sysctl sys_ni_syscall 150 i386 mlock sys_mlock 151 i386 munlock sys_munlock 152 i386 mlockall sys_mlockall diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 9d82078c949a..f30d6ae9a688 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -164,7 +164,7 @@ 153 common vhangup sys_vhangup 154 common modify_ldt sys_modify_ldt 155 common pivot_root sys_pivot_root -156 64 _sysctl sys_sysctl +156 64 _sysctl sys_ni_syscall 157 common prctl sys_prctl 158 common arch_prctl sys_arch_prctl 159 common adjtimex sys_adjtimex diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index d216ccba42f7..6276e3c2d3fc 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -222,7 +222,7 @@ 204 common quotactl sys_quotactl # 205 was old nfsservctl 205 common nfsservctl sys_ni_syscall -206 common _sysctl sys_sysctl +206 common _sysctl sys_ni_syscall 207 common bdflush sys_bdflush 208 common uname sys_newuname 209 common sysinfo sys_sysinfo diff --git a/include/linux/compat.h b/include/linux/compat.h index c4255d8a4a8a..d38c4d7e83bd 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -851,7 +851,6 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32); asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len, unsigned flags); -asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args); /* obsolete: fs/readdir.c */ asmlinkage long compat_sys_old_readdir(unsigned int fd, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index dc2b827c81e5..75ac7f8ae93c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -47,7 +47,6 @@ struct stat64; struct statfs; struct statfs64; struct statx; -struct __sysctl_args; struct sysinfo; struct timespec; struct __kernel_old_timeval; @@ -1117,7 +1116,6 @@ asmlinkage long sys_send(int, void __user *, size_t, unsigned); asmlinkage long sys_bdflush(int func, long data); asmlinkage long sys_oldumount(char __user *name); asmlinkage long sys_uselib(const char __user *library); -asmlinkage long sys_sysctl(struct __sysctl_args __user *args); asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2); asmlinkage long sys_fork(void); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 50bb7f383a1b..51298a4f4623 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -74,15 +74,13 @@ int proc_do_static_key(struct ctl_table *table, int write, void *buffer, * sysctl names can be mirrored automatically under /proc/sys. The * procname supplied controls /proc naming. * - * The table's mode will be honoured both for sys_sysctl(2) and - * proc-fs access. + * The table's mode will be honoured for proc-fs access. * * Leaf nodes in the sysctl tree will be represented by a single file * under /proc; non-leaf nodes will be represented by directories. A * null procname disables /proc mirroring at this node. * - * sysctl(2) can automatically manage read and write requests through - * the sysctl table. The data and maxlen fields of the ctl_table + * The data and maxlen fields of the ctl_table * struct enable minimal validation of the values being written to be * performed, and the mode field allows minimal authentication. * diff --git a/kernel/Makefile b/kernel/Makefile index b3da548691c9..9a20016d4900 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,7 +5,7 @@ obj-y = fork.o exec_domain.o panic.o \ cpu.o exit.o softirq.o resource.o \ - sysctl.o sysctl_binary.o capability.o ptrace.o user.o \ + sysctl.o capability.o ptrace.o user.o \ signal.o sys.o umh.o workqueue.o pid.o task_work.o \ extable.o params.o \ kthread.o sys_ni.o nsproxy.o \ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 3b69a560a7ac..4d59775ea79c 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -364,7 +364,6 @@ COND_SYSCALL(socketcall); COND_SYSCALL_COMPAT(socketcall); /* compat syscalls for arm64, x86, ... */ -COND_SYSCALL_COMPAT(sysctl); COND_SYSCALL_COMPAT(fanotify_mark); /* x86 */ diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c deleted file mode 100644 index 7d550cc76a3b..000000000000 --- a/kernel/sysctl_binary.c +++ /dev/null @@ -1,171 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include "../fs/xfs/xfs_sysctl.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static ssize_t binary_sysctl(const int *name, int nlen, - void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) -{ - return -ENOSYS; -} - -static void deprecated_sysctl_warning(const int *name, int nlen) -{ - int i; - - /* - * CTL_KERN/KERN_VERSION is used by older glibc and cannot - * ever go away. - */ - if (nlen >= 2 && name[0] == CTL_KERN && name[1] == KERN_VERSION) - return; - - if (printk_ratelimit()) { - printk(KERN_INFO - "warning: process `%s' used the deprecated sysctl " - "system call with ", current->comm); - for (i = 0; i < nlen; i++) - printk(KERN_CONT "%d.", name[i]); - printk(KERN_CONT "\n"); - } - return; -} - -#define WARN_ONCE_HASH_BITS 8 -#define WARN_ONCE_HASH_SIZE (1<nlen. */ - if (nlen < 0 || nlen > CTL_MAXNAME) - return -ENOTDIR; - /* Read in the sysctl name for simplicity */ - for (i = 0; i < nlen; i++) - if (get_user(name[i], args_name + i)) - return -EFAULT; - - warn_on_bintable(name, nlen); - - return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen); -} - -SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args) -{ - struct __sysctl_args tmp; - size_t oldlen = 0; - ssize_t result; - - if (copy_from_user(&tmp, args, sizeof(tmp))) - return -EFAULT; - - if (tmp.oldval && !tmp.oldlenp) - return -EFAULT; - - if (tmp.oldlenp && get_user(oldlen, tmp.oldlenp)) - return -EFAULT; - - result = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, oldlen, - tmp.newval, tmp.newlen); - - if (result >= 0) { - oldlen = result; - result = 0; - } - - if (tmp.oldlenp && put_user(oldlen, tmp.oldlenp)) - return -EFAULT; - - return result; -} - - -#ifdef CONFIG_COMPAT - -struct compat_sysctl_args { - compat_uptr_t name; - int nlen; - compat_uptr_t oldval; - compat_uptr_t oldlenp; - compat_uptr_t newval; - compat_size_t newlen; - compat_ulong_t __unused[4]; -}; - -COMPAT_SYSCALL_DEFINE1(sysctl, struct compat_sysctl_args __user *, args) -{ - struct compat_sysctl_args tmp; - compat_size_t __user *compat_oldlenp; - size_t oldlen = 0; - ssize_t result; - - if (copy_from_user(&tmp, args, sizeof(tmp))) - return -EFAULT; - - if (tmp.oldval && !tmp.oldlenp) - return -EFAULT; - - compat_oldlenp = compat_ptr(tmp.oldlenp); - if (compat_oldlenp && get_user(oldlen, compat_oldlenp)) - return -EFAULT; - - result = do_sysctl(compat_ptr(tmp.name), tmp.nlen, - compat_ptr(tmp.oldval), oldlen, - compat_ptr(tmp.newval), tmp.newlen); - - if (result >= 0) { - oldlen = result; - result = 0; - } - - if (compat_oldlenp && put_user(oldlen, compat_oldlenp)) - return -EFAULT; - - return result; -} - -#endif /* CONFIG_COMPAT */ diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index b190f2eb2611..3ca6fe057a0b 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -193,7 +193,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 nospu _sysctl sys_sysctl compat_sys_sysctl +149 nospu _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 56ae24b6e4be..6a0bbea225db 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -138,7 +138,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync -149 common _sysctl sys_sysctl compat_sys_sysctl +149 common _sysctl - - 150 common mlock sys_mlock compat_sys_mlock 151 common munlock sys_munlock compat_sys_munlock 152 common mlockall sys_mlockall sys_mlockall diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 9d82078c949a..f30d6ae9a688 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -164,7 +164,7 @@ 153 common vhangup sys_vhangup 154 common modify_ldt sys_modify_ldt 155 common pivot_root sys_pivot_root -156 64 _sysctl sys_sysctl +156 64 _sysctl sys_ni_syscall 157 common prctl sys_prctl 158 common arch_prctl sys_arch_prctl 159 common adjtimex sys_adjtimex From 69d0b54d41f915ae45afed6669d34a305cf95ff3 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:14 -0700 Subject: [PATCH 16/35] mm/kmemleak: silence KCSAN splats in checksum Even if KCSAN is disabled for kmemleak, update_checksum() could still call crc32() (which is outside of kmemleak.c) to dereference object->pointer. Thus, the value of object->pointer could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in crc32_le_base / do_raw_spin_lock write to 0xffffb0ea683a7d50 of 4 bytes by task 23575 on cpu 12: do_raw_spin_lock+0x114/0x200 debug_spin_lock_after at kernel/locking/spinlock_debug.c:91 (inlined by) do_raw_spin_lock at kernel/locking/spinlock_debug.c:115 _raw_spin_lock+0x40/0x50 __handle_mm_fault+0xa9e/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffffb0ea683a7d50 of 4 bytes by task 839 on cpu 60: crc32_le_base+0x67/0x350 crc32_le_base+0x67/0x350: crc32_body at lib/crc32.c:106 (inlined by) crc32_le_generic at lib/crc32.c:179 (inlined by) crc32_le at lib/crc32.c:197 kmemleak_scan+0x528/0xd90 update_checksum at mm/kmemleak.c:1172 (inlined by) kmemleak_scan at mm/kmemleak.c:1497 kmemleak_scan_thread+0xcc/0xfa kthread+0x1e0/0x200 ret_from_fork+0x27/0x50 If a shattered value was returned due to a data race, it will be corrected in the next scan. Thus, let KCSAN ignore all reads in the region to silence KCSAN in case the write side is non-atomic. Suggested-by: Marco Elver Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Acked-by: Marco Elver Acked-by: Catalin Marinas Link: http://lkml.kernel.org/r/20200317182754.2180-1-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e362dc3d2028..5e252d91eb14 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1169,8 +1169,10 @@ static bool update_checksum(struct kmemleak_object *object) u32 old_csum = object->checksum; kasan_disable_current(); + kcsan_disable_current(); object->checksum = crc32(0, (void *)object->pointer, object->size); kasan_enable_current(); + kcsan_enable_current(); return object->checksum != old_csum; } From 96bdd2bcc1a79818eac6902d6f9565b720f726aa Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:17 -0700 Subject: [PATCH 17/35] mm/frontswap: mark various intentional data races There are a few information counters that are intentionally not protected against increment races, so just annotate them using the data_race() macro. BUG: KCSAN: data-race in __frontswap_store / __frontswap_store write to 0xffffffff8b7174d8 of 8 bytes by task 6396 on cpu 103: __frontswap_store+0x2d0/0x344 inc_frontswap_failed_stores at mm/frontswap.c:70 (inlined by) __frontswap_store at mm/frontswap.c:280 swap_writepage+0x83/0xf0 pageout+0x33e/0xae0 shrink_page_list+0x1f57/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x170/0x700 __handle_mm_fault+0xc9f/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffffffff8b7174d8 of 8 bytes by task 6405 on cpu 47: __frontswap_store+0x2b9/0x344 inc_frontswap_failed_stores at mm/frontswap.c:70 (inlined by) __frontswap_store at mm/frontswap.c:280 swap_writepage+0x83/0xf0 pageout+0x33e/0xae0 shrink_page_list+0x1f57/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x170/0x700 __handle_mm_fault+0xc9f/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Cc: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1581114499-5042-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/frontswap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/frontswap.c b/mm/frontswap.c index 9d977b1fc016..2183a56c7874 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -61,16 +61,16 @@ static u64 frontswap_failed_stores; static u64 frontswap_invalidates; static inline void inc_frontswap_loads(void) { - frontswap_loads++; + data_race(frontswap_loads++); } static inline void inc_frontswap_succ_stores(void) { - frontswap_succ_stores++; + data_race(frontswap_succ_stores++); } static inline void inc_frontswap_failed_stores(void) { - frontswap_failed_stores++; + data_race(frontswap_failed_stores++); } static inline void inc_frontswap_invalidates(void) { - frontswap_invalidates++; + data_race(frontswap_invalidates++); } #else static inline void inc_frontswap_loads(void) { } From 7b37e22675dfe8673083aaea84c9e7d6e436abbc Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:20 -0700 Subject: [PATCH 18/35] mm/page_io: mark various intentional data races struct swap_info_struct si.flags could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in scan_swap_map_slots / swap_readpage write to 0xffff9c77b80ac400 of 8 bytes by task 91325 on cpu 16: scan_swap_map_slots+0x6fe/0xb50 scan_swap_map_slots at mm/swapfile.c:887 get_swap_pages+0x39d/0x5c0 get_swap_page+0x377/0x524 add_to_swap+0xe4/0x1c0 shrink_page_list+0x1740/0x2820 shrink_inactive_list+0x316/0x8b0 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x170/0x700 __handle_mm_fault+0xc9f/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffff9c77b80ac400 of 8 bytes by task 5422 on cpu 7: swap_readpage+0x204/0x6a0 swap_readpage at mm/page_io.c:380 read_swap_cache_async+0xa2/0xb0 swapin_readahead+0x6a0/0x890 do_swap_page+0x465/0xeb0 __handle_mm_fault+0xc7a/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Reported by Kernel Concurrency Sanitizer on: CPU: 7 PID: 5422 Comm: gmain Tainted: G W O L 5.5.0-next-20200204+ #6 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 Other reads, read to 0xffff91ea33eac400 of 8 bytes by task 11276 on cpu 120: __swap_writepage+0x140/0xc20 __swap_writepage at mm/page_io.c:289 read to 0xffff91ea33eac400 of 8 bytes by task 11264 on cpu 16: swap_set_page_dirty+0x44/0x1f4 swap_set_page_dirty at mm/page_io.c:442 The write is under &si->lock, but the reads are done as lockless. Since the reads only check for a specific bit in the flag, it is harmless even if load tearing happens. Thus, just mark them as intentional data races using the data_race() macro. [cai@lca.pw: add a missing annotation] Link: http://lkml.kernel.org/r/1581612585-5812-1-git-send-email-cai@lca.pw Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Link: http://lkml.kernel.org/r/20200207003601.1526-1-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/page_io.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/page_io.c b/mm/page_io.c index 454b70d8cda7..e485a6e8a6cd 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -85,7 +85,7 @@ static void swap_slot_free_notify(struct page *page) return; sis = page_swap_info(page); - if (!(sis->flags & SWP_BLKDEV)) + if (data_race(!(sis->flags & SWP_BLKDEV))) return; /* @@ -302,7 +302,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, struct swap_info_struct *sis = page_swap_info(page); VM_BUG_ON_PAGE(!PageSwapCache(page), page); - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct kiocb kiocb; struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; @@ -393,7 +393,7 @@ int swap_readpage(struct page *page, bool synchronous) goto out; } - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; @@ -455,7 +455,7 @@ int swap_set_page_dirty(struct page *page) { struct swap_info_struct *sis = page_swap_info(page); - if (sis->flags & SWP_FS) { + if (data_race(sis->flags & SWP_FS)) { struct address_space *mapping = sis->swap_file->f_mapping; VM_BUG_ON_PAGE(!PageSwapCache(page), page); From b96a3db2f37485e902794f28130f70dc834796a4 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:24 -0700 Subject: [PATCH 19/35] mm/swap_state: mark various intentional data races swap_cache_info.* could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in lookup_swap_cache / lookup_swap_cache write to 0xffffffff85517318 of 8 bytes by task 94138 on cpu 101: lookup_swap_cache+0x12e/0x460 lookup_swap_cache at mm/swap_state.c:322 do_swap_page+0x112/0xeb0 __handle_mm_fault+0xc7a/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffffffff85517318 of 8 bytes by task 91655 on cpu 100: lookup_swap_cache+0x117/0x460 lookup_swap_cache at mm/swap_state.c:322 shmem_swapin_page+0xc7/0x9e0 shmem_getpage_gfp+0x2ca/0x16c0 shmem_fault+0xef/0x3c0 __do_fault+0x9e/0x220 do_fault+0x4a0/0x920 __handle_mm_fault+0xc69/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Reported by Kernel Concurrency Sanitizer on: CPU: 100 PID: 91655 Comm: systemd-journal Tainted: G W O L 5.5.0-next-20200204+ #6 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 write to 0xffffffff8d717308 of 8 bytes by task 11365 on cpu 87: __delete_from_swap_cache+0x681/0x8b0 __delete_from_swap_cache at mm/swap_state.c:178 read to 0xffffffff8d717308 of 8 bytes by task 11275 on cpu 53: __delete_from_swap_cache+0x66e/0x8b0 __delete_from_swap_cache at mm/swap_state.c:178 Both the read and write are done as lockless. Since swap_cache_info.* are only used to print out counter information, even if any of them missed a few incremental due to data races, it will be harmless, so just mark it as an intentional data race using the data_race() macro. While at it, fix a checkpatch.pl warning, WARNING: Single statement macros should not use a do {} while (0) loop Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Link: http://lkml.kernel.org/r/20200207003715.1578-1-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/swap_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index d9d4a49f3241..c16eebb81d8b 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -57,8 +57,8 @@ static bool enable_vma_readahead __read_mostly = true; #define GET_SWAP_RA_VAL(vma) \ (atomic_long_read(&(vma)->swap_readahead_info) ? : 4) -#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) -#define ADD_CACHE_INFO(x, nr) do { swap_cache_info.x += (nr); } while (0) +#define INC_CACHE_INFO(x) data_race(swap_cache_info.x++) +#define ADD_CACHE_INFO(x, nr) data_race(swap_cache_info.x += (nr)) static struct { unsigned long add_total; From e630bfac79456d3acd22c9286b50e83aafb7a07c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 14 Aug 2020 17:31:27 -0700 Subject: [PATCH 20/35] mm/filemap.c: fix a data race in filemap_fault() struct file_ra_state ra.mmap_miss could be accessed concurrently during page faults as noticed by KCSAN, BUG: KCSAN: data-race in filemap_fault / filemap_map_pages write to 0xffff9b1700a2c1b4 of 4 bytes by task 3292 on cpu 30: filemap_fault+0x920/0xfc0 do_sync_mmap_readahead at mm/filemap.c:2384 (inlined by) filemap_fault at mm/filemap.c:2486 __xfs_filemap_fault+0x112/0x3e0 [xfs] xfs_filemap_fault+0x74/0x90 [xfs] __do_fault+0x9e/0x220 do_fault+0x4a0/0x920 __handle_mm_fault+0xc69/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffff9b1700a2c1b4 of 4 bytes by task 3313 on cpu 32: filemap_map_pages+0xc2e/0xd80 filemap_map_pages at mm/filemap.c:2625 do_fault+0x3da/0x920 __handle_mm_fault+0xc69/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Reported by Kernel Concurrency Sanitizer on: CPU: 32 PID: 3313 Comm: systemd-udevd Tainted: G W L 5.5.0-next-20200210+ #1 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 ra.mmap_miss is used to contribute the readahead decisions, a data race could be undesirable. Both the read and write is only under non-exclusive mmap_sem, two concurrent writers could even underflow the counter. Fix the underflow by writing to a local variable before committing a final store to ra.mmap_miss given a small inaccuracy of the counter should be acceptable. Signed-off-by: Kirill A. Shutemov Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Tested-by: Qian Cai Reviewed-by: Matthew Wilcox (Oracle) Cc: Marco Elver Link: http://lkml.kernel.org/r/20200211030134.1847-1-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/filemap.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 653190943aa7..1aaea26556cc 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2468,6 +2468,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) struct address_space *mapping = file->f_mapping; struct file *fpin = NULL; pgoff_t offset = vmf->pgoff; + unsigned int mmap_miss; /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ) @@ -2483,14 +2484,15 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) } /* Avoid banging the cache line if not needed */ - if (ra->mmap_miss < MMAP_LOTSAMISS * 10) - ra->mmap_miss++; + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss < MMAP_LOTSAMISS * 10) + WRITE_ONCE(ra->mmap_miss, ++mmap_miss); /* * Do we miss much more than hit in this file? If so, * stop bothering with read-ahead. It will only hurt. */ - if (ra->mmap_miss > MMAP_LOTSAMISS) + if (mmap_miss > MMAP_LOTSAMISS) return fpin; /* @@ -2516,13 +2518,15 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, struct file_ra_state *ra = &file->f_ra; struct address_space *mapping = file->f_mapping; struct file *fpin = NULL; + unsigned int mmap_miss; pgoff_t offset = vmf->pgoff; /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) return fpin; - if (ra->mmap_miss > 0) - ra->mmap_miss--; + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss) + WRITE_ONCE(ra->mmap_miss, --mmap_miss); if (PageReadahead(page)) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); page_cache_async_readahead(mapping, ra, file, @@ -2688,6 +2692,7 @@ void filemap_map_pages(struct vm_fault *vmf, unsigned long max_idx; XA_STATE(xas, &mapping->i_pages, start_pgoff); struct page *page; + unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); rcu_read_lock(); xas_for_each(&xas, page, end_pgoff) { @@ -2724,8 +2729,8 @@ void filemap_map_pages(struct vm_fault *vmf, if (page->index >= max_idx) goto unlock; - if (file->f_ra.mmap_miss > 0) - file->f_ra.mmap_miss--; + if (mmap_miss > 0) + mmap_miss--; vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT; if (vmf->pte) @@ -2745,6 +2750,7 @@ next: break; } rcu_read_unlock(); + WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); } EXPORT_SYMBOL(filemap_map_pages); From a449bf58e45abf919b27ba62a9dcbae3ad36e725 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:31 -0700 Subject: [PATCH 21/35] mm/swapfile: fix and annotate various data races swap_info_struct si.highest_bit, si.swap_map[offset] and si.flags could be accessed concurrently separately as noticed by KCSAN, === si.highest_bit === write to 0xffff8d5abccdc4d4 of 4 bytes by task 5353 on cpu 24: swap_range_alloc+0x81/0x130 swap_range_alloc at mm/swapfile.c:681 scan_swap_map_slots+0x371/0xb90 get_swap_pages+0x39d/0x5c0 get_swap_page+0xf2/0x524 add_to_swap+0xe4/0x1c0 shrink_page_list+0x1795/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 read to 0xffff8d5abccdc4d4 of 4 bytes by task 6672 on cpu 70: scan_swap_map_slots+0x4a6/0xb90 scan_swap_map_slots at mm/swapfile.c:892 get_swap_pages+0x39d/0x5c0 get_swap_page+0xf2/0x524 add_to_swap+0xe4/0x1c0 shrink_page_list+0x1795/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 Reported by Kernel Concurrency Sanitizer on: CPU: 70 PID: 6672 Comm: oom01 Tainted: G W L 5.5.0-next-20200205+ #3 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 === si.swap_map[offset] === write to 0xffffbc370c29a64c of 1 bytes by task 6856 on cpu 86: __swap_entry_free_locked+0x8c/0x100 __swap_entry_free_locked at mm/swapfile.c:1209 (discriminator 4) __swap_entry_free.constprop.20+0x69/0xb0 free_swap_and_cache+0x53/0xa0 unmap_page_range+0x7f8/0x1d70 unmap_single_vma+0xcd/0x170 unmap_vmas+0x18b/0x220 exit_mmap+0xee/0x220 mmput+0x10e/0x270 do_exit+0x59b/0xf40 do_group_exit+0x8b/0x180 read to 0xffffbc370c29a64c of 1 bytes by task 6855 on cpu 20: _swap_info_get+0x81/0xa0 _swap_info_get at mm/swapfile.c:1140 free_swap_and_cache+0x40/0xa0 unmap_page_range+0x7f8/0x1d70 unmap_single_vma+0xcd/0x170 unmap_vmas+0x18b/0x220 exit_mmap+0xee/0x220 mmput+0x10e/0x270 do_exit+0x59b/0xf40 do_group_exit+0x8b/0x180 === si.flags === write to 0xffff956c8fc6c400 of 8 bytes by task 6087 on cpu 23: scan_swap_map_slots+0x6fe/0xb50 scan_swap_map_slots at mm/swapfile.c:887 get_swap_pages+0x39d/0x5c0 get_swap_page+0x377/0x524 add_to_swap+0xe4/0x1c0 shrink_page_list+0x1795/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 read to 0xffff956c8fc6c400 of 8 bytes by task 6207 on cpu 63: _swap_info_get+0x41/0xa0 __swap_info_get at mm/swapfile.c:1114 put_swap_page+0x84/0x490 __remove_mapping+0x384/0x5f0 shrink_page_list+0xff1/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 The writes are under si->lock but the reads are not. For si.highest_bit and si.swap_map[offset], data race could trigger logic bugs, so fix them by having WRITE_ONCE() for the writes and READ_ONCE() for the reads except those isolated reads where they compare against zero which a data race would cause no harm. Thus, annotate them as intentional data races using the data_race() macro. For si.flags, the readers are only interested in a single bit where a data race there would cause no issue there. [cai@lca.pw: add a missing annotation for si->flags in memory.c] Link: http://lkml.kernel.org/r/1581612647-5958-1-git-send-email-cai@lca.pw Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Cc: Hugh Dickins Link: http://lkml.kernel.org/r/1581095163-12198-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/memory.c | 4 ++-- mm/swapfile.c | 31 ++++++++++++++++++------------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 228efaca75d3..dbc123535827 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3130,8 +3130,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!page) { struct swap_info_struct *si = swp_swap_info(entry); - if (si->flags & SWP_SYNCHRONOUS_IO && - __swap_count(entry) == 1) { + if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && + __swap_count(entry) == 1) { /* skip swapcache */ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); diff --git a/mm/swapfile.c b/mm/swapfile.c index eb410d3c8de8..12f59e641b5e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -672,7 +672,7 @@ static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset, if (offset == si->lowest_bit) si->lowest_bit += nr_entries; if (end == si->highest_bit) - si->highest_bit -= nr_entries; + WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries); si->inuse_pages += nr_entries; if (si->inuse_pages == si->pages) { si->lowest_bit = si->max; @@ -705,7 +705,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, if (end > si->highest_bit) { bool was_full = !si->highest_bit; - si->highest_bit = end; + WRITE_ONCE(si->highest_bit, end); if (was_full && (si->flags & SWP_WRITEOK)) add_to_avail_list(si); } @@ -870,7 +870,7 @@ checks: else goto done; } - si->swap_map[offset] = usage; + WRITE_ONCE(si->swap_map[offset], usage); inc_cluster_info_page(si, si->cluster_info, offset); unlock_cluster(ci); @@ -929,12 +929,13 @@ done: scan: spin_unlock(&si->lock); - while (++offset <= si->highest_bit) { - if (!si->swap_map[offset]) { + while (++offset <= READ_ONCE(si->highest_bit)) { + if (data_race(!si->swap_map[offset])) { spin_lock(&si->lock); goto checks; } - if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { + if (vm_swap_full() && + READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { spin_lock(&si->lock); goto checks; } @@ -946,11 +947,12 @@ scan: } offset = si->lowest_bit; while (offset < scan_base) { - if (!si->swap_map[offset]) { + if (data_race(!si->swap_map[offset])) { spin_lock(&si->lock); goto checks; } - if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { + if (vm_swap_full() && + READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { spin_lock(&si->lock); goto checks; } @@ -1149,7 +1151,7 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry) p = swp_swap_info(entry); if (!p) goto bad_nofile; - if (!(p->flags & SWP_USED)) + if (data_race(!(p->flags & SWP_USED))) goto bad_device; offset = swp_offset(entry); if (offset >= p->max) @@ -1175,7 +1177,7 @@ static struct swap_info_struct *_swap_info_get(swp_entry_t entry) p = __swap_info_get(entry); if (!p) goto out; - if (!p->swap_map[swp_offset(entry)]) + if (data_race(!p->swap_map[swp_offset(entry)])) goto bad_free; return p; @@ -1244,7 +1246,10 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, } usage = count | has_cache; - p->swap_map[offset] = usage ? : SWAP_HAS_CACHE; + if (usage) + WRITE_ONCE(p->swap_map[offset], usage); + else + WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE); return usage; } @@ -1296,7 +1301,7 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry) goto bad_nofile; rcu_read_lock(); - if (!(si->flags & SWP_VALID)) + if (data_race(!(si->flags & SWP_VALID))) goto unlock_out; offset = swp_offset(entry); if (offset >= si->max) @@ -3484,7 +3489,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) } else err = -ENOENT; /* unused swap entry */ - p->swap_map[offset] = count | has_cache; + WRITE_ONCE(p->swap_map[offset], count | has_cache); unlock_out: unlock_cluster_or_swap_info(p, ci); From 6e4bd50f3888fa8fea8bc66a0ad4ad5f1c862961 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:34 -0700 Subject: [PATCH 22/35] mm/page_counter: fix various data races at memsw Commit 3e32cb2e0a12 ("mm: memcontrol: lockless page counters") could had memcg->memsw->watermark and memcg->memsw->failcnt been accessed concurrently as reported by KCSAN, BUG: KCSAN: data-race in page_counter_try_charge / page_counter_try_charge read to 0xffff8fb18c4cd190 of 8 bytes by task 1081 on cpu 59: page_counter_try_charge+0x4d/0x150 mm/page_counter.c:138 try_charge+0x131/0xd50 mm/memcontrol.c:2405 __memcg_kmem_charge_memcg+0x58/0x140 __memcg_kmem_charge+0xcc/0x280 __alloc_pages_nodemask+0x1e1/0x450 alloc_pages_current+0xa6/0x120 pte_alloc_one+0x17/0xd0 __pte_alloc+0x3a/0x1f0 copy_p4d_range+0xc36/0x1990 copy_page_range+0x21d/0x360 dup_mmap+0x5f5/0x7a0 dup_mm+0xa2/0x240 copy_process+0x1b3f/0x3460 _do_fork+0xaa/0xa20 __x64_sys_clone+0x13b/0x170 do_syscall_64+0x91/0xb47 entry_SYSCALL_64_after_hwframe+0x49/0xbe write to 0xffff8fb18c4cd190 of 8 bytes by task 1153 on cpu 120: page_counter_try_charge+0x5b/0x150 mm/page_counter.c:139 try_charge+0x131/0xd50 mm/memcontrol.c:2405 mem_cgroup_try_charge+0x159/0x460 mem_cgroup_try_charge_delay+0x3d/0xa0 wp_page_copy+0x14d/0x930 do_wp_page+0x107/0x7b0 __handle_mm_fault+0xce6/0xd40 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 BUG: KCSAN: data-race in page_counter_try_charge / page_counter_try_charge write to 0xffff88809bbf2158 of 8 bytes by task 11782 on cpu 0: page_counter_try_charge+0x100/0x170 mm/page_counter.c:129 try_charge+0x185/0xbf0 mm/memcontrol.c:2405 __memcg_kmem_charge_memcg+0x4a/0xe0 mm/memcontrol.c:2837 __memcg_kmem_charge+0xcf/0x1b0 mm/memcontrol.c:2877 __alloc_pages_nodemask+0x26c/0x310 mm/page_alloc.c:4780 read to 0xffff88809bbf2158 of 8 bytes by task 11814 on cpu 1: page_counter_try_charge+0xef/0x170 mm/page_counter.c:129 try_charge+0x185/0xbf0 mm/memcontrol.c:2405 __memcg_kmem_charge_memcg+0x4a/0xe0 mm/memcontrol.c:2837 __memcg_kmem_charge+0xcf/0x1b0 mm/memcontrol.c:2877 __alloc_pages_nodemask+0x26c/0x310 mm/page_alloc.c:4780 Since watermark could be compared or set to garbage due to a data race which would change the code logic, fix it by adding a pair of READ_ONCE() and WRITE_ONCE() in those places. The "failcnt" counter is tolerant of some degree of inaccuracy and is only used to report stats, a data race will not be harmful, thus mark it as an intentional data race using the data_race() macro. Fixes: 3e32cb2e0a12 ("mm: memcontrol: lockless page counters") Reported-by: syzbot+f36cfe60b1006a94f9dc@syzkaller.appspotmail.com Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: David Hildenbrand Cc: Tetsuo Handa Cc: Marco Elver Cc: Dmitry Vyukov Cc: Johannes Weiner Link: http://lkml.kernel.org/r/1581519682-23594-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/page_counter.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mm/page_counter.c b/mm/page_counter.c index b4663844c9b3..afe22ad335cc 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -77,8 +77,8 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) * This is indeed racy, but we can live with some * inaccuracy in the watermark. */ - if (new > c->watermark) - c->watermark = new; + if (new > READ_ONCE(c->watermark)) + WRITE_ONCE(c->watermark, new); } } @@ -119,9 +119,10 @@ bool page_counter_try_charge(struct page_counter *counter, propagate_protected_usage(c, new); /* * This is racy, but we can live with some - * inaccuracy in the failcnt. + * inaccuracy in the failcnt which is only used + * to report stats. */ - c->failcnt++; + data_race(c->failcnt++); *fail = c; goto failed; } @@ -130,8 +131,8 @@ bool page_counter_try_charge(struct page_counter *counter, * Just like with failcnt, we can live with some * inaccuracy in the watermark. */ - if (new > c->watermark) - c->watermark = new; + if (new > READ_ONCE(c->watermark)) + WRITE_ONCE(c->watermark, new); } return true; From e0e3f42fd96cf830c61aa720f0abb4eef41c5ce3 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:37 -0700 Subject: [PATCH 23/35] mm/memcontrol: fix a data race in scan count struct mem_cgroup_per_node mz.lru_zone_size[zone_idx][lru] could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in lruvec_lru_size / mem_cgroup_update_lru_size write to 0xffff9c804ca285f8 of 8 bytes by task 50951 on cpu 12: mem_cgroup_update_lru_size+0x11c/0x1d0 mem_cgroup_update_lru_size at mm/memcontrol.c:1266 isolate_lru_pages+0x6a9/0xf30 shrink_active_list+0x123/0xcc0 shrink_lruvec+0x8fd/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x170/0x700 __handle_mm_fault+0xc9f/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffff9c804ca285f8 of 8 bytes by task 50964 on cpu 95: lruvec_lru_size+0xbb/0x270 mem_cgroup_get_zone_lru_size at include/linux/memcontrol.h:536 (inlined by) lruvec_lru_size at mm/vmscan.c:326 shrink_lruvec+0x1d0/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_current+0xa6/0x120 alloc_slab_page+0x3b1/0x540 allocate_slab+0x70/0x660 new_slab+0x46/0x70 ___slab_alloc+0x4ad/0x7d0 __slab_alloc+0x43/0x70 kmem_cache_alloc+0x2c3/0x420 getname_flags+0x4c/0x230 getname+0x22/0x30 do_sys_openat2+0x205/0x3b0 do_sys_open+0x9a/0xf0 __x64_sys_openat+0x62/0x80 do_syscall_64+0x91/0xb47 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reported by Kernel Concurrency Sanitizer on: CPU: 95 PID: 50964 Comm: cc1 Tainted: G W O L 5.5.0-next-20200204+ #6 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 The write is under lru_lock, but the read is done as lockless. The scan count is used to determine how aggressively the anon and file LRU lists should be scanned. Load tearing could generate an inefficient heuristic, so fix it by adding READ_ONCE() for the read. Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Link: http://lkml.kernel.org/r/20200206034945.2481-1-cai@lca.pw Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 385237e4cb44..d0b036123c6a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -630,7 +630,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, struct mem_cgroup_per_node *mz; mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - return mz->lru_zone_size[zone_idx][lru]; + return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } void mem_cgroup_handle_over_high(void); From a1f459354a0f011a7baf52e5f16cd9ca95f1e6f2 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:41 -0700 Subject: [PATCH 24/35] mm/list_lru: fix a data race in list_lru_count_one struct list_lru_one l.nr_items could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in list_lru_count_one / list_lru_isolate_move write to 0xffffa102789c4510 of 8 bytes by task 823 on cpu 39: list_lru_isolate_move+0xf9/0x130 list_lru_isolate_move at mm/list_lru.c:180 inode_lru_isolate+0x12b/0x2a0 __list_lru_walk_one+0x122/0x3d0 list_lru_walk_one+0x75/0xa0 prune_icache_sb+0x8b/0xc0 super_cache_scan+0x1b8/0x250 do_shrink_slab+0x256/0x6d0 shrink_slab+0x41b/0x4a0 shrink_node+0x35c/0xd80 balance_pgdat+0x652/0xd90 kswapd+0x396/0x8d0 kthread+0x1e0/0x200 ret_from_fork+0x27/0x50 read to 0xffffa102789c4510 of 8 bytes by task 6345 on cpu 56: list_lru_count_one+0x116/0x2f0 list_lru_count_one at mm/list_lru.c:193 super_cache_count+0xe8/0x170 do_shrink_slab+0x95/0x6d0 shrink_slab+0x41b/0x4a0 shrink_node+0x35c/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x170/0x700 __handle_mm_fault+0xc9f/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Reported by Kernel Concurrency Sanitizer on: CPU: 56 PID: 6345 Comm: oom01 Tainted: G W L 5.5.0-next-20200205+ #4 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 A shattered l.nr_items could affect the shrinker behaviour due to a data race. Fix it by adding READ_ONCE() for the read. Since the writes are aligned and up to word-size, assume those are safe from data races to avoid readability issues of writing WRITE_ONCE(var, var + val). Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Cc: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1581114679-5488-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/list_lru.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/list_lru.c b/mm/list_lru.c index e825804b3928..5aa6e44bc2ae 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -180,7 +180,7 @@ unsigned long list_lru_count_one(struct list_lru *lru, rcu_read_lock(); l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); - count = l->nr_items; + count = READ_ONCE(l->nr_items); rcu_read_unlock(); return count; From abe1de4209f670ca81ba0d96f64fa9285c05a5ad Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:44 -0700 Subject: [PATCH 25/35] mm/mempool: fix a data race in mempool_free() mempool_t pool.curr_nr could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in mempool_free / remove_element write to 0xffffffffa937638c of 4 bytes by task 6359 on cpu 113: remove_element+0x4a/0x1c0 remove_element at mm/mempool.c:132 mempool_alloc+0x102/0x210 (inlined by) mempool_alloc at mm/mempool.c:399 bio_alloc_bioset+0x106/0x2c0 get_swap_bio+0x49/0x230 __swap_writepage+0x680/0xc30 swap_writepage+0x9c/0xf0 pageout+0x33e/0xae0 shrink_page_list+0x1f57/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 read to 0xffffffffa937638c of 4 bytes by interrupt on cpu 64: mempool_free+0x3e/0x150 mempool_free at mm/mempool.c:492 bio_free+0x192/0x280 bio_put+0x91/0xd0 end_swap_bio_write+0x1d8/0x280 bio_endio+0x2c2/0x5b0 dec_pending+0x22b/0x440 [dm_mod] clone_endio+0xe4/0x2c0 [dm_mod] bio_endio+0x2c2/0x5b0 blk_update_request+0x217/0x940 scsi_end_request+0x6b/0x4d0 scsi_io_completion+0xb7/0x7e0 scsi_finish_command+0x223/0x310 scsi_softirq_done+0x1d5/0x210 blk_mq_complete_request+0x224/0x250 scsi_mq_done+0xc2/0x250 pqi_raid_io_complete+0x5a/0x70 [smartpqi] pqi_irq_handler+0x150/0x1410 [smartpqi] __handle_irq_event_percpu+0x90/0x540 handle_irq_event_percpu+0x49/0xd0 handle_irq_event+0x85/0xca handle_edge_irq+0x13f/0x3e0 do_IRQ+0x86/0x190 Since the write is under pool->lock but the read is done as lockless. Even though the commit 5b990546e334 ("mempool: fix and document synchronization and memory barrier usage") introduced the smp_wmb() and smp_rmb() pair to improve the situation, it is adequate to protect it from data races which could lead to a logic bug, so fix it by adding READ_ONCE() for the read. Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Cc: Tejun Heo Cc: Oleg Nesterov Link: http://lkml.kernel.org/r/1581446384-2131-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/mempool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mempool.c b/mm/mempool.c index 85efab3da720..79bff63ecf27 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -489,7 +489,7 @@ void mempool_free(void *element, mempool_t *pool) * ensures that there will be frees which return elements to the * pool waking up the waiters. */ - if (unlikely(pool->curr_nr < pool->min_nr)) { + if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) { spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr < pool->min_nr)) { add_element(pool, element); From 9c1177b62a8c7f78fee5ede540825b844a1bf0c8 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:47 -0700 Subject: [PATCH 26/35] mm/rmap: annotate a data race at tlb_flush_batched mm->tlb_flush_batched could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in flush_tlb_batched_pending / try_to_unmap_one write to 0xffff93f754880bd0 of 1 bytes by task 822 on cpu 6: try_to_unmap_one+0x59a/0x1ab0 set_tlb_ubc_flush_pending at mm/rmap.c:635 (inlined by) try_to_unmap_one at mm/rmap.c:1538 rmap_walk_anon+0x296/0x650 rmap_walk+0xdf/0x100 try_to_unmap+0x18a/0x2f0 shrink_page_list+0xef6/0x2870 shrink_inactive_list+0x316/0x880 shrink_lruvec+0x8dc/0x1380 shrink_node+0x317/0xd80 balance_pgdat+0x652/0xd90 kswapd+0x396/0x8d0 kthread+0x1e0/0x200 ret_from_fork+0x27/0x50 read to 0xffff93f754880bd0 of 1 bytes by task 6364 on cpu 4: flush_tlb_batched_pending+0x29/0x90 flush_tlb_batched_pending at mm/rmap.c:682 change_p4d_range+0x5dd/0x1030 change_pte_range at mm/mprotect.c:44 (inlined by) change_pmd_range at mm/mprotect.c:212 (inlined by) change_pud_range at mm/mprotect.c:240 (inlined by) change_p4d_range at mm/mprotect.c:260 change_protection+0x222/0x310 change_prot_numa+0x3e/0x60 task_numa_work+0x219/0x350 task_work_run+0xed/0x140 prepare_exit_to_usermode+0x2cc/0x2e0 ret_from_intr+0x32/0x42 Reported by Kernel Concurrency Sanitizer on: CPU: 4 PID: 6364 Comm: mtest01 Tainted: G W L 5.5.0-next-20200210+ #5 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 flush_tlb_batched_pending() is under PTL but the write is not, but mm->tlb_flush_batched is only a bool type, so the value is unlikely to be shattered. Thus, mark it as an intentional data race by using the data race macro. Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Marco Elver Link: http://lkml.kernel.org/r/1581450783-8262-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/rmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/rmap.c b/mm/rmap.c index 4ace1e32f705..83cc459edc40 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -672,7 +672,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) */ void flush_tlb_batched_pending(struct mm_struct *mm) { - if (mm->tlb_flush_batched) { + if (data_race(mm->tlb_flush_batched)) { flush_tlb_mm(mm); /* From 7e0cc01ea181c68f6aa013d8d7e7acbf9b49fda0 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:50 -0700 Subject: [PATCH 27/35] mm/swap.c: annotate data races for lru_rotate_pvecs Read to lru_add_pvec->nr could be interrupted and then write to the same variable. The write has local interrupt disabled, but the plain reads result in data races. However, it is unlikely the compilers could do much damage here given that lru_add_pvec->nr is a "unsigned char" and there is an existing compiler barrier. Thus, annotate the reads using the data_race() macro. The data races were reported by KCSAN, BUG: KCSAN: data-race in lru_add_drain_cpu / rotate_reclaimable_page write to 0xffff9291ebcb8a40 of 1 bytes by interrupt on cpu 23: rotate_reclaimable_page+0x2df/0x490 pagevec_add at include/linux/pagevec.h:81 (inlined by) rotate_reclaimable_page at mm/swap.c:259 end_page_writeback+0x1b5/0x2b0 end_swap_bio_write+0x1d0/0x280 bio_endio+0x297/0x560 dec_pending+0x218/0x430 [dm_mod] clone_endio+0xe4/0x2c0 [dm_mod] bio_endio+0x297/0x560 blk_update_request+0x201/0x920 scsi_end_request+0x6b/0x4a0 scsi_io_completion+0xb7/0x7e0 scsi_finish_command+0x1ed/0x2a0 scsi_softirq_done+0x1c9/0x1d0 blk_done_softirq+0x181/0x1d0 __do_softirq+0xd9/0x57c irq_exit+0xa2/0xc0 do_IRQ+0x8b/0x190 ret_from_intr+0x0/0x42 delay_tsc+0x46/0x80 __const_udelay+0x3c/0x40 __udelay+0x10/0x20 kcsan_setup_watchpoint+0x202/0x3a0 __tsan_read1+0xc2/0x100 lru_add_drain_cpu+0xb8/0x3f0 lru_add_drain+0x25/0x40 shrink_active_list+0xe1/0xc80 shrink_lruvec+0x766/0xb70 shrink_node+0x2d6/0xca0 do_try_to_free_pages+0x1f7/0x9a0 try_to_free_pages+0x252/0x5b0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x16e/0x6f0 __handle_mm_fault+0xcd5/0xd40 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffff9291ebcb8a40 of 1 bytes by task 37761 on cpu 23: lru_add_drain_cpu+0xb8/0x3f0 lru_add_drain_cpu at mm/swap.c:602 lru_add_drain+0x25/0x40 shrink_active_list+0xe1/0xc80 shrink_lruvec+0x766/0xb70 shrink_node+0x2d6/0xca0 do_try_to_free_pages+0x1f7/0x9a0 try_to_free_pages+0x252/0x5b0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x16e/0x6f0 __handle_mm_fault+0xcd5/0xd40 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 2 locks held by oom02/37761: #0: ffff9281e5928808 (&mm->mmap_sem#2){++++}, at: do_page_fault #1: ffffffffb3ade380 (fs_reclaim){+.+.}, at: fs_reclaim_acquire.part irq event stamp: 1949217 trace_hardirqs_on_thunk+0x1a/0x1c __do_softirq+0x2e7/0x57c __do_softirq+0x34c/0x57c irq_exit+0xa2/0xc0 Reported by Kernel Concurrency Sanitizer on: CPU: 23 PID: 37761 Comm: oom02 Not tainted 5.6.0-rc3-next-20200226+ #6 Hardware name: HP ProLiant BL660c Gen9, BIOS I38 10/17/2018 Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Acked-by: Marco Elver Link: http://lkml.kernel.org/r/20200228044018.1263-1-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/swap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/swap.c b/mm/swap.c index d26c22baf7c5..d16d65d9b4e0 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -632,7 +632,8 @@ void lru_add_drain_cpu(int cpu) __pagevec_lru_add(pvec); pvec = &per_cpu(lru_rotate.pvec, cpu); - if (pagevec_count(pvec)) { + /* Disabling interrupts below acts as a compiler barrier. */ + if (data_race(pagevec_count(pvec))) { unsigned long flags; /* No harm done if a racing interrupt already did this */ @@ -793,7 +794,7 @@ void lru_add_drain_all(void) struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) || - pagevec_count(&per_cpu(lru_rotate.pvec, cpu)) || + data_race(pagevec_count(&per_cpu(lru_rotate.pvec, cpu))) || pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) || pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) || pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) || From c403f6a3a792a6601185497c12b0bdf4be880439 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:53 -0700 Subject: [PATCH 28/35] mm: annotate a data race in page_zonenum() BUG: KCSAN: data-race in page_cpupid_xchg_last / put_page write (marked) to 0xfffffc0d48ec1a00 of 8 bytes by task 91442 on cpu 3: page_cpupid_xchg_last+0x51/0x80 page_cpupid_xchg_last at mm/mmzone.c:109 (discriminator 11) wp_page_reuse+0x3e/0xc0 wp_page_reuse at mm/memory.c:2453 do_wp_page+0x472/0x7b0 do_wp_page at mm/memory.c:2798 __handle_mm_fault+0xcb0/0xd00 handle_pte_fault at mm/memory.c:4049 (inlined by) __handle_mm_fault at mm/memory.c:4163 handle_mm_fault+0xfc/0x2f0 handle_mm_fault at mm/memory.c:4200 do_page_fault+0x263/0x6f9 do_user_addr_fault at arch/x86/mm/fault.c:1465 (inlined by) do_page_fault at arch/x86/mm/fault.c:1539 page_fault+0x34/0x40 read to 0xfffffc0d48ec1a00 of 8 bytes by task 94817 on cpu 69: put_page+0x15a/0x1f0 page_zonenum at include/linux/mm.h:923 (inlined by) is_zone_device_page at include/linux/mm.h:929 (inlined by) page_is_devmap_managed at include/linux/mm.h:948 (inlined by) put_page at include/linux/mm.h:1023 wp_page_copy+0x571/0x930 wp_page_copy at mm/memory.c:2615 do_wp_page+0x107/0x7b0 __handle_mm_fault+0xcb0/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Reported by Kernel Concurrency Sanitizer on: CPU: 69 PID: 94817 Comm: systemd-udevd Tainted: G W O L 5.5.0-next-20200204+ #6 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 A page never changes its zone number. The zone number happens to be stored in the same word as other bits which are modified, but the zone number bits will never be modified by any other write, so it can accept a reload of the zone bits after an intervening write and it don't need to use READ_ONCE(). Thus, annotate this data race using ASSERT_EXCLUSIVE_BITS() to also assert that there are no concurrent writes to it. Suggested-by: Marco Elver Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Paul E. McKenney Cc: David Hildenbrand Cc: Jan Kara Cc: John Hubbard Cc: Ira Weiny Cc: Dan Williams Link: http://lkml.kernel.org/r/1581619089-14472-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e7ec3ee34cf..1983e08f5906 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1067,6 +1067,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); static inline enum zone_type page_zonenum(const struct page *page) { + ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } From 7f897acbe5d57995438c831670b7c400e9c0dc00 Mon Sep 17 00:00:00 2001 From: Romain Naour Date: Fri, 14 Aug 2020 17:31:57 -0700 Subject: [PATCH 29/35] include/asm-generic/vmlinux.lds.h: align ro_after_init Since the patch [1], building the kernel using a toolchain built with binutils 2.33.1 prevents booting a sh4 system under Qemu. Apply the patch provided by Alan Modra [2] that fix alignment of rodata. [1] https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=ebd2263ba9a9124d93bbc0ece63d7e0fae89b40e [2] https://www.sourceware.org/ml/binutils/2019-12/msg00112.html Signed-off-by: Romain Naour Signed-off-by: Andrew Morton Cc: Alan Modra Cc: Bin Meng Cc: Chen Zhou Cc: Geert Uytterhoeven Cc: John Paul Adrian Glaubitz Cc: Krzysztof Kozlowski Cc: Kuninori Morimoto Cc: Rich Felker Cc: Sam Ravnborg Cc: Yoshinori Sato Cc: Arnd Bergmann Cc: Link: https://marc.info/?l=linux-sh&m=158429470221261 Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7616ff0b96ec..5430febd34be 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -394,6 +394,7 @@ */ #ifndef RO_AFTER_INIT_DATA #define RO_AFTER_INIT_DATA \ + . = ALIGN(8); \ __start_ro_after_init = .; \ *(.data..ro_after_init) \ JUMP_TABLE_DATA \ From a8e3943b58ac8321d4817cfa08aadcfa59c2bf3f Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 14 Aug 2020 17:32:00 -0700 Subject: [PATCH 30/35] sh: clkfwk: remove r8/r16/r32 SH will get below warning ${LINUX}/drivers/sh/clk/cpg.c: In function 'r8': ${LINUX}/drivers/sh/clk/cpg.c:41:17: warning: passing argument 1 of 'ioread8' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] return ioread8(addr); ^~~~ In file included from ${LINUX}/arch/sh/include/asm/io.h:21, from ${LINUX}/include/linux/io.h:13, from ${LINUX}/drivers/sh/clk/cpg.c:14: ${LINUX}/include/asm-generic/iomap.h:29:29: note: expected 'void *' but argument is of type 'const void *' extern unsigned int ioread8(void __iomem *); ^~~~~~~~~~~~~~ We don't need "const" for r8/r16/r32. And we don't need r8/r16/r32 themselvs. This patch cleanup these. Signed-off-by: Kuninori Morimoto Signed-off-by: Andrew Morton Cc: Alan Modra Cc: Bin Meng Cc: Chen Zhou Cc: Geert Uytterhoeven Cc: John Paul Adrian Glaubitz Cc: Krzysztof Kozlowski Cc: Rich Felker Cc: Romain Naour Cc: Sam Ravnborg Cc: Yoshinori Sato X-MARC-Message: https://marc.info/?l=linux-renesas-soc&m=157852973916903 Signed-off-by: Linus Torvalds --- drivers/sh/clk/cpg.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/sh/clk/cpg.c b/drivers/sh/clk/cpg.c index eeb028b9cdb3..a5cacfe24a42 100644 --- a/drivers/sh/clk/cpg.c +++ b/drivers/sh/clk/cpg.c @@ -36,36 +36,21 @@ static void sh_clk_write(int value, struct clk *clk) iowrite32(value, clk->mapped_reg); } -static unsigned int r8(const void __iomem *addr) -{ - return ioread8(addr); -} - -static unsigned int r16(const void __iomem *addr) -{ - return ioread16(addr); -} - -static unsigned int r32(const void __iomem *addr) -{ - return ioread32(addr); -} - static int sh_clk_mstp_enable(struct clk *clk) { sh_clk_write(sh_clk_read(clk) & ~(1 << clk->enable_bit), clk); if (clk->status_reg) { - unsigned int (*read)(const void __iomem *addr); + unsigned int (*read)(void __iomem *addr); int i; void __iomem *mapped_status = (phys_addr_t)clk->status_reg - (phys_addr_t)clk->enable_reg + clk->mapped_reg; if (clk->flags & CLK_ENABLE_REG_8BIT) - read = r8; + read = ioread8; else if (clk->flags & CLK_ENABLE_REG_16BIT) - read = r16; + read = ioread16; else - read = r32; + read = ioread32; for (i = 1000; (read(mapped_status) & (1 << clk->enable_bit)) && i; From f9e7ff9c6fc758b6f25674a9a4451db30344ce1e Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 14 Aug 2020 17:32:04 -0700 Subject: [PATCH 31/35] sh: use generic strncpy() Current SH will get below warning at strncpy() In file included from ${LINUX}/arch/sh/include/asm/string.h:3, from ${LINUX}/include/linux/string.h:20, from ${LINUX}/include/linux/bitmap.h:9, from ${LINUX}/include/linux/nodemask.h:95, from ${LINUX}/include/linux/mmzone.h:17, from ${LINUX}/include/linux/gfp.h:6, from ${LINUX}/innclude/linux/slab.h:15, from ${LINUX}/linux/drivers/mmc/host/vub300.c:38: ${LINUX}/drivers/mmc/host/vub300.c: In function 'new_system_port_status': ${LINUX}/arch/sh/include/asm/string_32.h:51:42: warning: array subscript\ 80 is above array bounds of 'char[26]' [-Warray-bounds] : "0" (__dest), "1" (__src), "r" (__src+__n) ~~~~~^~~~ In general, strncpy() should behave like below. char dest[10]; char *src = "12345"; strncpy(dest, src, 10); // dest = {'1', '2', '3', '4', '5', '\0','\0','\0','\0','\0'} But, current SH strnpy() has 2 issues. 1st is it will access to out-of-memory (= src + 10). 2nd is it needs big fixup for it, and maintenance __asm__ code is difficult. To solve these issues, this patch simply uses generic strncpy() instead of architecture specific one. Signed-off-by: Kuninori Morimoto Signed-off-by: Andrew Morton Cc: Alan Modra Cc: Bin Meng Cc: Chen Zhou Cc: Geert Uytterhoeven Cc: John Paul Adrian Glaubitz Cc: Krzysztof Kozlowski Cc: Rich Felker Cc: Romain Naour Cc: Sam Ravnborg Cc: Yoshinori Sato Link: https://marc.info/?l=linux-renesas-soc&m=157664657013309 Signed-off-by: Linus Torvalds --- arch/sh/include/asm/string_32.h | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/arch/sh/include/asm/string_32.h b/arch/sh/include/asm/string_32.h index 3558b1d7123e..be3f9a08cbc9 100644 --- a/arch/sh/include/asm/string_32.h +++ b/arch/sh/include/asm/string_32.h @@ -28,32 +28,6 @@ static inline char *strcpy(char *__dest, const char *__src) return __xdest; } -#define __HAVE_ARCH_STRNCPY -static inline char *strncpy(char *__dest, const char *__src, size_t __n) -{ - register char *__xdest = __dest; - unsigned long __dummy; - - if (__n == 0) - return __xdest; - - __asm__ __volatile__( - "1:\n" - "mov.b @%1+, %2\n\t" - "mov.b %2, @%0\n\t" - "cmp/eq #0, %2\n\t" - "bt/s 2f\n\t" - " cmp/eq %5,%1\n\t" - "bf/s 1b\n\t" - " add #1, %0\n" - "2:" - : "=r" (__dest), "=r" (__src), "=&z" (__dummy) - : "0" (__dest), "1" (__src), "r" (__src+__n) - : "memory", "t"); - - return __xdest; -} - #define __HAVE_ARCH_STRCMP static inline int strcmp(const char *__cs, const char *__ct) { From 8f28ca6bd8211214faf717677bbffe375c2a6072 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Aug 2020 17:32:07 -0700 Subject: [PATCH 32/35] iomap: constify ioreadX() iomem argument (as in generic implementation) Patch series "iomap: Constify ioreadX() iomem argument", v3. The ioread8/16/32() and others have inconsistent interface among the architectures: some taking address as const, some not. It seems there is nothing really stopping all of them to take pointer to const. This patch (of 4): The ioreadX() and ioreadX_rep() helpers have inconsistent interface. On some architectures void *__iomem address argument is a pointer to const, on some not. Implementations of ioreadX() do not modify the memory under the address so they can be converted to a "const" version for const-safety and consistency among architectures. [krzk@kernel.org: sh: clk: fix assignment from incompatible pointer type for ioreadX()] Link: http://lkml.kernel.org/r/20200723082017.24053-1-krzk@kernel.org [akpm@linux-foundation.org: fix drivers/mailbox/bcm-pdc-mailbox.c] Link: http://lkml.kernel.org/r/202007132209.Rxmv4QyS%25lkp@intel.com Suggested-by: Geert Uytterhoeven Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Reviewed-by: Geert Uytterhoeven Reviewed-by: Arnd Bergmann Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Yoshinori Sato Cc: Rich Felker Cc: Kalle Valo Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Dave Jiang Cc: Jon Mason Cc: Allen Hubbe Cc: "Michael S. Tsirkin" Cc: Jason Wang Link: http://lkml.kernel.org/r/20200709072837.5869-1-krzk@kernel.org Link: http://lkml.kernel.org/r/20200709072837.5869-2-krzk@kernel.org Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/core_apecs.h | 6 +-- arch/alpha/include/asm/core_cia.h | 6 +-- arch/alpha/include/asm/core_lca.h | 6 +-- arch/alpha/include/asm/core_marvel.h | 4 +- arch/alpha/include/asm/core_mcpcia.h | 6 +-- arch/alpha/include/asm/core_t2.h | 2 +- arch/alpha/include/asm/io.h | 12 ++--- arch/alpha/include/asm/io_trivial.h | 16 +++--- arch/alpha/include/asm/jensen.h | 2 +- arch/alpha/include/asm/machvec.h | 6 +-- arch/alpha/kernel/core_marvel.c | 2 +- arch/alpha/kernel/io.c | 12 ++--- arch/parisc/include/asm/io.h | 4 +- arch/parisc/lib/iomap.c | 72 +++++++++++++-------------- arch/powerpc/kernel/iomap.c | 28 +++++------ arch/sh/kernel/iomap.c | 22 ++++---- drivers/mailbox/bcm-pdc-mailbox.c | 2 +- drivers/sh/clk/cpg.c | 2 +- include/asm-generic/iomap.h | 28 +++++------ include/linux/io-64-nonatomic-hi-lo.h | 4 +- include/linux/io-64-nonatomic-lo-hi.h | 4 +- lib/iomap.c | 30 +++++------ 22 files changed, 138 insertions(+), 138 deletions(-) diff --git a/arch/alpha/include/asm/core_apecs.h b/arch/alpha/include/asm/core_apecs.h index 0a07055bc0fe..2d9726fc02ef 100644 --- a/arch/alpha/include/asm/core_apecs.h +++ b/arch/alpha/include/asm/core_apecs.h @@ -384,7 +384,7 @@ struct el_apecs_procdata } \ } while (0) -__EXTERN_INLINE unsigned int apecs_ioread8(void __iomem *xaddr) +__EXTERN_INLINE unsigned int apecs_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -420,7 +420,7 @@ __EXTERN_INLINE void apecs_iowrite8(u8 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int apecs_ioread16(void __iomem *xaddr) +__EXTERN_INLINE unsigned int apecs_ioread16(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -456,7 +456,7 @@ __EXTERN_INLINE void apecs_iowrite16(u16 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int apecs_ioread32(void __iomem *xaddr) +__EXTERN_INLINE unsigned int apecs_ioread32(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; if (addr < APECS_DENSE_MEM) diff --git a/arch/alpha/include/asm/core_cia.h b/arch/alpha/include/asm/core_cia.h index c706a7f2b061..cb22991f6761 100644 --- a/arch/alpha/include/asm/core_cia.h +++ b/arch/alpha/include/asm/core_cia.h @@ -342,7 +342,7 @@ struct el_CIA_sysdata_mcheck { #define vuip volatile unsigned int __force * #define vulp volatile unsigned long __force * -__EXTERN_INLINE unsigned int cia_ioread8(void __iomem *xaddr) +__EXTERN_INLINE unsigned int cia_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -374,7 +374,7 @@ __EXTERN_INLINE void cia_iowrite8(u8 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int cia_ioread16(void __iomem *xaddr) +__EXTERN_INLINE unsigned int cia_ioread16(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -404,7 +404,7 @@ __EXTERN_INLINE void cia_iowrite16(u16 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int cia_ioread32(void __iomem *xaddr) +__EXTERN_INLINE unsigned int cia_ioread32(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; if (addr < CIA_DENSE_MEM) diff --git a/arch/alpha/include/asm/core_lca.h b/arch/alpha/include/asm/core_lca.h index 84d5e5b84f4f..ec86314418cb 100644 --- a/arch/alpha/include/asm/core_lca.h +++ b/arch/alpha/include/asm/core_lca.h @@ -230,7 +230,7 @@ union el_lca { } while (0) -__EXTERN_INLINE unsigned int lca_ioread8(void __iomem *xaddr) +__EXTERN_INLINE unsigned int lca_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -266,7 +266,7 @@ __EXTERN_INLINE void lca_iowrite8(u8 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int lca_ioread16(void __iomem *xaddr) +__EXTERN_INLINE unsigned int lca_ioread16(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; unsigned long result, base_and_type; @@ -302,7 +302,7 @@ __EXTERN_INLINE void lca_iowrite16(u16 b, void __iomem *xaddr) *(vuip) ((addr << 5) + base_and_type) = w; } -__EXTERN_INLINE unsigned int lca_ioread32(void __iomem *xaddr) +__EXTERN_INLINE unsigned int lca_ioread32(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; if (addr < LCA_DENSE_MEM) diff --git a/arch/alpha/include/asm/core_marvel.h b/arch/alpha/include/asm/core_marvel.h index cc6fd92d5fa9..b266e02e284b 100644 --- a/arch/alpha/include/asm/core_marvel.h +++ b/arch/alpha/include/asm/core_marvel.h @@ -332,10 +332,10 @@ struct io7 { #define vucp volatile unsigned char __force * #define vusp volatile unsigned short __force * -extern unsigned int marvel_ioread8(void __iomem *); +extern unsigned int marvel_ioread8(const void __iomem *); extern void marvel_iowrite8(u8 b, void __iomem *); -__EXTERN_INLINE unsigned int marvel_ioread16(void __iomem *addr) +__EXTERN_INLINE unsigned int marvel_ioread16(const void __iomem *addr) { return __kernel_ldwu(*(vusp)addr); } diff --git a/arch/alpha/include/asm/core_mcpcia.h b/arch/alpha/include/asm/core_mcpcia.h index b30dc128210d..cb24d1bd6141 100644 --- a/arch/alpha/include/asm/core_mcpcia.h +++ b/arch/alpha/include/asm/core_mcpcia.h @@ -267,7 +267,7 @@ extern inline int __mcpcia_is_mmio(unsigned long addr) return (addr & 0x80000000UL) == 0; } -__EXTERN_INLINE unsigned int mcpcia_ioread8(void __iomem *xaddr) +__EXTERN_INLINE unsigned int mcpcia_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK; unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK; @@ -291,7 +291,7 @@ __EXTERN_INLINE void mcpcia_iowrite8(u8 b, void __iomem *xaddr) *(vuip) ((addr << 5) + hose + 0x00) = w; } -__EXTERN_INLINE unsigned int mcpcia_ioread16(void __iomem *xaddr) +__EXTERN_INLINE unsigned int mcpcia_ioread16(const void __iomem *xaddr) { unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK; unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK; @@ -315,7 +315,7 @@ __EXTERN_INLINE void mcpcia_iowrite16(u16 b, void __iomem *xaddr) *(vuip) ((addr << 5) + hose + 0x08) = w; } -__EXTERN_INLINE unsigned int mcpcia_ioread32(void __iomem *xaddr) +__EXTERN_INLINE unsigned int mcpcia_ioread32(const void __iomem *xaddr) { unsigned long addr = (unsigned long)xaddr; diff --git a/arch/alpha/include/asm/core_t2.h b/arch/alpha/include/asm/core_t2.h index e0b33d09e93a..12bb7addc789 100644 --- a/arch/alpha/include/asm/core_t2.h +++ b/arch/alpha/include/asm/core_t2.h @@ -572,7 +572,7 @@ __EXTERN_INLINE int t2_is_mmio(const volatile void __iomem *addr) it doesn't make sense to merge the pio and mmio routines. */ #define IOPORT(OS, NS) \ -__EXTERN_INLINE unsigned int t2_ioread##NS(void __iomem *xaddr) \ +__EXTERN_INLINE unsigned int t2_ioread##NS(const void __iomem *xaddr) \ { \ if (t2_is_mmio(xaddr)) \ return t2_read##OS(xaddr); \ diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index 640e1a2f57b4..1f6a909d1fa5 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -150,9 +150,9 @@ static inline void generic_##NAME(TYPE b, QUAL void __iomem *addr) \ alpha_mv.mv_##NAME(b, addr); \ } -REMAP1(unsigned int, ioread8, /**/) -REMAP1(unsigned int, ioread16, /**/) -REMAP1(unsigned int, ioread32, /**/) +REMAP1(unsigned int, ioread8, const) +REMAP1(unsigned int, ioread16, const) +REMAP1(unsigned int, ioread32, const) REMAP1(u8, readb, const volatile) REMAP1(u16, readw, const volatile) REMAP1(u32, readl, const volatile) @@ -307,7 +307,7 @@ static inline int __is_mmio(const volatile void __iomem *addr) */ #if IO_CONCAT(__IO_PREFIX,trivial_io_bw) -extern inline unsigned int ioread8(void __iomem *addr) +extern inline unsigned int ioread8(const void __iomem *addr) { unsigned int ret; mb(); @@ -316,7 +316,7 @@ extern inline unsigned int ioread8(void __iomem *addr) return ret; } -extern inline unsigned int ioread16(void __iomem *addr) +extern inline unsigned int ioread16(const void __iomem *addr) { unsigned int ret; mb(); @@ -359,7 +359,7 @@ extern inline void outw(u16 b, unsigned long port) #endif #if IO_CONCAT(__IO_PREFIX,trivial_io_lq) -extern inline unsigned int ioread32(void __iomem *addr) +extern inline unsigned int ioread32(const void __iomem *addr) { unsigned int ret; mb(); diff --git a/arch/alpha/include/asm/io_trivial.h b/arch/alpha/include/asm/io_trivial.h index ba3d8f0cfe0c..a1a29cbe02fa 100644 --- a/arch/alpha/include/asm/io_trivial.h +++ b/arch/alpha/include/asm/io_trivial.h @@ -7,15 +7,15 @@ #if IO_CONCAT(__IO_PREFIX,trivial_io_bw) __EXTERN_INLINE unsigned int -IO_CONCAT(__IO_PREFIX,ioread8)(void __iomem *a) +IO_CONCAT(__IO_PREFIX,ioread8)(const void __iomem *a) { - return __kernel_ldbu(*(volatile u8 __force *)a); + return __kernel_ldbu(*(const volatile u8 __force *)a); } __EXTERN_INLINE unsigned int -IO_CONCAT(__IO_PREFIX,ioread16)(void __iomem *a) +IO_CONCAT(__IO_PREFIX,ioread16)(const void __iomem *a) { - return __kernel_ldwu(*(volatile u16 __force *)a); + return __kernel_ldwu(*(const volatile u16 __force *)a); } __EXTERN_INLINE void @@ -33,9 +33,9 @@ IO_CONCAT(__IO_PREFIX,iowrite16)(u16 b, void __iomem *a) #if IO_CONCAT(__IO_PREFIX,trivial_io_lq) __EXTERN_INLINE unsigned int -IO_CONCAT(__IO_PREFIX,ioread32)(void __iomem *a) +IO_CONCAT(__IO_PREFIX,ioread32)(const void __iomem *a) { - return *(volatile u32 __force *)a; + return *(const volatile u32 __force *)a; } __EXTERN_INLINE void @@ -73,14 +73,14 @@ IO_CONCAT(__IO_PREFIX,writew)(u16 b, volatile void __iomem *a) __EXTERN_INLINE u8 IO_CONCAT(__IO_PREFIX,readb)(const volatile void __iomem *a) { - void __iomem *addr = (void __iomem *)a; + const void __iomem *addr = (const void __iomem *)a; return IO_CONCAT(__IO_PREFIX,ioread8)(addr); } __EXTERN_INLINE u16 IO_CONCAT(__IO_PREFIX,readw)(const volatile void __iomem *a) { - void __iomem *addr = (void __iomem *)a; + const void __iomem *addr = (const void __iomem *)a; return IO_CONCAT(__IO_PREFIX,ioread16)(addr); } diff --git a/arch/alpha/include/asm/jensen.h b/arch/alpha/include/asm/jensen.h index 436dc905b6ad..916895155a88 100644 --- a/arch/alpha/include/asm/jensen.h +++ b/arch/alpha/include/asm/jensen.h @@ -305,7 +305,7 @@ __EXTERN_INLINE int jensen_is_mmio(const volatile void __iomem *addr) that it doesn't make sense to merge them. */ #define IOPORT(OS, NS) \ -__EXTERN_INLINE unsigned int jensen_ioread##NS(void __iomem *xaddr) \ +__EXTERN_INLINE unsigned int jensen_ioread##NS(const void __iomem *xaddr) \ { \ if (jensen_is_mmio(xaddr)) \ return jensen_read##OS(xaddr - 0x100000000ul); \ diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h index a6b73c6d10ee..a4e96e2bec74 100644 --- a/arch/alpha/include/asm/machvec.h +++ b/arch/alpha/include/asm/machvec.h @@ -46,9 +46,9 @@ struct alpha_machine_vector void (*mv_pci_tbi)(struct pci_controller *hose, dma_addr_t start, dma_addr_t end); - unsigned int (*mv_ioread8)(void __iomem *); - unsigned int (*mv_ioread16)(void __iomem *); - unsigned int (*mv_ioread32)(void __iomem *); + unsigned int (*mv_ioread8)(const void __iomem *); + unsigned int (*mv_ioread16)(const void __iomem *); + unsigned int (*mv_ioread32)(const void __iomem *); void (*mv_iowrite8)(u8, void __iomem *); void (*mv_iowrite16)(u16, void __iomem *); diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 4c80d992a659..4485b77f8658 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -806,7 +806,7 @@ void __iomem *marvel_ioportmap (unsigned long addr) } unsigned int -marvel_ioread8(void __iomem *xaddr) +marvel_ioread8(const void __iomem *xaddr) { unsigned long addr = (unsigned long) xaddr; if (__marvel_is_port_kbd(addr)) diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c index 938de13adfbf..838586abb1e0 100644 --- a/arch/alpha/kernel/io.c +++ b/arch/alpha/kernel/io.c @@ -14,7 +14,7 @@ "generic", which bumps through the machine vector. */ unsigned int -ioread8(void __iomem *addr) +ioread8(const void __iomem *addr) { unsigned int ret; mb(); @@ -23,7 +23,7 @@ ioread8(void __iomem *addr) return ret; } -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { unsigned int ret; mb(); @@ -32,7 +32,7 @@ unsigned int ioread16(void __iomem *addr) return ret; } -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { unsigned int ret; mb(); @@ -257,7 +257,7 @@ EXPORT_SYMBOL(readq_relaxed); /* * Read COUNT 8-bit bytes from port PORT into memory starting at SRC. */ -void ioread8_rep(void __iomem *port, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *port, void *dst, unsigned long count) { while ((unsigned long)dst & 0x3) { if (!count) @@ -300,7 +300,7 @@ EXPORT_SYMBOL(insb); * the interfaces seems to be slow: just using the inlined version * of the inw() breaks things. */ -void ioread16_rep(void __iomem *port, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *port, void *dst, unsigned long count) { if (unlikely((unsigned long)dst & 0x3)) { if (!count) @@ -340,7 +340,7 @@ EXPORT_SYMBOL(insw); * but the interfaces seems to be slow: just using the inlined version * of the inl() breaks things. */ -void ioread32_rep(void __iomem *port, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *port, void *dst, unsigned long count) { if (unlikely((unsigned long)dst & 0x3)) { while (count--) { diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h index 116effe26143..45e20d38dc59 100644 --- a/arch/parisc/include/asm/io.h +++ b/arch/parisc/include/asm/io.h @@ -303,8 +303,8 @@ extern void outsl (unsigned long port, const void *src, unsigned long count); #define ioread64be ioread64be #define iowrite64 iowrite64 #define iowrite64be iowrite64be -extern u64 ioread64(void __iomem *addr); -extern u64 ioread64be(void __iomem *addr); +extern u64 ioread64(const void __iomem *addr); +extern u64 ioread64be(const void __iomem *addr); extern void iowrite64(u64 val, void __iomem *addr); extern void iowrite64be(u64 val, void __iomem *addr); diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c index 0195aec657e2..ce400417d54e 100644 --- a/arch/parisc/lib/iomap.c +++ b/arch/parisc/lib/iomap.c @@ -43,13 +43,13 @@ #endif struct iomap_ops { - unsigned int (*read8)(void __iomem *); - unsigned int (*read16)(void __iomem *); - unsigned int (*read16be)(void __iomem *); - unsigned int (*read32)(void __iomem *); - unsigned int (*read32be)(void __iomem *); - u64 (*read64)(void __iomem *); - u64 (*read64be)(void __iomem *); + unsigned int (*read8)(const void __iomem *); + unsigned int (*read16)(const void __iomem *); + unsigned int (*read16be)(const void __iomem *); + unsigned int (*read32)(const void __iomem *); + unsigned int (*read32be)(const void __iomem *); + u64 (*read64)(const void __iomem *); + u64 (*read64be)(const void __iomem *); void (*write8)(u8, void __iomem *); void (*write16)(u16, void __iomem *); void (*write16be)(u16, void __iomem *); @@ -57,9 +57,9 @@ struct iomap_ops { void (*write32be)(u32, void __iomem *); void (*write64)(u64, void __iomem *); void (*write64be)(u64, void __iomem *); - void (*read8r)(void __iomem *, void *, unsigned long); - void (*read16r)(void __iomem *, void *, unsigned long); - void (*read32r)(void __iomem *, void *, unsigned long); + void (*read8r)(const void __iomem *, void *, unsigned long); + void (*read16r)(const void __iomem *, void *, unsigned long); + void (*read32r)(const void __iomem *, void *, unsigned long); void (*write8r)(void __iomem *, const void *, unsigned long); void (*write16r)(void __iomem *, const void *, unsigned long); void (*write32r)(void __iomem *, const void *, unsigned long); @@ -69,17 +69,17 @@ struct iomap_ops { #define ADDR2PORT(addr) ((unsigned long __force)(addr) & 0xffffff) -static unsigned int ioport_read8(void __iomem *addr) +static unsigned int ioport_read8(const void __iomem *addr) { return inb(ADDR2PORT(addr)); } -static unsigned int ioport_read16(void __iomem *addr) +static unsigned int ioport_read16(const void __iomem *addr) { return inw(ADDR2PORT(addr)); } -static unsigned int ioport_read32(void __iomem *addr) +static unsigned int ioport_read32(const void __iomem *addr) { return inl(ADDR2PORT(addr)); } @@ -99,17 +99,17 @@ static void ioport_write32(u32 datum, void __iomem *addr) outl(datum, ADDR2PORT(addr)); } -static void ioport_read8r(void __iomem *addr, void *dst, unsigned long count) +static void ioport_read8r(const void __iomem *addr, void *dst, unsigned long count) { insb(ADDR2PORT(addr), dst, count); } -static void ioport_read16r(void __iomem *addr, void *dst, unsigned long count) +static void ioport_read16r(const void __iomem *addr, void *dst, unsigned long count) { insw(ADDR2PORT(addr), dst, count); } -static void ioport_read32r(void __iomem *addr, void *dst, unsigned long count) +static void ioport_read32r(const void __iomem *addr, void *dst, unsigned long count) { insl(ADDR2PORT(addr), dst, count); } @@ -150,37 +150,37 @@ static const struct iomap_ops ioport_ops = { /* Legacy I/O memory ops */ -static unsigned int iomem_read8(void __iomem *addr) +static unsigned int iomem_read8(const void __iomem *addr) { return readb(addr); } -static unsigned int iomem_read16(void __iomem *addr) +static unsigned int iomem_read16(const void __iomem *addr) { return readw(addr); } -static unsigned int iomem_read16be(void __iomem *addr) +static unsigned int iomem_read16be(const void __iomem *addr) { return __raw_readw(addr); } -static unsigned int iomem_read32(void __iomem *addr) +static unsigned int iomem_read32(const void __iomem *addr) { return readl(addr); } -static unsigned int iomem_read32be(void __iomem *addr) +static unsigned int iomem_read32be(const void __iomem *addr) { return __raw_readl(addr); } -static u64 iomem_read64(void __iomem *addr) +static u64 iomem_read64(const void __iomem *addr) { return readq(addr); } -static u64 iomem_read64be(void __iomem *addr) +static u64 iomem_read64be(const void __iomem *addr) { return __raw_readq(addr); } @@ -220,7 +220,7 @@ static void iomem_write64be(u64 datum, void __iomem *addr) __raw_writel(datum, addr); } -static void iomem_read8r(void __iomem *addr, void *dst, unsigned long count) +static void iomem_read8r(const void __iomem *addr, void *dst, unsigned long count) { while (count--) { *(u8 *)dst = __raw_readb(addr); @@ -228,7 +228,7 @@ static void iomem_read8r(void __iomem *addr, void *dst, unsigned long count) } } -static void iomem_read16r(void __iomem *addr, void *dst, unsigned long count) +static void iomem_read16r(const void __iomem *addr, void *dst, unsigned long count) { while (count--) { *(u16 *)dst = __raw_readw(addr); @@ -236,7 +236,7 @@ static void iomem_read16r(void __iomem *addr, void *dst, unsigned long count) } } -static void iomem_read32r(void __iomem *addr, void *dst, unsigned long count) +static void iomem_read32r(const void __iomem *addr, void *dst, unsigned long count) { while (count--) { *(u32 *)dst = __raw_readl(addr); @@ -297,49 +297,49 @@ static const struct iomap_ops *iomap_ops[8] = { }; -unsigned int ioread8(void __iomem *addr) +unsigned int ioread8(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read8(addr); return *((u8 *)addr); } -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read16(addr); return le16_to_cpup((u16 *)addr); } -unsigned int ioread16be(void __iomem *addr) +unsigned int ioread16be(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read16be(addr); return *((u16 *)addr); } -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read32(addr); return le32_to_cpup((u32 *)addr); } -unsigned int ioread32be(void __iomem *addr) +unsigned int ioread32be(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read32be(addr); return *((u32 *)addr); } -u64 ioread64(void __iomem *addr) +u64 ioread64(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read64(addr); return le64_to_cpup((u64 *)addr); } -u64 ioread64be(void __iomem *addr) +u64 ioread64be(const void __iomem *addr) { if (unlikely(INDIRECT_ADDR(addr))) return iomap_ops[ADDR_TO_REGION(addr)]->read64be(addr); @@ -411,7 +411,7 @@ void iowrite64be(u64 datum, void __iomem *addr) /* Repeating interfaces */ -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { if (unlikely(INDIRECT_ADDR(addr))) { iomap_ops[ADDR_TO_REGION(addr)]->read8r(addr, dst, count); @@ -423,7 +423,7 @@ void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) } } -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { if (unlikely(INDIRECT_ADDR(addr))) { iomap_ops[ADDR_TO_REGION(addr)]->read16r(addr, dst, count); @@ -435,7 +435,7 @@ void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) } } -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { if (unlikely(INDIRECT_ADDR(addr))) { iomap_ops[ADDR_TO_REGION(addr)]->read32r(addr, dst, count); diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 5ac84efc6ede..9fe4fb3b08aa 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -15,23 +15,23 @@ * Here comes the ppc64 implementation of the IOMAP * interfaces. */ -unsigned int ioread8(void __iomem *addr) +unsigned int ioread8(const void __iomem *addr) { return readb(addr); } -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { return readw(addr); } -unsigned int ioread16be(void __iomem *addr) +unsigned int ioread16be(const void __iomem *addr) { return readw_be(addr); } -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { return readl(addr); } -unsigned int ioread32be(void __iomem *addr) +unsigned int ioread32be(const void __iomem *addr) { return readl_be(addr); } @@ -41,27 +41,27 @@ EXPORT_SYMBOL(ioread16be); EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); #ifdef __powerpc64__ -u64 ioread64(void __iomem *addr) +u64 ioread64(const void __iomem *addr) { return readq(addr); } -u64 ioread64_lo_hi(void __iomem *addr) +u64 ioread64_lo_hi(const void __iomem *addr) { return readq(addr); } -u64 ioread64_hi_lo(void __iomem *addr) +u64 ioread64_hi_lo(const void __iomem *addr) { return readq(addr); } -u64 ioread64be(void __iomem *addr) +u64 ioread64be(const void __iomem *addr) { return readq_be(addr); } -u64 ioread64be_lo_hi(void __iomem *addr) +u64 ioread64be_lo_hi(const void __iomem *addr) { return readq_be(addr); } -u64 ioread64be_hi_lo(void __iomem *addr) +u64 ioread64be_hi_lo(const void __iomem *addr) { return readq_be(addr); } @@ -139,15 +139,15 @@ EXPORT_SYMBOL(iowrite64be_hi_lo); * FIXME! We could make these do EEH handling if we really * wanted. Not clear if we do. */ -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { readsb(addr, dst, count); } -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { readsw(addr, dst, count); } -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { readsl(addr, dst, count); } diff --git a/arch/sh/kernel/iomap.c b/arch/sh/kernel/iomap.c index ef9e2c97cbb7..0a0dff4e66de 100644 --- a/arch/sh/kernel/iomap.c +++ b/arch/sh/kernel/iomap.c @@ -8,31 +8,31 @@ #include #include -unsigned int ioread8(void __iomem *addr) +unsigned int ioread8(const void __iomem *addr) { return readb(addr); } EXPORT_SYMBOL(ioread8); -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { return readw(addr); } EXPORT_SYMBOL(ioread16); -unsigned int ioread16be(void __iomem *addr) +unsigned int ioread16be(const void __iomem *addr) { return be16_to_cpu(__raw_readw(addr)); } EXPORT_SYMBOL(ioread16be); -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { return readl(addr); } EXPORT_SYMBOL(ioread32); -unsigned int ioread32be(void __iomem *addr) +unsigned int ioread32be(const void __iomem *addr) { return be32_to_cpu(__raw_readl(addr)); } @@ -74,7 +74,7 @@ EXPORT_SYMBOL(iowrite32be); * convert to CPU byte order. We write in "IO byte * order" (we also don't have IO barriers). */ -static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) +static inline void mmio_insb(const void __iomem *addr, u8 *dst, int count) { while (--count >= 0) { u8 data = __raw_readb(addr); @@ -83,7 +83,7 @@ static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) } } -static inline void mmio_insw(void __iomem *addr, u16 *dst, int count) +static inline void mmio_insw(const void __iomem *addr, u16 *dst, int count) { while (--count >= 0) { u16 data = __raw_readw(addr); @@ -92,7 +92,7 @@ static inline void mmio_insw(void __iomem *addr, u16 *dst, int count) } } -static inline void mmio_insl(void __iomem *addr, u32 *dst, int count) +static inline void mmio_insl(const void __iomem *addr, u32 *dst, int count) { while (--count >= 0) { u32 data = __raw_readl(addr); @@ -125,19 +125,19 @@ static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count) } } -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { mmio_insb(addr, dst, count); } EXPORT_SYMBOL(ioread8_rep); -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { mmio_insw(addr, dst, count); } EXPORT_SYMBOL(ioread16_rep); -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { mmio_insl(addr, dst, count); } diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c index c10a9318a4b7..53945ca5d785 100644 --- a/drivers/mailbox/bcm-pdc-mailbox.c +++ b/drivers/mailbox/bcm-pdc-mailbox.c @@ -679,7 +679,7 @@ pdc_receive(struct pdc_state *pdcs) /* read last_rx_curr from register once */ pdcs->last_rx_curr = - (ioread32(&pdcs->rxregs_64->status0) & + (ioread32((const void __iomem *)&pdcs->rxregs_64->status0) & CRYPTO_D64_RS0_CD_MASK) / RING_ENTRY_SIZE; do { diff --git a/drivers/sh/clk/cpg.c b/drivers/sh/clk/cpg.c index a5cacfe24a42..fd72d9088bdc 100644 --- a/drivers/sh/clk/cpg.c +++ b/drivers/sh/clk/cpg.c @@ -40,7 +40,7 @@ static int sh_clk_mstp_enable(struct clk *clk) { sh_clk_write(sh_clk_read(clk) & ~(1 << clk->enable_bit), clk); if (clk->status_reg) { - unsigned int (*read)(void __iomem *addr); + unsigned int (*read)(const void __iomem *addr); int i; void __iomem *mapped_status = (phys_addr_t)clk->status_reg - (phys_addr_t)clk->enable_reg + clk->mapped_reg; diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 9d28a5e82f73..649224664969 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -26,14 +26,14 @@ * in the low address range. Architectures for which this is not * true can't use this generic implementation. */ -extern unsigned int ioread8(void __iomem *); -extern unsigned int ioread16(void __iomem *); -extern unsigned int ioread16be(void __iomem *); -extern unsigned int ioread32(void __iomem *); -extern unsigned int ioread32be(void __iomem *); +extern unsigned int ioread8(const void __iomem *); +extern unsigned int ioread16(const void __iomem *); +extern unsigned int ioread16be(const void __iomem *); +extern unsigned int ioread32(const void __iomem *); +extern unsigned int ioread32be(const void __iomem *); #ifdef CONFIG_64BIT -extern u64 ioread64(void __iomem *); -extern u64 ioread64be(void __iomem *); +extern u64 ioread64(const void __iomem *); +extern u64 ioread64be(const void __iomem *); #endif #ifdef readq @@ -41,10 +41,10 @@ extern u64 ioread64be(void __iomem *); #define ioread64_hi_lo ioread64_hi_lo #define ioread64be_lo_hi ioread64be_lo_hi #define ioread64be_hi_lo ioread64be_hi_lo -extern u64 ioread64_lo_hi(void __iomem *addr); -extern u64 ioread64_hi_lo(void __iomem *addr); -extern u64 ioread64be_lo_hi(void __iomem *addr); -extern u64 ioread64be_hi_lo(void __iomem *addr); +extern u64 ioread64_lo_hi(const void __iomem *addr); +extern u64 ioread64_hi_lo(const void __iomem *addr); +extern u64 ioread64be_lo_hi(const void __iomem *addr); +extern u64 ioread64be_hi_lo(const void __iomem *addr); #endif extern void iowrite8(u8, void __iomem *); @@ -79,9 +79,9 @@ extern void iowrite64be_hi_lo(u64 val, void __iomem *addr); * memory across multiple ports, use "memcpy_toio()" * and friends. */ -extern void ioread8_rep(void __iomem *port, void *buf, unsigned long count); -extern void ioread16_rep(void __iomem *port, void *buf, unsigned long count); -extern void ioread32_rep(void __iomem *port, void *buf, unsigned long count); +extern void ioread8_rep(const void __iomem *port, void *buf, unsigned long count); +extern void ioread16_rep(const void __iomem *port, void *buf, unsigned long count); +extern void ioread32_rep(const void __iomem *port, void *buf, unsigned long count); extern void iowrite8_rep(void __iomem *port, const void *buf, unsigned long count); extern void iowrite16_rep(void __iomem *port, const void *buf, unsigned long count); diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h index ae21b72cce85..f32522bb3aa5 100644 --- a/include/linux/io-64-nonatomic-hi-lo.h +++ b/include/linux/io-64-nonatomic-hi-lo.h @@ -57,7 +57,7 @@ static inline void hi_lo_writeq_relaxed(__u64 val, volatile void __iomem *addr) #ifndef ioread64_hi_lo #define ioread64_hi_lo ioread64_hi_lo -static inline u64 ioread64_hi_lo(void __iomem *addr) +static inline u64 ioread64_hi_lo(const void __iomem *addr) { u32 low, high; @@ -79,7 +79,7 @@ static inline void iowrite64_hi_lo(u64 val, void __iomem *addr) #ifndef ioread64be_hi_lo #define ioread64be_hi_lo ioread64be_hi_lo -static inline u64 ioread64be_hi_lo(void __iomem *addr) +static inline u64 ioread64be_hi_lo(const void __iomem *addr) { u32 low, high; diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h index faaa842dbdb9..448a21435dba 100644 --- a/include/linux/io-64-nonatomic-lo-hi.h +++ b/include/linux/io-64-nonatomic-lo-hi.h @@ -57,7 +57,7 @@ static inline void lo_hi_writeq_relaxed(__u64 val, volatile void __iomem *addr) #ifndef ioread64_lo_hi #define ioread64_lo_hi ioread64_lo_hi -static inline u64 ioread64_lo_hi(void __iomem *addr) +static inline u64 ioread64_lo_hi(const void __iomem *addr) { u32 low, high; @@ -79,7 +79,7 @@ static inline void iowrite64_lo_hi(u64 val, void __iomem *addr) #ifndef ioread64be_lo_hi #define ioread64be_lo_hi ioread64be_lo_hi -static inline u64 ioread64be_lo_hi(void __iomem *addr) +static inline u64 ioread64be_lo_hi(const void __iomem *addr) { u32 low, high; diff --git a/lib/iomap.c b/lib/iomap.c index e909ab71e995..fbaa3e8f19d6 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -70,27 +70,27 @@ static void bad_io_access(unsigned long port, const char *access) #define mmio_read64be(addr) swab64(readq(addr)) #endif -unsigned int ioread8(void __iomem *addr) +unsigned int ioread8(const void __iomem *addr) { IO_COND(addr, return inb(port), return readb(addr)); return 0xff; } -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { IO_COND(addr, return inw(port), return readw(addr)); return 0xffff; } -unsigned int ioread16be(void __iomem *addr) +unsigned int ioread16be(const void __iomem *addr) { IO_COND(addr, return pio_read16be(port), return mmio_read16be(addr)); return 0xffff; } -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { IO_COND(addr, return inl(port), return readl(addr)); return 0xffffffff; } -unsigned int ioread32be(void __iomem *addr) +unsigned int ioread32be(const void __iomem *addr) { IO_COND(addr, return pio_read32be(port), return mmio_read32be(addr)); return 0xffffffff; @@ -142,26 +142,26 @@ static u64 pio_read64be_hi_lo(unsigned long port) return lo | (hi << 32); } -u64 ioread64_lo_hi(void __iomem *addr) +u64 ioread64_lo_hi(const void __iomem *addr) { IO_COND(addr, return pio_read64_lo_hi(port), return readq(addr)); return 0xffffffffffffffffULL; } -u64 ioread64_hi_lo(void __iomem *addr) +u64 ioread64_hi_lo(const void __iomem *addr) { IO_COND(addr, return pio_read64_hi_lo(port), return readq(addr)); return 0xffffffffffffffffULL; } -u64 ioread64be_lo_hi(void __iomem *addr) +u64 ioread64be_lo_hi(const void __iomem *addr) { IO_COND(addr, return pio_read64be_lo_hi(port), return mmio_read64be(addr)); return 0xffffffffffffffffULL; } -u64 ioread64be_hi_lo(void __iomem *addr) +u64 ioread64be_hi_lo(const void __iomem *addr) { IO_COND(addr, return pio_read64be_hi_lo(port), return mmio_read64be(addr)); @@ -275,7 +275,7 @@ EXPORT_SYMBOL(iowrite64be_hi_lo); * order" (we also don't have IO barriers). */ #ifndef mmio_insb -static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) +static inline void mmio_insb(const void __iomem *addr, u8 *dst, int count) { while (--count >= 0) { u8 data = __raw_readb(addr); @@ -283,7 +283,7 @@ static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) dst++; } } -static inline void mmio_insw(void __iomem *addr, u16 *dst, int count) +static inline void mmio_insw(const void __iomem *addr, u16 *dst, int count) { while (--count >= 0) { u16 data = __raw_readw(addr); @@ -291,7 +291,7 @@ static inline void mmio_insw(void __iomem *addr, u16 *dst, int count) dst++; } } -static inline void mmio_insl(void __iomem *addr, u32 *dst, int count) +static inline void mmio_insl(const void __iomem *addr, u32 *dst, int count) { while (--count >= 0) { u32 data = __raw_readl(addr); @@ -325,15 +325,15 @@ static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count) } #endif -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { IO_COND(addr, insb(port,dst,count), mmio_insb(addr, dst, count)); } -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { IO_COND(addr, insw(port,dst,count), mmio_insw(addr, dst, count)); } -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { IO_COND(addr, insl(port,dst,count), mmio_insl(addr, dst, count)); } From 5ca6ad7dce2f3d05bd67a15031c750c0214b7b33 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Aug 2020 17:32:11 -0700 Subject: [PATCH 33/35] rtl818x: constify ioreadX() iomem argument (as in generic implementation) The ioreadX() helpers have inconsistent interface. On some architectures void *__iomem address argument is a pointer to const, on some not. Implementations of ioreadX() do not modify the memory under the address so they can be converted to a "const" version for const-safety and consistency among architectures. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Reviewed-by: Geert Uytterhoeven Acked-by: Kalle Valo Cc: Allen Hubbe Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Dave Jiang Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Helge Deller Cc: Ivan Kokshaysky Cc: Jakub Kicinski Cc: "James E.J. Bottomley" Cc: Jason Wang Cc: Jon Mason Cc: Matt Turner Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Paul Mackerras Cc: Richard Henderson Cc: Rich Felker Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200709072837.5869-3-krzk@kernel.org Signed-off-by: Linus Torvalds --- drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8180.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8180.h b/drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8180.h index 7948a2da195a..2ff00800d45b 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8180.h +++ b/drivers/net/wireless/realtek/rtl818x/rtl8180/rtl8180.h @@ -150,17 +150,17 @@ void rtl8180_write_phy(struct ieee80211_hw *dev, u8 addr, u32 data); void rtl8180_set_anaparam(struct rtl8180_priv *priv, u32 anaparam); void rtl8180_set_anaparam2(struct rtl8180_priv *priv, u32 anaparam2); -static inline u8 rtl818x_ioread8(struct rtl8180_priv *priv, u8 __iomem *addr) +static inline u8 rtl818x_ioread8(struct rtl8180_priv *priv, const u8 __iomem *addr) { return ioread8(addr); } -static inline u16 rtl818x_ioread16(struct rtl8180_priv *priv, __le16 __iomem *addr) +static inline u16 rtl818x_ioread16(struct rtl8180_priv *priv, const __le16 __iomem *addr) { return ioread16(addr); } -static inline u32 rtl818x_ioread32(struct rtl8180_priv *priv, __le32 __iomem *addr) +static inline u32 rtl818x_ioread32(struct rtl8180_priv *priv, const __le32 __iomem *addr) { return ioread32(addr); } From 58184e95e9f1f80dc7adca215ccbeabfee081b23 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Aug 2020 17:32:15 -0700 Subject: [PATCH 34/35] ntb: intel: constify ioreadX() iomem argument (as in generic implementation) The ioreadX() helpers have inconsistent interface. On some architectures void *__iomem address argument is a pointer to const, on some not. Implementations of ioreadX() do not modify the memory under the address so they can be converted to a "const" version for const-safety and consistency among architectures. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Reviewed-by: Geert Uytterhoeven Acked-by: Dave Jiang Cc: Allen Hubbe Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Helge Deller Cc: Ivan Kokshaysky Cc: Jakub Kicinski Cc: "James E.J. Bottomley" Cc: Jason Wang Cc: Jon Mason Cc: Kalle Valo Cc: Matt Turner Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Paul Mackerras Cc: Richard Henderson Cc: Rich Felker Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200709072837.5869-4-krzk@kernel.org Signed-off-by: Linus Torvalds --- drivers/ntb/hw/intel/ntb_hw_gen1.c | 2 +- drivers/ntb/hw/intel/ntb_hw_gen3.h | 2 +- drivers/ntb/hw/intel/ntb_hw_intel.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c index 423f9b8fbbcf..3185efeab487 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen1.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c @@ -1205,7 +1205,7 @@ int intel_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx, ndev->peer_reg->spad); } -static u64 xeon_db_ioread(void __iomem *mmio) +static u64 xeon_db_ioread(const void __iomem *mmio) { return (u64)ioread16(mmio); } diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.h b/drivers/ntb/hw/intel/ntb_hw_gen3.h index 2bc5d8356045..dea93989942d 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen3.h +++ b/drivers/ntb/hw/intel/ntb_hw_gen3.h @@ -91,7 +91,7 @@ #define GEN3_DB_TOTAL_SHIFT 33 #define GEN3_SPAD_COUNT 16 -static inline u64 gen3_db_ioread(void __iomem *mmio) +static inline u64 gen3_db_ioread(const void __iomem *mmio) { return ioread64(mmio); } diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h index d61fcd91714b..05e2335c9596 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.h +++ b/drivers/ntb/hw/intel/ntb_hw_intel.h @@ -103,7 +103,7 @@ struct intel_ntb_dev; struct intel_ntb_reg { int (*poll_link)(struct intel_ntb_dev *ndev); int (*link_is_up)(struct intel_ntb_dev *ndev); - u64 (*db_ioread)(void __iomem *mmio); + u64 (*db_ioread)(const void __iomem *mmio); void (*db_iowrite)(u64 db_bits, void __iomem *mmio); unsigned long ntb_ctl; resource_size_t db_size; From fe0580ac5cea1e534ce32a44306161f34ddbc60a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Aug 2020 17:32:20 -0700 Subject: [PATCH 35/35] virtio: pci: constify ioreadX() iomem argument (as in generic implementation) The ioreadX() helpers have inconsistent interface. On some architectures void *__iomem address argument is a pointer to const, on some not. Implementations of ioreadX() do not modify the memory under the address so they can be converted to a "const" version for const-safety and consistency among architectures. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Reviewed-by: Geert Uytterhoeven Cc: Allen Hubbe Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Dave Jiang Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Helge Deller Cc: Ivan Kokshaysky Cc: Jakub Kicinski Cc: "James E.J. Bottomley" Cc: Jason Wang Cc: Jon Mason Cc: Kalle Valo Cc: Matt Turner Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Paul Mackerras Cc: Richard Henderson Cc: Rich Felker Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200709072837.5869-5-krzk@kernel.org Signed-off-by: Linus Torvalds --- drivers/virtio/virtio_pci_modern.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 9bdc6f68221f..3e14e700b231 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -27,16 +27,16 @@ * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses * for 16-bit fields and 8-bit accesses for 8-bit fields. */ -static inline u8 vp_ioread8(u8 __iomem *addr) +static inline u8 vp_ioread8(const u8 __iomem *addr) { return ioread8(addr); } -static inline u16 vp_ioread16 (__le16 __iomem *addr) +static inline u16 vp_ioread16 (const __le16 __iomem *addr) { return ioread16(addr); } -static inline u32 vp_ioread32(__le32 __iomem *addr) +static inline u32 vp_ioread32(const __le32 __iomem *addr) { return ioread32(addr); }