Merge branch 'akpm' (patches from Andrew)

Merge more updates from Andrew Morton:
 "155 patches.

  Subsystems affected by this patch series: mm (dax, debug, thp,
  readahead, page-poison, util, memory-hotplug, zram, cleanups), misc,
  core-kernel, get_maintainer, MAINTAINERS, lib, bitops, checkpatch,
  binfmt, ramfs, autofs, nilfs, rapidio, panic, relay, kgdb, ubsan,
  romfs, and fault-injection"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (155 commits)
  lib, uaccess: add failure injection to usercopy functions
  lib, include/linux: add usercopy failure capability
  ROMFS: support inode blocks calculation
  ubsan: introduce CONFIG_UBSAN_LOCAL_BOUNDS for Clang
  sched.h: drop in_ubsan field when UBSAN is in trap mode
  scripts/gdb/tasks: add headers and improve spacing format
  scripts/gdb/proc: add struct mount & struct super_block addr in lx-mounts command
  kernel/relay.c: drop unneeded initialization
  panic: dump registers on panic_on_warn
  rapidio: fix the missed put_device() for rio_mport_add_riodev
  rapidio: fix error handling path
  nilfs2: fix some kernel-doc warnings for nilfs2
  autofs: harden ioctl table
  ramfs: fix nommu mmap with gaps in the page cache
  mm: remove the now-unnecessary mmget_still_valid() hack
  mm/gup: take mmap_lock in get_dump_page()
  binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot
  coredump: rework elf/elf_fdpic vma_dump_size() into common helper
  coredump: refactor page range dumping into common helper
  coredump: let dump_emit() bail out on short writes
  ...
This commit is contained in:
Linus Torvalds 2020-10-16 11:31:55 -07:00
commit c4cf498dc0
161 changed files with 2394 additions and 1728 deletions

View File

@ -133,6 +133,7 @@ James Ketrenos <jketreno@io.(none)>
Jan Glauber <jan.glauber@gmail.com> <jang@de.ibm.com>
Jan Glauber <jan.glauber@gmail.com> <jang@linux.vnet.ibm.com>
Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>

View File

@ -1343,6 +1343,7 @@
current integrity status.
failslab=
fail_usercopy=
fail_page_alloc=
fail_make_request=[KNL]
General fault injection mechanism.

View File

@ -475,13 +475,15 @@ or iterations will move the index to the first index in the range.
Each entry will only be returned once, no matter how many indices it
occupies.
Using xas_next() or xas_prev() with a multi-index xa_state
is not supported. Using either of these functions on a multi-index entry
will reveal sibling entries; these should be skipped over by the caller.
Using xas_next() or xas_prev() with a multi-index xa_state is not
supported. Using either of these functions on a multi-index entry will
reveal sibling entries; these should be skipped over by the caller.
Storing ``NULL`` into any index of a multi-index entry will set the entry
at every index to ``NULL`` and dissolve the tie. Splitting a multi-index
entry into entries occupying smaller ranges is not yet supported.
Storing ``NULL`` into any index of a multi-index entry will set the
entry at every index to ``NULL`` and dissolve the tie. A multi-index
entry can be split into entries occupying smaller ranges by calling
xas_split_alloc() without the xa_lock held, followed by taking the lock
and calling xas_split().
Functions and structures
========================

View File

@ -16,6 +16,10 @@ Available fault injection capabilities
injects page allocation failures. (alloc_pages(), get_free_pages(), ...)
- fail_usercopy
injects failures in user memory access functions. (copy_from_user(), get_user(), ...)
- fail_futex
injects futex deadlock and uaddr fault errors.
@ -177,6 +181,7 @@ use the boot option::
failslab=
fail_page_alloc=
fail_usercopy=
fail_make_request=
fail_futex=
mmc_core.fail_request=<interval>,<probability>,<space>,<times>
@ -222,7 +227,7 @@ How to add new fault injection capability
- debugfs entries
failslab, fail_page_alloc, and fail_make_request use this way.
failslab, fail_page_alloc, fail_usercopy, and fail_make_request use this way.
Helper functions:
fault_create_debugfs_attr(name, parent, attr);

View File

@ -9715,7 +9715,7 @@ F: security/keys/encrypted-keys/
KEYS-TRUSTED
M: James Bottomley <jejb@linux.ibm.com>
M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
M: Jarkko Sakkinen <jarkko@kernel.org>
M: Mimi Zohar <zohar@linux.ibm.com>
L: linux-integrity@vger.kernel.org
L: keyrings@vger.kernel.org
@ -9727,7 +9727,7 @@ F: security/keys/trusted-keys/
KEYS/KEYRINGS
M: David Howells <dhowells@redhat.com>
M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
M: Jarkko Sakkinen <jarkko@kernel.org>
L: keyrings@vger.kernel.org
S: Maintained
F: Documentation/security/keys/core.rst
@ -17717,7 +17717,7 @@ F: drivers/platform/x86/toshiba-wmi.c
TPM DEVICE DRIVER
M: Peter Huewe <peterhuewe@gmx.de>
M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
M: Jarkko Sakkinen <jarkko@kernel.org>
R: Jason Gunthorpe <jgg@ziepe.ca>
L: linux-integrity@vger.kernel.org
S: Maintained

View File

@ -537,7 +537,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
if (map_start < map_end)
memmap_init_zone((unsigned long)(map_end - map_start),
args->nid, args->zone, page_to_pfn(map_start),
MEMINIT_EARLY, NULL);
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
return 0;
}
@ -547,7 +547,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone,
{
if (!vmem_map) {
memmap_init_zone(size, nid, zone, start_pfn,
MEMINIT_EARLY, NULL);
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
} else {
struct page *start;
struct memmap_init_callback_data args;

View File

@ -615,7 +615,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot));
return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE);
}
static inline unsigned long pte_pfn(pte_t pte)
@ -651,11 +651,6 @@ static inline pte_t pte_mkexec(pte_t pte)
return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
}
static inline pte_t pte_mkpte(pte_t pte)
{
return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
}
static inline pte_t pte_mkwrite(pte_t pte)
{
/*
@ -819,6 +814,14 @@ static inline int pte_none(pte_t pte)
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int percpu)
{
VM_WARN_ON(!(pte_raw(pte) & cpu_to_be64(_PAGE_PTE)));
/*
* Keep the _PAGE_PTE added till we are sure we handle _PAGE_PTE
* in all the callers.
*/
pte = __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
if (radix_enabled())
return radix__set_pte_at(mm, addr, ptep, pte, percpu);
return hash__set_pte_at(mm, addr, ptep, pte, percpu);
@ -866,6 +869,13 @@ static inline bool pte_ci(pte_t pte)
static inline void pmd_clear(pmd_t *pmdp)
{
if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
/*
* Don't use this if we can possibly have a hash page table
* entry mapping this.
*/
WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
}
*pmdp = __pmd(0);
}
@ -914,6 +924,13 @@ static inline int pmd_bad(pmd_t pmd)
static inline void pud_clear(pud_t *pudp)
{
if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
/*
* Don't use this if we can possibly have a hash page table
* entry mapping this.
*/
WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
}
*pudp = __pud(0);
}

View File

@ -140,11 +140,6 @@ static inline pte_t pte_mkold(pte_t pte)
return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
}
static inline pte_t pte_mkpte(pte_t pte)
{
return pte;
}
static inline pte_t pte_mkspecial(pte_t pte)
{
return __pte(pte_val(pte) | _PAGE_SPECIAL);

View File

@ -184,9 +184,6 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
*/
VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
/* Add the pte bit when trying to set a pte */
pte = pte_mkpte(pte);
/* Note: mm->context.id might not yet have been assigned as
* this context might not have been activated yet when this
* is called.
@ -275,8 +272,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_
*/
VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
pte = pte_mkpte(pte);
pte = set_pte_filter(pte);
val = pte_val(pte);

View File

@ -224,7 +224,7 @@ static int memtrace_online(void)
ent->mem = 0;
}
if (add_memory(ent->nid, ent->start, ent->size)) {
if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) {
pr_err("Failed to add trace memory to node %d\n",
ent->nid);
ret += 1;

View File

@ -606,7 +606,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
block_sz = memory_block_size_bytes();
/* Add the memory */
rc = __add_memory(lmb->nid, lmb->base_addr, block_sz);
rc = __add_memory(lmb->nid, lmb->base_addr, block_sz, MHP_NONE);
if (rc) {
invalidate_lmb_associativity_index(lmb);
return rc;

View File

@ -194,7 +194,8 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
if (node < 0)
node = memory_add_physaddr_to_nid(info->start_addr);
result = __add_memory(node, info->start_addr, info->length);
result = __add_memory(node, info->start_addr, info->length,
MHP_NONE);
/*
* If the memory block has been used by the kernel, add_memory()

View File

@ -432,7 +432,8 @@ static ssize_t probe_store(struct device *dev, struct device_attribute *attr,
nid = memory_add_physaddr_to_nid(phys_addr);
ret = __add_memory(nid, phys_addr,
MIN_MEMORY_BLOCK_SIZE * sections_per_block);
MIN_MEMORY_BLOCK_SIZE * sections_per_block,
MHP_NONE);
if (ret)
goto out;

View File

@ -772,8 +772,8 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
return pfn_to_nid(pfn);
}
static int do_register_memory_block_under_node(int nid,
struct memory_block *mem_blk)
static void do_register_memory_block_under_node(int nid,
struct memory_block *mem_blk)
{
int ret;
@ -786,12 +786,19 @@ static int do_register_memory_block_under_node(int nid,
ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
&mem_blk->dev.kobj,
kobject_name(&mem_blk->dev.kobj));
if (ret)
return ret;
if (ret && ret != -EEXIST)
dev_err_ratelimited(&node_devices[nid]->dev,
"can't create link to %s in sysfs (%d)\n",
kobject_name(&mem_blk->dev.kobj), ret);
return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
ret = sysfs_create_link_nowarn(&mem_blk->dev.kobj,
&node_devices[nid]->dev.kobj,
kobject_name(&node_devices[nid]->dev.kobj));
if (ret && ret != -EEXIST)
dev_err_ratelimited(&mem_blk->dev,
"can't create link to %s in sysfs (%d)\n",
kobject_name(&node_devices[nid]->dev.kobj),
ret);
}
/* register memory section under specified node if it spans that node */
@ -827,7 +834,8 @@ static int register_mem_block_under_node_early(struct memory_block *mem_blk,
if (page_nid != nid)
continue;
return do_register_memory_block_under_node(nid, mem_blk);
do_register_memory_block_under_node(nid, mem_blk);
return 0;
}
/* mem section does not span the specified node */
return 0;
@ -842,7 +850,8 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
{
int nid = *(int *)arg;
return do_register_memory_block_under_node(nid, mem_blk);
do_register_memory_block_under_node(nid, mem_blk);
return 0;
}
/*
@ -860,8 +869,8 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
}
int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
enum meminit_context context)
void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
enum meminit_context context)
{
walk_memory_blocks_func_t func;
@ -870,9 +879,9 @@ int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
else
func = register_mem_block_under_node_early;
return walk_memory_blocks(PFN_PHYS(start_pfn),
PFN_PHYS(end_pfn - start_pfn), (void *)&nid,
func);
walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn),
(void *)&nid, func);
return;
}
#ifdef CONFIG_HUGETLBFS

View File

@ -1270,7 +1270,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
zram_slot_unlock(zram, index);
/* Should NEVER happen. Return bio error if it does. */
if (unlikely(ret))
if (WARN_ON(ret))
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
return ret;

View File

@ -35,11 +35,17 @@ static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
return 0;
}
struct dax_kmem_data {
const char *res_name;
struct resource *res[];
};
static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
{
struct device *dev = &dev_dax->dev;
struct dax_kmem_data *data;
int rc = -ENOMEM;
int i, mapped = 0;
char *res_name;
int numa_node;
/*
@ -55,14 +61,17 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
return -EINVAL;
}
res_name = kstrdup(dev_name(dev), GFP_KERNEL);
if (!res_name)
data = kzalloc(sizeof(*data) + sizeof(struct resource *) * dev_dax->nr_range, GFP_KERNEL);
if (!data)
return -ENOMEM;
data->res_name = kstrdup(dev_name(dev), GFP_KERNEL);
if (!data->res_name)
goto err_res_name;
for (i = 0; i < dev_dax->nr_range; i++) {
struct resource *res;
struct range range;
int rc;
rc = dax_kmem_range(dev_dax, i, &range);
if (rc) {
@ -72,7 +81,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
}
/* Region is permanently reserved if hotremove fails. */
res = request_mem_region(range.start, range_len(&range), res_name);
res = request_mem_region(range.start, range_len(&range), data->res_name);
if (!res) {
dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n",
i, range.start, range.end);
@ -82,9 +91,10 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
*/
if (mapped)
continue;
kfree(res_name);
return -EBUSY;
rc = -EBUSY;
goto err_request_mem;
}
data->res[i] = res;
/*
* Set flags appropriate for System RAM. Leave ..._BUSY clear
@ -99,23 +109,30 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
* this as RAM automatically.
*/
rc = add_memory_driver_managed(numa_node, range.start,
range_len(&range), kmem_name);
range_len(&range), kmem_name, MHP_NONE);
if (rc) {
dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
i, range.start, range.end);
release_mem_region(range.start, range_len(&range));
release_resource(res);
kfree(res);
data->res[i] = NULL;
if (mapped)
continue;
kfree(res_name);
return rc;
goto err_request_mem;
}
mapped++;
}
dev_set_drvdata(dev, res_name);
dev_set_drvdata(dev, data);
return 0;
err_request_mem:
kfree(data->res_name);
err_res_name:
kfree(data);
return rc;
}
#ifdef CONFIG_MEMORY_HOTREMOVE
@ -123,7 +140,7 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
{
int i, success = 0;
struct device *dev = &dev_dax->dev;
const char *res_name = dev_get_drvdata(dev);
struct dax_kmem_data *data = dev_get_drvdata(dev);
/*
* We have one shot for removing memory, if some memory blocks were not
@ -142,7 +159,9 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
rc = remove_memory(dev_dax->target_node, range.start,
range_len(&range));
if (rc == 0) {
release_mem_region(range.start, range_len(&range));
release_resource(data->res[i]);
kfree(data->res[i]);
data->res[i] = NULL;
success++;
continue;
}
@ -153,7 +172,8 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
}
if (success >= dev_dax->nr_range) {
kfree(res_name);
kfree(data->res_name);
kfree(data);
dev_set_drvdata(dev, NULL);
}

View File

@ -726,7 +726,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
ret = add_memory(nid, PFN_PHYS((start_pfn)),
(HA_CHUNK << PAGE_SHIFT));
(HA_CHUNK << PAGE_SHIFT), MEMHP_MERGE_RESOURCE);
if (ret) {
pr_err("hot_add memory failed error is %d\n", ret);

View File

@ -845,8 +845,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
* will only be one mm, so no big deal.
*/
mmap_read_lock(mm);
if (!mmget_still_valid(mm))
goto skip_mm;
mutex_lock(&ufile->umap_lock);
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
list) {
@ -865,7 +863,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
}
}
mutex_unlock(&ufile->umap_lock);
skip_mm:
mmap_read_unlock(mm);
mmput(mm);
}

View File

@ -871,15 +871,16 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
rmcd_error("pin_user_pages_fast err=%ld",
pinned);
nr_pages = 0;
} else
} else {
rmcd_error("pinned %ld out of %ld pages",
pinned, nr_pages);
/*
* Set nr_pages up to mean "how many pages to unpin, in
* the error handler:
*/
nr_pages = pinned;
}
ret = -EFAULT;
/*
* Set nr_pages up to mean "how many pages to unpin, in
* the error handler:
*/
nr_pages = pinned;
goto err_pg;
}
@ -1679,6 +1680,7 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv,
struct rio_dev *rdev;
struct rio_switch *rswitch = NULL;
struct rio_mport *mport;
struct device *dev;
size_t size;
u32 rval;
u32 swpinfo = 0;
@ -1693,8 +1695,10 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv,
rmcd_debug(RDEV, "name:%s ct:0x%x did:0x%x hc:0x%x", dev_info.name,
dev_info.comptag, dev_info.destid, dev_info.hopcount);
if (bus_find_device_by_name(&rio_bus_type, NULL, dev_info.name)) {
dev = bus_find_device_by_name(&rio_bus_type, NULL, dev_info.name);
if (dev) {
rmcd_debug(RDEV, "device %s already exists", dev_info.name);
put_device(dev);
return -EEXIST;
}

View File

@ -406,7 +406,7 @@ static void __init add_memory_merged(u16 rn)
if (!size)
goto skip_add;
for (addr = start; addr < start + size; addr += block_size)
add_memory(0, addr, block_size);
add_memory(0, addr, block_size, MHP_NONE);
skip_add:
first_rn = rn;
num = 1;

View File

@ -1480,31 +1480,29 @@ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
} else {
mmap_read_lock(mm);
}
if (mmget_still_valid(mm)) {
if (try) {
if (!mutex_trylock(&vdev->vma_lock)) {
mmap_read_unlock(mm);
mmput(mm);
return 0;
}
} else {
mutex_lock(&vdev->vma_lock);
if (try) {
if (!mutex_trylock(&vdev->vma_lock)) {
mmap_read_unlock(mm);
mmput(mm);
return 0;
}
list_for_each_entry_safe(mmap_vma, tmp,
&vdev->vma_list, vma_next) {
struct vm_area_struct *vma = mmap_vma->vma;
if (vma->vm_mm != mm)
continue;
list_del(&mmap_vma->vma_next);
kfree(mmap_vma);
zap_vma_ptes(vma, vma->vm_start,
vma->vm_end - vma->vm_start);
}
mutex_unlock(&vdev->vma_lock);
} else {
mutex_lock(&vdev->vma_lock);
}
list_for_each_entry_safe(mmap_vma, tmp,
&vdev->vma_list, vma_next) {
struct vm_area_struct *vma = mmap_vma->vma;
if (vma->vm_mm != mm)
continue;
list_del(&mmap_vma->vma_next);
kfree(mmap_vma);
zap_vma_ptes(vma, vma->vm_start,
vma->vm_end - vma->vm_start);
}
mutex_unlock(&vdev->vma_lock);
mmap_read_unlock(mm);
mmput(mm);
}

View File

@ -424,7 +424,8 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id)
dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id);
return add_memory_driver_managed(nid, addr, memory_block_size_bytes(),
vm->resource_name);
vm->resource_name,
MEMHP_MERGE_RESOURCE);
}
/*

View File

@ -331,7 +331,7 @@ static enum bp_state reserve_additional_memory(void)
mutex_unlock(&balloon_mutex);
/* add_memory_resource() requires the device_hotplug lock */
lock_device_hotplug();
rc = add_memory_resource(nid, resource);
rc = add_memory_resource(nid, resource, MEMHP_MERGE_RESOURCE);
unlock_device_hotplug();
mutex_lock(&balloon_mutex);

View File

@ -8,6 +8,7 @@
#include <linux/compat.h>
#include <linux/syscalls.h>
#include <linux/magic.h>
#include <linux/nospec.h>
#include "autofs_i.h"
@ -563,7 +564,7 @@ out:
static ioctl_fn lookup_dev_ioctl(unsigned int cmd)
{
static ioctl_fn _ioctls[] = {
static const ioctl_fn _ioctls[] = {
autofs_dev_ioctl_version,
autofs_dev_ioctl_protover,
autofs_dev_ioctl_protosubver,
@ -581,7 +582,10 @@ static ioctl_fn lookup_dev_ioctl(unsigned int cmd)
};
unsigned int idx = cmd_idx(cmd);
return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx];
if (idx >= ARRAY_SIZE(_ioctls))
return NULL;
idx = array_index_nospec(idx, ARRAY_SIZE(_ioctls));
return _ioctls[idx];
}
/* ioctl dispatcher */

View File

@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/log2.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/errno.h>
@ -421,6 +422,26 @@ static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
return 0;
}
static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
{
unsigned long alignment = 0;
int i;
for (i = 0; i < nr; i++) {
if (cmds[i].p_type == PT_LOAD) {
unsigned long p_align = cmds[i].p_align;
/* skip non-power of two alignments as invalid */
if (!is_power_of_2(p_align))
continue;
alignment = max(alignment, p_align);
}
}
/* ensure we align to at least one page */
return ELF_PAGEALIGN(alignment);
}
/**
* load_elf_phdrs() - load ELF program headers
* @elf_ex: ELF header of the binary whose program headers should be loaded
@ -1008,6 +1029,7 @@ out_free_interp:
int elf_prot, elf_flags;
unsigned long k, vaddr;
unsigned long total_size = 0;
unsigned long alignment;
if (elf_ppnt->p_type != PT_LOAD)
continue;
@ -1086,6 +1108,9 @@ out_free_interp:
load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
if (alignment)
load_bias &= ~(alignment - 1);
elf_flags |= MAP_FIXED;
} else
load_bias = 0;
@ -1389,126 +1414,6 @@ out:
* Jeremy Fitzhardinge <jeremy@sw.oz.au>
*/
/*
* The purpose of always_dump_vma() is to make sure that special kernel mappings
* that are useful for post-mortem analysis are included in every core dump.
* In that way we ensure that the core dump is fully interpretable later
* without matching up the same kernel and hardware config to see what PC values
* meant. These special mappings include - vDSO, vsyscall, and other
* architecture specific mappings
*/
static bool always_dump_vma(struct vm_area_struct *vma)
{
/* Any vsyscall mappings? */
if (vma == get_gate_vma(vma->vm_mm))
return true;
/*
* Assume that all vmas with a .name op should always be dumped.
* If this changes, a new vm_ops field can easily be added.
*/
if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
return true;
/*
* arch_vma_name() returns non-NULL for special architecture mappings,
* such as vDSO sections.
*/
if (arch_vma_name(vma))
return true;
return false;
}
/*
* Decide what to dump of a segment, part, all or none.
*/
static unsigned long vma_dump_size(struct vm_area_struct *vma,
unsigned long mm_flags)
{
#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
/* always dump the vdso and vsyscall sections */
if (always_dump_vma(vma))
goto whole;
if (vma->vm_flags & VM_DONTDUMP)
return 0;
/* support for DAX */
if (vma_is_dax(vma)) {
if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
goto whole;
if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
goto whole;
return 0;
}
/* Hugetlb memory check */
if (is_vm_hugetlb_page(vma)) {
if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
goto whole;
if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
goto whole;
return 0;
}
/* Do not dump I/O mapped devices or special mappings */
if (vma->vm_flags & VM_IO)
return 0;
/* By default, dump shared memory if mapped from an anonymous file. */
if (vma->vm_flags & VM_SHARED) {
if (file_inode(vma->vm_file)->i_nlink == 0 ?
FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
goto whole;
return 0;
}
/* Dump segments that have been written to. */
if (vma->anon_vma && FILTER(ANON_PRIVATE))
goto whole;
if (vma->vm_file == NULL)
return 0;
if (FILTER(MAPPED_PRIVATE))
goto whole;
/*
* If this looks like the beginning of a DSO or executable mapping,
* check for an ELF header. If we find one, dump the first page to
* aid in determining what was mapped here.
*/
if (FILTER(ELF_HEADERS) &&
vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
u32 __user *header = (u32 __user *) vma->vm_start;
u32 word;
/*
* Doing it this way gets the constant folded by GCC.
*/
union {
u32 cmp;
char elfmag[SELFMAG];
} magic;
BUILD_BUG_ON(SELFMAG != sizeof word);
magic.elfmag[EI_MAG0] = ELFMAG0;
magic.elfmag[EI_MAG1] = ELFMAG1;
magic.elfmag[EI_MAG2] = ELFMAG2;
magic.elfmag[EI_MAG3] = ELFMAG3;
if (unlikely(get_user(word, header)))
word = 0;
if (word == magic.cmp)
return PAGE_SIZE;
}
#undef FILTER
return 0;
whole:
return vma->vm_end - vma->vm_start;
}
/* An ELF note in memory */
struct memelfnote
{
@ -2220,32 +2125,6 @@ static void free_note_info(struct elf_note_info *info)
#endif
static struct vm_area_struct *first_vma(struct task_struct *tsk,
struct vm_area_struct *gate_vma)
{
struct vm_area_struct *ret = tsk->mm->mmap;
if (ret)
return ret;
return gate_vma;
}
/*
* Helper function for iterating across a vma list. It ensures that the caller
* will visit `gate_vma' prior to terminating the search.
*/
static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
struct vm_area_struct *gate_vma)
{
struct vm_area_struct *ret;
ret = this_vma->vm_next;
if (ret)
return ret;
if (this_vma == gate_vma)
return NULL;
return gate_vma;
}
static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
elf_addr_t e_shoff, int segs)
{
@ -2272,9 +2151,8 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
static int elf_core_dump(struct coredump_params *cprm)
{
int has_dumped = 0;
int segs, i;
size_t vma_data_size = 0;
struct vm_area_struct *vma, *gate_vma;
int vma_count, segs, i;
size_t vma_data_size;
struct elfhdr elf;
loff_t offset = 0, dataoff;
struct elf_note_info info = { };
@ -2282,30 +2160,16 @@ static int elf_core_dump(struct coredump_params *cprm)
struct elf_shdr *shdr4extnum = NULL;
Elf_Half e_phnum;
elf_addr_t e_shoff;
elf_addr_t *vma_filesz = NULL;
struct core_vma_metadata *vma_meta;
if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
return 0;
/*
* We no longer stop all VM operations.
*
* This is because those proceses that could possibly change map_count
* or the mmap / vma pages are now blocked in do_exit on current
* finishing this core dump.
*
* Only ptrace can touch these memory addresses, but it doesn't change
* the map_count or the pages allocated. So no possibility of crashing
* exists while dumping the mm->vm_next areas to the core file.
*/
/*
* The number of segs are recored into ELF header as 16bit value.
* Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
*/
segs = current->mm->map_count;
segs += elf_core_extra_phdrs();
gate_vma = get_gate_vma(current->mm);
if (gate_vma != NULL)
segs++;
segs = vma_count + elf_core_extra_phdrs();
/* for notes section */
segs++;
@ -2343,24 +2207,6 @@ static int elf_core_dump(struct coredump_params *cprm)
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
/*
* Zero vma process will get ZERO_SIZE_PTR here.
* Let coredump continue for register state at least.
*/
vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
GFP_KERNEL);
if (!vma_filesz)
goto end_coredump;
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
vma = next_vma(vma, gate_vma)) {
unsigned long dump_size;
dump_size = vma_dump_size(vma, cprm->mm_flags);
vma_filesz[i++] = dump_size;
vma_data_size += dump_size;
}
offset += vma_data_size;
offset += elf_core_extra_data_size();
e_shoff = offset;
@ -2381,21 +2227,23 @@ static int elf_core_dump(struct coredump_params *cprm)
goto end_coredump;
/* Write program headers for segments dump */
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
vma = next_vma(vma, gate_vma)) {
for (i = 0; i < vma_count; i++) {
struct core_vma_metadata *meta = vma_meta + i;
struct elf_phdr phdr;
phdr.p_type = PT_LOAD;
phdr.p_offset = offset;
phdr.p_vaddr = vma->vm_start;
phdr.p_vaddr = meta->start;
phdr.p_paddr = 0;
phdr.p_filesz = vma_filesz[i++];
phdr.p_memsz = vma->vm_end - vma->vm_start;
phdr.p_filesz = meta->dump_size;
phdr.p_memsz = meta->end - meta->start;
offset += phdr.p_filesz;
phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
if (vma->vm_flags & VM_WRITE)
phdr.p_flags = 0;
if (meta->flags & VM_READ)
phdr.p_flags |= PF_R;
if (meta->flags & VM_WRITE)
phdr.p_flags |= PF_W;
if (vma->vm_flags & VM_EXEC)
if (meta->flags & VM_EXEC)
phdr.p_flags |= PF_X;
phdr.p_align = ELF_EXEC_PAGESIZE;
@ -2417,28 +2265,11 @@ static int elf_core_dump(struct coredump_params *cprm)
if (!dump_skip(cprm, dataoff - cprm->pos))
goto end_coredump;
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
vma = next_vma(vma, gate_vma)) {
unsigned long addr;
unsigned long end;
for (i = 0; i < vma_count; i++) {
struct core_vma_metadata *meta = vma_meta + i;
end = vma->vm_start + vma_filesz[i++];
for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
struct page *page;
int stop;
page = get_dump_page(addr);
if (page) {
void *kaddr = kmap(page);
stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
kunmap(page);
put_page(page);
} else
stop = !dump_skip(cprm, PAGE_SIZE);
if (stop)
goto end_coredump;
}
if (!dump_user_range(cprm, meta->start, meta->dump_size))
goto end_coredump;
}
dump_truncate(cprm);
@ -2453,7 +2284,7 @@ static int elf_core_dump(struct coredump_params *cprm)
end_coredump:
free_note_info(&info);
kfree(shdr4extnum);
kvfree(vma_filesz);
kvfree(vma_meta);
kfree(phdr4note);
return has_dumped;
}

View File

@ -1215,76 +1215,6 @@ struct elf_prstatus_fdpic
int pr_fpvalid; /* True if math co-processor being used. */
};
/*
* Decide whether a segment is worth dumping; default is yes to be
* sure (missing info is worse than too much; etc).
* Personally I'd include everything, and use the coredump limit...
*
* I think we should skip something. But I am not sure how. H.J.
*/
static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
{
int dump_ok;
/* Do not dump I/O mapped devices or special mappings */
if (vma->vm_flags & VM_IO) {
kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags);
return 0;
}
/* If we may not read the contents, don't allow us to dump
* them either. "dump_write()" can't handle it anyway.
*/
if (!(vma->vm_flags & VM_READ)) {
kdcore("%08lx: %08lx: no (!read)", vma->vm_start, vma->vm_flags);
return 0;
}
/* support for DAX */
if (vma_is_dax(vma)) {
if (vma->vm_flags & VM_SHARED) {
dump_ok = test_bit(MMF_DUMP_DAX_SHARED, &mm_flags);
kdcore("%08lx: %08lx: %s (DAX shared)", vma->vm_start,
vma->vm_flags, dump_ok ? "yes" : "no");
} else {
dump_ok = test_bit(MMF_DUMP_DAX_PRIVATE, &mm_flags);
kdcore("%08lx: %08lx: %s (DAX private)", vma->vm_start,
vma->vm_flags, dump_ok ? "yes" : "no");
}
return dump_ok;
}
/* By default, dump shared memory if mapped from an anonymous file. */
if (vma->vm_flags & VM_SHARED) {
if (file_inode(vma->vm_file)->i_nlink == 0) {
dump_ok = test_bit(MMF_DUMP_ANON_SHARED, &mm_flags);
kdcore("%08lx: %08lx: %s (share)", vma->vm_start,
vma->vm_flags, dump_ok ? "yes" : "no");
return dump_ok;
}
dump_ok = test_bit(MMF_DUMP_MAPPED_SHARED, &mm_flags);
kdcore("%08lx: %08lx: %s (share)", vma->vm_start,
vma->vm_flags, dump_ok ? "yes" : "no");
return dump_ok;
}
#ifdef CONFIG_MMU
/* By default, if it hasn't been written to, don't write it out */
if (!vma->anon_vma) {
dump_ok = test_bit(MMF_DUMP_MAPPED_PRIVATE, &mm_flags);
kdcore("%08lx: %08lx: %s (!anon)", vma->vm_start,
vma->vm_flags, dump_ok ? "yes" : "no");
return dump_ok;
}
#endif
dump_ok = test_bit(MMF_DUMP_ANON_PRIVATE, &mm_flags);
kdcore("%08lx: %08lx: %s", vma->vm_start, vma->vm_flags,
dump_ok ? "yes" : "no");
return dump_ok;
}
/* An ELF note in memory */
struct memelfnote
{
@ -1524,54 +1454,21 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
/*
* dump the segments for an MMU process
*/
static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
static bool elf_fdpic_dump_segments(struct coredump_params *cprm,
struct core_vma_metadata *vma_meta,
int vma_count)
{
struct vm_area_struct *vma;
int i;
for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
#ifdef CONFIG_MMU
unsigned long addr;
#endif
for (i = 0; i < vma_count; i++) {
struct core_vma_metadata *meta = vma_meta + i;
if (!maydump(vma, cprm->mm_flags))
continue;
#ifdef CONFIG_MMU
for (addr = vma->vm_start; addr < vma->vm_end;
addr += PAGE_SIZE) {
bool res;
struct page *page = get_dump_page(addr);
if (page) {
void *kaddr = kmap(page);
res = dump_emit(cprm, kaddr, PAGE_SIZE);
kunmap(page);
put_page(page);
} else {
res = dump_skip(cprm, PAGE_SIZE);
}
if (!res)
return false;
}
#else
if (!dump_emit(cprm, (void *) vma->vm_start,
vma->vm_end - vma->vm_start))
if (!dump_user_range(cprm, meta->start, meta->dump_size))
return false;
#endif
}
return true;
}
static size_t elf_core_vma_data_size(unsigned long mm_flags)
{
struct vm_area_struct *vma;
size_t size = 0;
for (vma = current->mm->mmap; vma; vma = vma->vm_next)
if (maydump(vma, mm_flags))
size += vma->vm_end - vma->vm_start;
return size;
}
/*
* Actual dumper
*
@ -1582,9 +1479,8 @@ static size_t elf_core_vma_data_size(unsigned long mm_flags)
static int elf_fdpic_core_dump(struct coredump_params *cprm)
{
int has_dumped = 0;
int segs;
int vma_count, segs;
int i;
struct vm_area_struct *vma;
struct elfhdr *elf = NULL;
loff_t offset = 0, dataoff;
struct memelfnote psinfo_note, auxv_note;
@ -1598,18 +1494,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
elf_addr_t e_shoff;
struct core_thread *ct;
struct elf_thread_status *tmp;
/*
* We no longer stop all VM operations.
*
* This is because those proceses that could possibly change map_count
* or the mmap / vma pages are now blocked in do_exit on current
* finishing this core dump.
*
* Only ptrace can touch these memory addresses, but it doesn't change
* the map_count or the pages allocated. So no possibility of crashing
* exists while dumping the mm->vm_next areas to the core file.
*/
struct core_vma_metadata *vma_meta = NULL;
size_t vma_data_size;
/* alloc memory for large data structures: too large to be on stack */
elf = kmalloc(sizeof(*elf), GFP_KERNEL);
@ -1619,6 +1505,9 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
if (!psinfo)
goto end_coredump;
if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
goto end_coredump;
for (ct = current->mm->core_state->dumper.next;
ct; ct = ct->next) {
tmp = elf_dump_thread_status(cprm->siginfo->si_signo,
@ -1638,8 +1527,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
tmp->next = thread_list;
thread_list = tmp;
segs = current->mm->map_count;
segs += elf_core_extra_phdrs();
segs = vma_count + elf_core_extra_phdrs();
/* for notes section */
segs++;
@ -1684,7 +1572,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
/* Page-align dumped data */
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
offset += elf_core_vma_data_size(cprm->mm_flags);
offset += vma_data_size;
offset += elf_core_extra_data_size();
e_shoff = offset;
@ -1704,23 +1592,26 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
goto end_coredump;
/* write program headers for segments dump */
for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
for (i = 0; i < vma_count; i++) {
struct core_vma_metadata *meta = vma_meta + i;
struct elf_phdr phdr;
size_t sz;
sz = vma->vm_end - vma->vm_start;
sz = meta->end - meta->start;
phdr.p_type = PT_LOAD;
phdr.p_offset = offset;
phdr.p_vaddr = vma->vm_start;
phdr.p_vaddr = meta->start;
phdr.p_paddr = 0;
phdr.p_filesz = maydump(vma, cprm->mm_flags) ? sz : 0;
phdr.p_filesz = meta->dump_size;
phdr.p_memsz = sz;
offset += phdr.p_filesz;
phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
if (vma->vm_flags & VM_WRITE)
phdr.p_flags = 0;
if (meta->flags & VM_READ)
phdr.p_flags |= PF_R;
if (meta->flags & VM_WRITE)
phdr.p_flags |= PF_W;
if (vma->vm_flags & VM_EXEC)
if (meta->flags & VM_EXEC)
phdr.p_flags |= PF_X;
phdr.p_align = ELF_EXEC_PAGESIZE;
@ -1752,7 +1643,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
if (!dump_skip(cprm, dataoff - cprm->pos))
goto end_coredump;
if (!elf_fdpic_dump_segments(cprm))
if (!elf_fdpic_dump_segments(cprm, vma_meta, vma_count))
goto end_coredump;
if (!elf_core_write_extra_data(cprm))
@ -1776,6 +1667,7 @@ end_coredump:
thread_list = thread_list->next;
kfree(tmp);
}
kvfree(vma_meta);
kfree(phdr4note);
kfree(elf);
kfree(psinfo);

View File

@ -1168,7 +1168,7 @@ EXPORT_SYMBOL(configfs_depend_item);
/*
* Release the dependent linkage. This is much simpler than
* configfs_depend_item() because we know that that the client driver is
* configfs_depend_item() because we know that the client driver is
* pinned, thus the subsystem is pinned, and therefore configfs is pinned.
*/
void configfs_undepend_item(struct config_item *target)

View File

@ -267,7 +267,7 @@ flush_write_buffer(struct file *file, struct configfs_buffer *buffer, size_t cou
* There is no easy way for us to know if userspace is only doing a partial
* write, so we don't support them. We expect the entire buffer to come
* on the first write.
* Hint: if you're writing a value, first read the file, modify only the
* Hint: if you're writing a value, first read the file, modify only
* the value you're changing, then write entire buffer back.
*/

View File

@ -840,17 +840,17 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
ssize_t n;
if (cprm->written + nr > cprm->limit)
return 0;
while (nr) {
if (dump_interrupted())
return 0;
n = __kernel_write(file, addr, nr, &pos);
if (n <= 0)
return 0;
file->f_pos = pos;
cprm->written += n;
cprm->pos += n;
nr -= n;
}
if (dump_interrupted())
return 0;
n = __kernel_write(file, addr, nr, &pos);
if (n != nr)
return 0;
file->f_pos = pos;
cprm->written += n;
cprm->pos += n;
return 1;
}
EXPORT_SYMBOL(dump_emit);
@ -876,6 +876,40 @@ int dump_skip(struct coredump_params *cprm, size_t nr)
}
EXPORT_SYMBOL(dump_skip);
#ifdef CONFIG_ELF_CORE
int dump_user_range(struct coredump_params *cprm, unsigned long start,
unsigned long len)
{
unsigned long addr;
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
struct page *page;
int stop;
/*
* To avoid having to allocate page tables for virtual address
* ranges that have never been used yet, and also to make it
* easy to generate sparse core files, use a helper that returns
* NULL when encountering an empty page table entry that would
* otherwise have been filled with the zero page.
*/
page = get_dump_page(addr);
if (page) {
void *kaddr = kmap(page);
stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
kunmap(page);
put_page(page);
} else {
stop = !dump_skip(cprm, PAGE_SIZE);
}
if (stop)
return 0;
}
return 1;
}
#endif
int dump_align(struct coredump_params *cprm, int align)
{
unsigned mod = cprm->pos & (align - 1);
@ -902,3 +936,183 @@ void dump_truncate(struct coredump_params *cprm)
}
}
EXPORT_SYMBOL(dump_truncate);
/*
* The purpose of always_dump_vma() is to make sure that special kernel mappings
* that are useful for post-mortem analysis are included in every core dump.
* In that way we ensure that the core dump is fully interpretable later
* without matching up the same kernel and hardware config to see what PC values
* meant. These special mappings include - vDSO, vsyscall, and other
* architecture specific mappings
*/
static bool always_dump_vma(struct vm_area_struct *vma)
{
/* Any vsyscall mappings? */
if (vma == get_gate_vma(vma->vm_mm))
return true;
/*
* Assume that all vmas with a .name op should always be dumped.
* If this changes, a new vm_ops field can easily be added.
*/
if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
return true;
/*
* arch_vma_name() returns non-NULL for special architecture mappings,
* such as vDSO sections.
*/
if (arch_vma_name(vma))
return true;
return false;
}
/*
* Decide how much of @vma's contents should be included in a core dump.
*/
static unsigned long vma_dump_size(struct vm_area_struct *vma,
unsigned long mm_flags)
{
#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
/* always dump the vdso and vsyscall sections */
if (always_dump_vma(vma))
goto whole;
if (vma->vm_flags & VM_DONTDUMP)
return 0;
/* support for DAX */
if (vma_is_dax(vma)) {
if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
goto whole;
if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
goto whole;
return 0;
}
/* Hugetlb memory check */
if (is_vm_hugetlb_page(vma)) {
if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
goto whole;
if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
goto whole;
return 0;
}
/* Do not dump I/O mapped devices or special mappings */
if (vma->vm_flags & VM_IO)
return 0;
/* By default, dump shared memory if mapped from an anonymous file. */
if (vma->vm_flags & VM_SHARED) {
if (file_inode(vma->vm_file)->i_nlink == 0 ?
FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
goto whole;
return 0;
}
/* Dump segments that have been written to. */
if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE))
goto whole;
if (vma->vm_file == NULL)
return 0;
if (FILTER(MAPPED_PRIVATE))
goto whole;
/*
* If this is the beginning of an executable file mapping,
* dump the first page to aid in determining what was mapped here.
*/
if (FILTER(ELF_HEADERS) &&
vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ) &&
(READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
return PAGE_SIZE;
#undef FILTER
return 0;
whole:
return vma->vm_end - vma->vm_start;
}
static struct vm_area_struct *first_vma(struct task_struct *tsk,
struct vm_area_struct *gate_vma)
{
struct vm_area_struct *ret = tsk->mm->mmap;
if (ret)
return ret;
return gate_vma;
}
/*
* Helper function for iterating across a vma list. It ensures that the caller
* will visit `gate_vma' prior to terminating the search.
*/
static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
struct vm_area_struct *gate_vma)
{
struct vm_area_struct *ret;
ret = this_vma->vm_next;
if (ret)
return ret;
if (this_vma == gate_vma)
return NULL;
return gate_vma;
}
/*
* Under the mmap_lock, take a snapshot of relevant information about the task's
* VMAs.
*/
int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
struct core_vma_metadata **vma_meta,
size_t *vma_data_size_ptr)
{
struct vm_area_struct *vma, *gate_vma;
struct mm_struct *mm = current->mm;
int i;
size_t vma_data_size = 0;
/*
* Once the stack expansion code is fixed to not change VMA bounds
* under mmap_lock in read mode, this can be changed to take the
* mmap_lock in read mode.
*/
if (mmap_write_lock_killable(mm))
return -EINTR;
gate_vma = get_gate_vma(mm);
*vma_count = mm->map_count + (gate_vma ? 1 : 0);
*vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL);
if (!*vma_meta) {
mmap_write_unlock(mm);
return -ENOMEM;
}
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
vma = next_vma(vma, gate_vma), i++) {
struct core_vma_metadata *m = (*vma_meta) + i;
m->start = vma->vm_start;
m->end = vma->vm_end;
m->flags = vma->vm_flags;
m->dump_size = vma_dump_size(vma, cprm->mm_flags);
vma_data_size += m->dump_size;
}
mmap_write_unlock(mm);
if (WARN_ON(i != *vma_count))
return -EFAULT;
*vma_data_size_ptr = vma_data_size;
return 0;
}

View File

@ -349,6 +349,7 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
pgoff_t index,
unsigned long num_ra_pages)
{
DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index);
struct page *page;
index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
@ -358,8 +359,7 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
if (page)
put_page(page);
else if (num_ra_pages > 1)
page_cache_readahead_unbounded(inode->i_mapping, NULL,
index, num_ra_pages, 0);
page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
page = read_mapping_page(inode->i_mapping, index, NULL);
}
return page;

View File

@ -228,6 +228,7 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
pgoff_t index,
unsigned long num_ra_pages)
{
DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index);
struct page *page;
index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
@ -237,8 +238,7 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
if (page)
put_page(page);
else if (num_ra_pages > 1)
page_cache_readahead_unbounded(inode->i_mapping, NULL,
index, num_ra_pages, 0);
page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
page = read_mapping_page(inode->i_mapping, index, NULL);
}
return page;

View File

@ -181,6 +181,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
if (sb->s_type->fs_flags & FS_THP_SUPPORT)
__set_bit(AS_THP_SUPPORT, &mapping->flags);
mapping->wb_err = 0;
atomic_set(&mapping->i_mmap_writable, 0);
#ifdef CONFIG_READ_ONLY_THP_FOR_FS

View File

@ -355,7 +355,7 @@ void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap,
/**
* nilfs_bmap_assign - assign a new block number to a block
* @bmap: bmap
* @bhp: pointer to buffer head
* @bh: pointer to buffer head
* @blocknr: block number
* @binfo: block information
*

View File

@ -889,7 +889,7 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
* nilfs_cpfile_change_cpmode - change checkpoint mode
* @cpfile: inode of checkpoint file
* @cno: checkpoint number
* @status: mode of checkpoint
* @mode: mode of checkpoint
*
* Description: nilfs_change_cpmode() changes the mode of the checkpoint
* specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT.
@ -930,12 +930,12 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
/**
* nilfs_cpfile_get_stat - get checkpoint statistics
* @cpfile: inode of checkpoint file
* @stat: pointer to a structure of checkpoint statistics
* @cpstat: pointer to a structure of checkpoint statistics
*
* Description: nilfs_cpfile_get_stat() returns information about checkpoints.
*
* Return Value: On success, 0 is returned, and checkpoints information is
* stored in the place pointed by @stat. On error, one of the following
* stored in the place pointed by @cpstat. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.

View File

@ -69,7 +69,6 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
/**
* nilfs_forget_buffer - discard dirty state
* @inode: owner inode of the buffer
* @bh: buffer head of the buffer to be discarded
*/
void nilfs_forget_buffer(struct buffer_head *bh)

View File

@ -546,13 +546,13 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
/**
* nilfs_sufile_get_stat - get segment usage statistics
* @sufile: inode of segment usage file
* @stat: pointer to a structure of segment usage statistics
* @sustat: pointer to a structure of segment usage statistics
*
* Description: nilfs_sufile_get_stat() returns information about segment
* usage.
*
* Return Value: On success, 0 is returned, and segment usage information is
* stored in the place pointed by @stat. On error, one of the following
* stored in the place pointed by @sustat. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.

View File

@ -1244,24 +1244,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = -EINTR;
goto out_mm;
}
/*
* Avoid to modify vma->vm_flags
* without locked ops while the
* coredump reads the vm_flags.
*/
if (!mmget_still_valid(mm)) {
/*
* Silently return "count"
* like if get_task_mm()
* failed. FIXME: should this
* function have returned
* -ESRCH if get_task_mm()
* failed like if
* get_proc_task() fails?
*/
mmap_write_unlock(mm);
goto out_mm;
}
for (vma = mm->mmap; vma; vma = vma->vm_next) {
vma->vm_flags &= ~VM_SOFTDIRTY;
vma_set_page_prot(vma);

View File

@ -224,7 +224,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
if (!pages)
goto out_free;
nr = find_get_pages(inode->i_mapping, &pgoff, lpages, pages);
nr = find_get_pages_contig(inode->i_mapping, pgoff, lpages, pages);
if (nr != lpages)
goto out_free_pages; /* leave if some pages were missing */

View File

@ -356,6 +356,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
}
i->i_mode = mode;
i->i_blocks = (i->i_size + 511) >> 9;
unlock_new_inode(i);
return i;

View File

@ -601,8 +601,6 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
/* the various vma->vm_userfaultfd_ctx still points to it */
mmap_write_lock(mm);
/* no task can run (and in turn coredump) yet */
VM_WARN_ON(!mmget_still_valid(mm));
for (vma = mm->mmap; vma; vma = vma->vm_next)
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@ -842,7 +840,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
/* len == 0 means wake all */
struct userfaultfd_wake_range range = { .len = 0, };
unsigned long new_flags;
bool still_valid;
WRITE_ONCE(ctx->released, true);
@ -858,7 +855,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
* taking the mmap_lock for writing.
*/
mmap_write_lock(mm);
still_valid = mmget_still_valid(mm);
prev = NULL;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
cond_resched();
@ -869,17 +865,15 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
continue;
}
new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
if (still_valid) {
prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
new_flags, vma->anon_vma,
vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
NULL_VM_UFFD_CTX);
if (prev)
vma = prev;
else
prev = vma;
}
prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
new_flags, vma->anon_vma,
vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
NULL_VM_UFFD_CTX);
if (prev)
vma = prev;
else
prev = vma;
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
@ -1309,8 +1303,6 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
goto out;
mmap_write_lock(mm);
if (!mmget_still_valid(mm))
goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
@ -1511,8 +1503,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
goto out;
mmap_write_lock(mm);
if (!mmget_still_valid(mm))
goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;

View File

@ -188,12 +188,10 @@ static inline unsigned fls_long(unsigned long l)
static inline int get_count_order(unsigned int count)
{
int order;
if (count == 0)
return -1;
order = fls(count) - 1;
if (count & (count - 1))
order++;
return order;
return fls(--count);
}
/**
@ -206,10 +204,7 @@ static inline int get_count_order_long(unsigned long l)
{
if (l == 0UL)
return -1;
else if (l & (l - 1UL))
return (int)fls_long(l);
else
return (int)fls_long(l) - 1;
return (int)fls_long(--l);
}
/**

View File

@ -8,6 +8,7 @@
#include <linux/genhd.h>
#include <linux/list.h>
#include <linux/llist.h>
#include <linux/minmax.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/pagemap.h>

View File

@ -7,10 +7,14 @@
#ifndef __LINUX_BVEC_ITER_H
#define __LINUX_BVEC_ITER_H
#include <linux/kernel.h>
#include <linux/bug.h>
#include <linux/errno.h>
#include <linux/limits.h>
#include <linux/minmax.h>
#include <linux/mm.h>
#include <linux/types.h>
struct page;
/**
* struct bio_vec - a contiguous range of physical memory addresses

View File

@ -7,6 +7,12 @@
#include <linux/fs.h>
#include <asm/siginfo.h>
struct core_vma_metadata {
unsigned long start, end;
unsigned long flags;
unsigned long dump_size;
};
/*
* These are the only things you should do on a core-file: use only these
* functions to write out all the necessary info.
@ -16,6 +22,11 @@ extern int dump_skip(struct coredump_params *cprm, size_t nr);
extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
extern int dump_align(struct coredump_params *cprm, int align);
extern void dump_truncate(struct coredump_params *cprm);
int dump_user_range(struct coredump_params *cprm, unsigned long start,
unsigned long len);
int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
struct core_vma_metadata **vma_meta,
size_t *vma_data_size_ptr);
#ifdef CONFIG_COREDUMP
extern void do_coredump(const kernel_siginfo_t *siginfo);
#else

View File

@ -0,0 +1,22 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_FAULT_INJECT_USERCOPY_H__
#define __LINUX_FAULT_INJECT_USERCOPY_H__
/*
* This header provides a wrapper for injecting failures to user space memory
* access functions.
*/
#include <linux/types.h>
#ifdef CONFIG_FAULT_INJECTION_USERCOPY
bool should_fail_usercopy(void);
#else
static inline bool should_fail_usercopy(void) { return false; }
#endif /* CONFIG_FAULT_INJECTION_USERCOPY */
#endif /* __LINUX_FAULT_INJECT_USERCOPY_H__ */

View File

@ -2209,6 +2209,7 @@ struct file_system_type {
#define FS_HAS_SUBTYPE 4
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
#define FS_THP_SUPPORT 8192 /* Remove once all fs converted */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
@ -2696,33 +2697,6 @@ static inline errseq_t file_sample_sb_err(struct file *file)
return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
}
static inline int filemap_nr_thps(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
return atomic_read(&mapping->nr_thps);
#else
return 0;
#endif
}
static inline void filemap_nr_thps_inc(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
atomic_inc(&mapping->nr_thps);
#else
WARN_ON_ONCE(1);
#endif
}
static inline void filemap_nr_thps_dec(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
atomic_dec(&mapping->nr_thps);
#else
WARN_ON_ONCE(1);
#endif
}
extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
int datasync);
extern int vfs_fsync(struct file *file, int datasync);

View File

@ -263,7 +263,8 @@ void ida_destroy(struct ida *ida);
*
* Allocate an ID between 0 and %INT_MAX, inclusive.
*
* Context: Any context.
* Context: Any context. It is safe to call this function without
* locking in your code.
* Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
* or %-ENOSPC if there are no free IDs.
*/
@ -280,7 +281,8 @@ static inline int ida_alloc(struct ida *ida, gfp_t gfp)
*
* Allocate an ID between @min and %INT_MAX, inclusive.
*
* Context: Any context.
* Context: Any context. It is safe to call this function without
* locking in your code.
* Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
* or %-ENOSPC if there are no free IDs.
*/
@ -297,7 +299,8 @@ static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp)
*
* Allocate an ID between 0 and @max, inclusive.
*
* Context: Any context.
* Context: Any context. It is safe to call this function without
* locking in your code.
* Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
* or %-ENOSPC if there are no free IDs.
*/
@ -311,6 +314,10 @@ static inline void ida_init(struct ida *ida)
xa_init_flags(&ida->xa, IDA_INIT_FLAGS);
}
/*
* ida_simple_get() and ida_simple_remove() are deprecated. Use
* ida_alloc() and ida_free() instead respectively.
*/
#define ida_simple_get(ida, start, end, gfp) \
ida_alloc_range(ida, start, (end) - 1, gfp)
#define ida_simple_remove(ida, id) ida_free(ida, id)

View File

@ -58,6 +58,10 @@ struct resource {
#define IORESOURCE_EXT_TYPE_BITS 0x01000000 /* Resource extended types */
#define IORESOURCE_SYSRAM 0x01000000 /* System RAM (modifier) */
/* IORESOURCE_SYSRAM specific bits. */
#define IORESOURCE_SYSRAM_DRIVER_MANAGED 0x02000000 /* Always detected via a driver. */
#define IORESOURCE_SYSRAM_MERGEABLE 0x04000000 /* Resource can be merged. */
#define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */
#define IORESOURCE_DISABLED 0x10000000
@ -103,7 +107,6 @@ struct resource {
#define IORESOURCE_MEM_32BIT (3<<3)
#define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */
#define IORESOURCE_MEM_EXPANSIONROM (1<<6)
#define IORESOURCE_MEM_DRIVER_MANAGED (1<<7)
/* PnP I/O specific bits (IORESOURCE_BITS) */
#define IORESOURCE_IO_16BIT_ADDR (1<<0)
@ -248,8 +251,10 @@ extern struct resource * __request_region(struct resource *,
extern void __release_region(struct resource *, resource_size_t,
resource_size_t);
#ifdef CONFIG_MEMORY_HOTREMOVE
extern int release_mem_region_adjustable(struct resource *, resource_size_t,
resource_size_t);
extern void release_mem_region_adjustable(resource_size_t, resource_size_t);
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
extern void merge_system_ram_resource(struct resource *res);
#endif
/* Wrappers for managed devices */

View File

@ -3,8 +3,9 @@
#define _LINUX_JIFFIES_H
#include <linux/cache.h>
#include <linux/limits.h>
#include <linux/math64.h>
#include <linux/kernel.h>
#include <linux/minmax.h>
#include <linux/types.h>
#include <linux/time.h>
#include <linux/timex.h>

View File

@ -11,6 +11,7 @@
#include <linux/compiler.h>
#include <linux/bitops.h>
#include <linux/log2.h>
#include <linux/minmax.h>
#include <linux/typecheck.h>
#include <linux/printk.h>
#include <linux/build_bug.h>
@ -833,155 +834,6 @@ ftrace_vprintk(const char *fmt, va_list ap)
static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
#endif /* CONFIG_TRACING */
/*
* min()/max()/clamp() macros must accomplish three things:
*
* - avoid multiple evaluations of the arguments (so side-effects like
* "x++" happen only once) when non-constant.
* - perform strict type-checking (to generate warnings instead of
* nasty runtime surprises). See the "unnecessary" pointer comparison
* in __typecheck().
* - retain result as a constant expressions when called with only
* constant expressions (to avoid tripping VLA warnings in stack
* allocation usage).
*/
#define __typecheck(x, y) \
(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
/*
* This returns a constant expression while determining if an argument is
* a constant expression, most importantly without evaluating the argument.
* Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
*/
#define __is_constexpr(x) \
(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
#define __no_side_effects(x, y) \
(__is_constexpr(x) && __is_constexpr(y))
#define __safe_cmp(x, y) \
(__typecheck(x, y) && __no_side_effects(x, y))
#define __cmp(x, y, op) ((x) op (y) ? (x) : (y))
#define __cmp_once(x, y, unique_x, unique_y, op) ({ \
typeof(x) unique_x = (x); \
typeof(y) unique_y = (y); \
__cmp(unique_x, unique_y, op); })
#define __careful_cmp(x, y, op) \
__builtin_choose_expr(__safe_cmp(x, y), \
__cmp(x, y, op), \
__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
/**
* min - return minimum of two values of the same or compatible types
* @x: first value
* @y: second value
*/
#define min(x, y) __careful_cmp(x, y, <)
/**
* max - return maximum of two values of the same or compatible types
* @x: first value
* @y: second value
*/
#define max(x, y) __careful_cmp(x, y, >)
/**
* min3 - return minimum of three values
* @x: first value
* @y: second value
* @z: third value
*/
#define min3(x, y, z) min((typeof(x))min(x, y), z)
/**
* max3 - return maximum of three values
* @x: first value
* @y: second value
* @z: third value
*/
#define max3(x, y, z) max((typeof(x))max(x, y), z)
/**
* min_not_zero - return the minimum that is _not_ zero, unless both are zero
* @x: value1
* @y: value2
*/
#define min_not_zero(x, y) ({ \
typeof(x) __x = (x); \
typeof(y) __y = (y); \
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
/**
* clamp - return a value clamped to a given range with strict typechecking
* @val: current value
* @lo: lowest allowable value
* @hi: highest allowable value
*
* This macro does strict typechecking of @lo/@hi to make sure they are of the
* same type as @val. See the unnecessary pointer comparisons.
*/
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
/*
* ..and if you can't take the strict
* types, you can specify one yourself.
*
* Or not use min/max/clamp at all, of course.
*/
/**
* min_t - return minimum of two values, using the specified type
* @type: data type to use
* @x: first value
* @y: second value
*/
#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <)
/**
* max_t - return maximum of two values, using the specified type
* @type: data type to use
* @x: first value
* @y: second value
*/
#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >)
/**
* clamp_t - return a value clamped to a given range using a given type
* @type: the type of variable to use
* @val: current value
* @lo: minimum allowable value
* @hi: maximum allowable value
*
* This macro does no typechecking and uses temporary variables of type
* @type to make all the comparisons.
*/
#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
/**
* clamp_val - return a value clamped to a given range using val's type
* @val: current value
* @lo: minimum allowable value
* @hi: maximum allowable value
*
* This macro does no typechecking and uses temporary variables of whatever
* type the input argument @val is. This is useful when @val is an unsigned
* type and @lo and @hi are literals that will otherwise be assigned a signed
* integer type.
*/
#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
/**
* swap - swap values of @a and @b
* @a: first value
* @b: second value
*/
#define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
/* This counts to 12. Any more, it will return 13th argument. */
#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)

View File

@ -609,6 +609,15 @@ static inline void list_splice_tail_init(struct list_head *list,
pos != (head); \
pos = n, n = pos->prev)
/**
* list_entry_is_head - test if the entry points to the head of the list
* @pos: the type * to cursor
* @head: the head for your list.
* @member: the name of the list_head within the struct.
*/
#define list_entry_is_head(pos, head, member) \
(&pos->member == (head))
/**
* list_for_each_entry - iterate over list of given type
* @pos: the type * to use as a loop cursor.
@ -617,7 +626,7 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_entry(pos, head, member) \
for (pos = list_first_entry(head, typeof(*pos), member); \
&pos->member != (head); \
!list_entry_is_head(pos, head, member); \
pos = list_next_entry(pos, member))
/**
@ -628,7 +637,7 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_entry_reverse(pos, head, member) \
for (pos = list_last_entry(head, typeof(*pos), member); \
&pos->member != (head); \
!list_entry_is_head(pos, head, member); \
pos = list_prev_entry(pos, member))
/**
@ -653,7 +662,7 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_entry_continue(pos, head, member) \
for (pos = list_next_entry(pos, member); \
&pos->member != (head); \
!list_entry_is_head(pos, head, member); \
pos = list_next_entry(pos, member))
/**
@ -667,7 +676,7 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_entry_continue_reverse(pos, head, member) \
for (pos = list_prev_entry(pos, member); \
&pos->member != (head); \
!list_entry_is_head(pos, head, member); \
pos = list_prev_entry(pos, member))
/**
@ -679,7 +688,7 @@ static inline void list_splice_tail_init(struct list_head *list,
* Iterate over list of given type, continuing from current position.
*/
#define list_for_each_entry_from(pos, head, member) \
for (; &pos->member != (head); \
for (; !list_entry_is_head(pos, head, member); \
pos = list_next_entry(pos, member))
/**
@ -692,7 +701,7 @@ static inline void list_splice_tail_init(struct list_head *list,
* Iterate backwards over list of given type, continuing from current position.
*/
#define list_for_each_entry_from_reverse(pos, head, member) \
for (; &pos->member != (head); \
for (; !list_entry_is_head(pos, head, member); \
pos = list_prev_entry(pos, member))
/**
@ -705,7 +714,7 @@ static inline void list_splice_tail_init(struct list_head *list,
#define list_for_each_entry_safe(pos, n, head, member) \
for (pos = list_first_entry(head, typeof(*pos), member), \
n = list_next_entry(pos, member); \
&pos->member != (head); \
!list_entry_is_head(pos, head, member); \
pos = n, n = list_next_entry(n, member))
/**
@ -721,7 +730,7 @@ static inline void list_splice_tail_init(struct list_head *list,
#define list_for_each_entry_safe_continue(pos, n, head, member) \
for (pos = list_next_entry(pos, member), \
n = list_next_entry(pos, member); \
&pos->member != (head); \
!list_entry_is_head(pos, head, member); \
pos = n, n = list_next_entry(n, member))
/**
@ -736,7 +745,7 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_entry_safe_from(pos, n, head, member) \
for (n = list_next_entry(pos, member); \
&pos->member != (head); \
!list_entry_is_head(pos, head, member); \
pos = n, n = list_next_entry(n, member))
/**
@ -752,7 +761,7 @@ static inline void list_splice_tail_init(struct list_head *list,
#define list_for_each_entry_safe_reverse(pos, n, head, member) \
for (pos = list_last_entry(head, typeof(*pos), member), \
n = list_prev_entry(pos, member); \
&pos->member != (head); \
!list_entry_is_head(pos, head, member); \
pos = n, n = list_prev_entry(n, member))
/**

View File

@ -57,6 +57,19 @@ enum {
MMOP_ONLINE_MOVABLE,
};
/* Flags for add_memory() and friends to specify memory hotplug details. */
typedef int __bitwise mhp_t;
/* No special request */
#define MHP_NONE ((__force mhp_t)0)
/*
* Allow merging of the added System RAM resource with adjacent,
* mergeable resources. After a successful call to add_memory_resource()
* with this flag set, the resource pointer must no longer be used as it
* might be stale, or the resource might have changed.
*/
#define MEMHP_MERGE_RESOURCE ((__force mhp_t)BIT(0))
/*
* Extended parameters for memory hotplug:
* altmap: alternative allocator for memmap array (optional)
@ -103,8 +116,8 @@ extern int online_pages(unsigned long pfn, unsigned long nr_pages,
int online_type, int nid);
extern struct zone *test_pages_in_a_zone(unsigned long start_pfn,
unsigned long end_pfn);
extern unsigned long __offline_isolated_pages(unsigned long start_pfn,
unsigned long end_pfn);
extern void __offline_isolated_pages(unsigned long start_pfn,
unsigned long end_pfn);
typedef void (*online_page_callback_t)(struct page *page, unsigned int order);
@ -247,13 +260,6 @@ static inline void zone_span_writelock(struct zone *zone) {}
static inline void zone_span_writeunlock(struct zone *zone) {}
static inline void zone_seqlock_init(struct zone *zone) {}
static inline int mhp_notimplemented(const char *func)
{
printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func);
dump_stack();
return -ENOSYS;
}
static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
{
}
@ -344,14 +350,18 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {}
extern void set_zone_contiguous(struct zone *zone);
extern void clear_zone_contiguous(struct zone *zone);
#ifdef CONFIG_MEMORY_HOTPLUG
extern void __ref free_area_init_core_hotplug(int nid);
extern int __add_memory(int nid, u64 start, u64 size);
extern int add_memory(int nid, u64 start, u64 size);
extern int add_memory_resource(int nid, struct resource *resource);
extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
extern int add_memory_resource(int nid, struct resource *resource,
mhp_t mhp_flags);
extern int add_memory_driver_managed(int nid, u64 start, u64 size,
const char *resource_name);
const char *resource_name,
mhp_t mhp_flags);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap);
unsigned long nr_pages,
struct vmem_altmap *altmap, int migratetype);
extern void remove_pfn_range_from_zone(struct zone *zone,
unsigned long start_pfn,
unsigned long nr_pages);
@ -363,8 +373,8 @@ extern void sparse_remove_section(struct mem_section *ms,
unsigned long map_offset, struct vmem_altmap *altmap);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
int online_type);
extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
unsigned long nr_pages);
#endif /* CONFIG_MEMORY_HOTPLUG */
#endif /* __LINUX_MEMORY_HOTPLUG_H */

153
include/linux/minmax.h Normal file
View File

@ -0,0 +1,153 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MINMAX_H
#define _LINUX_MINMAX_H
/*
* min()/max()/clamp() macros must accomplish three things:
*
* - avoid multiple evaluations of the arguments (so side-effects like
* "x++" happen only once) when non-constant.
* - perform strict type-checking (to generate warnings instead of
* nasty runtime surprises). See the "unnecessary" pointer comparison
* in __typecheck().
* - retain result as a constant expressions when called with only
* constant expressions (to avoid tripping VLA warnings in stack
* allocation usage).
*/
#define __typecheck(x, y) \
(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
/*
* This returns a constant expression while determining if an argument is
* a constant expression, most importantly without evaluating the argument.
* Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
*/
#define __is_constexpr(x) \
(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
#define __no_side_effects(x, y) \
(__is_constexpr(x) && __is_constexpr(y))
#define __safe_cmp(x, y) \
(__typecheck(x, y) && __no_side_effects(x, y))
#define __cmp(x, y, op) ((x) op (y) ? (x) : (y))
#define __cmp_once(x, y, unique_x, unique_y, op) ({ \
typeof(x) unique_x = (x); \
typeof(y) unique_y = (y); \
__cmp(unique_x, unique_y, op); })
#define __careful_cmp(x, y, op) \
__builtin_choose_expr(__safe_cmp(x, y), \
__cmp(x, y, op), \
__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
/**
* min - return minimum of two values of the same or compatible types
* @x: first value
* @y: second value
*/
#define min(x, y) __careful_cmp(x, y, <)
/**
* max - return maximum of two values of the same or compatible types
* @x: first value
* @y: second value
*/
#define max(x, y) __careful_cmp(x, y, >)
/**
* min3 - return minimum of three values
* @x: first value
* @y: second value
* @z: third value
*/
#define min3(x, y, z) min((typeof(x))min(x, y), z)
/**
* max3 - return maximum of three values
* @x: first value
* @y: second value
* @z: third value
*/
#define max3(x, y, z) max((typeof(x))max(x, y), z)
/**
* min_not_zero - return the minimum that is _not_ zero, unless both are zero
* @x: value1
* @y: value2
*/
#define min_not_zero(x, y) ({ \
typeof(x) __x = (x); \
typeof(y) __y = (y); \
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
/**
* clamp - return a value clamped to a given range with strict typechecking
* @val: current value
* @lo: lowest allowable value
* @hi: highest allowable value
*
* This macro does strict typechecking of @lo/@hi to make sure they are of the
* same type as @val. See the unnecessary pointer comparisons.
*/
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
/*
* ..and if you can't take the strict
* types, you can specify one yourself.
*
* Or not use min/max/clamp at all, of course.
*/
/**
* min_t - return minimum of two values, using the specified type
* @type: data type to use
* @x: first value
* @y: second value
*/
#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <)
/**
* max_t - return maximum of two values, using the specified type
* @type: data type to use
* @x: first value
* @y: second value
*/
#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >)
/**
* clamp_t - return a value clamped to a given range using a given type
* @type: the type of variable to use
* @val: current value
* @lo: minimum allowable value
* @hi: maximum allowable value
*
* This macro does no typechecking and uses temporary variables of type
* @type to make all the comparisons.
*/
#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
/**
* clamp_val - return a value clamped to a given range using val's type
* @val: current value
* @lo: minimum allowable value
* @hi: maximum allowable value
*
* This macro does no typechecking and uses temporary variables of whatever
* type the input argument @val is. This is useful when @val is an unsigned
* type and @lo and @hi are literals that will otherwise be assigned a signed
* integer type.
*/
#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
/**
* swap - swap values of @a and @b
* @a: first value
* @b: second value
*/
#define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
#endif /* _LINUX_MINMAX_H */

View File

@ -2440,7 +2440,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
extern void set_dma_reserve(unsigned long new_dma_reserve);
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
enum meminit_context, struct vmem_altmap *);
enum meminit_context, struct vmem_altmap *, int migratetype);
extern void setup_per_zone_wmarks(void);
extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
@ -3025,8 +3025,6 @@ extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);
extern int unpoison_memory(unsigned long pfn);
extern int get_hwpoison_page(struct page *page);
#define put_hwpoison_page(page) put_page(page)
extern int sysctl_memory_failure_early_kill;
extern int sysctl_memory_failure_recovery;
extern void shake_page(struct page *p, int access);
@ -3066,6 +3064,7 @@ enum mf_action_page_type {
MF_MSG_BUDDY,
MF_MSG_BUDDY_2ND,
MF_MSG_DAX,
MF_MSG_UNSPLIT_THP,
MF_MSG_UNKNOWN,
};

View File

@ -266,6 +266,8 @@ static inline bool is_active_lru(enum lru_list lru)
return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
}
#define ANON_AND_FILE 2
enum lruvec_flags {
LRUVEC_CONGESTED, /* lruvec has many dirty pages
* backed by a congested BDI
@ -283,8 +285,8 @@ struct lruvec {
unsigned long file_cost;
/* Non-resident age, driven by LRU movement */
atomic_long_t nonresident_age;
/* Refaults at the time of last reclaim cycle, anon=0, file=1 */
unsigned long refaults[2];
/* Refaults at the time of last reclaim cycle */
unsigned long refaults[ANON_AND_FILE];
/* Various lruvec state flags (enum lruvec_flags) */
unsigned long flags;
#ifdef CONFIG_MEMCG
@ -441,6 +443,8 @@ enum zone_type {
#ifndef __GENERATING_BOUNDS_H
#define ASYNC_AND_SYNC 2
struct zone {
/* Read-mostly fields */
@ -560,8 +564,8 @@ struct zone {
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
/* pfn where compaction free scanner should start */
unsigned long compact_cached_free_pfn;
/* pfn where async and sync compaction migration scanner should start */
unsigned long compact_cached_migrate_pfn[2];
/* pfn where compaction migration scanner should start */
unsigned long compact_cached_migrate_pfn[ASYNC_AND_SYNC];
unsigned long compact_init_migrate_pfn;
unsigned long compact_init_free_pfn;
#endif
@ -1416,7 +1420,6 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr)
#define pfn_to_nid(pfn) (0)
#endif
#define early_pfn_valid(pfn) pfn_valid(pfn)
void sparse_init(void);
#else
#define sparse_init() do {} while (0)
@ -1436,10 +1439,6 @@ struct mminit_pfnnid_cache {
int last_nid;
};
#ifndef early_pfn_valid
#define early_pfn_valid(pfn) (1)
#endif
/*
* If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
* need to check pfn validity within that MAX_ORDER_NR_PAGES block.

View File

@ -99,15 +99,14 @@ extern struct node *node_devices[];
typedef void (*node_registration_func_t)(struct node *);
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA)
int link_mem_sections(int nid, unsigned long start_pfn,
unsigned long end_pfn,
enum meminit_context context);
void link_mem_sections(int nid, unsigned long start_pfn,
unsigned long end_pfn,
enum meminit_context context);
#else
static inline int link_mem_sections(int nid, unsigned long start_pfn,
unsigned long end_pfn,
enum meminit_context context)
static inline void link_mem_sections(int nid, unsigned long start_pfn,
unsigned long end_pfn,
enum meminit_context context)
{
return 0;
}
#endif
@ -130,8 +129,7 @@ static inline int register_one_node(int nid)
if (error)
return error;
/* link memory sections under this node */
error = link_mem_sections(nid, start_pfn, end_pfn,
MEMINIT_EARLY);
link_mem_sections(nid, start_pfn, end_pfn, MEMINIT_EARLY);
}
return error;

View File

@ -90,9 +90,9 @@
* for such situations. See below and CPUMASK_ALLOC also.
*/
#include <linux/kernel.h>
#include <linux/threads.h>
#include <linux/bitmap.h>
#include <linux/minmax.h>
#include <linux/numa.h>
typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;

View File

@ -431,13 +431,9 @@ PAGEFLAG_FALSE(Uncached)
PAGEFLAG(HWPoison, hwpoison, PF_ANY)
TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
#define __PG_HWPOISON (1UL << PG_hwpoison)
extern bool set_hwpoison_free_buddy_page(struct page *page);
extern bool take_page_off_buddy(struct page *page);
#else
PAGEFLAG_FALSE(HWPoison)
static inline bool set_hwpoison_free_buddy_page(struct page *page)
{
return 0;
}
#define __PG_HWPOISON 0
#endif

View File

@ -11,7 +11,7 @@ extern struct page_ext_operations page_owner_ops;
extern void __reset_page_owner(struct page *page, unsigned int order);
extern void __set_page_owner(struct page *page,
unsigned int order, gfp_t gfp_mask);
extern void __split_page_owner(struct page *page, unsigned int order);
extern void __split_page_owner(struct page *page, unsigned int nr);
extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
extern void __set_page_owner_migrate_reason(struct page *page, int reason);
extern void __dump_page_owner(struct page *page);
@ -31,10 +31,10 @@ static inline void set_page_owner(struct page *page,
__set_page_owner(page, order, gfp_mask);
}
static inline void split_page_owner(struct page *page, unsigned int order)
static inline void split_page_owner(struct page *page, unsigned int nr)
{
if (static_branch_unlikely(&page_owner_inited))
__split_page_owner(page, order);
__split_page_owner(page, nr);
}
static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
{

View File

@ -29,6 +29,7 @@ enum mapping_flags {
AS_EXITING = 4, /* final truncate in progress */
/* writeback related tags are not used */
AS_NO_WRITEBACK_TAGS = 5,
AS_THP_SUPPORT = 6, /* THPs supported */
};
/**
@ -120,6 +121,40 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
m->gfp_mask = mask;
}
static inline bool mapping_thp_support(struct address_space *mapping)
{
return test_bit(AS_THP_SUPPORT, &mapping->flags);
}
static inline int filemap_nr_thps(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
return atomic_read(&mapping->nr_thps);
#else
return 0;
#endif
}
static inline void filemap_nr_thps_inc(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
if (!mapping_thp_support(mapping))
atomic_inc(&mapping->nr_thps);
#else
WARN_ON_ONCE(1);
#endif
}
static inline void filemap_nr_thps_dec(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
if (!mapping_thp_support(mapping))
atomic_dec(&mapping->nr_thps);
#else
WARN_ON_ONCE(1);
#endif
}
void release_pages(struct page **pages, int nr);
/*
@ -726,17 +761,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
void delete_from_page_cache_batch(struct address_space *mapping,
struct pagevec *pvec);
#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
struct file *, pgoff_t index, unsigned long req_count);
void page_cache_async_readahead(struct address_space *, struct file_ra_state *,
struct file *, struct page *, pgoff_t index,
unsigned long req_count);
void page_cache_readahead_unbounded(struct address_space *, struct file *,
pgoff_t index, unsigned long nr_to_read,
unsigned long lookahead_count);
/*
* Like add_to_page_cache_locked, but used to add newly allocated pages:
* the page is new, so we can just run __SetPageLocked() against it.
@ -777,6 +801,67 @@ struct readahead_control {
unsigned int _batch_count;
};
#define DEFINE_READAHEAD(rac, f, m, i) \
struct readahead_control rac = { \
.file = f, \
.mapping = m, \
._index = i, \
}
#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
void page_cache_ra_unbounded(struct readahead_control *,
unsigned long nr_to_read, unsigned long lookahead_count);
void page_cache_sync_ra(struct readahead_control *, struct file_ra_state *,
unsigned long req_count);
void page_cache_async_ra(struct readahead_control *, struct file_ra_state *,
struct page *, unsigned long req_count);
/**
* page_cache_sync_readahead - generic file readahead
* @mapping: address_space which holds the pagecache and I/O vectors
* @ra: file_ra_state which holds the readahead state
* @file: Used by the filesystem for authentication.
* @index: Index of first page to be read.
* @req_count: Total number of pages being read by the caller.
*
* page_cache_sync_readahead() should be called when a cache miss happened:
* it will submit the read. The readahead logic may decide to piggyback more
* pages onto the read request if access patterns suggest it will improve
* performance.
*/
static inline
void page_cache_sync_readahead(struct address_space *mapping,
struct file_ra_state *ra, struct file *file, pgoff_t index,
unsigned long req_count)
{
DEFINE_READAHEAD(ractl, file, mapping, index);
page_cache_sync_ra(&ractl, ra, req_count);
}
/**
* page_cache_async_readahead - file readahead for marked pages
* @mapping: address_space which holds the pagecache and I/O vectors
* @ra: file_ra_state which holds the readahead state
* @file: Used by the filesystem for authentication.
* @page: The page at @index which triggered the readahead call.
* @index: Index of first page to be read.
* @req_count: Total number of pages being read by the caller.
*
* page_cache_async_readahead() should be called when a page is used which
* is marked as PageReadahead; this is a marker to suggest that the application
* has used up enough of the readahead window that we should start pulling in
* more pages.
*/
static inline
void page_cache_async_readahead(struct address_space *mapping,
struct file_ra_state *ra, struct file *file,
struct page *page, pgoff_t index, unsigned long req_count)
{
DEFINE_READAHEAD(ractl, file, mapping, index);
page_cache_async_ra(&ractl, ra, page, req_count);
}
/**
* readahead_page - Get the next page to read.
* @rac: The current readahead request.

View File

@ -1013,7 +1013,7 @@ struct task_struct {
struct held_lock held_locks[MAX_LOCK_DEPTH];
#endif
#ifdef CONFIG_UBSAN
#if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP)
unsigned int in_ubsan;
#endif

View File

@ -49,31 +49,6 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}
/*
* This has to be called after a get_task_mm()/mmget_not_zero()
* followed by taking the mmap_lock for writing before modifying the
* vmas or anything the coredump pretends not to change from under it.
*
* It also has to be called when mmgrab() is used in the context of
* the process, but then the mm_count refcount is transferred outside
* the context of the process to run down_write() on that pinned mm.
*
* NOTE: find_extend_vma() called from GUP context is the only place
* that can modify the "mm" (notably the vm_start/end) under mmap_lock
* for reading and outside the context of the process, so it is also
* the only case that holds the mmap_lock for reading that must call
* this function. Generally if the mmap_lock is hold for reading
* there's no need of this check after get_task_mm()/mmget_not_zero().
*
* This function can be obsoleted and the check can be removed, after
* the coredump code will hold the mmap_lock for writing before
* invoking the ->core_dump methods.
*/
static inline bool mmget_still_valid(struct mm_struct *mm)
{
return likely(!mm->core_state);
}
/**
* mmget() - Pin the address space associated with a &struct mm_struct.
* @mm: The address space to pin.

View File

@ -2,7 +2,9 @@
#ifndef __LINUX_UACCESS_H__
#define __LINUX_UACCESS_H__
#include <linux/fault-inject-usercopy.h>
#include <linux/instrumented.h>
#include <linux/minmax.h>
#include <linux/sched.h>
#include <linux/thread_info.h>
@ -83,6 +85,8 @@ static __always_inline __must_check unsigned long
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
might_fault();
if (should_fail_usercopy())
return n;
instrument_copy_from_user(to, from, n);
check_object_size(to, n, false);
return raw_copy_from_user(to, from, n);
@ -104,6 +108,8 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
static __always_inline __must_check unsigned long
__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
{
if (should_fail_usercopy())
return n;
instrument_copy_to_user(to, from, n);
check_object_size(from, n, true);
return raw_copy_to_user(to, from, n);
@ -113,6 +119,8 @@ static __always_inline __must_check unsigned long
__copy_to_user(void __user *to, const void *from, unsigned long n)
{
might_fault();
if (should_fail_usercopy())
return n;
instrument_copy_to_user(to, from, n);
check_object_size(from, n, true);
return raw_copy_to_user(to, from, n);
@ -124,7 +132,7 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
{
unsigned long res = n;
might_fault();
if (likely(access_ok(from, n))) {
if (!should_fail_usercopy() && likely(access_ok(from, n))) {
instrument_copy_from_user(to, from, n);
res = raw_copy_from_user(to, from, n);
}
@ -142,6 +150,8 @@ static inline __must_check unsigned long
_copy_to_user(void __user *to, const void *from, unsigned long n)
{
might_fault();
if (should_fail_usercopy())
return n;
if (access_ok(to, n)) {
instrument_copy_to_user(to, from, n);
n = raw_copy_to_user(to, from, n);

View File

@ -28,7 +28,7 @@ struct reclaim_stat {
unsigned nr_writeback;
unsigned nr_immediate;
unsigned nr_pageout;
unsigned nr_activate[2];
unsigned nr_activate[ANON_AND_FILE];
unsigned nr_ref_keep;
unsigned nr_unmap_fail;
unsigned nr_lazyfree_fail;

View File

@ -1505,6 +1505,28 @@ void xas_pause(struct xa_state *);
void xas_create_range(struct xa_state *);
#ifdef CONFIG_XARRAY_MULTI
int xa_get_order(struct xarray *, unsigned long index);
void xas_split(struct xa_state *, void *entry, unsigned int order);
void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t);
#else
static inline int xa_get_order(struct xarray *xa, unsigned long index)
{
return 0;
}
static inline void xas_split(struct xa_state *xas, void *entry,
unsigned int order)
{
xas_store(xas, entry);
}
static inline void xas_split_alloc(struct xa_state *xas, void *entry,
unsigned int order, gfp_t gfp)
{
}
#endif
/**
* xas_reload() - Refetch an entry from the xarray.
* @xas: XArray operation state.

View File

@ -361,6 +361,7 @@ TRACE_EVENT(aer_event,
EM ( MF_MSG_POISONED_HUGE, "huge page already hardware poisoned" ) \
EM ( MF_MSG_HUGE, "huge page" ) \
EM ( MF_MSG_FREE_HUGE, "free huge page" ) \
EM ( MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page" ) \
EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \
EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \
EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \
@ -373,6 +374,8 @@ TRACE_EVENT(aer_event,
EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \
EM ( MF_MSG_BUDDY, "free buddy page" ) \
EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" ) \
EM ( MF_MSG_DAX, "dax page" ) \
EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \
EMe ( MF_MSG_UNKNOWN, "unknown page" )
/*

View File

@ -25,7 +25,7 @@
* Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
* XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
*
* Fixed a nasty interaction with with sys_umount(). If the accointing
* Fixed a nasty interaction with sys_umount(). If the accounting
* was suspeneded we failed to stop it on umount(). Messy.
* Another one: remount to readonly didn't stop accounting.
* Question: what should we do if we have CAP_SYS_ADMIN but not
@ -263,12 +263,12 @@ static DEFINE_MUTEX(acct_on_mutex);
* sys_acct - enable/disable process accounting
* @name: file name for accounting records or NULL to shutdown accounting
*
* Returns 0 for success or negative errno values for failure.
*
* sys_acct() is the only system call needed to implement process
* accounting. It takes the name of the file where accounting records
* should be written. If the filename is NULL, accounting will be
* shutdown.
*
* Returns: 0 for success or negative errno values for failure.
*/
SYSCALL_DEFINE1(acct, const char __user *, name)
{
@ -586,9 +586,7 @@ static void slow_acct_process(struct pid_namespace *ns)
}
/**
* acct_process
*
* handles process accounting for an exiting task
* acct_process - handles process accounting for an exiting task
*/
void acct_process(void)
{

View File

@ -390,7 +390,7 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
* The top cpuset doesn't have any online cpu as a
* consequence of a race between cpuset_hotplug_work
* and cpu hotplug notifier. But we know the top
* cpuset's effective_cpus is on its way to to be
* cpuset's effective_cpus is on its way to be
* identical to cpu_online_mask.
*/
cpumask_copy(pmask, cpu_online_mask);

View File

@ -16,7 +16,7 @@
#include "direct.h"
/*
* Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it
* Most architectures use ZONE_DMA for the first 16 Megabytes, but some use
* it for entirely different regions. In that case the arch code needs to
* override the variable below for dma-direct to work properly.
*/

View File

@ -556,7 +556,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
get_file(file);
if (tmp->vm_flags & VM_DENYWRITE)
atomic_dec(&inode->i_writecount);
put_write_access(inode);
i_mmap_lock_write(mapping);
if (tmp->vm_flags & VM_SHARED)
mapping_allow_writable(mapping);
@ -2189,7 +2189,7 @@ static __latent_entropy struct task_struct *copy_process(
/*
* Ensure that the cgroup subsystem policies allow the new process to be
* forked. It should be noted the the new process's css_set can be changed
* forked. It should be noted that the new process's css_set can be changed
* between here and cgroup_post_fork() if an organisation operation is in
* progress.
*/

View File

@ -916,7 +916,7 @@ static inline void exit_pi_state_list(struct task_struct *curr) { }
* [10] Found | Found | task | !=taskTID | 0/1 | Invalid
*
* [1] Indicates that the kernel can acquire the futex atomically. We
* came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
* came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
*
* [2] Valid, if TID does not belong to a kernel thread. If no matching
* thread is found then it indicates that the owner TID has died.

View File

@ -604,7 +604,7 @@ int irq_timings_alloc(int irq)
/*
* Some platforms can have the same private interrupt per cpu,
* so this function may be be called several times with the
* so this function may be called several times with the
* same interrupt number. Just bail out in case the per cpu
* stat structure is already allocated.
*/

View File

@ -19,7 +19,7 @@
#include <linux/cpu.h>
#include <asm/sections.h>
/* mutex to protect coming/going of the the jump_label table */
/* mutex to protect coming/going of the jump_label table */
static DEFINE_MUTEX(jump_label_mutex);
void jump_label_lock(void)

View File

@ -32,7 +32,7 @@
* 1. different addresses but with the same encoded address race;
* 2. and both map onto the same watchpoint slots;
*
* Both these are assumed to be very unlikely. However, in case it still happens
* Both these are assumed to be very unlikely. However, in case it still
* happens, the report logic will filter out the false positive (see report.c).
*/
#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG-1 - WATCHPOINT_SIZE_BITS)

View File

@ -109,7 +109,7 @@ EXPORT_SYMBOL_GPL(kexec_crash_loaded);
* defined more restrictively in <asm/kexec.h>.
*
* The code for the transition from the current kernel to the
* the new kernel is placed in the control_code_buffer, whose size
* new kernel is placed in the control_code_buffer, whose size
* is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
* page of memory is necessary, but some architectures require more.
* Because this memory must be identity mapped in the transition from

View File

@ -521,7 +521,7 @@ static int locate_mem_hole_callback(struct resource *res, void *arg)
/* Returning 0 will take to next memory range */
/* Don't use memory that will be detected and handled by a driver. */
if (res->flags & IORESOURCE_MEM_DRIVER_MANAGED)
if (res->flags & IORESOURCE_SYSRAM_DRIVER_MANAGED)
return 0;
if (sz < kbuf->memsz)

View File

@ -775,7 +775,7 @@ EXPORT_SYMBOL(kthread_create_worker);
/**
* kthread_create_worker_on_cpu - create a kthread worker and bind it
* it to a given CPU and the associated NUMA node.
* to a given CPU and the associated NUMA node.
* @cpu: CPU number
* @flags: flags modifying the default behavior of the worker
* @namefmt: printf-style name for the kthread worker (task).

View File

@ -55,7 +55,7 @@ EXPORT_SYMBOL_GPL(klp_get_state);
*
* The function can be called only during transition when a new
* livepatch is being enabled or when such a transition is reverted.
* It is typically called only from from pre/post (un)patch
* It is typically called only from pre/post (un)patch
* callbacks.
*
* Return: pointer to the latest struct klp_state from already

View File

@ -589,6 +589,11 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
if (args)
vprintk(args->fmt, args->args);
print_modules();
if (regs)
show_regs(regs);
if (panic_on_warn) {
/*
* This thread may hit another WARN() in the panic path.
@ -600,12 +605,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
panic("panic_on_warn set ...\n");
}
print_modules();
if (regs)
show_regs(regs);
else
dump_stack();
dump_stack();
print_irqtrace_events(current);

View File

@ -233,7 +233,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
* to pid_ns->child_reaper. Thus pidns->child_reaper needs to
* stay valid until they all go away.
*
* The code relies on the the pid_ns->child_reaper ignoring
* The code relies on the pid_ns->child_reaper ignoring
* SIGCHILD to cause those EXIT_ZOMBIE processes to be
* autoreaped if reparented.
*

View File

@ -735,7 +735,7 @@ zone_found:
*/
/*
* If the zone we wish to scan is the the current zone and the
* If the zone we wish to scan is the current zone and the
* pfn falls into the current node then we do not need to walk
* the tree.
*/

View File

@ -2,8 +2,9 @@
/*
* Range add and subtract
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/minmax.h>
#include <linux/printk.h>
#include <linux/sort.h>
#include <linux/string.h>
#include <linux/range.h>

View File

@ -1002,7 +1002,7 @@ static int relay_file_read_avail(struct rchan_buf *buf)
size_t subbuf_size = buf->chan->subbuf_size;
size_t n_subbufs = buf->chan->n_subbufs;
size_t produced = buf->subbufs_produced;
size_t consumed = buf->subbufs_consumed;
size_t consumed;
relay_file_read_consume(buf, 0, 0);

View File

@ -1240,7 +1240,6 @@ EXPORT_SYMBOL(__release_region);
#ifdef CONFIG_MEMORY_HOTREMOVE
/**
* release_mem_region_adjustable - release a previously reserved memory region
* @parent: parent resource descriptor
* @start: resource start address
* @size: resource region size
*
@ -1258,21 +1257,28 @@ EXPORT_SYMBOL(__release_region);
* assumes that all children remain in the lower address entry for
* simplicity. Enhance this logic when necessary.
*/
int release_mem_region_adjustable(struct resource *parent,
resource_size_t start, resource_size_t size)
void release_mem_region_adjustable(resource_size_t start, resource_size_t size)
{
struct resource *parent = &iomem_resource;
struct resource *new_res = NULL;
bool alloc_nofail = false;
struct resource **p;
struct resource *res;
struct resource *new_res;
resource_size_t end;
int ret = -EINVAL;
end = start + size - 1;
if ((start < parent->start) || (end > parent->end))
return ret;
if (WARN_ON_ONCE((start < parent->start) || (end > parent->end)))
return;
/* The alloc_resource() result gets checked later */
new_res = alloc_resource(GFP_KERNEL);
/*
* We free up quite a lot of memory on memory hotunplug (esp., memap),
* just before releasing the region. This is highly unlikely to
* fail - let's play save and make it never fail as the caller cannot
* perform any error handling (e.g., trying to re-add memory will fail
* similarly).
*/
retry:
new_res = alloc_resource(GFP_KERNEL | (alloc_nofail ? __GFP_NOFAIL : 0));
p = &parent->child;
write_lock(&resource_lock);
@ -1298,7 +1304,6 @@ int release_mem_region_adjustable(struct resource *parent,
* so if we are dealing with them, let us just back off here.
*/
if (!(res->flags & IORESOURCE_SYSRAM)) {
ret = 0;
break;
}
@ -1315,20 +1320,23 @@ int release_mem_region_adjustable(struct resource *parent,
/* free the whole entry */
*p = res->sibling;
free_resource(res);
ret = 0;
} else if (res->start == start && res->end != end) {
/* adjust the start */
ret = __adjust_resource(res, end + 1,
res->end - end);
WARN_ON_ONCE(__adjust_resource(res, end + 1,
res->end - end));
} else if (res->start != start && res->end == end) {
/* adjust the end */
ret = __adjust_resource(res, res->start,
start - res->start);
WARN_ON_ONCE(__adjust_resource(res, res->start,
start - res->start));
} else {
/* split into two entries */
/* split into two entries - we need a new resource */
if (!new_res) {
ret = -ENOMEM;
break;
new_res = alloc_resource(GFP_ATOMIC);
if (!new_res) {
alloc_nofail = true;
write_unlock(&resource_lock);
goto retry;
}
}
new_res->name = res->name;
new_res->start = end + 1;
@ -1339,9 +1347,8 @@ int release_mem_region_adjustable(struct resource *parent,
new_res->sibling = res->sibling;
new_res->child = NULL;
ret = __adjust_resource(res, res->start,
start - res->start);
if (ret)
if (WARN_ON_ONCE(__adjust_resource(res, res->start,
start - res->start)))
break;
res->sibling = new_res;
new_res = NULL;
@ -1352,10 +1359,69 @@ int release_mem_region_adjustable(struct resource *parent,
write_unlock(&resource_lock);
free_resource(new_res);
return ret;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
#ifdef CONFIG_MEMORY_HOTPLUG
static bool system_ram_resources_mergeable(struct resource *r1,
struct resource *r2)
{
/* We assume either r1 or r2 is IORESOURCE_SYSRAM_MERGEABLE. */
return r1->flags == r2->flags && r1->end + 1 == r2->start &&
r1->name == r2->name && r1->desc == r2->desc &&
!r1->child && !r2->child;
}
/*
* merge_system_ram_resource - mark the System RAM resource mergeable and try to
* merge it with adjacent, mergeable resources
* @res: resource descriptor
*
* This interface is intended for memory hotplug, whereby lots of contiguous
* system ram resources are added (e.g., via add_memory*()) by a driver, and
* the actual resource boundaries are not of interest (e.g., it might be
* relevant for DIMMs). Only resources that are marked mergeable, that have the
* same parent, and that don't have any children are considered. All mergeable
* resources must be immutable during the request.
*
* Note:
* - The caller has to make sure that no pointers to resources that are
* marked mergeable are used anymore after this call - the resource might
* be freed and the pointer might be stale!
* - release_mem_region_adjustable() will split on demand on memory hotunplug
*/
void merge_system_ram_resource(struct resource *res)
{
const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
struct resource *cur;
if (WARN_ON_ONCE((res->flags & flags) != flags))
return;
write_lock(&resource_lock);
res->flags |= IORESOURCE_SYSRAM_MERGEABLE;
/* Try to merge with next item in the list. */
cur = res->sibling;
if (cur && system_ram_resources_mergeable(res, cur)) {
res->end = cur->end;
res->sibling = cur->sibling;
free_resource(cur);
}
/* Try to merge with previous item in the list. */
cur = res->parent->child;
while (cur && cur->sibling != res)
cur = cur->sibling;
if (cur && system_ram_resources_mergeable(cur, res)) {
cur->end = res->end;
cur->sibling = res->sibling;
free_resource(res);
}
write_unlock(&resource_lock);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
/*
* Managed region resource
*/

View File

@ -741,7 +741,7 @@ EXPORT_SYMBOL(on_each_cpu_mask);
* for all the required CPUs to finish. This may include the local
* processor.
* @cond_func: A callback function that is passed a cpu id and
* the the info parameter. The function is called
* the info parameter. The function is called
* with preemption disabled. The function should
* return a blooean value indicating whether to IPI
* the specified CPU.

View File

@ -2034,7 +2034,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
* VMAs already unmapped and kernel uses these members for statistics
* output in procfs mostly, except
*
* - @start_brk/@brk which are used in do_brk but kernel lookups
* - @start_brk/@brk which are used in do_brk_flags but kernel lookups
* for VMAs when updating these memvers so anything wrong written
* here cause kernel to swear at userspace program but won't lead
* to any problem in kernel itself

View File

@ -515,7 +515,7 @@ EXPORT_SYMBOL(from_kgid_munged);
*
* When there is no mapping defined for the user-namespace projid
* pair INVALID_PROJID is returned. Callers are expected to test
* for and handle handle INVALID_PROJID being returned. INVALID_PROJID
* for and handle INVALID_PROJID being returned. INVALID_PROJID
* may be tested for using projid_valid().
*/
kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)

View File

@ -1768,6 +1768,13 @@ config FAIL_PAGE_ALLOC
help
Provide fault-injection capability for alloc_pages().
config FAULT_INJECTION_USERCOPY
bool "Fault injection capability for usercopy functions"
depends on FAULT_INJECTION
help
Provides fault-injection capability to inject failures
in usercopy functions (copy_from_user(), get_user(), ...).
config FAIL_MAKE_REQUEST
bool "Fault-injection capability for disk IO"
depends on FAULT_INJECTION && BLOCK

View File

@ -47,6 +47,20 @@ config UBSAN_BOUNDS
to the {str,mem}*cpy() family of functions (that is addressed
by CONFIG_FORTIFY_SOURCE).
config UBSAN_LOCAL_BOUNDS
bool "Perform array local bounds checking"
depends on UBSAN_TRAP
depends on CC_IS_CLANG
depends on !UBSAN_KCOV_BROKEN
help
This option enables -fsanitize=local-bounds which traps when an
exception/error is detected. Therefore, it should be enabled only
if trapping is expected.
Enabling this option detects errors due to accesses through a
pointer that is derived from an object of a statically-known size,
where an added offset (which may not be known statically) is
out-of-bounds.
config UBSAN_MISC
bool "Enable all other Undefined Behavior sanity checks"
default UBSAN

View File

@ -210,6 +210,7 @@ obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
obj-$(CONFIG_FAULT_INJECTION_USERCOPY) += fault-inject-usercopy.o
obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o

View File

@ -23,7 +23,7 @@
/**
* DOC: bitmap introduction
*
* bitmaps provide an array of bits, implemented using an an
* bitmaps provide an array of bits, implemented using an
* array of unsigned longs. The number of valid bits in a
* given bitmap does _not_ need to be an exact multiple of
* BITS_PER_LONG.

View File

@ -331,7 +331,7 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
return crc;
}
#if CRC_LE_BITS == 1
#if CRC_BE_BITS == 1
u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
{
return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);

View File

@ -390,7 +390,7 @@ static int INIT get_next_block(struct bunzip_data *bd)
j = (bd->inbufBits >> bd->inbufBitCount)&
((1 << hufGroup->maxLen)-1);
got_huff_bits:
/* Figure how how many bits are in next symbol and
/* Figure how many bits are in next symbol and
* unget extras */
i = hufGroup->minLen;
while (j > limit[i])

View File

@ -60,8 +60,8 @@ void dql_completed(struct dql *dql, unsigned int count)
* A decrease is only considered if the queue has been busy in
* the whole interval (the check above).
*
* If there is slack, the amount of execess data queued above
* the the amount needed to prevent starvation, the queue limit
* If there is slack, the amount of excess data queued above
* the amount needed to prevent starvation, the queue limit
* can be decreased. To avoid hysteresis we consider the
* minimum amount of slack found over several iterations of the
* completion routine.

View File

@ -42,7 +42,7 @@ enum cpio_fields {
/**
* cpio_data find_cpio_data - Search for files in an uncompressed cpio
* @path: The directory to search for, including a slash at the end
* @data: Pointer to the the cpio archive or a header inside
* @data: Pointer to the cpio archive or a header inside
* @len: Remaining length of the cpio based on data pointer
* @nextoff: When a matching file is found, this is the offset from the
* beginning of the cpio to the beginning of the next file, not the

View File

@ -0,0 +1,39 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/fault-inject.h>
#include <linux/fault-inject-usercopy.h>
static struct {
struct fault_attr attr;
} fail_usercopy = {
.attr = FAULT_ATTR_INITIALIZER,
};
static int __init setup_fail_usercopy(char *str)
{
return setup_fault_attr(&fail_usercopy.attr, str);
}
__setup("fail_usercopy=", setup_fail_usercopy);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
static int __init fail_usercopy_debugfs(void)
{
struct dentry *dir;
dir = fault_create_debugfs_attr("fail_usercopy", NULL,
&fail_usercopy.attr);
if (IS_ERR(dir))
return PTR_ERR(dir);
return 0;
}
late_initcall(fail_usercopy_debugfs);
#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
bool should_fail_usercopy(void)
{
return should_fail(&fail_usercopy.attr, 1);
}
EXPORT_SYMBOL_GPL(should_fail_usercopy);

View File

@ -16,6 +16,7 @@
#include <linux/bitmap.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/minmax.h>
#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
!defined(find_next_bit_le) || !defined(find_next_zero_bit_le) || \

View File

@ -7,6 +7,7 @@
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/minmax.h>
#include <linux/export.h>
#include <asm/unaligned.h>

View File

@ -372,7 +372,8 @@ EXPORT_SYMBOL(idr_replace);
* Allocate an ID between @min and @max, inclusive. The allocated ID will
* not exceed %INT_MAX, even if @max is larger.
*
* Context: Any context.
* Context: Any context. It is safe to call this function without
* locking in your code.
* Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
* or %-ENOSPC if there are no free IDs.
*/
@ -479,7 +480,8 @@ EXPORT_SYMBOL(ida_alloc_range);
* @ida: IDA handle.
* @id: Previously allocated ID.
*
* Context: Any context.
* Context: Any context. It is safe to call this function without
* locking in your code.
*/
void ida_free(struct ida *ida, unsigned int id)
{
@ -531,7 +533,8 @@ EXPORT_SYMBOL(ida_free);
* or freed. If the IDA is already empty, there is no need to call this
* function.
*
* Context: Any context.
* Context: Any context. It is safe to call this function without
* locking in your code.
*/
void ida_destroy(struct ida *ida)
{

View File

@ -2,6 +2,7 @@
#include <crypto/hash.h>
#include <linux/export.h>
#include <linux/bvec.h>
#include <linux/fault-inject-usercopy.h>
#include <linux/uio.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
@ -140,6 +141,8 @@
static int copyout(void __user *to, const void *from, size_t n)
{
if (should_fail_usercopy())
return n;
if (access_ok(to, n)) {
instrument_copy_to_user(to, from, n);
n = raw_copy_to_user(to, from, n);
@ -149,6 +152,8 @@ static int copyout(void __user *to, const void *from, size_t n)
static int copyin(void *to, const void __user *from, size_t n)
{
if (should_fail_usercopy())
return n;
if (access_ok(from, n)) {
instrument_copy_from_user(to, from, n);
n = raw_copy_from_user(to, from, n);

Some files were not shown because too many files have changed in this diff Show More