exec: pass stack rlimit into mm layout functions

Patch series "exec: Pin stack limit during exec".

Attempts to solve problems with the stack limit changing during exec
continue to be frustrated[1][2].  In addition to the specific issues
around the Stack Clash family of flaws, Andy Lutomirski pointed out[3]
other places during exec where the stack limit is used and is assumed to
be unchanging.  Given the many places it gets used and the fact that it
can be manipulated/raced via setrlimit() and prlimit(), I think the only
way to handle this is to move away from the "current" view of the stack
limit and instead attach it to the bprm, and plumb this down into the
functions that need to know the stack limits.  This series implements
the approach.

[1] 04e35f4495 ("exec: avoid RLIMIT_STACK races with prlimit()")
[2] 779f4e1c6c ("Revert "exec: avoid RLIMIT_STACK races with prlimit()"")
[3] to security@kernel.org, "Subject: existing rlimit races?"

This patch (of 3):

Since it is possible that the stack rlimit can change externally during
exec (either via another thread calling setrlimit() or another process
calling prlimit()), provide a way to pass the rlimit down into the
per-architecture mm layout functions so that the rlimit can stay in the
bprm structure instead of sitting in the signal structure until exec is
finalized.

Link: http://lkml.kernel.org/r/1518638796-20819-2-git-send-email-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Hugh Dickins <hughd@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Greg KH <greg@kroah.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
Cc: Brad Spengler <spender@grsecurity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Kees Cook 2018-04-10 16:34:53 -07:00 committed by Linus Torvalds
parent d64d01a155
commit 8f2af155b5
11 changed files with 81 additions and 58 deletions

View File

@ -21,20 +21,20 @@
#define MIN_GAP (128*1024*1024UL)
#define MAX_GAP ((TASK_SIZE)/6*5)
static int mmap_is_legacy(void)
static int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
}
static unsigned long mmap_base(unsigned long rnd)
static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{
unsigned long gap = rlimit(RLIMIT_STACK);
unsigned long gap = rlim_stack->rlim_cur;
if (gap < MIN_GAP)
gap = MIN_GAP;
@ -180,18 +180,18 @@ unsigned long arch_mmap_rnd(void)
return rnd << PAGE_SHIFT;
}
void arch_pick_mmap_layout(struct mm_struct *mm)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd();
if (mmap_is_legacy()) {
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor);
mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}

View File

@ -38,12 +38,12 @@
#define MIN_GAP (SZ_128M)
#define MAX_GAP (STACK_TOP/6*5)
static int mmap_is_legacy(void)
static int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
@ -62,9 +62,9 @@ unsigned long arch_mmap_rnd(void)
return rnd << PAGE_SHIFT;
}
static unsigned long mmap_base(unsigned long rnd)
static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{
unsigned long gap = rlimit(RLIMIT_STACK);
unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
/* Values close to RLIM_INFINITY can overflow. */
@ -83,7 +83,7 @@ static unsigned long mmap_base(unsigned long rnd)
* This function, called very early during the creation of a new process VM
* image, sets up which VM layout function to use:
*/
void arch_pick_mmap_layout(struct mm_struct *mm)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
@ -94,11 +94,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* Fall back to the standard layout if the personality bit is set, or
* if the expected stack growth is unlimited:
*/
if (mmap_is_legacy()) {
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor);
mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}

View File

@ -24,20 +24,20 @@ EXPORT_SYMBOL(shm_align_mask);
#define MIN_GAP (128*1024*1024UL)
#define MAX_GAP ((TASK_SIZE)/6*5)
static int mmap_is_legacy(void)
static int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
}
static unsigned long mmap_base(unsigned long rnd)
static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{
unsigned long gap = rlimit(RLIMIT_STACK);
unsigned long gap = rlim_stack->rlim_cur;
if (gap < MIN_GAP)
gap = MIN_GAP;
@ -158,18 +158,18 @@ unsigned long arch_mmap_rnd(void)
return rnd << PAGE_SHIFT;
}
void arch_pick_mmap_layout(struct mm_struct *mm)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd();
if (mmap_is_legacy()) {
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor);
mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}

View File

@ -70,12 +70,18 @@ static inline unsigned long COLOR_ALIGN(unsigned long addr,
* Top of mmap area (just below the process stack).
*/
static unsigned long mmap_upper_limit(void)
/*
* When called from arch_get_unmapped_area(), rlim_stack will be NULL,
* indicating that "current" should be used instead of a passed-in
* value from the exec bprm as done with arch_pick_mmap_layout().
*/
static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
{
unsigned long stack_base;
/* Limit stack size - see setup_arg_pages() in fs/exec.c */
stack_base = rlimit_max(RLIMIT_STACK);
stack_base = rlim_stack ? rlim_stack->rlim_max
: rlimit_max(RLIMIT_STACK);
if (stack_base > STACK_SIZE_MAX)
stack_base = STACK_SIZE_MAX;
@ -127,7 +133,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_legacy_base;
info.high_limit = mmap_upper_limit();
info.high_limit = mmap_upper_limit(NULL);
info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
info.align_offset = shared_align_offset(last_mmap, pgoff);
addr = vm_unmapped_area(&info);
@ -250,10 +256,10 @@ static unsigned long mmap_legacy_base(void)
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
void arch_pick_mmap_layout(struct mm_struct *mm)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
mm->mmap_legacy_base = mmap_legacy_base();
mm->mmap_base = mmap_upper_limit();
mm->mmap_base = mmap_upper_limit(rlim_stack);
if (mmap_is_legacy()) {
mm->mmap_base = mm->mmap_legacy_base;

View File

@ -39,12 +39,12 @@
#define MIN_GAP (128*1024*1024)
#define MAX_GAP (TASK_SIZE/6*5)
static inline int mmap_is_legacy(void)
static inline int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
@ -76,9 +76,10 @@ static inline unsigned long stack_maxrandom_size(void)
return (1<<30);
}
static inline unsigned long mmap_base(unsigned long rnd)
static inline unsigned long mmap_base(unsigned long rnd,
struct rlimit *rlim_stack)
{
unsigned long gap = rlimit(RLIMIT_STACK);
unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
/* Values close to RLIM_INFINITY can overflow. */
@ -196,26 +197,28 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
}
static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
unsigned long random_factor)
unsigned long random_factor,
struct rlimit *rlim_stack)
{
if (mmap_is_legacy()) {
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = radix__arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor);
mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown;
}
}
#else
/* dummy */
extern void radix__arch_pick_mmap_layout(struct mm_struct *mm,
unsigned long random_factor);
unsigned long random_factor,
struct rlimit *rlim_stack);
#endif
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
void arch_pick_mmap_layout(struct mm_struct *mm)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
@ -223,16 +226,17 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
random_factor = arch_mmap_rnd();
if (radix_enabled())
return radix__arch_pick_mmap_layout(mm, random_factor);
return radix__arch_pick_mmap_layout(mm, random_factor,
rlim_stack);
/*
* Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited:
*/
if (mmap_is_legacy()) {
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor);
mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}

View File

@ -37,11 +37,11 @@ static unsigned long stack_maxrandom_size(void)
#define MIN_GAP (32*1024*1024)
#define MAX_GAP (STACK_TOP/6*5)
static inline int mmap_is_legacy(void)
static inline int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
}
@ -56,9 +56,10 @@ static unsigned long mmap_base_legacy(unsigned long rnd)
return TASK_UNMAPPED_BASE + rnd;
}
static inline unsigned long mmap_base(unsigned long rnd)
static inline unsigned long mmap_base(unsigned long rnd,
struct rlimit *rlim_stack)
{
unsigned long gap = rlimit(RLIMIT_STACK);
unsigned long gap = rlim_stack->rlim_cur;
if (gap < MIN_GAP)
gap = MIN_GAP;
@ -184,7 +185,7 @@ check_asce_limit:
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
void arch_pick_mmap_layout(struct mm_struct *mm)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
@ -195,11 +196,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited:
*/
if (mmap_is_legacy()) {
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = mmap_base_legacy(random_factor);
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor);
mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}

View File

@ -276,7 +276,7 @@ static unsigned long mmap_rnd(void)
return rnd << PAGE_SHIFT;
}
void arch_pick_mmap_layout(struct mm_struct *mm)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = mmap_rnd();
unsigned long gap;
@ -285,7 +285,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited:
*/
gap = rlimit(RLIMIT_STACK);
gap = rlim_stack->rlim_cur;
if (!test_thread_flag(TIF_32BIT) ||
(current->personality & ADDR_COMPAT_LAYOUT) ||
gap == RLIM_INFINITY ||

View File

@ -90,9 +90,10 @@ unsigned long arch_mmap_rnd(void)
return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
}
static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
static unsigned long mmap_base(unsigned long rnd, unsigned long task_size,
struct rlimit *rlim_stack)
{
unsigned long gap = rlimit(RLIMIT_STACK);
unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
unsigned long gap_min, gap_max;
@ -126,16 +127,17 @@ static unsigned long mmap_legacy_base(unsigned long rnd,
* process VM image, sets up which VM layout function to use:
*/
static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
unsigned long random_factor, unsigned long task_size)
unsigned long random_factor, unsigned long task_size,
struct rlimit *rlim_stack)
{
*legacy_base = mmap_legacy_base(random_factor, task_size);
if (mmap_is_legacy())
*base = *legacy_base;
else
*base = mmap_base(random_factor, task_size);
*base = mmap_base(random_factor, task_size, rlim_stack);
}
void arch_pick_mmap_layout(struct mm_struct *mm)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
if (mmap_is_legacy())
mm->get_unmapped_area = arch_get_unmapped_area;
@ -143,7 +145,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
arch_rnd(mmap64_rnd_bits), task_size_64bit(0));
arch_rnd(mmap64_rnd_bits), task_size_64bit(0),
rlim_stack);
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
/*
@ -153,7 +156,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* mmap_base, the compat syscall uses mmap_compat_base.
*/
arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
arch_rnd(mmap32_rnd_bits), task_size_32bit());
arch_rnd(mmap32_rnd_bits), task_size_32bit(),
rlim_stack);
#endif
}

View File

@ -1323,6 +1323,8 @@ EXPORT_SYMBOL(would_dump);
void setup_new_exec(struct linux_binprm * bprm)
{
struct rlimit rlim_stack;
/*
* Once here, prepare_binrpm() will not be called any more, so
* the final state of setuid/setgid/fscaps can be merged into the
@ -1345,7 +1347,11 @@ void setup_new_exec(struct linux_binprm * bprm)
current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
}
arch_pick_mmap_layout(current->mm);
task_lock(current->group_leader);
rlim_stack = current->signal->rlim[RLIMIT_STACK];
task_unlock(current->group_leader);
arch_pick_mmap_layout(current->mm, &rlim_stack);
current->sas_ss_sp = current->sas_ss_size = 0;

View File

@ -104,7 +104,8 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
#endif /* CONFIG_MEMCG */
#ifdef CONFIG_MMU
extern void arch_pick_mmap_layout(struct mm_struct *mm);
extern void arch_pick_mmap_layout(struct mm_struct *mm,
struct rlimit *rlim_stack);
extern unsigned long
arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
unsigned long, unsigned long);
@ -113,7 +114,8 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags);
#else
static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
static inline void arch_pick_mmap_layout(struct mm_struct *mm,
struct rlimit *rlim_stack) {}
#endif
static inline bool in_vfork(struct task_struct *tsk)

View File

@ -287,7 +287,7 @@ int vma_is_stack_for_current(struct vm_area_struct *vma)
}
#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
void arch_pick_mmap_layout(struct mm_struct *mm)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;