4c5ea0a9cd
The following scenario is possible: CPU 1 CPU 2 static_key_slow_inc() atomic_inc_not_zero() -> key.enabled == 0, no increment jump_label_lock() atomic_inc_return() -> key.enabled == 1 now static_key_slow_inc() atomic_inc_not_zero() -> key.enabled == 1, inc to 2 return ** static key is wrong! jump_label_update() jump_label_unlock() Testing the static key at the point marked by (**) will follow the wrong path for jumps that have not been patched yet. This can actually happen when creating many KVM virtual machines with userspace LAPIC emulation; just run several copies of the following program: #include <fcntl.h> #include <unistd.h> #include <sys/ioctl.h> #include <linux/kvm.h> int main(void) { for (;;) { int kvmfd = open("/dev/kvm", O_RDONLY); int vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0); close(ioctl(vmfd, KVM_CREATE_VCPU, 1)); close(vmfd); close(kvmfd); } return 0; } Every KVM_CREATE_VCPU ioctl will attempt a static_key_slow_inc() call. The static key's purpose is to skip NULL pointer checks and indeed one of the processes eventually dereferences NULL. As explained in the commit that introduced the bug:706249c222
("locking/static_keys: Rework update logic") jump_label_update() needs key.enabled to be true. The solution adopted here is to temporarily make key.enabled == -1, and use go down the slow path when key.enabled <= 0. Reported-by: Dmitry Vyukov <dvyukov@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: <stable@vger.kernel.org> # v4.3+ Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Fixes:706249c222
("locking/static_keys: Rework update logic") Link: http://lkml.kernel.org/r/1466527937-69798-1-git-send-email-pbonzini@redhat.com [ Small stylistic edits to the changelog and the code. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
553 lines
14 KiB
C
553 lines
14 KiB
C
/*
|
|
* jump label support
|
|
*
|
|
* Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
|
|
* Copyright (C) 2011 Peter Zijlstra
|
|
*
|
|
*/
|
|
#include <linux/memory.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/module.h>
|
|
#include <linux/list.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/sort.h>
|
|
#include <linux/err.h>
|
|
#include <linux/static_key.h>
|
|
#include <linux/jump_label_ratelimit.h>
|
|
|
|
#ifdef HAVE_JUMP_LABEL
|
|
|
|
/* mutex to protect coming/going of the the jump_label table */
|
|
static DEFINE_MUTEX(jump_label_mutex);
|
|
|
|
void jump_label_lock(void)
|
|
{
|
|
mutex_lock(&jump_label_mutex);
|
|
}
|
|
|
|
void jump_label_unlock(void)
|
|
{
|
|
mutex_unlock(&jump_label_mutex);
|
|
}
|
|
|
|
static int jump_label_cmp(const void *a, const void *b)
|
|
{
|
|
const struct jump_entry *jea = a;
|
|
const struct jump_entry *jeb = b;
|
|
|
|
if (jea->key < jeb->key)
|
|
return -1;
|
|
|
|
if (jea->key > jeb->key)
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
|
|
{
|
|
unsigned long size;
|
|
|
|
size = (((unsigned long)stop - (unsigned long)start)
|
|
/ sizeof(struct jump_entry));
|
|
sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
|
|
}
|
|
|
|
static void jump_label_update(struct static_key *key);
|
|
|
|
void static_key_slow_inc(struct static_key *key)
|
|
{
|
|
int v, v1;
|
|
|
|
STATIC_KEY_CHECK_USE();
|
|
|
|
/*
|
|
* Careful if we get concurrent static_key_slow_inc() calls;
|
|
* later calls must wait for the first one to _finish_ the
|
|
* jump_label_update() process. At the same time, however,
|
|
* the jump_label_update() call below wants to see
|
|
* static_key_enabled(&key) for jumps to be updated properly.
|
|
*
|
|
* So give a special meaning to negative key->enabled: it sends
|
|
* static_key_slow_inc() down the slow path, and it is non-zero
|
|
* so it counts as "enabled" in jump_label_update(). Note that
|
|
* atomic_inc_unless_negative() checks >= 0, so roll our own.
|
|
*/
|
|
for (v = atomic_read(&key->enabled); v > 0; v = v1) {
|
|
v1 = atomic_cmpxchg(&key->enabled, v, v + 1);
|
|
if (likely(v1 == v))
|
|
return;
|
|
}
|
|
|
|
jump_label_lock();
|
|
if (atomic_read(&key->enabled) == 0) {
|
|
atomic_set(&key->enabled, -1);
|
|
jump_label_update(key);
|
|
atomic_set(&key->enabled, 1);
|
|
} else {
|
|
atomic_inc(&key->enabled);
|
|
}
|
|
jump_label_unlock();
|
|
}
|
|
EXPORT_SYMBOL_GPL(static_key_slow_inc);
|
|
|
|
static void __static_key_slow_dec(struct static_key *key,
|
|
unsigned long rate_limit, struct delayed_work *work)
|
|
{
|
|
/*
|
|
* The negative count check is valid even when a negative
|
|
* key->enabled is in use by static_key_slow_inc(); a
|
|
* __static_key_slow_dec() before the first static_key_slow_inc()
|
|
* returns is unbalanced, because all other static_key_slow_inc()
|
|
* instances block while the update is in progress.
|
|
*/
|
|
if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
|
|
WARN(atomic_read(&key->enabled) < 0,
|
|
"jump label: negative count!\n");
|
|
return;
|
|
}
|
|
|
|
if (rate_limit) {
|
|
atomic_inc(&key->enabled);
|
|
schedule_delayed_work(work, rate_limit);
|
|
} else {
|
|
jump_label_update(key);
|
|
}
|
|
jump_label_unlock();
|
|
}
|
|
|
|
static void jump_label_update_timeout(struct work_struct *work)
|
|
{
|
|
struct static_key_deferred *key =
|
|
container_of(work, struct static_key_deferred, work.work);
|
|
__static_key_slow_dec(&key->key, 0, NULL);
|
|
}
|
|
|
|
void static_key_slow_dec(struct static_key *key)
|
|
{
|
|
STATIC_KEY_CHECK_USE();
|
|
__static_key_slow_dec(key, 0, NULL);
|
|
}
|
|
EXPORT_SYMBOL_GPL(static_key_slow_dec);
|
|
|
|
void static_key_slow_dec_deferred(struct static_key_deferred *key)
|
|
{
|
|
STATIC_KEY_CHECK_USE();
|
|
__static_key_slow_dec(&key->key, key->timeout, &key->work);
|
|
}
|
|
EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
|
|
|
|
void jump_label_rate_limit(struct static_key_deferred *key,
|
|
unsigned long rl)
|
|
{
|
|
STATIC_KEY_CHECK_USE();
|
|
key->timeout = rl;
|
|
INIT_DELAYED_WORK(&key->work, jump_label_update_timeout);
|
|
}
|
|
EXPORT_SYMBOL_GPL(jump_label_rate_limit);
|
|
|
|
static int addr_conflict(struct jump_entry *entry, void *start, void *end)
|
|
{
|
|
if (entry->code <= (unsigned long)end &&
|
|
entry->code + JUMP_LABEL_NOP_SIZE > (unsigned long)start)
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __jump_label_text_reserved(struct jump_entry *iter_start,
|
|
struct jump_entry *iter_stop, void *start, void *end)
|
|
{
|
|
struct jump_entry *iter;
|
|
|
|
iter = iter_start;
|
|
while (iter < iter_stop) {
|
|
if (addr_conflict(iter, start, end))
|
|
return 1;
|
|
iter++;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Update code which is definitely not currently executing.
|
|
* Architectures which need heavyweight synchronization to modify
|
|
* running code can override this to make the non-live update case
|
|
* cheaper.
|
|
*/
|
|
void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry *entry,
|
|
enum jump_label_type type)
|
|
{
|
|
arch_jump_label_transform(entry, type);
|
|
}
|
|
|
|
static inline struct jump_entry *static_key_entries(struct static_key *key)
|
|
{
|
|
return (struct jump_entry *)((unsigned long)key->entries & ~JUMP_TYPE_MASK);
|
|
}
|
|
|
|
static inline bool static_key_type(struct static_key *key)
|
|
{
|
|
return (unsigned long)key->entries & JUMP_TYPE_MASK;
|
|
}
|
|
|
|
static inline struct static_key *jump_entry_key(struct jump_entry *entry)
|
|
{
|
|
return (struct static_key *)((unsigned long)entry->key & ~1UL);
|
|
}
|
|
|
|
static bool jump_entry_branch(struct jump_entry *entry)
|
|
{
|
|
return (unsigned long)entry->key & 1UL;
|
|
}
|
|
|
|
static enum jump_label_type jump_label_type(struct jump_entry *entry)
|
|
{
|
|
struct static_key *key = jump_entry_key(entry);
|
|
bool enabled = static_key_enabled(key);
|
|
bool branch = jump_entry_branch(entry);
|
|
|
|
/* See the comment in linux/jump_label.h */
|
|
return enabled ^ branch;
|
|
}
|
|
|
|
static void __jump_label_update(struct static_key *key,
|
|
struct jump_entry *entry,
|
|
struct jump_entry *stop)
|
|
{
|
|
for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
|
|
/*
|
|
* entry->code set to 0 invalidates module init text sections
|
|
* kernel_text_address() verifies we are not in core kernel
|
|
* init code, see jump_label_invalidate_module_init().
|
|
*/
|
|
if (entry->code && kernel_text_address(entry->code))
|
|
arch_jump_label_transform(entry, jump_label_type(entry));
|
|
}
|
|
}
|
|
|
|
void __init jump_label_init(void)
|
|
{
|
|
struct jump_entry *iter_start = __start___jump_table;
|
|
struct jump_entry *iter_stop = __stop___jump_table;
|
|
struct static_key *key = NULL;
|
|
struct jump_entry *iter;
|
|
|
|
jump_label_lock();
|
|
jump_label_sort_entries(iter_start, iter_stop);
|
|
|
|
for (iter = iter_start; iter < iter_stop; iter++) {
|
|
struct static_key *iterk;
|
|
|
|
/* rewrite NOPs */
|
|
if (jump_label_type(iter) == JUMP_LABEL_NOP)
|
|
arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
|
|
|
|
iterk = jump_entry_key(iter);
|
|
if (iterk == key)
|
|
continue;
|
|
|
|
key = iterk;
|
|
/*
|
|
* Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
|
|
*/
|
|
*((unsigned long *)&key->entries) += (unsigned long)iter;
|
|
#ifdef CONFIG_MODULES
|
|
key->next = NULL;
|
|
#endif
|
|
}
|
|
static_key_initialized = true;
|
|
jump_label_unlock();
|
|
}
|
|
|
|
#ifdef CONFIG_MODULES
|
|
|
|
static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
|
|
{
|
|
struct static_key *key = jump_entry_key(entry);
|
|
bool type = static_key_type(key);
|
|
bool branch = jump_entry_branch(entry);
|
|
|
|
/* See the comment in linux/jump_label.h */
|
|
return type ^ branch;
|
|
}
|
|
|
|
struct static_key_mod {
|
|
struct static_key_mod *next;
|
|
struct jump_entry *entries;
|
|
struct module *mod;
|
|
};
|
|
|
|
static int __jump_label_mod_text_reserved(void *start, void *end)
|
|
{
|
|
struct module *mod;
|
|
|
|
mod = __module_text_address((unsigned long)start);
|
|
if (!mod)
|
|
return 0;
|
|
|
|
WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod);
|
|
|
|
return __jump_label_text_reserved(mod->jump_entries,
|
|
mod->jump_entries + mod->num_jump_entries,
|
|
start, end);
|
|
}
|
|
|
|
static void __jump_label_mod_update(struct static_key *key)
|
|
{
|
|
struct static_key_mod *mod;
|
|
|
|
for (mod = key->next; mod; mod = mod->next) {
|
|
struct module *m = mod->mod;
|
|
|
|
__jump_label_update(key, mod->entries,
|
|
m->jump_entries + m->num_jump_entries);
|
|
}
|
|
}
|
|
|
|
/***
|
|
* apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
|
|
* @mod: module to patch
|
|
*
|
|
* Allow for run-time selection of the optimal nops. Before the module
|
|
* loads patch these with arch_get_jump_label_nop(), which is specified by
|
|
* the arch specific jump label code.
|
|
*/
|
|
void jump_label_apply_nops(struct module *mod)
|
|
{
|
|
struct jump_entry *iter_start = mod->jump_entries;
|
|
struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
|
|
struct jump_entry *iter;
|
|
|
|
/* if the module doesn't have jump label entries, just return */
|
|
if (iter_start == iter_stop)
|
|
return;
|
|
|
|
for (iter = iter_start; iter < iter_stop; iter++) {
|
|
/* Only write NOPs for arch_branch_static(). */
|
|
if (jump_label_init_type(iter) == JUMP_LABEL_NOP)
|
|
arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
|
|
}
|
|
}
|
|
|
|
static int jump_label_add_module(struct module *mod)
|
|
{
|
|
struct jump_entry *iter_start = mod->jump_entries;
|
|
struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
|
|
struct jump_entry *iter;
|
|
struct static_key *key = NULL;
|
|
struct static_key_mod *jlm;
|
|
|
|
/* if the module doesn't have jump label entries, just return */
|
|
if (iter_start == iter_stop)
|
|
return 0;
|
|
|
|
jump_label_sort_entries(iter_start, iter_stop);
|
|
|
|
for (iter = iter_start; iter < iter_stop; iter++) {
|
|
struct static_key *iterk;
|
|
|
|
iterk = jump_entry_key(iter);
|
|
if (iterk == key)
|
|
continue;
|
|
|
|
key = iterk;
|
|
if (within_module(iter->key, mod)) {
|
|
/*
|
|
* Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
|
|
*/
|
|
*((unsigned long *)&key->entries) += (unsigned long)iter;
|
|
key->next = NULL;
|
|
continue;
|
|
}
|
|
jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL);
|
|
if (!jlm)
|
|
return -ENOMEM;
|
|
jlm->mod = mod;
|
|
jlm->entries = iter;
|
|
jlm->next = key->next;
|
|
key->next = jlm;
|
|
|
|
/* Only update if we've changed from our initial state */
|
|
if (jump_label_type(iter) != jump_label_init_type(iter))
|
|
__jump_label_update(key, iter, iter_stop);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void jump_label_del_module(struct module *mod)
|
|
{
|
|
struct jump_entry *iter_start = mod->jump_entries;
|
|
struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
|
|
struct jump_entry *iter;
|
|
struct static_key *key = NULL;
|
|
struct static_key_mod *jlm, **prev;
|
|
|
|
for (iter = iter_start; iter < iter_stop; iter++) {
|
|
if (jump_entry_key(iter) == key)
|
|
continue;
|
|
|
|
key = jump_entry_key(iter);
|
|
|
|
if (within_module(iter->key, mod))
|
|
continue;
|
|
|
|
prev = &key->next;
|
|
jlm = key->next;
|
|
|
|
while (jlm && jlm->mod != mod) {
|
|
prev = &jlm->next;
|
|
jlm = jlm->next;
|
|
}
|
|
|
|
if (jlm) {
|
|
*prev = jlm->next;
|
|
kfree(jlm);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void jump_label_invalidate_module_init(struct module *mod)
|
|
{
|
|
struct jump_entry *iter_start = mod->jump_entries;
|
|
struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
|
|
struct jump_entry *iter;
|
|
|
|
for (iter = iter_start; iter < iter_stop; iter++) {
|
|
if (within_module_init(iter->code, mod))
|
|
iter->code = 0;
|
|
}
|
|
}
|
|
|
|
static int
|
|
jump_label_module_notify(struct notifier_block *self, unsigned long val,
|
|
void *data)
|
|
{
|
|
struct module *mod = data;
|
|
int ret = 0;
|
|
|
|
switch (val) {
|
|
case MODULE_STATE_COMING:
|
|
jump_label_lock();
|
|
ret = jump_label_add_module(mod);
|
|
if (ret)
|
|
jump_label_del_module(mod);
|
|
jump_label_unlock();
|
|
break;
|
|
case MODULE_STATE_GOING:
|
|
jump_label_lock();
|
|
jump_label_del_module(mod);
|
|
jump_label_unlock();
|
|
break;
|
|
case MODULE_STATE_LIVE:
|
|
jump_label_lock();
|
|
jump_label_invalidate_module_init(mod);
|
|
jump_label_unlock();
|
|
break;
|
|
}
|
|
|
|
return notifier_from_errno(ret);
|
|
}
|
|
|
|
struct notifier_block jump_label_module_nb = {
|
|
.notifier_call = jump_label_module_notify,
|
|
.priority = 1, /* higher than tracepoints */
|
|
};
|
|
|
|
static __init int jump_label_init_module(void)
|
|
{
|
|
return register_module_notifier(&jump_label_module_nb);
|
|
}
|
|
early_initcall(jump_label_init_module);
|
|
|
|
#endif /* CONFIG_MODULES */
|
|
|
|
/***
|
|
* jump_label_text_reserved - check if addr range is reserved
|
|
* @start: start text addr
|
|
* @end: end text addr
|
|
*
|
|
* checks if the text addr located between @start and @end
|
|
* overlaps with any of the jump label patch addresses. Code
|
|
* that wants to modify kernel text should first verify that
|
|
* it does not overlap with any of the jump label addresses.
|
|
* Caller must hold jump_label_mutex.
|
|
*
|
|
* returns 1 if there is an overlap, 0 otherwise
|
|
*/
|
|
int jump_label_text_reserved(void *start, void *end)
|
|
{
|
|
int ret = __jump_label_text_reserved(__start___jump_table,
|
|
__stop___jump_table, start, end);
|
|
|
|
if (ret)
|
|
return ret;
|
|
|
|
#ifdef CONFIG_MODULES
|
|
ret = __jump_label_mod_text_reserved(start, end);
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
static void jump_label_update(struct static_key *key)
|
|
{
|
|
struct jump_entry *stop = __stop___jump_table;
|
|
struct jump_entry *entry = static_key_entries(key);
|
|
#ifdef CONFIG_MODULES
|
|
struct module *mod;
|
|
|
|
__jump_label_mod_update(key);
|
|
|
|
preempt_disable();
|
|
mod = __module_address((unsigned long)key);
|
|
if (mod)
|
|
stop = mod->jump_entries + mod->num_jump_entries;
|
|
preempt_enable();
|
|
#endif
|
|
/* if there are no users, entry can be NULL */
|
|
if (entry)
|
|
__jump_label_update(key, entry, stop);
|
|
}
|
|
|
|
#ifdef CONFIG_STATIC_KEYS_SELFTEST
|
|
static DEFINE_STATIC_KEY_TRUE(sk_true);
|
|
static DEFINE_STATIC_KEY_FALSE(sk_false);
|
|
|
|
static __init int jump_label_test(void)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
WARN_ON(static_key_enabled(&sk_true.key) != true);
|
|
WARN_ON(static_key_enabled(&sk_false.key) != false);
|
|
|
|
WARN_ON(!static_branch_likely(&sk_true));
|
|
WARN_ON(!static_branch_unlikely(&sk_true));
|
|
WARN_ON(static_branch_likely(&sk_false));
|
|
WARN_ON(static_branch_unlikely(&sk_false));
|
|
|
|
static_branch_disable(&sk_true);
|
|
static_branch_enable(&sk_false);
|
|
|
|
WARN_ON(static_key_enabled(&sk_true.key) == true);
|
|
WARN_ON(static_key_enabled(&sk_false.key) == false);
|
|
|
|
WARN_ON(static_branch_likely(&sk_true));
|
|
WARN_ON(static_branch_unlikely(&sk_true));
|
|
WARN_ON(!static_branch_likely(&sk_false));
|
|
WARN_ON(!static_branch_unlikely(&sk_false));
|
|
|
|
static_branch_enable(&sk_true);
|
|
static_branch_disable(&sk_false);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
late_initcall(jump_label_test);
|
|
#endif /* STATIC_KEYS_SELFTEST */
|
|
|
|
#endif /* HAVE_JUMP_LABEL */
|