diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index ea8d7b4e53f0..32a25fad0c1b 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt @@ -155,7 +155,9 @@ or: There are a few functions and macros that architectures must implement in order to take advantage of this optimization. If there is no architecture support, we -simply fall back to a traditional, load, test, and jump sequence. +simply fall back to a traditional, load, test, and jump sequence. Also, the +struct jump_entry table must be at least 4-byte aligned because the +static_key->entry field makes use of the two least significant bits. * select HAVE_ARCH_JUMP_LABEL, see: arch/x86/Kconfig diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 91c30cba984e..627e697e5d25 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -105,29 +105,36 @@ struct ftrace_branch_data { }; }; +struct ftrace_likely_data { + struct ftrace_branch_data data; + unsigned long constant; +}; + /* * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code * to disable branch tracing on a per file basis. */ #if defined(CONFIG_TRACE_BRANCH_PROFILING) \ && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__) -void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); +void ftrace_likely_update(struct ftrace_likely_data *f, int val, + int expect, int is_constant); #define likely_notrace(x) __builtin_expect(!!(x), 1) #define unlikely_notrace(x) __builtin_expect(!!(x), 0) -#define __branch_check__(x, expect) ({ \ +#define __branch_check__(x, expect, is_constant) ({ \ int ______r; \ - static struct ftrace_branch_data \ + static struct ftrace_likely_data \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_annotated_branch"))) \ ______f = { \ - .func = __func__, \ - .file = __FILE__, \ - .line = __LINE__, \ + .data.func = __func__, \ + .data.file = __FILE__, \ + .data.line = __LINE__, \ }; \ - ______r = likely_notrace(x); \ - ftrace_likely_update(&______f, ______r, expect); \ + ______r = __builtin_expect(!!(x), expect); \ + ftrace_likely_update(&______f, ______r, \ + expect, is_constant); \ ______r; \ }) @@ -137,10 +144,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); * written by Daniel Walker. */ # ifndef likely -# define likely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1)) +# define likely(x) (__branch_check__(x, 1, __builtin_constant_p(x))) # endif # ifndef unlikely -# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0)) +# define unlikely(x) (__branch_check__(x, 0, __builtin_constant_p(x))) # endif #ifdef CONFIG_PROFILE_ALL_BRANCHES diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index b63d6b7b0db0..8e06d758ee48 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -89,11 +89,17 @@ extern bool static_key_initialized; struct static_key { atomic_t enabled; -/* Set lsb bit to 1 if branch is default true, 0 ot */ - struct jump_entry *entries; -#ifdef CONFIG_MODULES - struct static_key_mod *next; -#endif +/* + * bit 0 => 1 if key is initially true + * 0 if initially false + * bit 1 => 1 if points to struct static_key_mod + * 0 if points to struct jump_entry + */ + union { + unsigned long type; + struct jump_entry *entries; + struct static_key_mod *next; + }; }; #else @@ -118,9 +124,10 @@ struct module; #ifdef HAVE_JUMP_LABEL -#define JUMP_TYPE_FALSE 0UL -#define JUMP_TYPE_TRUE 1UL -#define JUMP_TYPE_MASK 1UL +#define JUMP_TYPE_FALSE 0UL +#define JUMP_TYPE_TRUE 1UL +#define JUMP_TYPE_LINKED 2UL +#define JUMP_TYPE_MASK 3UL static __always_inline bool static_key_false(struct static_key *key) { diff --git a/include/linux/timer.h b/include/linux/timer.h index 5a209b84fd9e..c7bdf895179c 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -61,6 +61,8 @@ struct timer_list { #define TIMER_ARRAYSHIFT 22 #define TIMER_ARRAYMASK 0xFFC00000 +#define TIMER_TRACE_FLAGMASK (TIMER_MIGRATING | TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE) + #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 1bca99dbb98f..80787eafba99 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -36,6 +36,13 @@ DEFINE_EVENT(timer_class, timer_init, TP_ARGS(timer) ); +#define decode_timer_flags(flags) \ + __print_flags(flags, "|", \ + { TIMER_MIGRATING, "M" }, \ + { TIMER_DEFERRABLE, "D" }, \ + { TIMER_PINNED, "P" }, \ + { TIMER_IRQSAFE, "I" }) + /** * timer_start - called when the timer is started * @timer: pointer to struct timer_list @@ -65,9 +72,12 @@ TRACE_EVENT(timer_start, __entry->flags = flags; ), - TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] flags=0x%08x", + TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] cpu=%u idx=%u flags=%s", __entry->timer, __entry->function, __entry->expires, - (long)__entry->expires - __entry->now, __entry->flags) + (long)__entry->expires - __entry->now, + __entry->flags & TIMER_CPUMASK, + __entry->flags >> TIMER_ARRAYSHIFT, + decode_timer_flags(__entry->flags & TIMER_TRACE_FLAGMASK)) ); /** diff --git a/kernel/jump_label.c b/kernel/jump_label.c index a9b8cf500591..6c9cb208ac48 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -236,12 +236,28 @@ void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry static inline struct jump_entry *static_key_entries(struct static_key *key) { - return (struct jump_entry *)((unsigned long)key->entries & ~JUMP_TYPE_MASK); + WARN_ON_ONCE(key->type & JUMP_TYPE_LINKED); + return (struct jump_entry *)(key->type & ~JUMP_TYPE_MASK); } static inline bool static_key_type(struct static_key *key) { - return (unsigned long)key->entries & JUMP_TYPE_MASK; + return key->type & JUMP_TYPE_TRUE; +} + +static inline bool static_key_linked(struct static_key *key) +{ + return key->type & JUMP_TYPE_LINKED; +} + +static inline void static_key_clear_linked(struct static_key *key) +{ + key->type &= ~JUMP_TYPE_LINKED; +} + +static inline void static_key_set_linked(struct static_key *key) +{ + key->type |= JUMP_TYPE_LINKED; } static inline struct static_key *jump_entry_key(struct jump_entry *entry) @@ -254,6 +270,26 @@ static bool jump_entry_branch(struct jump_entry *entry) return (unsigned long)entry->key & 1UL; } +/*** + * A 'struct static_key' uses a union such that it either points directly + * to a table of 'struct jump_entry' or to a linked list of modules which in + * turn point to 'struct jump_entry' tables. + * + * The two lower bits of the pointer are used to keep track of which pointer + * type is in use and to store the initial branch direction, we use an access + * function which preserves these bits. + */ +static void static_key_set_entries(struct static_key *key, + struct jump_entry *entries) +{ + unsigned long type; + + WARN_ON_ONCE((unsigned long)entries & JUMP_TYPE_MASK); + type = key->type & JUMP_TYPE_MASK; + key->entries = entries; + key->type |= type; +} + static enum jump_label_type jump_label_type(struct jump_entry *entry) { struct static_key *key = jump_entry_key(entry); @@ -313,13 +349,7 @@ void __init jump_label_init(void) continue; key = iterk; - /* - * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. - */ - *((unsigned long *)&key->entries) += (unsigned long)iter; -#ifdef CONFIG_MODULES - key->next = NULL; -#endif + static_key_set_entries(key, iter); } static_key_initialized = true; jump_label_unlock(); @@ -343,6 +373,29 @@ struct static_key_mod { struct module *mod; }; +static inline struct static_key_mod *static_key_mod(struct static_key *key) +{ + WARN_ON_ONCE(!(key->type & JUMP_TYPE_LINKED)); + return (struct static_key_mod *)(key->type & ~JUMP_TYPE_MASK); +} + +/*** + * key->type and key->next are the same via union. + * This sets key->next and preserves the type bits. + * + * See additional comments above static_key_set_entries(). + */ +static void static_key_set_mod(struct static_key *key, + struct static_key_mod *mod) +{ + unsigned long type; + + WARN_ON_ONCE((unsigned long)mod & JUMP_TYPE_MASK); + type = key->type & JUMP_TYPE_MASK; + key->next = mod; + key->type |= type; +} + static int __jump_label_mod_text_reserved(void *start, void *end) { struct module *mod; @@ -365,11 +418,23 @@ static void __jump_label_mod_update(struct static_key *key) { struct static_key_mod *mod; - for (mod = key->next; mod; mod = mod->next) { - struct module *m = mod->mod; + for (mod = static_key_mod(key); mod; mod = mod->next) { + struct jump_entry *stop; + struct module *m; - __jump_label_update(key, mod->entries, - m->jump_entries + m->num_jump_entries); + /* + * NULL if the static_key is defined in a module + * that does not use it + */ + if (!mod->entries) + continue; + + m = mod->mod; + if (!m) + stop = __stop___jump_table; + else + stop = m->jump_entries + m->num_jump_entries; + __jump_label_update(key, mod->entries, stop); } } @@ -404,7 +469,7 @@ static int jump_label_add_module(struct module *mod) struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; struct jump_entry *iter; struct static_key *key = NULL; - struct static_key_mod *jlm; + struct static_key_mod *jlm, *jlm2; /* if the module doesn't have jump label entries, just return */ if (iter_start == iter_stop) @@ -421,20 +486,32 @@ static int jump_label_add_module(struct module *mod) key = iterk; if (within_module(iter->key, mod)) { - /* - * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. - */ - *((unsigned long *)&key->entries) += (unsigned long)iter; - key->next = NULL; + static_key_set_entries(key, iter); continue; } jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL); if (!jlm) return -ENOMEM; + if (!static_key_linked(key)) { + jlm2 = kzalloc(sizeof(struct static_key_mod), + GFP_KERNEL); + if (!jlm2) { + kfree(jlm); + return -ENOMEM; + } + preempt_disable(); + jlm2->mod = __module_address((unsigned long)key); + preempt_enable(); + jlm2->entries = static_key_entries(key); + jlm2->next = NULL; + static_key_set_mod(key, jlm2); + static_key_set_linked(key); + } jlm->mod = mod; jlm->entries = iter; - jlm->next = key->next; - key->next = jlm; + jlm->next = static_key_mod(key); + static_key_set_mod(key, jlm); + static_key_set_linked(key); /* Only update if we've changed from our initial state */ if (jump_label_type(iter) != jump_label_init_type(iter)) @@ -461,16 +538,34 @@ static void jump_label_del_module(struct module *mod) if (within_module(iter->key, mod)) continue; + /* No memory during module load */ + if (WARN_ON(!static_key_linked(key))) + continue; + prev = &key->next; - jlm = key->next; + jlm = static_key_mod(key); while (jlm && jlm->mod != mod) { prev = &jlm->next; jlm = jlm->next; } - if (jlm) { + /* No memory during module load */ + if (WARN_ON(!jlm)) + continue; + + if (prev == &key->next) + static_key_set_mod(key, jlm->next); + else *prev = jlm->next; + + kfree(jlm); + + jlm = static_key_mod(key); + /* if only one etry is left, fold it back into the static_key */ + if (jlm->next == NULL) { + static_key_set_entries(key, jlm->entries); + static_key_clear_linked(key); kfree(jlm); } } @@ -499,8 +594,10 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, case MODULE_STATE_COMING: jump_label_lock(); ret = jump_label_add_module(mod); - if (ret) + if (ret) { + WARN(1, "Failed to allocatote memory: jump_label may not work properly.\n"); jump_label_del_module(mod); + } jump_label_unlock(); break; case MODULE_STATE_GOING: @@ -561,11 +658,14 @@ int jump_label_text_reserved(void *start, void *end) static void jump_label_update(struct static_key *key) { struct jump_entry *stop = __stop___jump_table; - struct jump_entry *entry = static_key_entries(key); + struct jump_entry *entry; #ifdef CONFIG_MODULES struct module *mod; - __jump_label_mod_update(key); + if (static_key_linked(key)) { + __jump_label_mod_update(key); + return; + } preempt_disable(); mod = __module_address((unsigned long)key); @@ -573,6 +673,7 @@ static void jump_label_update(struct static_key *key) stop = mod->jump_entries + mod->num_jump_entries; preempt_enable(); #endif + entry = static_key_entries(key); /* if there are no users, entry can be NULL */ if (entry) __jump_label_update(key, entry, stop); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index eb230f06ba41..0c0609326391 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1110,13 +1110,6 @@ struct ftrace_func_entry { unsigned long ip; }; -struct ftrace_hash { - unsigned long size_bits; - struct hlist_head *buckets; - unsigned long count; - struct rcu_head rcu; -}; - /* * We make these constant because no one should touch them, * but they are used as the default "empty hash", to avoid allocating @@ -1192,26 +1185,24 @@ struct ftrace_page { static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; -static bool __always_inline ftrace_hash_empty(struct ftrace_hash *hash) +static __always_inline unsigned long +ftrace_hash_key(struct ftrace_hash *hash, unsigned long ip) { - return !hash || !hash->count; + if (hash->size_bits > 0) + return hash_long(ip, hash->size_bits); + + return 0; } -static struct ftrace_func_entry * -ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) +/* Only use this function if ftrace_hash_empty() has already been tested */ +static __always_inline struct ftrace_func_entry * +__ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) { unsigned long key; struct ftrace_func_entry *entry; struct hlist_head *hhd; - if (ftrace_hash_empty(hash)) - return NULL; - - if (hash->size_bits > 0) - key = hash_long(ip, hash->size_bits); - else - key = 0; - + key = ftrace_hash_key(hash, ip); hhd = &hash->buckets[key]; hlist_for_each_entry_rcu_notrace(entry, hhd, hlist) { @@ -1221,17 +1212,32 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) return NULL; } +/** + * ftrace_lookup_ip - Test to see if an ip exists in an ftrace_hash + * @hash: The hash to look at + * @ip: The instruction pointer to test + * + * Search a given @hash to see if a given instruction pointer (@ip) + * exists in it. + * + * Returns the entry that holds the @ip if found. NULL otherwise. + */ +struct ftrace_func_entry * +ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) +{ + if (ftrace_hash_empty(hash)) + return NULL; + + return __ftrace_lookup_ip(hash, ip); +} + static void __add_hash_entry(struct ftrace_hash *hash, struct ftrace_func_entry *entry) { struct hlist_head *hhd; unsigned long key; - if (hash->size_bits) - key = hash_long(entry->ip, hash->size_bits); - else - key = 0; - + key = ftrace_hash_key(hash, entry->ip); hhd = &hash->buckets[key]; hlist_add_head(&entry->hlist, hhd); hash->count++; @@ -1383,9 +1389,8 @@ ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash); static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, struct ftrace_hash *new_hash); -static int -ftrace_hash_move(struct ftrace_ops *ops, int enable, - struct ftrace_hash **dst, struct ftrace_hash *src) +static struct ftrace_hash * +__ftrace_hash_move(struct ftrace_hash *src) { struct ftrace_func_entry *entry; struct hlist_node *tn; @@ -1393,21 +1398,13 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, struct ftrace_hash *new_hash; int size = src->count; int bits = 0; - int ret; int i; - /* Reject setting notrace hash on IPMODIFY ftrace_ops */ - if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable) - return -EINVAL; - /* - * If the new source is empty, just free dst and assign it - * the empty_hash. + * If the new source is empty, just return the empty_hash. */ - if (!src->count) { - new_hash = EMPTY_HASH; - goto update; - } + if (!src->count) + return EMPTY_HASH; /* * Make the hash size about 1/2 the # found @@ -1421,7 +1418,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, new_hash = alloc_ftrace_hash(bits); if (!new_hash) - return -ENOMEM; + return NULL; size = 1 << src->size_bits; for (i = 0; i < size; i++) { @@ -1432,7 +1429,24 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, } } -update: + return new_hash; +} + +static int +ftrace_hash_move(struct ftrace_ops *ops, int enable, + struct ftrace_hash **dst, struct ftrace_hash *src) +{ + struct ftrace_hash *new_hash; + int ret; + + /* Reject setting notrace hash on IPMODIFY ftrace_ops */ + if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable) + return -EINVAL; + + new_hash = __ftrace_hash_move(src); + if (!new_hash) + return -ENOMEM; + /* Make sure this can be applied if it is IPMODIFY ftrace_ops */ if (enable) { /* IPMODIFY should be updated only when filter_hash updating */ @@ -1466,9 +1480,9 @@ static bool hash_contains_ip(unsigned long ip, * notrace hash is considered not in the notrace hash. */ return (ftrace_hash_empty(hash->filter_hash) || - ftrace_lookup_ip(hash->filter_hash, ip)) && + __ftrace_lookup_ip(hash->filter_hash, ip)) && (ftrace_hash_empty(hash->notrace_hash) || - !ftrace_lookup_ip(hash->notrace_hash, ip)); + !__ftrace_lookup_ip(hash->notrace_hash, ip)); } /* @@ -2880,7 +2894,7 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) /* The function must be in the filter */ if (!ftrace_hash_empty(ops->func_hash->filter_hash) && - !ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) + !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) return 0; /* If in notrace hash, we ignore it too */ @@ -4382,7 +4396,7 @@ __setup("ftrace_filter=", set_ftrace_filter); #ifdef CONFIG_FUNCTION_GRAPH_TRACER static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata; -static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer); +static int ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer); static unsigned long save_global_trampoline; static unsigned long save_global_flags; @@ -4405,18 +4419,17 @@ static void __init set_ftrace_early_graph(char *buf, int enable) { int ret; char *func; - unsigned long *table = ftrace_graph_funcs; - int *count = &ftrace_graph_count; + struct ftrace_hash *hash; - if (!enable) { - table = ftrace_graph_notrace_funcs; - count = &ftrace_graph_notrace_count; - } + if (enable) + hash = ftrace_graph_hash; + else + hash = ftrace_graph_notrace_hash; while (buf) { func = strsep(&buf, ","); /* we allow only one expression at a time */ - ret = ftrace_set_func(table, count, FTRACE_GRAPH_MAX_FUNCS, func); + ret = ftrace_graph_set_hash(hash, func); if (ret) printk(KERN_DEBUG "ftrace: function %s not " "traceable\n", func); @@ -4540,26 +4553,55 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); -int ftrace_graph_count; -int ftrace_graph_notrace_count; -unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; -unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; +struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH; +struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; + +enum graph_filter_type { + GRAPH_FILTER_NOTRACE = 0, + GRAPH_FILTER_FUNCTION, +}; + +#define FTRACE_GRAPH_EMPTY ((void *)1) struct ftrace_graph_data { - unsigned long *table; - size_t size; - int *count; - const struct seq_operations *seq_ops; + struct ftrace_hash *hash; + struct ftrace_func_entry *entry; + int idx; /* for hash table iteration */ + enum graph_filter_type type; + struct ftrace_hash *new_hash; + const struct seq_operations *seq_ops; + struct trace_parser parser; }; static void * __g_next(struct seq_file *m, loff_t *pos) { struct ftrace_graph_data *fgd = m->private; + struct ftrace_func_entry *entry = fgd->entry; + struct hlist_head *head; + int i, idx = fgd->idx; - if (*pos >= *fgd->count) + if (*pos >= fgd->hash->count) return NULL; - return &fgd->table[*pos]; + + if (entry) { + hlist_for_each_entry_continue(entry, hlist) { + fgd->entry = entry; + return entry; + } + + idx++; + } + + for (i = idx; i < 1 << fgd->hash->size_bits; i++) { + head = &fgd->hash->buckets[i]; + hlist_for_each_entry(entry, head, hlist) { + fgd->entry = entry; + fgd->idx = i; + return entry; + } + } + return NULL; } static void * @@ -4575,10 +4617,19 @@ static void *g_start(struct seq_file *m, loff_t *pos) mutex_lock(&graph_lock); - /* Nothing, tell g_show to print all functions are enabled */ - if (!*fgd->count && !*pos) - return (void *)1; + if (fgd->type == GRAPH_FILTER_FUNCTION) + fgd->hash = rcu_dereference_protected(ftrace_graph_hash, + lockdep_is_held(&graph_lock)); + else + fgd->hash = rcu_dereference_protected(ftrace_graph_notrace_hash, + lockdep_is_held(&graph_lock)); + /* Nothing, tell g_show to print all functions are enabled */ + if (ftrace_hash_empty(fgd->hash) && !*pos) + return FTRACE_GRAPH_EMPTY; + + fgd->idx = 0; + fgd->entry = NULL; return __g_next(m, pos); } @@ -4589,22 +4640,22 @@ static void g_stop(struct seq_file *m, void *p) static int g_show(struct seq_file *m, void *v) { - unsigned long *ptr = v; + struct ftrace_func_entry *entry = v; - if (!ptr) + if (!entry) return 0; - if (ptr == (unsigned long *)1) { + if (entry == FTRACE_GRAPH_EMPTY) { struct ftrace_graph_data *fgd = m->private; - if (fgd->table == ftrace_graph_funcs) + if (fgd->type == GRAPH_FILTER_FUNCTION) seq_puts(m, "#### all functions enabled ####\n"); else seq_puts(m, "#### no functions disabled ####\n"); return 0; } - seq_printf(m, "%ps\n", (void *)*ptr); + seq_printf(m, "%ps\n", (void *)entry->ip); return 0; } @@ -4621,24 +4672,51 @@ __ftrace_graph_open(struct inode *inode, struct file *file, struct ftrace_graph_data *fgd) { int ret = 0; + struct ftrace_hash *new_hash = NULL; - mutex_lock(&graph_lock); - if ((file->f_mode & FMODE_WRITE) && - (file->f_flags & O_TRUNC)) { - *fgd->count = 0; - memset(fgd->table, 0, fgd->size * sizeof(*fgd->table)); + if (file->f_mode & FMODE_WRITE) { + const int size_bits = FTRACE_HASH_DEFAULT_BITS; + + if (trace_parser_get_init(&fgd->parser, FTRACE_BUFF_MAX)) + return -ENOMEM; + + if (file->f_flags & O_TRUNC) + new_hash = alloc_ftrace_hash(size_bits); + else + new_hash = alloc_and_copy_ftrace_hash(size_bits, + fgd->hash); + if (!new_hash) { + ret = -ENOMEM; + goto out; + } } - mutex_unlock(&graph_lock); if (file->f_mode & FMODE_READ) { - ret = seq_open(file, fgd->seq_ops); + ret = seq_open(file, &ftrace_graph_seq_ops); if (!ret) { struct seq_file *m = file->private_data; m->private = fgd; + } else { + /* Failed */ + free_ftrace_hash(new_hash); + new_hash = NULL; } } else file->private_data = fgd; +out: + if (ret < 0 && file->f_mode & FMODE_WRITE) + trace_parser_put(&fgd->parser); + + fgd->new_hash = new_hash; + + /* + * All uses of fgd->hash must be taken with the graph_lock + * held. The graph_lock is going to be released, so force + * fgd->hash to be reinitialized when it is taken again. + */ + fgd->hash = NULL; + return ret; } @@ -4646,6 +4724,7 @@ static int ftrace_graph_open(struct inode *inode, struct file *file) { struct ftrace_graph_data *fgd; + int ret; if (unlikely(ftrace_disabled)) return -ENODEV; @@ -4654,18 +4733,26 @@ ftrace_graph_open(struct inode *inode, struct file *file) if (fgd == NULL) return -ENOMEM; - fgd->table = ftrace_graph_funcs; - fgd->size = FTRACE_GRAPH_MAX_FUNCS; - fgd->count = &ftrace_graph_count; + mutex_lock(&graph_lock); + + fgd->hash = rcu_dereference_protected(ftrace_graph_hash, + lockdep_is_held(&graph_lock)); + fgd->type = GRAPH_FILTER_FUNCTION; fgd->seq_ops = &ftrace_graph_seq_ops; - return __ftrace_graph_open(inode, file, fgd); + ret = __ftrace_graph_open(inode, file, fgd); + if (ret < 0) + kfree(fgd); + + mutex_unlock(&graph_lock); + return ret; } static int ftrace_graph_notrace_open(struct inode *inode, struct file *file) { struct ftrace_graph_data *fgd; + int ret; if (unlikely(ftrace_disabled)) return -ENODEV; @@ -4674,45 +4761,97 @@ ftrace_graph_notrace_open(struct inode *inode, struct file *file) if (fgd == NULL) return -ENOMEM; - fgd->table = ftrace_graph_notrace_funcs; - fgd->size = FTRACE_GRAPH_MAX_FUNCS; - fgd->count = &ftrace_graph_notrace_count; + mutex_lock(&graph_lock); + + fgd->hash = rcu_dereference_protected(ftrace_graph_notrace_hash, + lockdep_is_held(&graph_lock)); + fgd->type = GRAPH_FILTER_NOTRACE; fgd->seq_ops = &ftrace_graph_seq_ops; - return __ftrace_graph_open(inode, file, fgd); + ret = __ftrace_graph_open(inode, file, fgd); + if (ret < 0) + kfree(fgd); + + mutex_unlock(&graph_lock); + return ret; } static int ftrace_graph_release(struct inode *inode, struct file *file) { + struct ftrace_graph_data *fgd; + struct ftrace_hash *old_hash, *new_hash; + struct trace_parser *parser; + int ret = 0; + if (file->f_mode & FMODE_READ) { struct seq_file *m = file->private_data; - kfree(m->private); + fgd = m->private; seq_release(inode, file); } else { - kfree(file->private_data); + fgd = file->private_data; } - return 0; + + if (file->f_mode & FMODE_WRITE) { + + parser = &fgd->parser; + + if (trace_parser_loaded((parser))) { + parser->buffer[parser->idx] = 0; + ret = ftrace_graph_set_hash(fgd->new_hash, + parser->buffer); + } + + trace_parser_put(parser); + + new_hash = __ftrace_hash_move(fgd->new_hash); + if (!new_hash) { + ret = -ENOMEM; + goto out; + } + + mutex_lock(&graph_lock); + + if (fgd->type == GRAPH_FILTER_FUNCTION) { + old_hash = rcu_dereference_protected(ftrace_graph_hash, + lockdep_is_held(&graph_lock)); + rcu_assign_pointer(ftrace_graph_hash, new_hash); + } else { + old_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, + lockdep_is_held(&graph_lock)); + rcu_assign_pointer(ftrace_graph_notrace_hash, new_hash); + } + + mutex_unlock(&graph_lock); + + /* Wait till all users are no longer using the old hash */ + synchronize_sched(); + + free_ftrace_hash(old_hash); + } + + out: + kfree(fgd->new_hash); + kfree(fgd); + + return ret; } static int -ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer) +ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) { struct ftrace_glob func_g; struct dyn_ftrace *rec; struct ftrace_page *pg; + struct ftrace_func_entry *entry; int fail = 1; int not; - bool exists; - int i; /* decode regex */ func_g.type = filter_parse_regex(buffer, strlen(buffer), &func_g.search, ¬); - if (!not && *idx >= size) - return -EBUSY; func_g.len = strlen(func_g.search); @@ -4729,26 +4868,18 @@ ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer) continue; if (ftrace_match_record(rec, &func_g, NULL, 0)) { - /* if it is in the array */ - exists = false; - for (i = 0; i < *idx; i++) { - if (array[i] == rec->ip) { - exists = true; - break; - } - } + entry = ftrace_lookup_ip(hash, rec->ip); if (!not) { fail = 0; - if (!exists) { - array[(*idx)++] = rec->ip; - if (*idx >= size) - goto out; - } + + if (entry) + continue; + if (add_hash_entry(hash, rec->ip) < 0) + goto out; } else { - if (exists) { - array[i] = array[--(*idx)]; - array[*idx] = 0; + if (entry) { + free_hash_entry(hash, entry); fail = 0; } } @@ -4767,35 +4898,34 @@ static ssize_t ftrace_graph_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_parser parser; ssize_t read, ret = 0; struct ftrace_graph_data *fgd = file->private_data; + struct trace_parser *parser; if (!cnt) return 0; - if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) - return -ENOMEM; + /* Read mode uses seq functions */ + if (file->f_mode & FMODE_READ) { + struct seq_file *m = file->private_data; + fgd = m->private; + } - read = trace_get_user(&parser, ubuf, cnt, ppos); + parser = &fgd->parser; - if (read >= 0 && trace_parser_loaded((&parser))) { - parser.buffer[parser.idx] = 0; + read = trace_get_user(parser, ubuf, cnt, ppos); - mutex_lock(&graph_lock); + if (read >= 0 && trace_parser_loaded(parser) && + !trace_parser_cont(parser)) { - /* we allow only one expression at a time */ - ret = ftrace_set_func(fgd->table, fgd->count, fgd->size, - parser.buffer); - - mutex_unlock(&graph_lock); + ret = ftrace_graph_set_hash(fgd->new_hash, + parser->buffer); + trace_parser_clear(parser); } if (!ret) ret = read; - trace_parser_put(&parser); - return ret; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 310f0ea0d1a2..707445ceb7ef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -260,16 +260,8 @@ unsigned long long ns2usecs(u64 nsec) TRACE_ITER_EVENT_FORK /* - * The global_trace is the descriptor that holds the tracing - * buffers for the live tracing. For each CPU, it contains - * a link list of pages that will store trace entries. The - * page descriptor of the pages in the memory is used to hold - * the link list by linking the lru item in the page descriptor - * to each of the pages in the buffer per CPU. - * - * For each active CPU there is a data field that holds the - * pages for the buffer for that CPU. Each CPU has the same number - * of pages allocated for its buffer. + * The global_trace is the descriptor that holds the top-level tracing + * buffers for the live tracing. */ static struct trace_array global_trace = { .trace_flags = TRACE_DEFAULT_FLAGS, @@ -1193,6 +1185,7 @@ int trace_parser_get_init(struct trace_parser *parser, int size) void trace_parser_put(struct trace_parser *parser) { kfree(parser->buffer); + parser->buffer = NULL; } /* diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1ea51ab53edf..ae1cce91fead 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -753,6 +753,21 @@ enum print_line_t print_trace_line(struct trace_iterator *iter); extern char trace_find_mark(unsigned long long duration); +struct ftrace_hash { + unsigned long size_bits; + struct hlist_head *buckets; + unsigned long count; + struct rcu_head rcu; +}; + +struct ftrace_func_entry * +ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip); + +static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash) +{ + return !hash || !hash->count; +} + /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -787,53 +802,50 @@ extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned long flags, int pc); - #ifdef CONFIG_DYNAMIC_FTRACE -/* TODO: make this variable */ -#define FTRACE_GRAPH_MAX_FUNCS 32 -extern int ftrace_graph_count; -extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; -extern int ftrace_graph_notrace_count; -extern unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS]; +extern struct ftrace_hash *ftrace_graph_hash; +extern struct ftrace_hash *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(unsigned long addr) { - int i; + int ret = 0; - if (!ftrace_graph_count) - return 1; + preempt_disable_notrace(); - for (i = 0; i < ftrace_graph_count; i++) { - if (addr == ftrace_graph_funcs[i]) { - /* - * If no irqs are to be traced, but a set_graph_function - * is set, and called by an interrupt handler, we still - * want to trace it. - */ - if (in_irq()) - trace_recursion_set(TRACE_IRQ_BIT); - else - trace_recursion_clear(TRACE_IRQ_BIT); - return 1; - } + if (ftrace_hash_empty(ftrace_graph_hash)) { + ret = 1; + goto out; } - return 0; + if (ftrace_lookup_ip(ftrace_graph_hash, addr)) { + /* + * If no irqs are to be traced, but a set_graph_function + * is set, and called by an interrupt handler, we still + * want to trace it. + */ + if (in_irq()) + trace_recursion_set(TRACE_IRQ_BIT); + else + trace_recursion_clear(TRACE_IRQ_BIT); + ret = 1; + } + +out: + preempt_enable_notrace(); + return ret; } static inline int ftrace_graph_notrace_addr(unsigned long addr) { - int i; + int ret = 0; - if (!ftrace_graph_notrace_count) - return 0; + preempt_disable_notrace(); - for (i = 0; i < ftrace_graph_notrace_count; i++) { - if (addr == ftrace_graph_notrace_funcs[i]) - return 1; - } + if (ftrace_lookup_ip(ftrace_graph_notrace_hash, addr)) + ret = 1; - return 0; + preempt_enable_notrace(); + return ret; } #else static inline int ftrace_graph_addr(unsigned long addr) @@ -1300,7 +1312,8 @@ static inline bool is_string_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_DYN_STRING || field->filter_type == FILTER_STATIC_STRING || - field->filter_type == FILTER_PTR_STRING; + field->filter_type == FILTER_PTR_STRING || + field->filter_type == FILTER_COMM; } static inline bool is_function_field(struct ftrace_event_field *field) diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c index e3b488825ae3..e49fbe901cfc 100644 --- a/kernel/trace/trace_benchmark.c +++ b/kernel/trace/trace_benchmark.c @@ -175,9 +175,9 @@ int trace_benchmark_reg(void) bm_event_thread = kthread_run(benchmark_event_kthread, NULL, "event_benchmark"); - if (!bm_event_thread) { + if (IS_ERR(bm_event_thread)) { pr_warning("trace benchmark failed to create kernel thread\n"); - return -ENOMEM; + return PTR_ERR(bm_event_thread); } return 0; diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 75489de546b6..4d8fdf3184dc 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -27,7 +27,7 @@ static DEFINE_MUTEX(branch_tracing_mutex); static struct trace_array *branch_tracer; static void -probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) +probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) { struct trace_event_call *call = &event_branch; struct trace_array *tr = branch_tracer; @@ -68,16 +68,17 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry = ring_buffer_event_data(event); /* Strip off the path, only save the file */ - p = f->file + strlen(f->file); - while (p >= f->file && *p != '/') + p = f->data.file + strlen(f->data.file); + while (p >= f->data.file && *p != '/') p--; p++; - strncpy(entry->func, f->func, TRACE_FUNC_SIZE); + strncpy(entry->func, f->data.func, TRACE_FUNC_SIZE); strncpy(entry->file, p, TRACE_FILE_SIZE); entry->func[TRACE_FUNC_SIZE] = 0; entry->file[TRACE_FILE_SIZE] = 0; - entry->line = f->line; + entry->constant = f->constant; + entry->line = f->data.line; entry->correct = val == expect; if (!call_filter_check_discard(call, entry, buffer, event)) @@ -89,7 +90,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) } static inline -void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) +void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) { if (!branch_tracing_enabled) return; @@ -195,13 +196,19 @@ core_initcall(init_branch_tracer); #else static inline -void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) +void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) { } #endif /* CONFIG_BRANCH_TRACER */ -void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect) +void ftrace_likely_update(struct ftrace_likely_data *f, int val, + int expect, int is_constant) { + /* A constant is always correct */ + if (is_constant) { + f->constant++; + val = expect; + } /* * I would love to have a trace point here instead, but the * trace point code is so inundated with unlikely and likely @@ -212,9 +219,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect) /* FIXME: Make this atomic! */ if (val == expect) - f->correct++; + f->data.correct++; else - f->incorrect++; + f->data.incorrect++; } EXPORT_SYMBOL(ftrace_likely_update); @@ -245,29 +252,60 @@ static inline long get_incorrect_percent(struct ftrace_branch_data *p) return percent; } -static int branch_stat_show(struct seq_file *m, void *v) +static const char *branch_stat_process_file(struct ftrace_branch_data *p) { - struct ftrace_branch_data *p = v; const char *f; - long percent; /* Only print the file, not the path */ f = p->file + strlen(p->file); while (f >= p->file && *f != '/') f--; - f++; + return ++f; +} + +static void branch_stat_show(struct seq_file *m, + struct ftrace_branch_data *p, const char *f) +{ + long percent; /* * The miss is overlayed on correct, and hit on incorrect. */ percent = get_incorrect_percent(p); - seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect); if (percent < 0) seq_puts(m, " X "); else seq_printf(m, "%3ld ", percent); + seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line); +} + +static int branch_stat_show_normal(struct seq_file *m, + struct ftrace_branch_data *p, const char *f) +{ + seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect); + branch_stat_show(m, p, f); + return 0; +} + +static int annotate_branch_stat_show(struct seq_file *m, void *v) +{ + struct ftrace_likely_data *p = v; + const char *f; + int l; + + f = branch_stat_process_file(&p->data); + + if (!p->constant) + return branch_stat_show_normal(m, &p->data, f); + + l = snprintf(NULL, 0, "/%lu", p->constant); + l = l > 8 ? 0 : 8 - l; + + seq_printf(m, "%8lu/%lu %*lu ", + p->data.correct, p->constant, l, p->data.incorrect); + branch_stat_show(m, &p->data, f); return 0; } @@ -279,7 +317,7 @@ static void *annotated_branch_stat_start(struct tracer_stat *trace) static void * annotated_branch_stat_next(void *v, int idx) { - struct ftrace_branch_data *p = v; + struct ftrace_likely_data *p = v; ++p; @@ -328,7 +366,7 @@ static struct tracer_stat annotated_branch_stats = { .stat_next = annotated_branch_stat_next, .stat_cmp = annotated_branch_stat_cmp, .stat_headers = annotated_branch_stat_headers, - .stat_show = branch_stat_show + .stat_show = annotate_branch_stat_show }; __init static int init_annotated_branch_stats(void) @@ -379,12 +417,21 @@ all_branch_stat_next(void *v, int idx) return p; } +static int all_branch_stat_show(struct seq_file *m, void *v) +{ + struct ftrace_branch_data *p = v; + const char *f; + + f = branch_stat_process_file(p); + return branch_stat_show_normal(m, p, f); +} + static struct tracer_stat all_branch_stats = { .name = "branch_all", .stat_start = all_branch_stat_start, .stat_next = all_branch_stat_next, .stat_headers = all_branch_stat_headers, - .stat_show = branch_stat_show + .stat_show = all_branch_stat_show }; __init static int all_annotated_branch_stats(void) diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index eb7396b7e7c3..c203ac4df791 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -328,11 +328,13 @@ FTRACE_ENTRY(branch, trace_branch, __array( char, func, TRACE_FUNC_SIZE+1 ) __array( char, file, TRACE_FILE_SIZE+1 ) __field( char, correct ) + __field( char, constant ) ), - F_printk("%u:%s:%s (%u)", + F_printk("%u:%s:%s (%u)%s", __entry->line, - __entry->func, __entry->file, __entry->correct), + __entry->func, __entry->file, __entry->correct, + __entry->constant ? " CONSTANT" : ""), FILTER_OTHER ); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index af344a1bf0d0..1199fe1d8eba 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -322,10 +322,7 @@ static void move_to_next_cpu(bool initmask) * need to ensure nothing else might be running (and thus preempting). * Obviously this should never be used in production environments. * - * Currently this runs on which ever CPU it was scheduled on, but most - * real-world hardware latency situations occur across several CPUs, - * but we might later generalize this if we find there are any actualy - * systems with alternate SMI delivery or other hardware latencies. + * Executes one loop interaction on each CPU in tracing_cpumask sysfs file. */ static int kthread_fn(void *data) { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7ad9e53ad174..eadd96ef772f 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -16,6 +16,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) "trace_kprobe: " fmt #include #include diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 8c0553d9afd3..52478f033f88 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -21,6 +21,7 @@ * Copyright (C) IBM Corporation, 2010-2011 * Author: Srikar Dronamraju */ +#define pr_fmt(fmt) "trace_probe: " fmt #include "trace_probe.h" @@ -647,7 +648,7 @@ ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, int (*createfn)(int, char **)) { - char *kbuf, *tmp; + char *kbuf, *buf, *tmp; int ret = 0; size_t done = 0; size_t size; @@ -667,27 +668,38 @@ ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer, goto out; } kbuf[size] = '\0'; - tmp = strchr(kbuf, '\n'); + buf = kbuf; + do { + tmp = strchr(buf, '\n'); + if (tmp) { + *tmp = '\0'; + size = tmp - buf + 1; + } else { + size = strlen(buf); + if (done + size < count) { + if (buf != kbuf) + break; + /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ + pr_warn("Line length is too long: Should be less than %d\n", + WRITE_BUFSIZE - 2); + ret = -EINVAL; + goto out; + } + } + done += size; - if (tmp) { - *tmp = '\0'; - size = tmp - kbuf + 1; - } else if (done + size < count) { - pr_warn("Line length is too long: Should be less than %d\n", - WRITE_BUFSIZE); - ret = -EINVAL; - goto out; - } - done += size; - /* Remove comments */ - tmp = strchr(kbuf, '#'); + /* Remove comments */ + tmp = strchr(buf, '#'); - if (tmp) - *tmp = '\0'; + if (tmp) + *tmp = '\0'; - ret = traceprobe_command(kbuf, createfn); - if (ret) - goto out; + ret = traceprobe_command(buf, createfn); + if (ret) + goto out; + buf += size; + + } while (done < count); } ret = done; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 0913693caf6e..f4379e772171 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -17,6 +17,7 @@ * Copyright (C) IBM Corporation, 2010-2012 * Author: Srikar Dronamraju */ +#define pr_fmt(fmt) "trace_kprobe: " fmt #include #include @@ -431,7 +432,8 @@ static int create_trace_uprobe(int argc, char **argv) pr_info("Probe point is not specified.\n"); return -EINVAL; } - arg = strchr(argv[1], ':'); + /* Find the last occurrence, in case the path contains ':' too. */ + arg = strrchr(argv[1], ':'); if (!arg) { ret = -EINVAL; goto fail_address_parse;