/* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM sched #if !defined(LTTNG_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) #define LTTNG_TRACE_SCHED_H #include #include #include #include #include #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0)) #include #endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)) #define lttng_proc_inum ns.inum #else #define lttng_proc_inum proc_inum #endif #define LTTNG_MAX_PID_NS_LEVEL 32 #ifndef _TRACE_SCHED_DEF_ #define _TRACE_SCHED_DEF_ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0)) static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) { unsigned int state; #ifdef CONFIG_SCHED_DEBUG BUG_ON(p != current); #endif /* CONFIG_SCHED_DEBUG */ /* * Preemption ignores task state, therefore preempted tasks are always * RUNNING (we will not have dequeued if state != RUNNING). */ if (preempt) return TASK_REPORT_MAX; /* * task_state_index() uses fls() and returns a value from 0-8 range. * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using * it for left shift operation to get the correct task->state * mapping. */ state = task_state_index(p); return state ? (1 << (state - 1)) : state; } #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0)) static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) { unsigned int state; #ifdef CONFIG_SCHED_DEBUG BUG_ON(p != current); #endif /* CONFIG_SCHED_DEBUG */ /* * Preemption ignores task state, therefore preempted tasks are always * RUNNING (we will not have dequeued if state != RUNNING). */ if (preempt) return TASK_REPORT_MAX; /* * __get_task_state() uses fls() and returns a value from 0-8 range. * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using * it for left shift operation to get the correct task->state * mapping. */ state = __get_task_state(p); return state ? (1 << (state - 1)) : state; } #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)) static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) { #ifdef CONFIG_SCHED_DEBUG BUG_ON(p != current); #endif /* CONFIG_SCHED_DEBUG */ /* * Preemption ignores task state, therefore preempted tasks are always RUNNING * (we will not have dequeued if state != RUNNING). */ return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state; } #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)) static inline long __trace_sched_switch_state(struct task_struct *p) { long state = p->state; #ifdef CONFIG_PREEMPT #ifdef CONFIG_SCHED_DEBUG BUG_ON(p != current); #endif /* CONFIG_SCHED_DEBUG */ /* * For all intents and purposes a preempted task is a running task. */ if (preempt_count() & PREEMPT_ACTIVE) state = TASK_RUNNING | TASK_STATE_MAX; #endif /* CONFIG_PREEMPT */ return state; } #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0)) static inline long __trace_sched_switch_state(struct task_struct *p) { long state = p->state; #ifdef CONFIG_PREEMPT /* * For all intents and purposes a preempted task is a running task. */ if (task_preempt_count(p) & PREEMPT_ACTIVE) state = TASK_RUNNING | TASK_STATE_MAX; #endif return state; } #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)) static inline long __trace_sched_switch_state(struct task_struct *p) { long state = p->state; #ifdef CONFIG_PREEMPT /* * For all intents and purposes a preempted task is a running task. */ if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE) state = TASK_RUNNING | TASK_STATE_MAX; #endif return state; } #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) static inline long __trace_sched_switch_state(struct task_struct *p) { long state = p->state; #ifdef CONFIG_PREEMPT /* * For all intents and purposes a preempted task is a running task. */ if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE) state = TASK_RUNNING; #endif return state; } #endif #endif /* _TRACE_SCHED_DEF_ */ /* * Tracepoint for calling kthread_stop, performed to end a kthread: */ LTTNG_TRACEPOINT_EVENT(sched_kthread_stop, TP_PROTO(struct task_struct *t), TP_ARGS(t), TP_FIELDS( ctf_array_text(char, comm, t->comm, TASK_COMM_LEN) ctf_integer(pid_t, tid, t->pid) ) ) /* * Tracepoint for the return value of the kthread stopping: */ LTTNG_TRACEPOINT_EVENT(sched_kthread_stop_ret, TP_PROTO(int ret), TP_ARGS(ret), TP_FIELDS( ctf_integer(int, ret, ret) ) ) /* * Tracepoint for waking up a task: */ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) || \ LTTNG_RT_KERNEL_RANGE(4,1,10,11, 4,2,0,0) || \ LTTNG_RT_KERNEL_RANGE(3,18,27,26, 3,19,0,0) || \ LTTNG_RT_KERNEL_RANGE(3,14,61,63, 3,15,0,0) || \ LTTNG_RT_KERNEL_RANGE(3,12,54,73, 3,13,0,0) || \ LTTNG_RT_KERNEL_RANGE(3,10,97,106, 3,11,0,0) || \ LTTNG_RT_KERNEL_RANGE(3,4,110,139, 3,5,0,0) || \ LTTNG_RT_KERNEL_RANGE(3,2,77,111, 3,3,0,0)) LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template, TP_PROTO(struct task_struct *p), TP_ARGS(p), TP_FIELDS( ctf_array_text(char, comm, p->comm, TASK_COMM_LEN) ctf_integer(pid_t, tid, p->pid) ctf_integer(int, prio, p->prio - MAX_RT_PRIO) ctf_integer(int, target_cpu, task_cpu(p)) ) ) #else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */ LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template, #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) TP_PROTO(struct task_struct *p, int success), TP_ARGS(p, success), #else TP_PROTO(struct rq *rq, struct task_struct *p, int success), TP_ARGS(rq, p, success), #endif TP_FIELDS( ctf_array_text(char, comm, p->comm, TASK_COMM_LEN) ctf_integer(pid_t, tid, p->pid) ctf_integer(int, prio, p->prio - MAX_RT_PRIO) ctf_integer(int, success, success) #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)) ctf_integer(int, target_cpu, task_cpu(p)) #endif ) ) #endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) || \ LTTNG_RT_KERNEL_RANGE(4,1,10,11, 4,2,0,0) || \ LTTNG_RT_KERNEL_RANGE(3,18,27,26, 3,19,0,0) || \ LTTNG_RT_KERNEL_RANGE(3,14,61,63, 3,15,0,0) || \ LTTNG_RT_KERNEL_RANGE(3,12,54,73, 3,13,0,0) || \ LTTNG_RT_KERNEL_RANGE(3,10,97,106, 3,11,0,0) || \ LTTNG_RT_KERNEL_RANGE(3,4,110,139, 3,5,0,0) || \ LTTNG_RT_KERNEL_RANGE(3,2,77,111, 3,3,0,0)) /* * Tracepoint called when waking a task; this tracepoint is guaranteed to be * called from the waking context. */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_waking, TP_PROTO(struct task_struct *p), TP_ARGS(p)) /* * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. * It it not always called from the waking context. */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup, TP_PROTO(struct task_struct *p), TP_ARGS(p)) /* * Tracepoint for waking up a new task: */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new, TP_PROTO(struct task_struct *p), TP_ARGS(p)) #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup, TP_PROTO(struct task_struct *p, int success), TP_ARGS(p, success)) /* * Tracepoint for waking up a new task: */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new, TP_PROTO(struct task_struct *p, int success), TP_ARGS(p, success)) #else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup, TP_PROTO(struct rq *rq, struct task_struct *p, int success), TP_ARGS(rq, p, success)) /* * Tracepoint for waking up a new task: */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new, TP_PROTO(struct rq *rq, struct task_struct *p, int success), TP_ARGS(rq, p, success)) #endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */ /* * Tracepoint for task switches, performed by the scheduler: */ LTTNG_TRACEPOINT_EVENT(sched_switch, #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)) TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next), TP_ARGS(preempt, prev, next), #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) TP_PROTO(struct task_struct *prev, struct task_struct *next), TP_ARGS(prev, next), #else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */ TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), #endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */ TP_FIELDS( ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN) ctf_integer(pid_t, prev_tid, prev->pid) ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO) #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)) ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev)) #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) ctf_integer(long, prev_state, __trace_sched_switch_state(prev)) #else ctf_integer(long, prev_state, prev->state) #endif ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN) ctf_integer(pid_t, next_tid, next->pid) ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO) ) ) /* * Tracepoint for a task being migrated: */ LTTNG_TRACEPOINT_EVENT(sched_migrate_task, TP_PROTO(struct task_struct *p, int dest_cpu), TP_ARGS(p, dest_cpu), TP_FIELDS( ctf_array_text(char, comm, p->comm, TASK_COMM_LEN) ctf_integer(pid_t, tid, p->pid) ctf_integer(int, prio, p->prio - MAX_RT_PRIO) ctf_integer(int, orig_cpu, task_cpu(p)) ctf_integer(int, dest_cpu, dest_cpu) ) ) LTTNG_TRACEPOINT_EVENT_CLASS(sched_process_template, TP_PROTO(struct task_struct *p), TP_ARGS(p), TP_FIELDS( ctf_array_text(char, comm, p->comm, TASK_COMM_LEN) ctf_integer(pid_t, tid, p->pid) ctf_integer(int, prio, p->prio - MAX_RT_PRIO) ) ) /* * Tracepoint for freeing a task: */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_free, TP_PROTO(struct task_struct *p), TP_ARGS(p)) /* * Tracepoint for a task exiting: */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_exit, TP_PROTO(struct task_struct *p), TP_ARGS(p)) /* * Tracepoint for waiting on task to unschedule: */ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_wait_task, TP_PROTO(struct task_struct *p), TP_ARGS(p)) #else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_wait_task, TP_PROTO(struct rq *rq, struct task_struct *p), TP_ARGS(rq, p)) #endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */ /* * Tracepoint for a waiting task: */ LTTNG_TRACEPOINT_EVENT(sched_process_wait, TP_PROTO(struct pid *pid), TP_ARGS(pid), TP_FIELDS( ctf_array_text(char, comm, current->comm, TASK_COMM_LEN) ctf_integer(pid_t, tid, pid_nr(pid)) ctf_integer(int, prio, current->prio - MAX_RT_PRIO) ) ) /* * Tracepoint for do_fork. * Saving both TID and PID information, especially for the child, allows * trace analyzers to distinguish between creation of a new process and * creation of a new thread. Newly created processes will have child_tid * == child_pid, while creation of a thread yields to child_tid != * child_pid. */ LTTNG_TRACEPOINT_EVENT_CODE(sched_process_fork, TP_PROTO(struct task_struct *parent, struct task_struct *child), TP_ARGS(parent, child), TP_locvar( pid_t vtids[LTTNG_MAX_PID_NS_LEVEL]; unsigned int ns_level; ), TP_code_pre( if (child) { struct pid *child_pid; unsigned int i; child_pid = task_pid(child); tp_locvar->ns_level = min_t(unsigned int, child_pid->level + 1, LTTNG_MAX_PID_NS_LEVEL); for (i = 0; i < tp_locvar->ns_level; i++) tp_locvar->vtids[i] = child_pid->numbers[i].nr; } ), TP_FIELDS( ctf_array_text(char, parent_comm, parent->comm, TASK_COMM_LEN) ctf_integer(pid_t, parent_tid, parent->pid) ctf_integer(pid_t, parent_pid, parent->tgid) #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)) ctf_integer(unsigned int, parent_ns_inum, ({ unsigned int parent_ns_inum = 0; if (parent) { struct pid_namespace *pid_ns; pid_ns = task_active_pid_ns(parent); if (pid_ns) parent_ns_inum = pid_ns->lttng_proc_inum; } parent_ns_inum; })) #endif ctf_array_text(char, child_comm, child->comm, TASK_COMM_LEN) ctf_integer(pid_t, child_tid, child->pid) ctf_sequence(pid_t, vtids, tp_locvar->vtids, u8, tp_locvar->ns_level) ctf_integer(pid_t, child_pid, child->tgid) #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)) ctf_integer(unsigned int, child_ns_inum, ({ unsigned int child_ns_inum = 0; if (child) { struct pid_namespace *pid_ns; pid_ns = task_active_pid_ns(child); if (pid_ns) child_ns_inum = pid_ns->lttng_proc_inum; } child_ns_inum; })) #endif ), TP_code_post() ) #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33)) /* * Tracepoint for sending a signal: */ LTTNG_TRACEPOINT_EVENT(sched_signal_send, TP_PROTO(int sig, struct task_struct *p), TP_ARGS(sig, p), TP_FIELDS( ctf_integer(int, sig, sig) ctf_array_text(char, comm, p->comm, TASK_COMM_LEN) ctf_integer(pid_t, tid, p->pid) ) ) #endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,0)) /* * Tracepoint for exec: */ LTTNG_TRACEPOINT_EVENT(sched_process_exec, TP_PROTO(struct task_struct *p, pid_t old_pid, struct linux_binprm *bprm), TP_ARGS(p, old_pid, bprm), TP_FIELDS( ctf_string(filename, bprm->filename) ctf_integer(pid_t, tid, p->pid) ctf_integer(pid_t, old_tid, old_pid) ) ) #endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32)) /* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ LTTNG_TRACEPOINT_EVENT_CLASS(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay), TP_FIELDS( ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN) ctf_integer(pid_t, tid, tsk->pid) ctf_integer(u64, delay, delay) ) ) /* * Tracepoint for accounting wait time (time the task is runnable * but not actually running due to scheduler contention). */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_wait, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)) /* * Tracepoint for accounting sleep time (time the task is not runnable, * including iowait, see below). */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_sleep, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)) /* * Tracepoint for accounting iowait time (time the task is not runnable * due to waiting on IO to complete). */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_iowait, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)) #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0)) /* * Tracepoint for accounting blocked time (time the task is in uninterruptible). */ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_blocked, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)) #endif /* * Tracepoint for accounting runtime (time the task is executing * on a CPU). */ LTTNG_TRACEPOINT_EVENT(sched_stat_runtime, TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), TP_ARGS(tsk, runtime, vruntime), TP_FIELDS( ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN) ctf_integer(pid_t, tid, tsk->pid) ctf_integer(u64, runtime, runtime) ctf_integer(u64, vruntime, vruntime) ) ) #endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) || \ LTTNG_RT_KERNEL_RANGE(4,9,27,18, 4,10,0,0) || \ LTTNG_RT_KERNEL_RANGE(4,11,5,1, 4,12,0,0)) /* * Tracepoint for showing priority inheritance modifying a tasks * priority. */ LTTNG_TRACEPOINT_EVENT(sched_pi_setprio, TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), TP_ARGS(tsk, pi_task), TP_FIELDS( ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN) ctf_integer(pid_t, tid, tsk->pid) ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO) ctf_integer(int, newprio, pi_task ? pi_task->prio - MAX_RT_PRIO : tsk->prio - MAX_RT_PRIO) ) ) #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)) /* * Tracepoint for showing priority inheritance modifying a tasks * priority. */ LTTNG_TRACEPOINT_EVENT(sched_pi_setprio, TP_PROTO(struct task_struct *tsk, int newprio), TP_ARGS(tsk, newprio), TP_FIELDS( ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN) ctf_integer(pid_t, tid, tsk->pid) ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO) ctf_integer(int, newprio, newprio - MAX_RT_PRIO) ) ) #endif #endif /* LTTNG_TRACE_SCHED_H */ /* This part must be outside protection */ #include