[CLASSES]: Improve cacheline boundary printing
Now we show more cacheline boundaries by looking at when we cross cachelines, not just when there is a exact cacheline boundary, an (long) example to clarify: [acme@newtoy net-2.6]$ pahole ../OUTPUT/qemu/net-2.6/fs/built-in.o task_struct /* include2/asm/system.h:11 */ struct task_struct { volatile long int state; /* 0 4 */ struct thread_info * thread_info; /* 4 4 */ atomic_t usage; /* 8 4 */ long unsigned int flags; /* 12 4 */ long unsigned int ptrace; /* 16 4 */ int lock_depth; /* 20 4 */ int load_weight; /* 24 4 */ int prio; /* 28 4 */ /* ----- cacheline 1 boundary ----- */ int static_prio; /* 32 4 */ int normal_prio; /* 36 4 */ struct list_head run_list; /* 40 8 */ struct prio_array * array; /* 48 4 */ short unsigned int ioprio; /* 52 2 */ /* XXX 2 bytes hole, try to pack */ long unsigned int sleep_avg; /* 56 4 */ long long unsigned int timestamp; /* 60 8 */ /* ----- cacheline 2 boundary was 2 bytes ago ----- */ long long unsigned int last_ran; /* 68 8 */ long long unsigned int sched_time; /* 76 8 */ enum sleep_type sleep_type; /* 84 4 */ long unsigned int policy; /* 88 4 */ cpumask_t cpus_allowed; /* 92 4 */ unsigned int time_slice; /* 96 4 */ /* ----- cacheline 3 boundary was 2 bytes ago ----- */ unsigned int first_time_slice; /* 100 4 */ struct list_head tasks; /* 104 8 */ struct list_head ptrace_children; /* 112 8 */ struct list_head ptrace_list; /* 120 8 */ struct mm_struct * mm; /* 128 4 */ /* ----- cacheline 4 boundary was 2 bytes ago ----- */ struct mm_struct * active_mm; /* 132 4 */ struct linux_binfmt * binfmt; /* 136 4 */ long int exit_state; /* 140 4 */ int exit_code; /* 144 4 */ int exit_signal; /* 148 4 */ int pdeath_signal; /* 152 4 */ long unsigned int personality; /* 156 4 */ unsigned int did_exec:1; /* 160 4 */ /* ----- cacheline 5 boundary was 2 bytes ago ----- */ pid_t pid; /* 164 4 */ pid_t tgid; /* 168 4 */ struct task_struct * real_parent; /* 172 4 */ struct task_struct * parent; /* 176 4 */ struct list_head children; /* 180 8 */ struct list_head sibling; /* 188 8 */ /* ----- cacheline 6 boundary was 2 bytes ago ----- */ struct task_struct * group_leader; /* 196 4 */ struct pid_link pids[3]; /* 200 36 */ /* ----- cacheline 7 boundary was 10 bytes ago ----- */ struct list_head thread_group; /* 236 8 */ struct completion * vfork_done; /* 244 4 */ int * set_child_tid; /* 248 4 */ int * clear_child_tid; /* 252 4 */ long unsigned int rt_priority; /* 256 4 */ /* ----- cacheline 8 boundary was 2 bytes ago ----- */ cputime_t utime; /* 260 4 */ cputime_t stime; /* 264 4 */ long unsigned int nvcsw; /* 268 4 */ long unsigned int nivcsw; /* 272 4 */ struct timespec start_time; /* 276 8 */ long unsigned int min_flt; /* 284 4 */ long unsigned int maj_flt; /* 288 4 */ /* ----- cacheline 9 boundary was 2 bytes ago ----- */ cputime_t it_prof_expires; /* 292 4 */ cputime_t it_virt_expires; /* 296 4 */ long long unsigned int it_sched_expires; /* 300 8 */ struct list_head cpu_timers[3]; /* 308 24 */ /* ----- cacheline 10 boundary was 10 bytes ago ----- */ uid_t uid; /* 332 4 */ uid_t euid; /* 336 4 */ uid_t suid; /* 340 4 */ uid_t fsuid; /* 344 4 */ gid_t gid; /* 348 4 */ gid_t egid; /* 352 4 */ /* ----- cacheline 11 boundary was 2 bytes ago ----- */ gid_t sgid; /* 356 4 */ gid_t fsgid; /* 360 4 */ struct group_info * group_info; /* 364 4 */ kernel_cap_t cap_effective; /* 368 4 */ kernel_cap_t cap_inheritable; /* 372 4 */ kernel_cap_t cap_permitted; /* 376 4 */ unsigned int keep_capabilities:1; /* 380 4 */ struct user_struct * user; /* 384 4 */ /* ----- cacheline 12 boundary was 2 bytes ago ----- */ struct key * request_key_auth; /* 388 4 */ struct key * thread_keyring; /* 392 4 */ unsigned char jit_keyring; /* 396 1 */ unsigned char fpu_counter; /* 397 1 */ /* XXX 2 bytes hole, try to pack */ int oomkilladj; /* 400 4 */ char comm[16]; /* 404 16 */ /* ----- cacheline 13 boundary ----- */ int link_count; /* 420 4 */ int total_link_count; /* 424 4 */ struct sysv_sem sysvsem; /* 428 4 */ struct thread_struct thread; /* 432 656 */ /* ----- cacheline 33 boundary was 28 bytes ago ----- */ struct fs_struct * fs; /* 1088 4 */ /* ----- cacheline 34 boundary ----- */ struct files_struct * files; /* 1092 4 */ struct nsproxy * nsproxy; /* 1096 4 */ struct signal_struct * signal; /* 1100 4 */ struct sighand_struct * sighand; /* 1104 4 */ sigset_t blocked; /* 1108 8 */ sigset_t real_blocked; /* 1116 8 */ /* ----- cacheline 35 boundary ----- */ sigset_t saved_sigmask; /* 1124 8 */ struct sigpending pending; /* 1132 16 */ long unsigned int sas_ss_sp; /* 1148 4 */ size_t sas_ss_size; /* 1152 4 */ /* ----- cacheline 36 boundary ----- */ int (*notifier)(); /* 1156 4 */ void * notifier_data; /* 1160 4 */ sigset_t * notifier_mask; /* 1164 4 */ void * security; /* 1168 4 */ struct audit_context * audit_context; /* 1172 4 */ seccomp_t seccomp; /* 1176 0 */ u32 parent_exec_id; /* 1176 4 */ u32 self_exec_id; /* 1180 4 */ spinlock_t alloc_lock; /* 1184 40 */ /* ----- cacheline 38 boundary was 4 bytes ago ----- */ spinlock_t pi_lock; /* 1224 40 */ /* ----- cacheline 39 boundary was 12 bytes ago ----- */ struct plist_head pi_waiters; /* 1264 20 */ /* ----- cacheline 40 boundary ----- */ struct rt_mutex_waiter * pi_blocked_on; /* 1284 4 */ struct mutex_waiter * blocked_on; /* 1288 4 */ unsigned int irq_events; /* 1292 4 */ int hardirqs_enabled; /* 1296 4 */ long unsigned int hardirq_enable_ip; /* 1300 4 */ unsigned int hardirq_enable_event; /* 1304 4 */ long unsigned int hardirq_disable_ip; /* 1308 4 */ unsigned int hardirq_disable_event; /* 1312 4 */ /* ----- cacheline 41 boundary ----- */ int softirqs_enabled; /* 1316 4 */ long unsigned int softirq_disable_ip; /* 1320 4 */ unsigned int softirq_disable_event; /* 1324 4 */ long unsigned int softirq_enable_ip; /* 1328 4 */ unsigned int softirq_enable_event; /* 1332 4 */ int hardirq_context; /* 1336 4 */ int softirq_context; /* 1340 4 */ u64 curr_chain_key; /* 1344 8 */ /* ----- cacheline 42 boundary was 4 bytes ago ----- */ int lockdep_depth; /* 1352 4 */ struct held_lock held_locks[30]; /* 1356 1200 */ /* ----- cacheline 79 boundary was 24 bytes ago ----- */ unsigned int lockdep_recursion; /* 2556 4 */ void * journal_info; /* 2560 4 */ /* ----- cacheline 80 boundary ----- */ struct reclaim_state * reclaim_state; /* 2564 4 */ struct backing_dev_info * backing_dev_info; /* 2568 4 */ struct io_context * io_context; /* 2572 4 */ long unsigned int ptrace_message; /* 2576 4 */ siginfo_t * last_siginfo; /* 2580 4 */ wait_queue_t * io_wait; /* 2584 4 */ u64 rchar; /* 2588 8 */ /* ----- cacheline 81 boundary ----- */ u64 wchar; /* 2596 8 */ u64 syscr; /* 2604 8 */ u64 syscw; /* 2612 8 */ struct robust_list_head * robust_list; /* 2620 4 */ struct list_head pi_state_list; /* 2624 8 */ /* ----- cacheline 82 boundary was 4 bytes ago ----- */ struct futex_pi_state * pi_state_cache; /* 2632 4 */ atomic_t fs_excl; /* 2636 4 */ struct rcu_head rcu; /* 2640 8 */ struct pipe_inode_info * splice_pipe; /* 2648 4 */ }; /* size: 2656, sum members: 2648, holes: 2, sum holes: 4, padding: 4, cachelines: 83 */ [acme@newtoy net-2.6]$ See the "cacheline 82 boundary was 4 bytes ago" type comments? They show cases where members cross cachelines (in this case a 32 bytes cacheline was used, the default when one doesn't specify one with --cacheline in the pahole cmd line). This should help in reorganizing struct layouts to avoid bringing two cachelines when, say, a 'int' member is accessed, i.e. better aligning the members to avoid cacheline trashing. One seemingly interesting idea now that we have this infrastructure is to write code to suggest reorganizing a struct to avoid these cacheline boundary crossings, kill holes, etc, this together with static analysis on binaries or using sparse to look member usage patterns, i.e. if io_wait is almost always used after wchar in task_struct, we would move them to be on the same cacheline, of course there are cases where data dependency may be an obstacle, we'd need blacklists to help the tool avoid moving these cases. Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
This commit is contained in:
parent
91c91fc822
commit
8d19574248
20
classes.c
20
classes.c
|
@ -910,16 +910,26 @@ static void class__print_struct(struct class *self)
|
|||
struct class_member *pos;
|
||||
char name[128];
|
||||
uint64_t last_size = 0, size;
|
||||
unsigned int last_cacheline;
|
||||
unsigned int last_cacheline = 0;
|
||||
int last_bit_size = 0;
|
||||
int last_offset = -1;
|
||||
|
||||
printf("%s {\n", class__name(self, name, sizeof(name)));
|
||||
list_for_each_entry(pos, &self->members, tag.node) {
|
||||
if (sum > 0 && last_size > 0 && sum % cacheline_size == 0)
|
||||
printf(" /* ---------- cacheline "
|
||||
"%lu boundary ---------- */\n",
|
||||
sum / cacheline_size);
|
||||
const unsigned int cacheline = sum / cacheline_size;
|
||||
|
||||
if (cacheline > last_cacheline) {
|
||||
const unsigned int cacheline_pos = sum % cacheline_size;
|
||||
if (cacheline_pos == 0)
|
||||
printf(" /* ----- cacheline "
|
||||
"%u boundary ----- */\n",
|
||||
cacheline);
|
||||
else
|
||||
printf(" /* ----- cacheline "
|
||||
"%u boundary was %u bytes ago ----- */\n",
|
||||
cacheline, cacheline_pos);
|
||||
last_cacheline = cacheline;
|
||||
}
|
||||
fputs(" ", stdout);
|
||||
size = class_member__print(pos);
|
||||
putchar('\n');
|
||||
|
|
Loading…
Reference in New Issue