[CLASSES]: Improve cacheline boundary printing

Now we show more cacheline boundaries by looking at when we cross cachelines,
not just when there is a exact cacheline boundary, an (long) example to
clarify:

[acme@newtoy net-2.6]$ pahole ../OUTPUT/qemu/net-2.6/fs/built-in.o task_struct
/* include2/asm/system.h:11 */
struct task_struct {
        volatile long int          state;                /*     0     4 */
        struct thread_info *       thread_info;          /*     4     4 */
        atomic_t                   usage;                /*     8     4 */
        long unsigned int          flags;                /*    12     4 */
        long unsigned int          ptrace;               /*    16     4 */
        int                        lock_depth;           /*    20     4 */
        int                        load_weight;          /*    24     4 */
        int                        prio;                 /*    28     4 */
        /* ----- cacheline 1 boundary ----- */
        int                        static_prio;          /*    32     4 */
        int                        normal_prio;          /*    36     4 */
        struct list_head           run_list;             /*    40     8 */
        struct prio_array *        array;                /*    48     4 */
        short unsigned int         ioprio;               /*    52     2 */

        /* XXX 2 bytes hole, try to pack */

        long unsigned int          sleep_avg;            /*    56     4 */
        long long unsigned int     timestamp;            /*    60     8 */
        /* ----- cacheline 2 boundary was 2 bytes ago ----- */
        long long unsigned int     last_ran;             /*    68     8 */
        long long unsigned int     sched_time;           /*    76     8 */
        enum sleep_type            sleep_type;           /*    84     4 */
        long unsigned int          policy;               /*    88     4 */
        cpumask_t                  cpus_allowed;         /*    92     4 */
        unsigned int               time_slice;           /*    96     4 */
        /* ----- cacheline 3 boundary was 2 bytes ago ----- */
        unsigned int               first_time_slice;     /*   100     4 */
        struct list_head           tasks;                /*   104     8 */
        struct list_head           ptrace_children;      /*   112     8 */
        struct list_head           ptrace_list;          /*   120     8 */
        struct mm_struct *         mm;                   /*   128     4 */
        /* ----- cacheline 4 boundary was 2 bytes ago ----- */
        struct mm_struct *         active_mm;            /*   132     4 */
        struct linux_binfmt *      binfmt;               /*   136     4 */
        long int                   exit_state;           /*   140     4 */
        int                        exit_code;            /*   144     4 */
        int                        exit_signal;          /*   148     4 */
        int                        pdeath_signal;        /*   152     4 */
        long unsigned int          personality;          /*   156     4 */
        unsigned int               did_exec:1;           /*   160     4 */
        /* ----- cacheline 5 boundary was 2 bytes ago ----- */
        pid_t                      pid;                  /*   164     4 */
        pid_t                      tgid;                 /*   168     4 */
        struct task_struct *       real_parent;          /*   172     4 */
        struct task_struct *       parent;               /*   176     4 */
        struct list_head           children;             /*   180     8 */
        struct list_head           sibling;              /*   188     8 */
        /* ----- cacheline 6 boundary was 2 bytes ago ----- */
        struct task_struct *       group_leader;         /*   196     4 */
        struct pid_link            pids[3];              /*   200    36 */
        /* ----- cacheline 7 boundary was 10 bytes ago ----- */
        struct list_head           thread_group;         /*   236     8 */
        struct completion *        vfork_done;           /*   244     4 */
        int *                      set_child_tid;        /*   248     4 */
        int *                      clear_child_tid;      /*   252     4 */
        long unsigned int          rt_priority;          /*   256     4 */
        /* ----- cacheline 8 boundary was 2 bytes ago ----- */
        cputime_t                  utime;                /*   260     4 */
        cputime_t                  stime;                /*   264     4 */
        long unsigned int          nvcsw;                /*   268     4 */
        long unsigned int          nivcsw;               /*   272     4 */
        struct timespec            start_time;           /*   276     8 */
        long unsigned int          min_flt;              /*   284     4 */
        long unsigned int          maj_flt;              /*   288     4 */
        /* ----- cacheline 9 boundary was 2 bytes ago ----- */
        cputime_t                  it_prof_expires;      /*   292     4 */
        cputime_t                  it_virt_expires;      /*   296     4 */
        long long unsigned int     it_sched_expires;     /*   300     8 */
        struct list_head           cpu_timers[3];        /*   308    24 */
        /* ----- cacheline 10 boundary was 10 bytes ago ----- */
        uid_t                      uid;                  /*   332     4 */
        uid_t                      euid;                 /*   336     4 */
        uid_t                      suid;                 /*   340     4 */
        uid_t                      fsuid;                /*   344     4 */
        gid_t                      gid;                  /*   348     4 */
        gid_t                      egid;                 /*   352     4 */
        /* ----- cacheline 11 boundary was 2 bytes ago ----- */
        gid_t                      sgid;                 /*   356     4 */
        gid_t                      fsgid;                /*   360     4 */
        struct group_info *        group_info;           /*   364     4 */
        kernel_cap_t               cap_effective;        /*   368     4 */
        kernel_cap_t               cap_inheritable;      /*   372     4 */
        kernel_cap_t               cap_permitted;        /*   376     4 */
        unsigned int               keep_capabilities:1;  /*   380     4 */
        struct user_struct *       user;                 /*   384     4 */
        /* ----- cacheline 12 boundary was 2 bytes ago ----- */
        struct key *               request_key_auth;     /*   388     4 */
        struct key *               thread_keyring;       /*   392     4 */
        unsigned char              jit_keyring;          /*   396     1 */
        unsigned char              fpu_counter;          /*   397     1 */

        /* XXX 2 bytes hole, try to pack */

        int                        oomkilladj;           /*   400     4 */
        char                       comm[16];             /*   404    16 */
        /* ----- cacheline 13 boundary ----- */
        int                        link_count;           /*   420     4 */
        int                        total_link_count;     /*   424     4 */
        struct sysv_sem            sysvsem;              /*   428     4 */
        struct thread_struct       thread;               /*   432   656 */
        /* ----- cacheline 33 boundary was 28 bytes ago ----- */
        struct fs_struct *         fs;                   /*  1088     4 */
        /* ----- cacheline 34 boundary ----- */
        struct files_struct *      files;                /*  1092     4 */
        struct nsproxy *           nsproxy;              /*  1096     4 */
        struct signal_struct *     signal;               /*  1100     4 */
        struct sighand_struct *    sighand;              /*  1104     4 */
        sigset_t                   blocked;              /*  1108     8 */
        sigset_t                   real_blocked;         /*  1116     8 */
        /* ----- cacheline 35 boundary ----- */
        sigset_t                   saved_sigmask;        /*  1124     8 */
        struct sigpending          pending;              /*  1132    16 */
        long unsigned int          sas_ss_sp;            /*  1148     4 */
        size_t                     sas_ss_size;          /*  1152     4 */
        /* ----- cacheline 36 boundary ----- */
        int                        (*notifier)();        /*  1156     4 */
        void *                     notifier_data;        /*  1160     4 */
        sigset_t *                 notifier_mask;        /*  1164     4 */
        void *                     security;             /*  1168     4 */
        struct audit_context *     audit_context;        /*  1172     4 */
        seccomp_t                  seccomp;              /*  1176     0 */
        u32                        parent_exec_id;       /*  1176     4 */
        u32                        self_exec_id;         /*  1180     4 */
        spinlock_t                 alloc_lock;           /*  1184    40 */
        /* ----- cacheline 38 boundary was 4 bytes ago ----- */
        spinlock_t                 pi_lock;              /*  1224    40 */
        /* ----- cacheline 39 boundary was 12 bytes ago ----- */
        struct plist_head          pi_waiters;           /*  1264    20 */
        /* ----- cacheline 40 boundary ----- */
        struct rt_mutex_waiter *   pi_blocked_on;        /*  1284     4 */
        struct mutex_waiter *      blocked_on;           /*  1288     4 */
        unsigned int               irq_events;           /*  1292     4 */
        int                        hardirqs_enabled;     /*  1296     4 */
        long unsigned int          hardirq_enable_ip;    /*  1300     4 */
        unsigned int               hardirq_enable_event; /*  1304     4 */
        long unsigned int          hardirq_disable_ip;   /*  1308     4 */
        unsigned int               hardirq_disable_event; /*  1312     4 */
        /* ----- cacheline 41 boundary ----- */
        int                        softirqs_enabled;     /*  1316     4 */
        long unsigned int          softirq_disable_ip;   /*  1320     4 */
        unsigned int               softirq_disable_event; /*  1324     4 */
        long unsigned int          softirq_enable_ip;    /*  1328     4 */
        unsigned int               softirq_enable_event; /*  1332     4 */
        int                        hardirq_context;      /*  1336     4 */
        int                        softirq_context;      /*  1340     4 */
        u64                        curr_chain_key;       /*  1344     8 */
        /* ----- cacheline 42 boundary was 4 bytes ago ----- */
        int                        lockdep_depth;        /*  1352     4 */
        struct held_lock           held_locks[30];       /*  1356  1200 */
        /* ----- cacheline 79 boundary was 24 bytes ago ----- */
        unsigned int               lockdep_recursion;    /*  2556     4 */
        void *                     journal_info;         /*  2560     4 */
        /* ----- cacheline 80 boundary ----- */
        struct reclaim_state *     reclaim_state;        /*  2564     4 */
        struct backing_dev_info *  backing_dev_info;     /*  2568     4 */
        struct io_context *        io_context;           /*  2572     4 */
        long unsigned int          ptrace_message;       /*  2576     4 */
        siginfo_t *                last_siginfo;         /*  2580     4 */
        wait_queue_t *             io_wait;              /*  2584     4 */
        u64                        rchar;                /*  2588     8 */
        /* ----- cacheline 81 boundary ----- */
        u64                        wchar;                /*  2596     8 */
        u64                        syscr;                /*  2604     8 */
        u64                        syscw;                /*  2612     8 */
        struct robust_list_head *  robust_list;          /*  2620     4 */
        struct list_head           pi_state_list;        /*  2624     8 */
        /* ----- cacheline 82 boundary was 4 bytes ago ----- */
        struct futex_pi_state *    pi_state_cache;       /*  2632     4 */
        atomic_t                   fs_excl;              /*  2636     4 */
        struct rcu_head            rcu;                  /*  2640     8 */
        struct pipe_inode_info *   splice_pipe;          /*  2648     4 */
}; /* size: 2656, sum members: 2648, holes: 2, sum holes: 4, padding: 4, cachelines: 83 */

[acme@newtoy net-2.6]$

See the "cacheline 82 boundary was 4 bytes ago" type comments? They show cases
where members cross cachelines (in this case a 32 bytes cacheline was used, the
default when one doesn't specify one with --cacheline in the pahole cmd line).

This should help in reorganizing struct layouts to avoid bringing two
cachelines when, say, a 'int' member is accessed, i.e. better aligning the
members to avoid cacheline trashing.

One seemingly interesting idea now that we have this infrastructure is to write
code to suggest reorganizing a struct to avoid these cacheline boundary
crossings, kill holes, etc, this together with static analysis on binaries or
using sparse to look member usage patterns, i.e. if io_wait is almost always
used after wchar in task_struct, we would move them to be on the same
cacheline, of course there are cases where data dependency may be an obstacle,
we'd need blacklists to help the tool avoid moving these cases.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
This commit is contained in:
Arnaldo Carvalho de Melo 2006-12-07 04:20:42 -02:00
parent 91c91fc822
commit 8d19574248
1 changed files with 15 additions and 5 deletions

View File

@ -910,16 +910,26 @@ static void class__print_struct(struct class *self)
struct class_member *pos;
char name[128];
uint64_t last_size = 0, size;
unsigned int last_cacheline;
unsigned int last_cacheline = 0;
int last_bit_size = 0;
int last_offset = -1;
printf("%s {\n", class__name(self, name, sizeof(name)));
list_for_each_entry(pos, &self->members, tag.node) {
if (sum > 0 && last_size > 0 && sum % cacheline_size == 0)
printf(" /* ---------- cacheline "
"%lu boundary ---------- */\n",
sum / cacheline_size);
const unsigned int cacheline = sum / cacheline_size;
if (cacheline > last_cacheline) {
const unsigned int cacheline_pos = sum % cacheline_size;
if (cacheline_pos == 0)
printf(" /* ----- cacheline "
"%u boundary ----- */\n",
cacheline);
else
printf(" /* ----- cacheline "
"%u boundary was %u bytes ago ----- */\n",
cacheline, cacheline_pos);
last_cacheline = cacheline;
}
fputs(" ", stdout);
size = class_member__print(pos);
putchar('\n');