Commit Graph

1256 Commits

Author SHA1 Message Date
Arnaldo Carvalho de Melo
dc6b9437a3 emit: Handle structs with DW_AT_alignment=1 meaning __packed__
In the following struct the ceph_entity_addr entries all appear marked with a
__attribute__((__aligned__(8)), which, for the first two members of this type,
'peer_addr' and 'peer_addr_for_me', don't cause the regenerated struct to
differ in layout from the original layout put in place by the compiler as per
the original source code.

But the third member of this type, 'actual_peer_addr' ends up in a different
offset, even in a different cacheline, here is how it looks like in the code generated
from the original source code, at offset 568.

          char                       in_banner[30];        /*   472    30 */
          struct ceph_msg_connect out_connect;             /*   502    33 */
          /* --- cacheline 8 boundary (512 bytes) was 23 bytes ago --- */
          struct ceph_msg_connect_reply in_reply;          /*   535    26 */
          struct ceph_entity_addr actual_peer_addr __attribute__((__aligned__(1))); /*   561   136 */
          /* --- cacheline 10 boundary (640 bytes) was 57 bytes ago --- */
          struct ceph_msg_header out_hdr;                  /*   697    53 */

          /* XXX 2 bytes hole, try to pack */

          /* --- cacheline 11 boundary (704 bytes) was 48 bytes ago --- */

And here is how it looks like when built from the regenerated source code, at
offset 568:

  $ pfunct --compile /home/acme/git/build/v5.1-rc4+/fs/ceph/super.o > ceph.c
  $ gcc -g -c  ceph.c
  $ pahole -C ceph_connection ceph.o | head -46
  struct ceph_connection {
          void *                     private;              /*     0     8 */
          const struct ceph_connection_operations  * ops;  /*     8     8 */
          struct ceph_messenger *    msgr;                 /*    16     8 */
          atomic_t                   sock_state;           /*    24     4 */

          /* XXX 4 bytes hole, try to pack */

          struct socket *            sock;                 /*    32     8 */
          struct ceph_entity_addr peer_addr __attribute__((__aligned__(8))); /*    40   136 */
          /* --- cacheline 2 boundary (128 bytes) was 48 bytes ago --- */
          struct ceph_entity_addr peer_addr_for_me __attribute__((__aligned__(8))); /*   176   136 */
          /* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */
          long unsigned int          flags;                /*   312     8 */
          /* --- cacheline 5 boundary (320 bytes) --- */
          long unsigned int          state;                /*   320     8 */
          const char  *              error_msg;            /*   328     8 */
          struct ceph_entity_name peer_name;               /*   336     9 */

          /* XXX 7 bytes hole, try to pack */

          u64                        peer_features;        /*   352     8 */
          u32                        connect_seq;          /*   360     4 */
          u32                        peer_global_seq;      /*   364     4 */
          struct ceph_auth_handshake * auth;               /*   368     8 */
          int                        auth_retry;           /*   376     4 */

          /* XXX 4 bytes hole, try to pack */

          /* --- cacheline 6 boundary (384 bytes) --- */
          struct mutex       mutex;                        /*   384    32 */
          struct list_head   out_queue;                    /*   416    16 */
          struct list_head   out_sent;                     /*   432    16 */
          /* --- cacheline 7 boundary (448 bytes) --- */
          u64                        out_seq;              /*   448     8 */
          u64                        in_seq;               /*   456     8 */
          u64                        in_seq_acked;         /*   464     8 */
          char                       in_banner[30];        /*   472    30 */
          struct ceph_msg_connect out_connect;             /*   502    33 */
          /* --- cacheline 8 boundary (512 bytes) was 23 bytes ago --- */
          struct ceph_msg_connect_reply in_reply;          /*   535    26 */

          /* XXX 7 bytes hole, try to pack */

          struct ceph_entity_addr actual_peer_addr __attribute__((__aligned__(8))); /*   568   136 */
          /* --- cacheline 11 boundary (704 bytes) --- */
    $

That happens because 'struct ceph_entity_addr' has that __attribute__
((__aligned__(8)) in the regenerated source code, above, now look at how it
gets regenerated:

  $ pahole -C ceph_entity_addr ceph.o
  struct ceph_entity_addr {
          __le32                     type;                 /*     0     4 */
          __le32                     nonce;                /*     4     4 */
          struct __kernel_sockaddr_storage in_addr __attribute__((__aligned__(8))); /*     8   128 */

          /* size: 136, cachelines: 3, members: 3 */
          /* forced alignments: 1 */
          /* last cacheline: 8 bytes */
  } __attribute__((__aligned__(8)));
  $

While when looking at the original DWARF:

  $ pahole -C ceph_entity_addr /home/acme/git/build/v5.1-rc4+/fs/ceph/super.o
  struct ceph_entity_addr {
          __le32                     type;                 /*     0     4 */
          __le32                     nonce;                /*     4     4 */
          struct __kernel_sockaddr_storage in_addr __attribute__((__aligned__(1))); /*     8   128 */

          /* size: 136, cachelines: 3, members: 3 */
          /* forced alignments: 1 */
          /* last cacheline: 8 bytes */
  } __attribute__((__aligned__(1)));
  $

The confusion may further come from the fact that 'struct __kernel_sockaddr_storage' has,
in the regenerated source code, the __attribute__((__aligned__8)))

  $ pahole -C __kernel_sockaddr_storage ceph.o
  struct __kernel_sockaddr_storage {
          __kernel_sa_family_t       ss_family;            /*     0     2 */
          char                       __data[126];          /*     2   126 */

          /* size: 128, cachelines: 2, members: 2 */
  } __attribute__((__aligned__(8)));
  $

Which is the same as in the original DWARF:

  $ pahole -C __kernel_sockaddr_storage /home/acme/git/build/v5.1-rc4+/fs/ceph/super.o
  struct __kernel_sockaddr_storage {
          __kernel_sa_family_t       ss_family;            /*     0     2 */
          char                       __data[126];          /*     2   126 */

          /* size: 128, cachelines: 2, members: 2 */
  } __attribute__((__aligned__(8)));
  $

Looking at the original original source code for 'struct ceph_entity_addr'
helps here, as it reads:

  include/linux/ceph/msgr.h, line 63:

  /*
   * entity_addr -- network address
   */
  struct ceph_entity_addr {
          __le32 type;
          __le32 nonce;  /* unique id for process (e.g. pid) */
          struct sockaddr_storage in_addr;
  } __attribute__ ((packed));

So the original code has no __attribute__((__aligned__(1))), so, lets look at
what the compiler generates for 'struct ceph_entity_addr':

  $ readelf -wi /home/acme/git/build/v5.1-rc4+/fs/ceph/super.o | grep ceph_entity_addr -A7
      <193a6>   DW_AT_name        : (indirect string, offset: 0x1586): ceph_entity_addr
      <193aa>   DW_AT_byte_size   : 136
      <193ab>   DW_AT_alignment   : 1
      <193ac>   DW_AT_decl_file   : 296
      <193ae>   DW_AT_decl_line   : 63
      <193af>   DW_AT_decl_column : 8
      <193b0>   DW_AT_sibling     : <0x193e0>
   <2><193b4>: Abbrev Number: 5 (DW_TAG_member)
  $

So the natural alignment for 'struct ceph_entity_addr' ends up being the
natural alignment for 'struct __kernel_sockaddr_storage', which is 8, but
since 'struct ceph_entity_addr' was marked in the original source code as __packed__,
the compiler added the DW_AT_alignment: 1 to override that.

The heuristic in pahole, so far, took that __attribute__((__aligned__(1)))
literally:

  $ pahole -C ceph_entity_addr /home/acme/git/build/v5.1-rc4+/fs/ceph/super.o
  struct ceph_entity_addr {
          __le32                     type;                 /*     0     4 */
          __le32                     nonce;                /*     4     4 */
          struct __kernel_sockaddr_storage in_addr __attribute__((__aligned__(1))); /*     8   128 */

          /* size: 136, cachelines: 3, members: 3 */
          /* forced alignments: 1 */
          /* last cacheline: 8 bytes */
  } __attribute__((__aligned__(1)));
  $

which ends up making the regenerated source code (with the __aligned__((1))),
generate a different layout, the __aligned__((8)) in one of its members
overrode that __aligned__((1)).

Take this into account and when faced with a structure which natural alignment
is not one and that has a DW_AT_alignment:1 to mean it really is __packed__.

Doing that makes the regenerated source code match the original structure
layouts, i.e. after the patch we get:

  $ pahole -C ceph_entity_addr /home/acme/git/build/v5.1-rc4+/fs/ceph/super.o
  struct ceph_entity_addr {
          __le32                     type;                 /*     0     4 */
          __le32                     nonce;                /*     4     4 */
          struct __kernel_sockaddr_storage in_addr __attribute__((__aligned__(1))); /*     8   128 */

          /* size: 136, cachelines: 3, members: 3 */
          /* forced alignments: 1 */
          /* last cacheline: 8 bytes */
  } __attribute__((__packed__));
  $

And that member in 'struct ceph_connection', in the original, continues to read:

  $ pahole -C ceph_connection /home/acme/git/build/v5.1-rc4+/fs/ceph/super.o | grep -w actual_peer_addr -B4 -A6
          char                       in_banner[30];        /*   472    30 */
          struct ceph_msg_connect out_connect;             /*   502    33 */
          /* --- cacheline 8 boundary (512 bytes) was 23 bytes ago --- */
          struct ceph_msg_connect_reply in_reply;          /*   535    26 */
          struct ceph_entity_addr actual_peer_addr __attribute__((__aligned__(1))); /*   561   136 */
          /* --- cacheline 10 boundary (640 bytes) was 57 bytes ago --- */
          struct ceph_msg_header out_hdr;                  /*   697    53 */

          /* XXX 2 bytes hole, try to pack */

          /* --- cacheline 11 boundary (704 bytes) was 48 bytes ago --- */
  $

While in the regenerated DWARF from the regenerated source code reads:

  $ pfunct --compile /home/acme/git/build/v5.1-rc4+/fs/ceph/super.o > ceph.c
  $ gcc -g -c  ceph.c
  $ pahole -C ceph_connection ceph.o | grep -w actual_peer_addr -B4 -A6
          char                       in_banner[30];        /*   472    30 */
          struct ceph_msg_connect out_connect;             /*   502    33 */
          /* --- cacheline 8 boundary (512 bytes) was 23 bytes ago --- */
          struct ceph_msg_connect_reply in_reply;          /*   535    26 */
          struct ceph_entity_addr actual_peer_addr __attribute__((__aligned__(1))); /*   561   136 */
          /* --- cacheline 10 boundary (640 bytes) was 57 bytes ago --- */
          struct ceph_msg_header out_hdr;                  /*   697    53 */

          /* XXX 2 bytes hole, try to pack */

          /* --- cacheline 11 boundary (704 bytes) was 48 bytes ago --- */
  $

I.e. it now matches.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-15 13:25:48 -03:00
Arnaldo Carvalho de Melo
f78633cfb9 core: Infer __packed__ for union struct members
I.e. check that all the structs that are embedded in a union have their natural
alignment satisfied by the size of the array they are contained in,

Before this change we ended up not marking union struct members that didn't had
natural alignment violations as __packed__ even tho they had to be to be in a
struct that didn't satisfied their natural alignment requirements, which would
violate them when said union was in an array, i.e. the second entry would have
the non __packed__ union struct member in a bad position.

E.g. Before:

  $ pahole -C ceph_osd_op /home/acme/git/build/v5.1-rc4+/net/ceph/osd_client.o
  struct ceph_osd_op {
          __le16                     op;                   /*     0     2 */
          __le32                     flags;                /*     2     4 */
          union {
                  struct {
                          __le64     offset;               /*     6     8 */
                          __le64     length;               /*    14     8 */
                          __le64     truncate_size;        /*    22     8 */
                          __le32     truncate_seq;         /*    30     4 */
                  } __attribute__((__packed__)) extent;    /*     6    28 */
                  struct {
                          __le32     name_len;             /*     6     4 */
                          __le32     value_len;            /*    10     4 */
                          __u8       cmp_op;               /*    14     1 */
                          __u8       cmp_mode;             /*    15     1 */
                  } __attribute__((__packed__)) xattr;     /*     6    10 */
                  struct {
                          __u8       class_len;            /*     6     1 */
                          __u8       method_len;           /*     7     1 */
                          __u8       argc;                 /*     8     1 */
                          __le32     indata_len;           /*     9     4 */
                  } __attribute__((__packed__)) cls;       /*     6     7 */
                  struct {
                          __le64     cookie;               /*     6     8 */
                          __le64     count;                /*    14     8 */
                  } pgls;                                  /*     6    16 */
                  struct {
                          __le64     snapid;               /*     6     8 */
                  } snap;                                  /*     6     8 */
                  struct {
                          __le64     cookie;               /*     6     8 */
                          __le64     ver;                  /*    14     8 */
                          __u8       op;                   /*    22     1 */
                          __le32     gen;                  /*    23     4 */
                  } __attribute__((__packed__)) watch;     /*     6    21 */
                  struct {
                          __le64     cookie;               /*     6     8 */
                  } notify;                                /*     6     8 */
                  struct {
                          __le64     offset;               /*     6     8 */
                          __le64     length;               /*    14     8 */
                          __le64     src_offset;           /*    22     8 */
                  } clonerange;                            /*     6    24 */
                  struct {
                          __le64     expected_object_size; /*     6     8 */
                          __le64     expected_write_size;  /*    14     8 */
                  } alloc_hint;                            /*     6    16 */
                  struct {
                          __le64     snapid;               /*     6     8 */
                          __le64     src_version;          /*    14     8 */
                          __u8       flags;                /*    22     1 */
                          __le32     src_fadvise_flags;    /*    23     4 */
                  } __attribute__((__packed__)) copy_from; /*     6    21 */
          };                                               /*     6    28 */
          __le32                     payload_len;          /*    34     4 */

          /* size: 38, cachelines: 1, members: 4 */
          /* last cacheline: 38 bytes */
  } __attribute__((__packed__));

After:

  $ pahole -C ceph_osd_op /home/acme/git/build/v5.1-rc4+/net/ceph/osd_client.o
  struct ceph_osd_op {
          __le16                     op;                   /*     0     2 */
          __le32                     flags;                /*     2     4 */
          union {
                  struct {
                          __le64     offset;               /*     6     8 */
                          __le64     length;               /*    14     8 */
                          __le64     truncate_size;        /*    22     8 */
                          __le32     truncate_seq;         /*    30     4 */
                  } __attribute__((__packed__)) extent;    /*     6    28 */
                  struct {
                          __le32     name_len;             /*     6     4 */
                          __le32     value_len;            /*    10     4 */
                          __u8       cmp_op;               /*    14     1 */
                          __u8       cmp_mode;             /*    15     1 */
                  } __attribute__((__packed__)) xattr;     /*     6    10 */
                  struct {
                          __u8       class_len;            /*     6     1 */
                          __u8       method_len;           /*     7     1 */
                          __u8       argc;                 /*     8     1 */
                          __le32     indata_len;           /*     9     4 */
                  } __attribute__((__packed__)) cls;       /*     6     7 */
                  struct {
                          __le64     cookie;               /*     6     8 */
                          __le64     count;                /*    14     8 */
                  } pgls;                                  /*     6    16 */
                  struct {
                          __le64     snapid;               /*     6     8 */
                  } snap;                                  /*     6     8 */
                  struct {
                          __le64     cookie;               /*     6     8 */
                          __le64     ver;                  /*    14     8 */
                          __u8       op;                   /*    22     1 */
                          __le32     gen;                  /*    23     4 */
                  } __attribute__((__packed__)) watch;     /*     6    21 */
                  struct {
                          __le64     cookie;               /*     6     8 */
                  } notify;                                /*     6     8 */
                  struct {
                          __le64     offset;               /*     6     8 */
                          __le64     length;               /*    14     8 */
                          __le64     src_offset;           /*    22     8 */
                  } clonerange;                            /*     6    24 */
                  struct {
                          __le64     expected_object_size; /*     6     8 */
                          __le64     expected_write_size;  /*    14     8 */
                  } alloc_hint;                            /*     6    16 */
                  struct {
                          __le64     snapid;               /*     6     8 */
                          __le64     src_version;          /*    14     8 */
                          __u8       flags;                /*    22     1 */
                          __le32     src_fadvise_flags;    /*    23     4 */
                  } __attribute__((__packed__)) copy_from; /*     6    21 */
          };                                               /*     6    28 */
          __le32                     payload_len;          /*    34     4 */

          /* size: 38, cachelines: 1, members: 4 */
          /* last cacheline: 38 bytes */
  } __attribute__((__packed__));
  $

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-12 17:08:41 -03:00
Arnaldo Carvalho de Melo
75c52de9c6 core: Move packed_attribute_inferred from 'class' to 'type' class
Since we need to infer the attributes of union members too, so move
there.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-12 12:39:50 -03:00
Arnaldo Carvalho de Melo
1bb4527220 fprintf: Fixup const pointers
Before:

  $ pahole -C nft_ctx /home/acme/git/build/v5.1-rc4+/net/netfilter/nft_set_rbtree.o
  struct nft_ctx {
          struct net *               net;                  /*     0     8 */
          struct nft_table *         table;                /*     8     8 */
          struct nft_chain *         chain;                /*    16     8 */
          const const struct nlattr  *  * nla;             /*    24     8 */
          u32                        portid;               /*    32     4 */
          u32                        seq;                  /*    36     4 */
          u8                         family;               /*    40     1 */
          u8                         level;                /*    41     1 */
          bool                       report;               /*    42     1 */

          /* size: 48, cachelines: 1, members: 9 */
          /* padding: 5 */
          /* last cacheline: 48 bytes */
  };
  $

Original:

  struct nft_ctx {
          struct net                      *net;
          struct nft_table                *table;
          struct nft_chain                *chain;
          const struct nlattr * const     *nla;
          u32                             portid;
          u32                             seq;
          u8                              family;
          u8                              level;
          bool                            report;
  };

DWARF tags:

 <1><12c8a>: Abbrev Number: 12 (DW_TAG_structure_type)
    <12c8b>   DW_AT_name        : (indirect string, offset: 0xcc6f): nlattr
    <12c8f>   DW_AT_byte_size   : 4
    <12c93>   DW_AT_sibling     : <0x12cb2>

 <1><12cb2>: Abbrev Number: 17 (DW_TAG_const_type)
    <12cb3>   DW_AT_type        : <0x12c8a>

 <1><12cf9>: Abbrev Number: 4 (DW_TAG_pointer_type)
    <12cfa>   DW_AT_byte_size   : 8
    <12cfb>   DW_AT_type        : <0x12cb2>

 <1><12cff>: Abbrev Number: 17 (DW_TAG_const_type)
    <12d00>   DW_AT_type        : <0x12cf9>

 <1><1d54b>: Abbrev Number: 4 (DW_TAG_pointer_type)
    <1d54c>   DW_AT_byte_size   : 8
    <1d54d>   DW_AT_type        : <0x12cff>

 <2><1e52e>: Abbrev Number: 14 (DW_TAG_member)
    <1e52f>   DW_AT_name        : nla
    <1e536>   DW_AT_type        : <0x1d54b>
    <1e53a>   DW_AT_data_member_location: 24

Fixed now:

  $ pahole -C nft_ctx /home/acme/git/build/v5.1-rc4+/net/netfilter/nft_set_rbtree.o
  struct nft_ctx {
          struct net *               net;                  /*     0     8 */
          struct nft_table *         table;                /*     8     8 */
          struct nft_chain *         chain;                /*    16     8 */
          const struct nlattr  * const * nla;              /*    24     8 */
          u32                        portid;               /*    32     4 */
          u32                        seq;                  /*    36     4 */
          u8                         family;               /*    40     1 */
          u8                         level;                /*    41     1 */
          bool                       report;               /*    42     1 */

          /* size: 48, cachelines: 1, members: 9 */
          /* padding: 5 */
          /* last cacheline: 48 bytes */
  };
  $

So, one more full circled:

  $ fullcircle /home/acme/git/build/v5.1-rc4+/net/netfilter/nft_set_rbtree.o
  $

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-12 12:07:46 -03:00
Arnaldo Carvalho de Melo
dc3d441961 core: Improve the natural alignment calculation
We need to take more than just arrays into account when figuring out the
natural alignment of struct members, looking recursively at types till
we get to basic types and pointers.

Before this patch the 'new' struct field in the 'v' union was considered
__packed__, when in fact it is not, as the natural alignment for the
'state_id' typedef is 4, so it can start at offset 36 (or 4 considering
just its container struct), see below:

  $ pahole -IC nfsd4_lock /home/acme/git/build/v5.1-rc4+/fs/nfsd/nfs4xdr.o
  /* Used at: /home/acme/git/linux/fs/nfsd/nfs4xdr.c */
  /* <1717a> /home/acme/git/linux/fs/nfsd/xdr4.h:156 */
  struct nfsd4_lock {
          u32                        lk_type;              /*     0     4 */
          u32                        lk_reclaim;           /*     4     4 */
          u64                        lk_offset;            /*     8     8 */
          u64                        lk_length;            /*    16     8 */
          u32                        lk_is_new;            /*    24     4 */

          /* XXX 4 bytes hole, try to pack */

          union {
                  struct {
                          u32        open_seqid;           /*    32     4 */
                          stateid_t  open_stateid;         /*    36    16 */
                          u32        lock_seqid;           /*    52     4 */
                          clientid_t clientid;             /*    56     8 */
                          /* --- cacheline 1 boundary (64 bytes) --- */
                          struct xdr_netobj owner;         /*    64    16 */
                  } __attribute__((__packed__)) new;       /*    32    48 */
                  struct {
                          stateid_t  lock_stateid;         /*    32    16 */
                          u32        lock_seqid;           /*    48     4 */
                  } __attribute__((__packed__)) old;       /*    32    20 */
          } v;                                             /*    32    48 */
          /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
          union {
                  struct {
                          stateid_t  stateid;              /*    80    16 */
                  } ok;                                    /*    80    16 */
                  struct nfsd4_lock_denied denied;         /*    80    48 */
          } u;                                             /*    80    48 */

          /* size: 128, cachelines: 2, members: 7 */
          /* sum members: 124, holes: 1, sum holes: 4 */
  };
  $

Asking for -rEIC, i.e. relative offsets, expand types we can see that
stateid_t opaque type:

                struct {
                        /* typedef u32 -> __u32 */ unsigned int open_seqid;              /*     0     4 */
                        /* typedef stateid_t */ struct {
                                /* typedef u32 -> __u32 */ unsigned int si_generation;   /*     0     4 */
                                /* typedef stateid_opaque_t */ struct {
                                        /* typedef clientid_t */ struct {
                                                /* typedef u32 -> __u32 */ unsigned int   cl_boot; /*     0     4 */
                                                /* typedef u32 -> __u32 */ unsigned int   cl_id; /*     4     4 */
                                        } so_clid; /*     0     8 */
                                        /* typedef u32 -> __u32 */ unsigned int so_id;   /*     8     4 */
                                } si_opaque; /*     4    12 */
                        } open_stateid; /*     4    16 */

With the algorithm implemented in this patch we get it correctly as not
packed:

  $ pahole -IC nfsd4_lock /home/acme/git/build/v5.1-rc4+/fs/nfsd/nfs4xdr.o
  /* Used at: /home/acme/git/linux/fs/nfsd/nfs4xdr.c */
  /* <1717a> /home/acme/git/linux/fs/nfsd/xdr4.h:156 */
  struct nfsd4_lock {
          u32                        lk_type;              /*     0     4 */
          u32                        lk_reclaim;           /*     4     4 */
          u64                        lk_offset;            /*     8     8 */
          u64                        lk_length;            /*    16     8 */
          u32                        lk_is_new;            /*    24     4 */

          /* XXX 4 bytes hole, try to pack */

          union {
                  struct {
                          u32        open_seqid;           /*    32     4 */
                          stateid_t  open_stateid;         /*    36    16 */
                          u32        lock_seqid;           /*    52     4 */
                          clientid_t clientid;             /*    56     8 */
                          /* --- cacheline 1 boundary (64 bytes) --- */
                          struct xdr_netobj owner;         /*    64    16 */
                  } new;                                   /*    32    48 */
                  struct {
                          stateid_t  lock_stateid;         /*    32    16 */
                          u32        lock_seqid;           /*    48     4 */
                  } old;                                   /*    32    20 */
          } v;                                             /*    32    48 */
          /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
          union {
                  struct {
                          stateid_t  stateid;              /*    80    16 */
                  } ok;                                    /*    80    16 */
                  struct nfsd4_lock_denied denied;         /*    80    48 */
          } u;                                             /*    80    48 */

          /* size: 128, cachelines: 2, members: 7 */
          /* sum members: 124, holes: 1, sum holes: 4 */
  };

Fixes: f2641ce169 ("core: Take arrays into account when inferring if a struct is packed")
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-11 16:43:59 -03:00
Arnaldo Carvalho de Melo
ac32e5e908 codiff: Fix comparision of multi-cu against single-cu files
When the first arg, the old object file has multiple compile units, i.e.
multiple objects that were then linked into one, and the second just one
.o, or equivalent, i.e. a .BTF file, then codiff shouldn't try to
find the types in the single CU in each of the old CUs.

Think about a .BTF file generated from a multi-CU DWARF binary, it will
contain all the types in all of the DWARF CUs, so if we go on trying to
find all the BTF files in each of the CUs, we'll fail.

It only makes sense to go on the DWARF CUs looking for the type on the
.BTF section and then compare them.

Fixes: 6b1e43f2c1 ("codiff: When comparing against a file with just one CU don't bother finding by name")
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-11 16:38:51 -03:00
Arnaldo Carvalho de Melo
f2641ce169 core: Take arrays into account when inferring if a struct is packed
Before:

  $ pahole -C qrwlock /home/acme/git/build/v5.1-rc4+/fs/ceph/dir.o
  struct qrwlock {
  	union {
  		atomic_t           cnts;                 /*     0     4 */
  		struct {
  			u8         wlocked;              /*     0     1 */
  			u8         __lstate[3];          /*     1     3 */
  		} __attribute__((__packed__));           /*     0     4 */
  	};                                               /*     0     4 */
  	arch_spinlock_t            wait_lock;            /*     4     4 */

  	/* size: 8, cachelines: 1, members: 2 */
  	/* last cacheline: 8 bytes */
  };

I.e. __lstate's class_member->byte_size is 3, causing the misinference that that
struct was packed, it is naturally aligned, we need to look at the size of the
array's entries to figure out its natural alignment:

After:

  $ pahole -C qrwlock /home/acme/git/build/v5.1-rc4+/fs/ceph/dir.o
  struct qrwlock {
  	union {
  		atomic_t           cnts;                 /*     0     4 */
  		struct {
  			u8         wlocked;              /*     0     1 */
  			u8         __lstate[3];          /*     1     3 */
  		};                                       /*     0     4 */
  	};                                               /*     0     4 */
  	arch_spinlock_t            wait_lock;            /*     4     4 */

  	/* size: 8, cachelines: 1, members: 2 */
  	/* last cacheline: 8 bytes */
  };
  $

To further test:

  $ cat packed_array_struct.c
  struct sarray {
  	short	  array[3];
  	long long first;
  } __attribute__((__packed__));

  void foo(struct sarray *s) {}
  $ gcc -g -c packed_array_struct.c
  $ pahole packed_array_struct.o
  struct sarray {
  	short int                  array[3];             /*     0     6 */
  	long long int              first;                /*     6     8 */

  	/* size: 14, cachelines: 1, members: 2 */
  	/* last cacheline: 14 bytes */
  } __attribute__((__packed__));
  $ cat packed_array_struct.c
  struct sarray {
  	short	  array[3];
  	long long first;
  };

  void foo(struct sarray *s) {}
  $ gcc -g -c packed_array_struct.c
  $ pahole packed_array_struct.o
  struct sarray {
  	short int                  array[3];             /*     0     6 */

  	/* XXX 2 bytes hole, try to pack */

  	long long int              first;                /*     8     8 */

  	/* size: 16, cachelines: 1, members: 2 */
  	/* sum members: 14, holes: 1, sum holes: 2 */
  	/* last cacheline: 16 bytes */
  };
  $

One more test:

  $ cat packed_array_struct.c
  struct sarray {
  	short     a;
  	short	  array[3];
  	long long b;
  };

  void foo(struct sarray *s) {}
  $

Before this patch:

  $ gcc -g -c packed_array_struct.c
  $ pahole packed_array_struct.o
  struct sarray {
  	short int                  a;                    /*     0     2 */
  	short int                  array[3];             /*     2     6 */
  	long long int              b;                    /*     8     8 */

  	/* size: 16, cachelines: 1, members: 3 */
  	/* last cacheline: 16 bytes */
  } __attribute__((__packed__));

After:

  $ pahole packed_array_struct.o
  struct sarray {
  	short int                  a;                    /*     0     2 */
  	short int                  array[3];             /*     2     6 */
  	long long int              b;                    /*     8     8 */

  	/* size: 16, cachelines: 1, members: 3 */
  	/* last cacheline: 16 bytes */
  };
  $

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-11 13:25:45 -03:00
Arnaldo Carvalho de Melo
85c9936963 fprintf: Do not add explicit padding when struct has __aligned__ attr
Fixes: 13e5b9fc00 ("fprintf: Add unnamed bitfield padding at the end to rebuild original type")
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-11 13:23:40 -03:00
Arnaldo Carvalho de Melo
b5e8fab596 emit: Cover void ** as a function parameter
Like in:

  static int
  cifs_setlease(struct file *file, long arg, struct file_lock **lease, void **priv)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-11 11:19:32 -03:00
Arnaldo Carvalho de Melo
28a3bc7add fprintf: Support packed enums
Check if the size is different than sizeof(int), which should be good
enough for now for both 64-bit and 32-bit targets.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-11 10:49:56 -03:00
Arnaldo Carvalho de Melo
f77a442f09 fprintf: Do not print the __aligned__ attribute if asked
I.e. honour conf_fprintf.suppress_aligned_attribute, noticed with
btfdiff, as BTF doesn't carries the alignment attribute, so can't
regenerate it, we need to suppress it so as to compare the output of
DWARF with that of the equivalent BTF.

Fixes: b42d77b0bb ("fprintf: Print __attribute__((__aligned__(N))) for structs/classes")
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-10 18:34:16 -03:00
Arnaldo Carvalho de Melo
ea583dac52 fprintf: Print zero sized flat arrays as [], not [0]
To match the case when we really have just one dimension, so the
--flat-arrays should show for zero sized arrays, [], not [0]:

Noticed with btfdiff.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-10 18:25:37 -03:00
Arnaldo Carvalho de Melo
f909f13dd7 fprintf: Fixup handling of unnamed bitfields
We were only handling holes inside bitfields as a request to change the
byte_offset, which is not the case when instead of 'int foo:0;' we have
'int foo:6;' to ask for a explicit 6 bit hole inside a bitfield, like
in:

Before this patch:

  $ pahole -F btf -C kvm_mmu_page_role /home/acme/git/build/v5.1-rc4+/arch/x86/kvm/hyperv.o
  union kvm_mmu_page_role {
          u32                        word;               /*     0     4 */
          struct {
                  unsigned int       level:4;            /*     0: 0  4 */
                  unsigned int       gpte_is_8_bytes:1;  /*     0: 4  4 */
                  unsigned int       quadrant:2;         /*     0: 5  4 */
                  unsigned int       direct:1;           /*     0: 7  4 */
                  unsigned int       access:3;           /*     0: 8  4 */
                  unsigned int       invalid:1;          /*     0:11  4 */
                  unsigned int       nxe:1;              /*     0:12  4 */
                  unsigned int       cr0_wp:1;           /*     0:13  4 */
                  unsigned int       smep_andnot_wp:1;   /*     0:14  4 */
                  unsigned int       smap_andnot_wp:1;   /*     0:15  4 */
                  unsigned int       ad_disabled:1;      /*     0:16  4 */
                  unsigned int       guest_mode:1;       /*     0:17  4 */

                  /* XXX 6 bits hole, try to pack */

                  /* Force alignment to the next boundary: */
                  unsigned int       :0;

                  unsigned int       smm:8;              /*     0:24  4 */
          };                                             /*     0     4 */
  };
  $

After:

  $ pahole -F btf -C kvm_mmu_page_role /home/acme/git/build/v5.1-rc4+/arch/x86/kvm/hyperv.o
  union kvm_mmu_page_role {
          u32                        word;               /*     0     4 */
          struct {
                  unsigned int       level:4;            /*     0: 0  4 */
                  unsigned int       gpte_is_8_bytes:1;  /*     0: 4  4 */
                  unsigned int       quadrant:2;         /*     0: 5  4 */
                  unsigned int       direct:1;           /*     0: 7  4 */
                  unsigned int       access:3;           /*     0: 8  4 */
                  unsigned int       invalid:1;          /*     0:11  4 */
                  unsigned int       nxe:1;              /*     0:12  4 */
                  unsigned int       cr0_wp:1;           /*     0:13  4 */
                  unsigned int       smep_andnot_wp:1;   /*     0:14  4 */
                  unsigned int       smap_andnot_wp:1;   /*     0:15  4 */
                  unsigned int       ad_disabled:1;      /*     0:16  4 */
                  unsigned int       guest_mode:1;       /*     0:17  4 */

                  /* XXX 6 bits hole, try to pack */
                  unsigned int       :6;

                  unsigned int       smm:8;              /*     0:24  4 */
          };                                             /*     0     4 */
  };

Cc: Alexei Starovoitov <ast@fb.com>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Fixes: a104eb1ea1 ("fprintf: Notice explicit bitfield alignment modifications")
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-10 18:11:55 -03:00
Arnaldo Carvalho de Melo
3247a777dc core: Infer if a struct is packed by the offsets/natural alignments
As DWARF (nor BTF) provides explicit attributes, we need to look at the
natural alignments, a byte is always alignted, etc.

This probably fails with things like __attribute__(__aligned(power-of-two)),
but with it most of the kernel data structures are full circled, i.e.
'pfunct --compile' regenerates source code from debug info that when
compiled generats debug info that end up matching the original sources.

  $ cat a.c
  #define __packed __attribute__((__packed__))

  struct filename {
  	const char  *              name;
  	const char  *              uptr;
  	int                        refcnt;
  };

  void m(struct filename *f) {}
  $ gcc -g -c a.c
  $ pahole a.o
  struct filename {
  	const char  *              name;                 /*     0     8 */
  	const char  *              uptr;                 /*     8     8 */
  	int                        refcnt;               /*    16     4 */

  	/* size: 24, cachelines: 1, members: 3 */
  	/* padding: 4 */
  	/* last cacheline: 24 bytes */
  };
  $ cat a.c
  #define __packed __attribute__((__packed__))

  struct filename {
  	const char  *              name;
  	const char  *              uptr;
  	int                        refcnt;
  } __packed;

  void m(struct filename *f) {}
  $ gcc -g -c a.c
  $ pahole a.o
  struct filename {
  	const char  *              name;                 /*     0     8 */
  	const char  *              uptr;                 /*     8     8 */
  	int                        refcnt;               /*    16     4 */

  	/* size: 20, cachelines: 1, members: 3 */
  	/* last cacheline: 20 bytes */
  } __attribute__((__packed__));
  $ cat a.c
  #define __packed __attribute__((__packed__))

  struct filename {
  	const char  *              name;
  	int                        refcnt;
  	const char  *              uptr;
  };

  void m(struct filename *f) {}
  $ gcc -g -c a.c
  $ pahole a.o
  struct filename {
  	const char  *              name;                 /*     0     8 */
  	int                        refcnt;               /*     8     4 */

  	/* XXX 4 bytes hole, try to pack */

  	const char  *              uptr;                 /*    16     8 */

  	/* size: 24, cachelines: 1, members: 3 */
  	/* sum members: 20, holes: 1, sum holes: 4 */
  	/* last cacheline: 24 bytes */
  };
  $ cat a.c
  #define __packed __attribute__((__packed__))

  struct filename {
  	const char  *              name;
  	int                        refcnt;
  	const char  *              uptr;
  } __packed;

  void m(struct filename *f) {}
  $ gcc -g -c a.c
  $ pahole a.o
  struct filename {
  	const char  *              name;                 /*     0     8 */
  	int                        refcnt;               /*     8     4 */
  	const char  *              uptr;                 /*    12     8 */

  	/* size: 20, cachelines: 1, members: 3 */
  	/* last cacheline: 20 bytes */
  } __attribute__((__packed__));
  $ cat a.c
  #define __packed __attribute__((__packed__))

  struct filename {
  	const char  *              name;
  	const char  *              uptr;
  	unsigned char              refcnt;
  };

  void m(struct filename *f) {}
  $ gcc -g -c a.c
  $ pahole a.o
  struct filename {
  	const char  *              name;                 /*     0     8 */
  	const char  *              uptr;                 /*     8     8 */
  	unsigned char              refcnt;               /*    16     1 */

  	/* size: 24, cachelines: 1, members: 3 */
  	/* padding: 7 */
  	/* last cacheline: 24 bytes */
  };
  $ cat a.c
  #define __packed __attribute__((__packed__))

  struct filename {
  	const char  *              name;
  	const char  *              uptr;
  	unsigned char              refcnt;
  } __packed;

  void m(struct filename *f) {}
  $ gcc -g -c a.c
  $ pahole a.o
  struct filename {
  	const char  *              name;                 /*     0     8 */
  	const char  *              uptr;                 /*     8     8 */
  	unsigned char              refcnt;               /*    16     1 */

  	/* size: 17, cachelines: 1, members: 3 */
  	/* last cacheline: 17 bytes */
  } __attribute__((__packed__));
  $ cat a.c
  #define __packed __attribute__((__packed__))

  struct filename {
  	const char  *              name;
  	unsigned char              refcnt;
  	const char  *              uptr;
  };

  void m(struct filename *f) {}
  $ gcc -g -c a.c
  $ pahole a.o
  struct filename {
  	const char  *              name;                 /*     0     8 */
  	unsigned char              refcnt;               /*     8     1 */

  	/* XXX 7 bytes hole, try to pack */

  	const char  *              uptr;                 /*    16     8 */

  	/* size: 24, cachelines: 1, members: 3 */
  	/* sum members: 17, holes: 1, sum holes: 7 */
  	/* last cacheline: 24 bytes */
  };
  $ cat a.c
  #define __packed __attribute__((__packed__))

  struct filename {
  	const char  *              name;
  	unsigned char              refcnt;
  	const char  *              uptr;
  } __packed;

  void m(struct filename *f) {}
  $ gcc -g -c a.c
  $ pahole a.o
  struct filename {
  	const char  *              name;                 /*     0     8 */
  	unsigned char              refcnt;               /*     8     1 */
  	const char  *              uptr;                 /*     9     8 */

  	/* size: 17, cachelines: 1, members: 3 */
  	/* last cacheline: 17 bytes */
  } __attribute__((__packed__));
  $

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-10 17:41:59 -03:00
Arnaldo Carvalho de Melo
13e5b9fc00 fprintf: Add unnamed bitfield padding at the end to rebuild original type
Just like the 'struct timex' in the linux kernel UAPI, that is now
correctly reconstructed as:

  $ pahole -IC timex /home/acme/git/build/v5.0-rc2+/kernel/time/posix-clock.o | tail -32
          __kernel_long_t            ppsfreq;              /*    96     8 */
          __kernel_long_t            jitter;               /*   104     8 */
          int                        shift;                /*   112     4 */

          /* XXX 4 bytes hole, try to pack */

          __kernel_long_t            stabil;               /*   120     8 */
          /* --- cacheline 2 boundary (128 bytes) --- */
          __kernel_long_t            jitcnt;               /*   128     8 */
          __kernel_long_t            calcnt;               /*   136     8 */
          __kernel_long_t            errcnt;               /*   144     8 */
          __kernel_long_t            stbcnt;               /*   152     8 */
          int                        tai;                  /*   160     4 */

          /* Force padding: */
          int                        :32;
          int                        :32;
          int                        :32;
          int                        :32;
          int                        :32;
          int                        :32;
          int                        :32;
          int                        :32;
          int                        :32;
          int                        :32;
          int                        :32;

          /* size: 208, cachelines: 4, members: 20 */
          /* sum members: 152, holes: 3, sum holes: 12 */
          /* padding: 44 */
          /* last cacheline: 16 bytes */
  };
  $

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-10 11:15:42 -03:00
Arnaldo Carvalho de Melo
ccd67bdb20 fprintf: Print "const" for class members more early, in type__fprintf()
We want to reach array__fprintf() from here, with the class_member
name, as __tag__name() isn't handling arrays properly.

I.e. to print an array when we have its name we can't use __tag__name().

This also stops printing 0 for zero sized arrays and trows away the
extra DW_TAG_const_type that comes with zero sized arrays, where we
have:

   class_member type: DW_TAG_const_type 1
   DW_TAG_const_type 1: DW_TAG_array_type 2
   DW_TAG_array_type 2: 0 entries, type: DW_TAG_const_type 3
   DW_TAG_const_type 3: real type of the zero sized array

For instance, after this patch we get a sane reconstruction of this
type:

  $ pahole -C filename /home/acme/git/build/v5.0-rc2+/ipc/mqueue.o
  struct filename {
          const char  *              name;                 /*     0     8 */
          const char  *              uptr;                 /*     8     8 */
          int                        refcnt;               /*    16     4 */

          /* XXX 4 bytes hole, try to pack */

          struct audit_names *       aname;                /*    24     8 */
          const char                 iname[];              /*    32     0 */

          /* size: 32, cachelines: 1, members: 5 */
          /* sum members: 28, holes: 1, sum holes: 4 */
          /* last cacheline: 32 bytes */
  };
  $

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-09 18:13:58 -03:00
Arnaldo Carvalho de Melo
b42d77b0bb fprintf: Print __attribute__((__aligned__(N))) for structs/classes
For instance:

  $ pahole -C kern_ipc_perm /home/acme/git/build/v5.0-rc2+/ipc/util.o
  struct kern_ipc_perm {
        spinlock_t                 lock;                 /*     0     4 */
        bool                       deleted;              /*     4     1 */

        /* XXX 3 bytes hole, try to pack */

        int                        id;                   /*     8     4 */
        key_t                      key;                  /*    12     4 */
        kuid_t                     uid;                  /*    16     4 */
        kgid_t                     gid;                  /*    20     4 */
        kuid_t                     cuid;                 /*    24     4 */
        kgid_t                     cgid;                 /*    28     4 */
        umode_t                    mode;                 /*    32     2 */

        /* XXX 6 bytes hole, try to pack */

        long unsigned int          seq;                  /*    40     8 */
        void *                     security;             /*    48     8 */
        struct rhash_head          khtnode;              /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct callback_head       rcu __attribute__((__aligned__(8))); /*    64    16 */
        refcount_t                 refcount;             /*    80     4 */

        /* size: 128, cachelines: 2, members: 14 */
        /* sum members: 75, holes: 2, sum holes: 9 */
        /* padding: 44 */
        /* forced alignments: 1 */
  } __attribute__((__aligned__(64)));
  $

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-09 17:03:54 -03:00
Arnaldo Carvalho de Melo
1c9c1d6bbd dwarf_loader: Store DW_AT_alignment if available in DW_TAG_{structure,union,class}_type
That is not just for DW_TAG_class_member :-)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-09 16:57:52 -03:00
Arnaldo Carvalho de Melo
41c55858da codiff: Add --quiet option
To avoid printing anything when there are no differences to show.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-09 16:46:49 -03:00
Arnaldo Carvalho de Melo
a104eb1ea1 fprintf: Notice explicit bitfield alignment modifications
I.e. when we find that the last member has a bit_hole, i.e. it is part
of a bitfield, and the current field has a bitfield_size, i.e. it _also_
is part of a bitfield, the only explanation is that they were
artificially put in different base types, i.e. like in these fields
in the linux kernel 'struct task_struct', here reconstructed by pahole:

  $ pahole -C task_struct ~/git/build/v5.1-rc2+/kernel/sched/core.o | grep :0 -B9 -A12
          unsigned int               personality;          /*  1128     4 */
          unsigned int               sched_reset_on_fork:1; /*  1132: 0  4 */
          unsigned int               sched_contributes_to_load:1; /*  1132: 1  4 */
          unsigned int               sched_migrated:1;     /*  1132: 2  4 */
          unsigned int               sched_remote_wakeup:1; /*  1132: 3  4 */

          /* XXX 28 bits hole, try to pack */

          /* Force alignment to the next boundary: */
          unsigned int               :0;

          unsigned int               in_execve:1;          /*  1136: 0  4 */
          unsigned int               in_iowait:1;          /*  1136: 1  4 */
          unsigned int               restore_sigmask:1;    /*  1136: 2  4 */
          unsigned int               in_user_fault:1;      /*  1136: 3  4 */
          unsigned int               no_cgroup_migration:1; /*  1136: 4  4 */
          unsigned int               use_memdelay:1;       /*  1136: 5  4 */

          /* XXX 26 bits hole, try to pack */
          /* XXX 4 bytes hole, try to pack */

          long unsigned int          atomic_flags;         /*  1144     8 */
  $

This matches the original definition in the original kernel sources, and
further more, the following sequence proves that with this and DW_AT_alignment,
we can go full circle, i.e.:

1. from an object file reconstruct the source code for all the types that
   appears in function signatures, if pointers, them they will be fully defined,
   not just forward declared:

  $ pfunct --compile=sched_change_group ~/git/build/v5.1-rc2+/kernel/sched/core.o | egrep -w 'sched_change_group|task_struct {' -B10 -A5

          /* --- cacheline 3 boundary (192 bytes) --- */
          struct fpu                 fpu __attribute__((__aligned__(64))); /*   192  4160 */

          /* size: 4352, cachelines: 68, members: 21 */
          /* sum members: 4316, holes: 2, sum holes: 32 */
          /* sum bitfield members: 2 bits, bit holes: 1, sum bit holes: 30 bits */
          /* forced alignments: 1, forced holes: 1, sum forced holes: 28 */
  };

  struct task_struct {
          struct thread_info         thread_info;          /*     0    16 */

          /* XXX last struct has 4 bytes of padding */

          volatile long int          state;                /*    16     8 */
  --
          /* --- cacheline 104 boundary (6656 bytes) --- */
          struct thread_struct       thread __attribute__((__aligned__(64))); /*  6656  4352 */

          /* size: 11008, cachelines: 172, members: 207 */
          /* sum members: 10902, holes: 16, sum holes: 98 */
          /* sum bitfield members: 10 bits, bit holes: 2, sum bit holes: 54 bits */
          /* paddings: 3, sum paddings: 14 */
          /* forced alignments: 6, forced holes: 1, sum forced holes: 40 */
  };

  void sched_change_group(struct task_struct * tsk, int type)
  {
  }
  $

2. Build the regenerated skeleton function + its types:

  $ pfunct --compile=sched_change_group ~/git/build/v5.1-rc2+/kernel/sched/core.o > sched_change_group.c
  $ gcc -g -c sched_change_group.c
  $ file sched_change_group.o
  sched_change_group.o: ELF 64-bit LSB relocatable, x86-64, version 1 (SYSV), with debug_info, not stripped
  $

3. Now lets see if the original 'struct task_struct' printed by pahole, matches
   the the output printed by pahole for the DWARF info generated for the regenerated
   'struct task_struct' source code in sched_change_group.c:

  $ pahole -C task_struct sched_change_group.o | tail

          /* --- cacheline 104 boundary (6656 bytes) --- */
          struct thread_struct       thread __attribute__((__aligned__(64))); /*  6656  4352 */

          /* size: 11008, cachelines: 172, members: 207 */
          /* sum members: 10902, holes: 16, sum holes: 98 */
          /* sum bitfield members: 10 bits, bit holes: 2, sum bit holes: 54 bits */
          /* paddings: 3, sum paddings: 14 */
          /* forced alignments: 6, forced holes: 1, sum forced holes: 40 */
  };
  $ pahole -C task_struct ~/git/build/v5.1-rc2+/kernel/sched/core.o | tail

          /* --- cacheline 104 boundary (6656 bytes) --- */
          struct thread_struct       thread __attribute__((__aligned__(64))); /*  6656  4352 */

          /* size: 11008, cachelines: 172, members: 207 */
          /* sum members: 10902, holes: 16, sum holes: 98 */
          /* sum bitfield members: 10 bits, bit holes: 2, sum bit holes: 54 bits */
          /* paddings: 3, sum paddings: 14 */
          /* forced alignments: 6, forced holes: 1, sum forced holes: 40 */
  };
  $

  Furthermore:

  $ pahole -C task_struct ~/git/build/v5.1-rc2+/kernel/sched/core.o > /tmp/original
  $ pahole -C task_struct sched_change_group.o > /tmp/regenerated
  $ diff -u /tmp/original /tmp/regenerated
  $

So one of the most complex data structures in the Linux kernel seems to be under control,
and it uses zero sized unnamed bitfields and __attribute__((aligned(N))), a DWARF5 goodie,
time to go tag v1.13!

Cc: Alexei Starovoitov <ast@fb.com>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-09 15:44:42 -03:00
Arnaldo Carvalho de Melo
75f32a24c7 codiff: Improve the comparision of anonymous struct members
I.e. 'union {};', 'struct {};' members were always appearing as having
been removed, as we normally do lookup by member name, to find out if
its offset, size, type, etc changed.

For unnamed members, try a different heuristic, i.e. look for the nth
anonymous member, this way we're just trying to compare the first
unnamed member of, say, struct OLD with the first unnamed member of
struct NEW, etc.

For OLD == NEW, this works well, for OLD != NEW because some non
anonymous field got added, removed or moved around, ditto, and when the
number of unnamed fields gets decreased, then we can mix things up, and
compare the previously first in A with the previously first in B.

For the current intended use case of:

1) compile a .c file into a .o file with debugging info, say FILE.o

2) use 'pfunct --compile FILE.o > regenerated-FILE.c'

3) compile regenerated-FILE.c into regenerated-FILE.o with debugging info

4) codiff --struct FILE.o regenerated-FILE.o and find out if they match

This gets us moving forward as we'll spot differences with this algo.

For the future we can use a few more heuristics or stop using search by
name members, instead traversing both structs in tandem, spotting the
differences by comparing the fields that way.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-08 15:09:33 -03:00
Arnaldo Carvalho de Melo
6b1e43f2c1 codiff: When comparing against a file with just one CU don't bother finding by name
I.e. when we have two object files with debugging info, and one of them
jas just one CU, then compare all the CUs in the other file to this
unique CU.

Case in hand: encode BTF in a file, then the BTF info has everything in
just one "compile unit", so when looking at the types in the DWARF
originals, we should just compare its types to what is in the single BTF
"compile unit".

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-08 15:07:17 -03:00
Arnaldo Carvalho de Melo
15a754f224 core: Add nr_entries member to 'struct cus'
Will be used when considering comparing multiple CU entries in a struct
cus to the sole compile unit in a second file, like when comparing the
types in a multi-CU DWARF file like vmlinux against the combined,
deduplicated entries in that vmlinux .BTF section.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-08 11:54:06 -03:00
Arnaldo Carvalho de Melo
99750f244c pfunct: Generate a valid return type for the --compile bodies
Before:

  $ pfunct --compile examples/tcp.o > tcp.pahole.c
  $ clang --target=bpf -c -g tcp.pahole.c |& tail
  tcp.pahole.c:6154:1: warning: control reaches end of non-void function [-Wreturn-type]
  }
  ^
  tcp.pahole.c:6158:1: warning: control reaches end of non-void function [-Wreturn-type]
  }
  ^
  tcp.pahole.c:6170:1: warning: control reaches end of non-void function [-Wreturn-type]
  }
  ^
  192 warnings generated.
  $ head -6170 tcp.pahole.c | tail -3
  inline int arch_atomic_read(const atomic_t  * v)
  {
  }
  $

After:

  $ pfunct --compile examples/tcp.o > tcp.pahole.c
  $ clang --target=bpf -c -g tcp.pahole.c
  $ grep -A3 -w arch_atomic_read tcp.pahole.c
  inline int arch_atomic_read(const atomic_t  * v)
  {
	  return 0;
  }
  $

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-05 16:33:43 -03:00
Arnaldo Carvalho de Melo
881aabd6fc reorganize: Introduce class__for_each_member_from_safe()
Reducing boilerplate, keeping consistent with the other member traversal
helpers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-05 16:16:16 -03:00
Arnaldo Carvalho de Melo
1b2e3389f3 reorganize: Introduce class__for_each_member_reverse()
Reducing boilerplate, keeping consistent with the other member traversal
helpers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-05 16:11:42 -03:00
Arnaldo Carvalho de Melo
10fef2916d reorganize: Introduce class__for_each_member_continue()
To get a lot of boilerplate behind a nice helper.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-05 16:07:30 -03:00
Arnaldo Carvalho de Melo
e7a56ee8cc reorganize: Introduce class__for_each_member_from()
To get a lot of boilerplate behind a nice helper.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-05 16:05:28 -03:00
Arnaldo Carvalho de Melo
9a79bb6ced tag: Introduce tag__is_pointer_to()
To shorten the check if a tag is a pointer to a particular type.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-05 15:28:55 -03:00
Arnaldo Carvalho de Melo
45ad545944 tag: Introduce tag__is_pointer()
For the usual idiom to ask if a tag is a pointer, removing a bit of
DWARFism and shortening the operation.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-05 15:21:55 -03:00
Arnaldo Carvalho de Melo
89ce57a02e pdwtags: Find holes in structs
The pdwtags prints all tags, so call class__find_holes() for structs so
that we don't print BFAs.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-05 11:34:33 -03:00
Arnaldo Carvalho de Melo
ce6f393bc9 fprintf: Fixup the printing of const parameters
The last problem with 'pfunct --compile' at least for tcp.o:

Before:

  $ pfunct --compile examples/tcp.o > tcp.pahole.c
  $ gcc -c tcp.pahole.c -g
  tcp.pahole.c:1808:48: error: unknown type name ‘u8const’; did you mean ‘const’?
   inline void tcp_set_ca_state(struct sock * sk, u8const ca_state)
                                                  ^~~~~~~
                                                  const
  tcp.pahole.c:5346:56: error: unknown type name ‘intconst’; did you mean ‘const’?
   inline void skb_set_tail_pointer(struct sk_buff * skb, intconst offset)
                                                          ^~~~~~~~
                                                          const
  tcp.pahole.c:5914:37: error: unknown type name ‘gfp_tconst’; did you mean ‘gfp_t’?
   inline bool gfpflags_allow_blocking(gfp_tconst gfp_flags)
                                       ^~~~~~~~~~
                                       gfp_t
  tcp.pahole.c:5926:24: error: unknown type name ‘ktime_tconst’; did you mean ‘ktime_t’?
   inline s64 ktime_to_ns(ktime_tconst kt)
                          ^~~~~~~~~~~~
                          ktime_t
  tcp.pahole.c:5939:54: warning: ‘struct timespec64const’ declared inside parameter list will not be visible outside of this definition or declaration
   inline struct timespec timespec64_to_timespec(struct timespec64const ts64)
                                                        ^~~~~~~~~~~~~~~
  tcp.pahole.c:5939:70: error: parameter 1 (‘ts64’) has incomplete type
   inline struct timespec timespec64_to_timespec(struct timespec64const ts64)
                                                 ~~~~~~~~~~~~~~~~~~~~~~~^~~~
  $

After:

  $ pfunct --compile examples/tcp.o > tcp.pahole.c
  $ gcc -c tcp.pahole.c -g

Because:

  $ grep -A2 tcp_set_ca_state tcp.pahole.c
  inline void tcp_set_ca_state(struct sock * sk, const u8 ca_state)
  {
  }
  $

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-04 21:59:23 -03:00
Arnaldo Carvalho de Melo
7aec7dd6c2 pfunct: Do not reconstruct external functions
I.e. those with DW_AT_external set, to avoid regenerating multiple times
things like __compiletime_assert_1504:

 <5><2fc41>: Abbrev Number: 100 (DW_TAG_subprogram)
    <2fc42>   DW_AT_external    : 1
    <2fc42>   DW_AT_name        : (indirect string, offset: 0x1751f): __compiletime_assert_1504
    <2fc46>   DW_AT_decl_file   : 1
    <2fc47>   DW_AT_decl_line   : 1504
    <2fc49>   DW_AT_decl_column : 2
    <2fc4a>   DW_AT_prototyped  : 1
    <2fc4a>   DW_AT_declaration : 1
 <5><2fc4a>: Abbrev Number: 0
 <4><2fc4b>: Abbrev Number: 0
 <3><2fc4c>: Abbrev Number: 0
 <2><2fc4d>: Abbrev Number: 34 (DW_TAG_lexical_block)
 <3><2fc4e>: Abbrev Number: 12 (DW_TAG_variable)
    <2fc4f>   DW_AT_name        : (indirect string, offset: 0xbcc6): ____ptr
    <2fc53>   DW_AT_decl_file   : 1
    <2fc54>   DW_AT_decl_line   : 1504
    <2fc56>   DW_AT_decl_column : 2
    <2fc57>   DW_AT_type        : <0x6441>
 <3><2fc5b>: Abbrev Number: 34 (DW_TAG_lexical_block)
 <4><2fc5c>: Abbrev Number: 12 (DW_TAG_variable)
    <2fc5d>   DW_AT_name        : (indirect string, offset: 0xeb74): __mptr
    <2fc61>   DW_AT_decl_file   : 1
    <2fc62>   DW_AT_decl_line   : 1504
    <2fc64>   DW_AT_decl_column : 2
    <2fc65>   DW_AT_type        : <0x5a2>
 <4><2fc69>: Abbrev Number: 34 (DW_TAG_lexical_block)
 <5><2fc6a>: Abbrev Number: 100 (DW_TAG_subprogram)
    <2fc6b>   DW_AT_external    : 1
    <2fc6b>   DW_AT_name        : (indirect string, offset: 0x1751f): __compiletime_assert_1504
    <2fc6f>   DW_AT_decl_file   : 1
    <2fc70>   DW_AT_decl_line   : 1504
    <2fc72>   DW_AT_decl_column : 2
    <2fc73>   DW_AT_prototyped  : 1
    <2fc73>   DW_AT_declaration : 1

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-04 21:59:23 -03:00
Arnaldo Carvalho de Melo
163b873f81 pfunct: Do not reconstruct inline expansions of functions
I.e. those that point back to the inline function via
DW_AT_abstract_origin.

For instance:

 <1><34b65>: Abbrev Number: 156 (DW_TAG_subprogram)
    <34b67>   DW_AT_external    : 1
    <34b67>   DW_AT_name        : (indirect string, offset: 0x2404): tcp_enter_memory_pressure
    <34b6b>   DW_AT_decl_file   : 1
    <34b6c>   DW_AT_decl_line   : 324
    <34b6e>   DW_AT_decl_column : 6
    <34b6f>   DW_AT_prototyped  : 1
    <34b6f>   DW_AT_inline      : 1     (inlined)

  <SNIP>

   <1><37f45>: Abbrev Number: 149 (DW_TAG_subprogram)
    <37f47>   DW_AT_abstract_origin: <0x34b65>
    <37f4b>   DW_AT_low_pc      : 0x1000
    <37f53>   DW_AT_high_pc     : 0x48
    <37f5b>   DW_AT_frame_base  : 1 byte block: 9c      (DW_OP_call_frame_cfa)
    <37f5d>   DW_AT_GNU_all_call_sites: 1
    <37f5d>   DW_AT_sibling     : <0x38032>

Generated by:

  Compilation Unit @ offset 0x0:
   Length:        0x3b40b (32-bit)
   Version:       4
   Abbrev Offset: 0x0
   Pointer Size:  8
 <0><b>: Abbrev Number: 215 (DW_TAG_compile_unit)
    <d>   DW_AT_producer    : (indirect string, offset: 0xb0bc): GNU C89 8.2.1 20181215 (Red Hat 8.2.1-6) -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -mno-80387 -mno-fp-ret-in-387 -mpreferred-stack-boundary=3 -mskip-rax-setup -mtune=generic -mno-red-zone -mcmodel=kernel -mindirect-branch=thunk-extern -mindirect-branch-register -mrecord-mcount -mfentry -march=x86-64 -g -O2 -std=gnu90 -p -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE -falign-jumps=1 -falign-loops=1 -fno-asynchronous-unwind-tables -fno-delete-null-pointer-checks -fstack-protector-strong -fno-var-tracking-assignments -fno-strict-overflow -fno-merge-all-constants -fmerge-constants -fstack-check=no -fconserve-stack --param allow-store-data-races=0
    <11>   DW_AT_language    : 1        (ANSI C)
    <12>   DW_AT_name        : (indirect string, offset: 0x10daa): /home/acme/git/linux/net/ipv4/tcp.c
    <16>   DW_AT_comp_dir    : (indirect string, offset: 0x1d8c5): /home/acme/git/build/v5.0+

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-04 21:59:23 -03:00
Arnaldo Carvalho de Melo
ea83b780ec pfunct: Handle unnamed struct typedefs
If we first reconstruct all the types needed for a typedef to then
reconstruct it, we may end up with the unnamed struct correctly
reconstructed and then this:

  tcp.pahole.c:1987:17: warning: useless storage class specifier in empty declaration
   typedef struct  read_descriptor_t;

I.e.:

  typedef struct {
          size_t                     written;              /*     0     8 */
          size_t                     count;                /*     8     8 */
          union {
                  char *             buf;                  /*    16     8 */
                  void *             data;                 /*    16     8 */
          } arg;                                           /*    16     8 */
          int                        error;                /*    24     4 */

          /* size: 32, cachelines: 1, members: 4 */
          /* padding: 4 */
          /* last cacheline: 32 bytes */
  } read_descriptor_t;
  typedef struct  read_descriptor_t;

So special case it.

XXX

I'll revisit this, looks suboptimal, we manage to get this right
when reconstructing nameless struct typedefs found as the types for
members of structs or unions...

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-04 21:59:23 -03:00
Arnaldo Carvalho de Melo
e7ebc05d12 emit: Unwind the definitions for typedefs in type__emit_definitions()
For instance, with the existing code we ended up with:

  typedef __kernel_size_t size_t;
  size_t tcp_opt_stats_get_size(void)
  {
  }

Which lacks unwinding what a __kernel_size_t is, i.e.
type__emit_definitions() was only emitting definitions for the members
of structs and unions, do it for typedefs too, and then we end up what
we need, which is:

  typedef long unsigned int __kernel_ulong_t;
  typedef __kernel_ulong_t __kernel_size_t;
  typedef __kernel_size_t size_t;
  size_t tcp_opt_stats_get_size(void)
  {
  }

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-04 21:59:23 -03:00
Arnaldo Carvalho de Melo
093135b0bf pfunct: Do not emit a type multiple times
We need to check if tag__type(type)->definition_emitted is set before
asking for that type to be emitted, otherwise we get type redefinition
errors when trying to compile the output from pahole --expand_types.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-04 21:59:23 -03:00
Arnaldo Carvalho de Melo
3ce2c52166 pfunct: Ask for generating compilable output that generates DWARF for types
I.e. --compile is similar to --expand_types but also makes sure the
function have empty function bodies, which ends up making at least gcc
to generate the DWARF info for the types referenced by the function
arguments and return types.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-04 21:59:23 -03:00
Arnaldo Carvalho de Melo
e7a786540d pfunct: Make --expand_types/-b without -f expand types for all functions
Previously we wouldn't expand types if the user didn't provide a
function name, make it expand the types for all function arguments if
no function name is provided.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-04 21:59:20 -03:00
Arnaldo Carvalho de Melo
9b2eadf97b pfunct: Follow const, restrict, volatile in --expand_types
Take:

  int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)

We were not generating the 'struct tcp_md5sig_key' forward decl or
struct definition, stopping at 'const', which made this uncompilable.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-04 15:45:26 -03:00
Arnaldo Carvalho de Melo
f3f86f2f89 pfunct: Reconstruct function return types for --expand_types
Not just for the function arguments, so that we are able to get
something closer to buildable.

So far we got it buildable when the return types were reconstructed
because they also appeared in one of the function arguments or
in structs used by them.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-04 15:39:26 -03:00
Arnaldo Carvalho de Melo
a7d9c58cb8 fprintf: Add missing closing parens to the align attribute
Noticed while trying to use pfunct's -b option, that will show a
function prototype + the types it uses in its function signature, i.e.:

  $ pfunct -b -f tcp_sendmsg tcp.o
  typedef long long unsigned int __u64;
  typedef __u64 __addrpair;

  typedef unsigned int __u32;
  typedef __u32 __be32;

  typedef short unsigned int __u16;

  typedef __u32 __portpair;

  typedef __u16 __be16;

  struct hlist_node {
  	struct hlist_node *        next;                 /*     0     8 */
  	struct hlist_node * *      pprev;                /*     8     8 */

  	/* size: 16, cachelines: 1, members: 2 */
  	/* last cacheline: 16 bytes */
  };

  <SNIP tons of types>

  struct sock {
  	struct sock_common         __sk_common;          /*     0   136 */
  	/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
  	socket_lock_t              sk_lock;              /*   136    32 */
  	atomic_t                   sk_drops;             /*   168     4 */
  	/* --- cacheline 10 boundary (640 bytes) --- */
<SNIP the rest of the 'struct sock' members>
  	struct sock_cgroup_data    sk_cgrp_data;         /*   640     8 */
  	struct mem_cgroup *        sk_memcg;             /*   648     8 */
  	void                       (*sk_state_change)(struct sock *); /*   656     8 */
  	void                       (*sk_data_ready)(struct sock *); /*   664     8 */
  	void                       (*sk_write_space)(struct sock *); /*   672     8 */
  	void                       (*sk_error_report)(struct sock *); /*   680     8 */
  	int                        (*sk_backlog_rcv)(struct sock *, struct sk_buff *); /*   688     8 */
  	void                       (*sk_destruct)(struct sock *); /*   696     8 */
  	/* --- cacheline 11 boundary (704 bytes) --- */
  	struct sock_reuseport *    sk_reuseport_cb;      /*   704     8 */
  	struct callback_head       sk_rcu __attribute__((__aligned__(8))); /*   712    16 */

  	/* size: 728, cachelines: 12, members: 84 */
  	/* sum members: 715, holes: 4, sum holes: 8 */
  	/* sum bitfield members: 40 bits (5 bytes) */
  	/* paddings: 1, sum paddings: 4 */
  	/* forced alignments: 1 */
  	/* last cacheline: 24 bytes */
  };

<SNIP some more types>

  struct kiocb;

  struct msghdr {
  	void *                     msg_name;             /*     0     8 */
  	int                        msg_namelen;          /*     8     4 */

  	/* XXX 4 bytes hole, try to pack */

  	struct iov_iter            msg_iter;             /*    16    40 */
  	void *                     msg_control;          /*    56     8 */
  	/* --- cacheline 1 boundary (64 bytes) --- */
  	__kernel_size_t            msg_controllen;       /*    64     8 */
  	unsigned int               msg_flags;            /*    72     4 */

  	/* XXX 4 bytes hole, try to pack */

  	struct kiocb *             msg_iocb;             /*    80     8 */

  	/* size: 88, cachelines: 2, members: 7 */
  	/* sum members: 80, holes: 2, sum holes: 8 */
  	/* last cacheline: 24 bytes */
  };

  typedef __kernel_size_t size_t;

  int tcp_sendmsg(struct sock * sk, struct msghdr * msg, size_t size);
  $

So if we then redirect the output to a file and if we make it a empty
function instead of a prototype, i.e. if we make the last line above to
become this:

  int tcp_sendmsg(struct sock * sk, struct msghdr * msg, size_t size) {}

then build with gcc -g to have it build as a .o with DWARF info, then we
should be able to see if the struct rebuilt from DWARF matches the
original struct used to generate the DWARF, going full circle:

  $ pfunct -b -f tcp_sendmsg tcp.o > tcp_sendmsg_types.c
  $ gcc -c tcp_sendmsg_types.c -g
  $ file tcp_sendmsg_types.o
  tcp_sendmsg_types.o: ELF 64-bit LSB relocatable, x86-64, version 1 (SYSV), with debug_info, not stripped
  $ pahole -E -C sock tcp_sendmsg_types.o > tcp_sendmsg_types.o.pahole
  $ pahole -E -C sock tcp.o > tcp.o.pahole
  $ diff -u tcp_sendmsg_types.o.pahole tcp.o.pahole
  $ wc -l tcp_sendmsg_types.o.pahole
  420 tcp_sendmsg_types.o.pahole
  $

So all the types that come from sock are expanded and all its details
are reconstructed in the same way for both cases.

  $ pahole -C sock tcp.o | tail
	struct sock_reuseport *    sk_reuseport_cb;      /*   704     8 */
	struct callback_head       sk_rcu __attribute__((__aligned__(8))); /*   712    16 */

	/* size: 728, cachelines: 12, members: 84 */
	/* sum members: 715, holes: 4, sum holes: 8 */
	/* sum bitfield members: 40 bits (5 bytes) */
	/* paddings: 1, sum paddings: 4 */
	/* forced alignments: 1 */
	/* last cacheline: 24 bytes */
  };
  $ pahole -C sock tcp_sendmsg_types.o | tail
	struct sock_reuseport *    sk_reuseport_cb;      /*   704     8 */
	struct callback_head       sk_rcu __attribute__((__aligned__(8))); /*   712    16 */

	/* size: 728, cachelines: 12, members: 84 */
	/* sum members: 715, holes: 4, sum holes: 8 */
	/* sum bitfield members: 40 bits (5 bytes) */
	/* paddings: 1, sum paddings: 4 */
	/* forced alignments: 1 */
	/* last cacheline: 24 bytes */
  };
  $

Reported-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-04 10:48:17 -03:00
Arnaldo Carvalho de Melo
d83d9f578f dwarf_loader: Handle DW_TAG_label in inline expansions
Just add it to the current lexblock.

This removes the warnings that started to appear with the fix in the
previous patch, i.e. these:

      die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3aefafe> not handled!
      die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3aeff8a> not handled!
      die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af02e0> not handled!

Reported-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-03 21:34:01 -03:00
Arnaldo Carvalho de Melo
73e545b144 dwarf_loader: Handle unsupported_tag in die__process_inline_expansion
It calls die__process_tag() that can return &unsupported_tag, for
instance for DW_TAG_label, as reported by Jiri, so handle that, which
ends up causing these warnings to appear in pahole:

  struct userfaultfd_wake_range {
  	long unsigned int          start;                /*     0     8 */
  	long unsigned int          len;                  /*     8     8 */

  	/* size: 16, cachelines: 1, members: 2 */
  	/* last cacheline: 16 bytes */
  };
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3aefafe> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3aeff8a> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af02e0> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af1903> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af19bf> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af358d> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af3e87> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af4268> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af46ec> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af4bd3> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af4f8c> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af551e> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af5815> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af5c15> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af5cad> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af79e0> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af7b34> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af7df3> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af806e> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3af812c> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3afa8da> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3afaba5> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3afb22e> not handled!
  die__process_inline_expansion: DW_TAG_label (0xa) @ <0x3afb45e> not handled!
  struct kioctx_table {
  	struct callback_head       rcu __attribute__((__aligned__(8)); /*     0    16 */
  	unsigned int               nr;                   /*    16     4 */

  	/* XXX 4 bytes hole, try to pack */

  	struct kioctx *            table[0];             /*    24     0 */

  	/* size: 24, cachelines: 1, members: 3 */
  	/* sum members: 20, holes: 1, sum holes: 4 */
  	/* forced alignments: 1 */
  	/* last cacheline: 24 bytes */
  };
  ^C
  [acme@quaco pahole]$

But at least no segfault takes place. Next csets should take care of it
more properly.

Reported-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-03 21:23:08 -03:00
Andrii Nakryiko
fe590758cb class__find_holes: Zero out bit_hole/hole on member
pahole --reorganize is calling class__find_holes() multiple times on the
same struct to re-calculate holes. If it so happens that after reorg
last struct's member had hole previously, we are not going to clear it
out, which will lead to weird output and BFA, like this:

$ pahole -F btf --reorganize -C netns_frags ~/tmp/vmlinux-default
struct netns_frags {
        long int                   high_thresh;          /*     0     8 */
        long int                   low_thresh;           /*     8     8 */
        int                        timeout;              /*    16     4 */
        int                        max_dist;             /*    20     4 */
        struct inet_frags *        f;                    /*    24     8 */
        atomic_long_t              mem;                  /*    32     8 */

        /* XXX 24 bytes hole, try to pack */

        /* --- cacheline 1 boundary (64 bytes) --- */
        struct rhashtable          rhashtable;           /*    64   136 */

        /* XXX 56 bytes hole, try to pack */

        /* size: 200, cachelines: 4, members: 7 */
        /* sum members: 176, holes: 1, sum holes: 80 */
        /* last cacheline: 8 bytes */

        /* BRAIN FART ALERT! 200 bytes != 176 (member bytes) + 0 (member bits) + 80 (byte holes) + 0 (bit holes), diff = -448 bits */
};   /* saved 120 bytes and 1 cacheline! */

After this change:
$ pahole -F btf --reorganize -C netns_frags ~/tmp/vmlinux-defaultstruct netns_frags {
        long int                   high_thresh;          /*     0     8 */
        long int                   low_thresh;           /*     8     8 */
        int                        timeout;              /*    16     4 */
        int                        max_dist;             /*    20     4 */
        struct inet_frags *        f;                    /*    24     8 */
        atomic_long_t              mem;                  /*    32     8 */

        /* XXX 24 bytes hole, try to pack */

        /* --- cacheline 1 boundary (64 bytes) --- */
        struct rhashtable          rhashtable;           /*    64   136 */

        /* size: 200, cachelines: 4, members: 7 */
        /* sum members: 176, holes: 1, sum holes: 24 */
        /* last cacheline: 8 bytes */
};   /* saved 120 bytes and 1 cacheline! */

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: dwarves@vger.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-03 21:10:05 -03:00
Arnaldo Carvalho de Melo
863c2af6e9 reorganize: Disable the bitfield coalescing/moving steps
We need to fix some bugs introduced recently, till then, disable steps
that try to demote the base type of bitfields and those that
move/combine bitfields to save space.

We'll revisit those later, bringing them back to the reorg codebase.

Acked-by: Andrii Nakryiko <andriin@fb.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Wielaard <mark@klomp.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>#
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-03 18:35:43 -03:00
Arnaldo Carvalho de Melo
b95961db69 fprintf: Show statistics about holes due to forced alignments
$ pahole -C task_struct | tail

	/* --- cacheline 104 boundary (6656 bytes) --- */
	struct thread_struct       thread __attribute__((__aligned__(64)); /*  6656  4352 */

	/* size: 11008, cachelines: 172, members: 207 */
	/* sum members: 10902, holes: 16, sum holes: 98 */
	/* sum bitfield members: 10 bits, bit holes: 2, sum bit holes: 54 bits */
	/* paddings: 3, sum paddings: 14 */
	/* forced alignments: 6, forced holes: 1, sum forced holes: 40 */
  };
  $ pahole -C inet_timewait_death_row
  struct inet_timewait_death_row {
	atomic_t                   tw_count;             /*     0     4 */

	/* XXX 60 bytes hole, try to pack */

	/* --- cacheline 1 boundary (64 bytes) --- */
	struct inet_hashinfo *     hashinfo __attribute__((__aligned__(64)); /*    64     8 */
	int                        sysctl_max_tw_buckets; /*    72     4 */

	/* size: 128, cachelines: 2, members: 3 */
	/* sum members: 16, holes: 1, sum holes: 60 */
	/* padding: 52 */
	/* forced alignments: 1, forced holes: 1, sum forced holes: 60 */
  };
  $

Cc: Alexei Starovoitov <ast@fb.com>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Wielaard <mark@klomp.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-03 18:10:16 -03:00
Arnaldo Carvalho de Melo
ec772f21f6 fprintf: Show the number of forced alignments in a class
For instance, for task_struct:

  $ pahole -C task_struct | tail

	/* --- cacheline 104 boundary (6656 bytes) --- */
	struct thread_struct       thread __attribute__((__aligned__(64)); /*  6656  4352 */

	/* size: 11008, cachelines: 172, members: 207 */
	/* sum members: 10902, holes: 16, sum holes: 98 */
	/* sum bitfield members: 10 bits, bit holes: 2, sum bit holes: 54 bits */
	/* paddings: 3, sum paddings: 14 */
	/* forced alignments: 6 */
  };
  $

Cc: Alexei Starovoitov <ast@fb.com>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Wielaard <mark@klomp.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-03 18:10:16 -03:00
Arnaldo Carvalho de Melo
52d1c75ea4 btfdiff: Use --suppress_aligned_attribute with -F dwarf
Now that we collect DWARF5's DW_AT_alignent, btdiff shows, for instance:

  $ btfdiff examples/tcp.o
  <SNIP>
  @@ -13450,7 +13450,7 @@ struct ip6_flowlabel {
   	struct in6_addr            dst;                  /*    16    16 */
   	struct ipv6_txoptions *    opt;                  /*    32     8 */
   	long unsigned int          linger;               /*    40     8 */
  -	struct callback_head       rcu __attribute__((__aligned__(8)); /*    48    16 */
  +	struct callback_head       rcu;                  /*    48    16 */
   	/* --- cacheline 1 boundary (64 bytes) --- */
   	u8                         share;                /*    64     1 */

  @@ -13616,7 +13616,7 @@ struct fib6_node {
   	__u16                      fn_flags;             /*    42     2 */
   	int                        fn_sernum;            /*    44     4 */
   	struct fib6_info *         rr_ptr;               /*    48     8 */
  -	struct callback_head       rcu __attribute__((__aligned__(8)); /*    56    16 */
  +	struct callback_head       rcu;                  /*    56    16 */

   	/* size: 72, cachelines: 2, members: 10 */
   	/* last cacheline: 8 bytes */
  $

So ask for those attributes to be suppressed when comparing BTF and
DWARF output.

Cc: Alexei Starovoitov <ast@fb.com>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Wielaard <mark@klomp.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-03 18:10:16 -03:00
Arnaldo Carvalho de Melo
6cd6a6bd87 dwarves_fprintf: Allow suppressing the __attribute__((__aligned__(N))
So that we can use it in things like btfdiff.

Cc: Alexei Starovoitov <ast@fb.com>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Wielaard <mark@klomp.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-03 18:10:16 -03:00